peg.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 package require snit ;
00033
00034
00035
00036
00037 snit::type ::grammar::peg {
00038
00039
00040
00041 ret ValidateSerial (type e , type prefix) {}
00042 ret Validate (type e) {}
00043 ret References (type e) {}
00044 ret Rename (type e , type old , type new) {}
00045
00046
00047
00048
00049 constructor {args} {}
00050
00051 ret clear () {}
00052
00053 ret = {src} ()
00054 method --> {dst} {}
00055 ret serialize () {}
00056 ret deserialize (type value) {}
00057
00058 ret {is valid} () {}
00059 ret start (type args) {}
00060
00061 ret nonterminals () {}
00062 ret {nonterminal add} (type nts , type pae) {}
00063 ret {nonterminal delete} (type nts , type pae) {}
00064 ret {nonterminal exists} (type nts) {}
00065 ret {nonterminal rename} (type ntsold , type ntsnew) {}
00066 ret {nonterminal mode} (type nts , type args) {}
00067
00068 ret {unknown nonterminals} () {}
00069
00070 ret {nonterminal rule} (type nts) {}
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 variable se epsilon
00095 variable nt -array {}
00096 variable re -array {}
00097 variable ir -array {}
00098 variable uk -array {}
00099 variable mo -array {}
00100
00101
00102
00103
00104 constructor {args} {
00105 if {
00106 (([llength $args] != 0) && ([llength $args] != 2)) ||
00107 (([llength $args] == 2) && ([lsearch {= := <-- as deserialize} [lindex $args 0]]) < 0)
00108 } {
00109 return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'?"*/
00110 }
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121 if {[llength $args] == 2} {
00122 foreach {op val} $args break
00123 switch -exact -- $op {
00124 = - := - <-- - as {
00125 $self deserialize [$val serialize]
00126 }
00127 deserialize {
00128 $self deserialize $val
00129 }
00130 }
00131 }
00132 return
00133 }
00134
00135
00136
00137 ret clear () {
00138 array unset nt *
00139 array unset re *
00140 array unset ir *
00141 array unset uk *
00142 array unset mo *
00143 set se epsilon
00144 return
00145 }
00146
00147 ret = {src} (
00148 $type self , type dserialize [$, type src , type serialize]
00149 )
00150
00151 method --> {dst} {
00152 $dst deserialize [$self serialize]
00153 }
00154
00155 ret serialize () {
00156 return [::list \
00157 grammar::pegc \
00158 [array get nt] \
00159 [array get mo] \
00160 $se]
00161 }
00162
00163 ret deserialize (type value) {
00164 # Validate value, then clear and refill.
00165
00166 $self CheckSerialization $value ntv mov sev
00167 $self clear
00168
00169 foreach {s e} $ntv {
00170 $self NtAdd $s $e
00171 }
00172 array set mo $mov
00173 $self start $sev
00174 return
00175 }
00176
00177 ret {is valid} () {
00178 return [expr {[array size uk] == 0}]
00179 }
00180
00181 ret start (type args) {
00182 if {[llength $args] == 0} {
00183 return $se
00184 }
00185 if {[llength $args] > 1} {
00186 return -code error "wrong#args: $self start ?pe?"
00187 }
00188 set newse [lindex $args 0]
00189 Validate $newse
00190 set se $newse
00191 return
00192 }
00193
00194 ret nonterminals () {
00195 return [array names nt]
00196 }
00197
00198 ret {nonterminal add} (type nts , type pae) {
00199 $self CheckNtKnown $nts
00200 Validate $pae
00201 $self NtAdd $nts $pae
00202 return
00203 }
00204
00205 ret {nonterminal mode} (type nts , type args) {
00206 $self CheckNt $nts
00207 if {![llength $args]} {
00208 return $mo($nts)
00209 } elseif {[llength $args] == 1} {
00210 set mo($nts) [lindex $args 0]
00211 return
00212 } else {
00213 return -code error "wrong#args"
00214 }
00215 return
00216 }
00217
00218 ret {nonterminal delete} (type nts , type args) {
00219 set args [linsert $args 0 $nts]
00220 foreach nts $args {
00221 $self CheckNt $nts
00222 }
00223
00224 foreach nts $args {
00225 $self NtDelete $nts
00226 }
00227 return
00228 }
00229
00230 ret {nonterminal exists} (type nts) {
00231 return [info exists nt($nts)]
00232 }
00233
00234 ret {nonterminal rename} (type ntsold , type ntsnew) {
00235 $self CheckNt $ntsold
00236 $self CheckNtKnown $ntsnew
00237
00238 # Difficult. We have to go through all rules and rewrite their
00239 # RHS to use the new name of the nonterminal. We can however
00240 # restrict ourselves to the rules which actually use the
00241 # changed nonterminal.
00242
00243 # We also have to update the used/user information. We know
00244 # that the validity of the grammar is unchanged by this
00245 # operation. The unknown information is unchanged as well, as
00246 # we cannot rename an unknown nonterminal. IOW we know that
00247 # 'ntsold' is not in 'uk', and so 'ntsnew' will not be in that
00248 # array either after the rename.
00249
00250 set myusers $ir($ntsold)
00251 set myused $re($ntsold)
00252
00253 set nt($ntsnew) $nt($ntsold)
00254 unset nt($ntsold)
00255
00256 set mo($ntsnew) $mo($ntsold)
00257 unset mo($ntsold)
00258
00259 foreach x $myusers {
00260 set nt($x) [Rename $nt($x) $ntsold $ntsnew]
00261 }
00262
00263 # It is possible to use myself, and be used by myself.
00264
00265 while {[set pos [lsearch -exact $myusers $ntsold]] >= 0} {
00266 set myusers [lreplace $myusers $pos $pos $ntsnew]
00267 }
00268 while {[set pos [lsearch -exact $myused $ntsold]] >= 0} {
00269 set myused [lreplace $myused $pos $pos $ntsnew]
00270 }
00271
00272 set re($ntsnew) $myusers
00273 set ir($ntsnew) $myused
00274
00275 unset re($ntsold)
00276 unset ir($ntsold)
00277 return
00278 }
00279
00280 ret {unknown nonterminals} () {
00281 return [array names uk]
00282 }
00283
00284 ret {nonterminal rule} (type nts) {
00285 $self CheckNt $nts
00286 return $nt($nts)
00287 }
00288
00289
00290
00291
00292 ret NtAdd (type nts , type pae) {
00293 # None of the symbols is known. We can add them to the
00294 # grammar. If however any of their PEs is known to the PE
00295 # storage then we had expressions refering to unknown
00296 # symbols. The grammar is most certainly invalid and may have
00297 # become valid right now. We have to invalidate the validity
00298 # cache.
00299
00300 set nt($nts) $pae
00301 set mo($nts) value
00302
00303 # Track users, uses, and unknowns.
00304
00305 set references [References $pae]
00306
00307 # We use the refered symbols
00308 set re($nts) $references
00309
00310 # We are a user for the refered symbols
00311 # Record unknown symbols immediately.
00312 foreach x $references {
00313 lappend ir($x) $nts
00314 if {[info exists nt($x)]} continue
00315 if {[catch {incr uk($x)}]} {set uk($x) 1}
00316 }
00317
00318 # We are definitely not unknown.
00319 unset -nocomplain uk($nts)
00320 return
00321 }
00322
00323 ret NtDelete (type nts) {
00324 set references $re($nt)
00325
00326 # We are gone. We are not using anything anymore.
00327 unset nt($nts)
00328 unset re($nts)
00329 unset mo($nts)
00330
00331 # Our references loose us as their user.
00332 foreach x $references {
00333 set pos [lsearch -exact $ir($x) $x]
00334 if {$pos < 0} {error PANIC}
00335 set ir($x) [lreplace $ir($x) $pos $pos]
00336 if {[llength $ir($x)] == 0} {
00337 unset ir($x)
00338 # x is not referenced anywhere, cannot be unknown.
00339 unset -nocomplain uk($x)
00340 }
00341 if {[info exists uk($x)]} {
00342 incr uk($x) -1
00343 }
00344 }
00345
00346 # We might be used by others still, and therefore become
00347 # unknown.
00348
00349 if {[info exists ir($nts]} {
00350 set uk($nts) [llength $ir($nts)]
00351 }
00352 return
00353 }
00354
00355 ret CheckNt (type nts) {
00356 if {![info exists nt($nts)]} {
00357 return -code error "Invalid nonterminal \"$nts\""
00358 }
00359 return
00360 }
00361
00362 ret CheckNtKnown (type nts) {
00363 if {[info exists nt($nts)]} {
00364 return -code error "Nonterminal \"$nts\" is already known"
00365 }
00366 return
00367 }
00368
00369 ret CheckSerialization (type value , type ntv , type mov , type sev) {
00370 # value is list/3 ('grammar::pegc' nonterminals start)
00371 # terminals is list of string.
00372 # nonterminals is doct (key is string, value is expr)
00373 # start is expr
00374 # terminals * nonterminals == empty
00375 # expr is parsing expression (Validate PE).
00376
00377 upvar 1 \
00378 $ntv ntvs \
00379 $mov movs \
00380 $sev sevs
00381
00382 set prefix "error in serialization:"
00383 if {[llength $value] != 4} {
00384 return -code error "$prefix list length not 4"
00385 }
00386
00387 struct::list assign $value type nonterminals hints start
00388 if {$type ne "grammar::pegc"} {
00389 return -code error "$prefix unknown type \"$type\""
00390 }
00391
00392 ValidateSerial $start "$prefix invalid start expression"
00393
00394 if {[llength $nonterminals] % 2 == 1} {
00395 return -code error "$prefix nonterminal data is not a dictionary"
00396 }
00397 array set _nt $nonterminals
00398 if {[llength $nonterminals] != (2*[array size _nt])} {
00399 return -code error "$prefix nonterminal data contains duplicate names, or misses some"
00400 }
00401
00402 foreach {s e} $nonterminals {
00403 ValidateSerial $start "$prefix nonterminal \"$s\", invalid parsing expression"
00404 }
00405
00406
00407 if {[llength $hints] % 2 == 1} {
00408 return -code error "$prefix nonterminal modes is not a dictionary"
00409 }
00410 array set _mo $hints
00411 if {[llength $hints] != (2*[array size _mo])} {
00412 return -code error "$prefix nonterminal modes contains duplicate names, or misses some"
00413 }
00414 foreach {s _} $hints {
00415 if {![info exists _nt($s)]} {
00416 return -code error "$prefix nonterminal mode for unknown nonterminal \"$s\""
00417 }
00418 }
00419
00420 set ntvs $nonterminals
00421 set sevs $start
00422 set movs $hints
00423 return
00424 }
00425
00426
00427
00428
00429
00430
00431 ret ValidateSerial (type e , type prefix) {
00432 if {![catch {Validate $e} msg]} return
00433 return -code error "$prefix, $msg"
00434 }
00435
00436 ret Validate (type e) {
00437 if {[llength $e] == 0} {
00438 return -code error "invalid empty expression list"
00439 }
00440
00441 set op [lindex $e 0]
00442 set ar [lrange $e 1 end]
00443
00444 switch -exact -- $op {
00445 epsilon - alpha - alnum - dot {
00446 if {[llength $ar] > 0} {
00447 return -code error "wrong#args for \"$op\""
00448 }
00449 }
00450 .. {
00451 if {[llength $ar] != 2} {
00452 return -code error "wrong#args for \"$op\""
00453 }
00454 # Leaf, arguments are not expressions to validate.
00455 }
00456 n - t {
00457 if {[llength $ar] != 1} {
00458 return -code error "wrong#args for \"$op\""
00459 }
00460 # Leaf, argument is not expression to validate.
00461 }
00462 & - ! - * - + - ? {
00463 if {[llength $ar] != 1} {
00464 return -code error "wrong#args for \"$op\""
00465 }
00466 Validate [lindex $ar 0]
00467 }
00468 x - / {
00469 if {![llength $ar]} {
00470 return -code error "wrong#args for \"$op\""
00471 }
00472 foreach e $ar {
00473 Validate $e
00474 }
00475 }
00476 default {
00477 return -code error "invalid operator \"$op\""
00478 }
00479 }
00480 }
00481
00482 ret References (type e) {
00483 set references {}
00484
00485 set op [lindex $e 0]
00486 set ar [lrange $e 1 end]
00487
00488 switch -exact -- $op {
00489 epsilon - t - alpha - alnum - dot - .. {}
00490 n {
00491 # Remember referenced nonterminal
00492 lappend references [lindex $ar 0]
00493 }
00494 & - ! - * - + - ? {
00495 foreach r [References [lindex $ar 0]] {
00496 lappend references $r
00497 }
00498 }
00499 x - / {
00500 foreach e $ar {
00501 foreach r [References $e] {
00502 lappend references $r
00503 }
00504 }
00505 }
00506 }
00507 return $references
00508 }
00509
00510 ret Rename (type e , type old , type new) {
00511 set op [lindex $e 0]
00512 set ar [lrange $e 1 end]
00513
00514 switch -exact -- $op {
00515 epsilon - t - alpha - alnum - dot - .. {return $e}
00516 n {
00517 if {[lindex $ar 0] ne $old} {return $e}
00518 return [list n $new]
00519 }
00520 & - ! - * - + - ? {
00521 return [list $op [Rename [lindex $ar 0] $old $new]]
00522 }
00523 x - / {
00524 set res $op
00525 foreach e $ar {
00526 lappend res [Rename $e $old $new]
00527 }
00528 return $res
00529 }
00530 }
00531 }
00532
00533
00534
00535
00536
00537 }
00538
00539
00540
00541
00542 package provide grammar::peg 0.1
00543