00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 namespace ::grammar::me::cpu::core {}
00014
00015
00016
00017
00018 ret ::grammar::me::cpu::core (type cmd , type args) {
00019 # Dispatcher for the ensemble command.
00020 variable core::cmds
00021 return [uplevel 1 [linsert $args 0 $cmds($cmd)]]
00022 }
00023
00024 namespace grammar::me::cpu::core {
00025 variable cmds
00026
00027
00028
00029
00030 array cmds = {
00031 disasm ::grammar::me::cpu::core::disasm
00032 asm ::grammar::me::cpu::core::asm
00033 new ::grammar::me::cpu::core::new
00034 lc ::grammar::me::cpu::core::lc
00035 tok ::grammar::me::cpu::core::tok
00036 pc ::grammar::me::cpu::core::pc
00037 iseof ::grammar::me::cpu::core::iseof
00038 at ::grammar::me::cpu::core::at
00039 cc ::grammar::me::cpu::core::cc
00040 sv ::grammar::me::cpu::core::sv
00041 ok ::grammar::me::cpu::core::ok
00042 error ::grammar::me::cpu::core::error
00043 lstk ::grammar::me::cpu::core::lstk
00044 astk ::grammar::me::cpu::core::astk
00045 mstk ::grammar::me::cpu::core::mstk
00046 estk ::grammar::me::cpu::core::estk
00047 rstk ::grammar::me::cpu::core::rstk
00048 nc ::grammar::me::cpu::core::nc
00049 ast ::grammar::me::cpu::core::ast
00050 halted ::grammar::me::cpu::core::halted
00051 code ::grammar::me::cpu::core::code
00052 eof ::grammar::me::cpu::core::eof
00053 put ::grammar::me::cpu::core::put
00054 run ::grammar::me::cpu::core::run
00055 }
00056 }
00057
00058
00059
00060
00061 ret ::grammar::me::cpu::core::disasm (type code) {
00062 variable iname
00063 variable tclass
00064 variable anum
00065
00066 Validate $code ord dst jmp
00067
00068 set label 0
00069 foreach k [array names jmp] {
00070 set jmp($k) bra$label
00071 incr label
00072 }
00073 foreach k [array names dst] {
00074 if {![info exists jmp($k)]} {
00075 set jmp($k) {}
00076 }
00077 }
00078
00079 set result {}
00080 foreach {asm pool tokmap} $code break
00081
00082 set pc 0
00083 set pcend [llength $asm]
00084
00085 while {$pc < $pcend} {
00086 set base $pc
00087 set insn [lindex $asm $pc] ; incr pc
00088 set an [lindex $anum $insn]
00089
00090 if {$an == 1} {
00091 set a [lindex $asm $pc] ; incr pc
00092 } elseif {$an == 2} {
00093 set a [lindex $asm $pc] ; incr pc
00094 set b [lindex $asm $pc] ; incr pc
00095 } elseif {$an == 3} {
00096 set a [lindex $asm $pc] ; incr pc
00097 set b [lindex $asm $pc] ; incr pc
00098 set c [lindex $asm $pc] ; incr pc
00099 }
00100
00101 set instruction {}
00102 lappend instruction $jmp($base)
00103 lappend instruction $iname($insn)
00104
00105 switch -exact $insn {
00106 0 - 5 - 20 - 24 - 25 - 26 -
00107 a/string {
00108 lappend instruction [lindex $pool $a]
00109 }
00110 1 {
00111 # a/tok b/string
00112 if {![llength $tokmap]} {
00113 lappend instruction [lindex $pool $a]
00114 } else {
00115 lappend instruction ${a}:$ord($a)
00116 }
00117 lappend instruction [lindex $pool $b]
00118 }
00119 2 {
00120 # a/tokstart b/tokend c/string
00121 if {![llength $tokmap]} {
00122 lappend instruction [lindex $pool $a]
00123 lappend instruction [lindex $pool $b]
00124 } else {
00125 # tokmap defined: a = b = order rank.
00126 lappend instruction ${a}:$ord($a)
00127 lappend instruction ${b}:$ord($b)
00128 }
00129 lappend instruction [lindex $pool $c]
00130 }
00131 3 {
00132 # a/class(0-5) b/string
00133 lappend instruction [lindex $tclass $a]
00134 lappend instruction [lindex $pool $b]
00135 }
00136 4 {
00137 # a/branch b/string
00138 lappend instruction $jmp($a)
00139 lappend instruction [lindex $pool $b]
00140 }
00141 6 - 11 - 12 - 13 -
00142 a/branch {
00143 lappend instruction $jmp($a)
00144 }
00145 default {}
00146 }
00147
00148 lappend result $instruction
00149 }
00150
00151 return $result
00152 }
00153
00154 ret ::grammar::me::cpu::core::asm (type code) {
00155 variable iname
00156 variable anum
00157 variable tccode
00158
00159 # code = list(insn), insn = list (label insn-name ...)
00160
00161 # I. Indices for the labels, based on instruction sizes.
00162
00163 array set jmp {}
00164 set off 0
00165 foreach insn $code {
00166 foreach {label name} $insn break
00167 # Ignore embedded comments, except for labels
00168 if {$label ne ""} {
00169 set jmp($label) $off
00170 }
00171 if {$name eq ".C"} continue
00172 if {![info exists iname($name)]} {
00173 return -code error "Bad instruction \"$insn\", unknown command \"$name\""
00174 }
00175 set an [lindex $anum $iname($name)]
00176 if {[llength $insn] != ($an+2)} {
00177 return -code error "Bad instruction \"$insn\", expected $an argument[expr {$an == 1 ? "" : "s"}]"
00178 }
00179 incr off
00180 incr off [lindex $anum $iname($name)]
00181 }
00182
00183 set asm {}
00184 set pool {}
00185 array set poolh {}
00186 array set tokmap {}
00187 array set ord {}
00188 set plain 0
00189
00190 foreach insn $code {
00191 foreach {label name} $insn break
00192 # Ignore embedded comments
00193 if {$name eq ".C"} continue
00194 set an [lindex $anum $iname($name)]
00195
00196 # Instruction code to assembly ...
00197 lappend asm $iname($name)
00198
00199 # Encode arguments ...
00200 switch -exact -- $name {
00201 ict_advance -
00202 inc_save -
00203 ier_nonterminal -
00204 isv_nonterminal_leaf -
00205 isv_nonterminal_range -
00206 isv_nonterminal_reduce {
00207 lappend asm [Str [lindex $insn 2]]
00208 }
00209 ict_match_token {
00210 lappend asm [Tok [lindex $insn 2]]
00211 lappend asm [Str [lindex $insn 3]]
00212 }
00213 ict_match_tokrange {
00214 lappend asm [Tok [lindex $insn 2]]
00215 lappend asm [Tok [lindex $insn 3]]
00216 lappend asm [Str [lindex $insn 4]]
00217 }
00218 ict_match_tokclass {
00219 set ccode [lindex $insn 2]
00220 if {![info exists tccode($ccode)]} {
00221 return -code error "Bad instruction \"$insn\", unknown class code \"$ccode\""
00222 }
00223 lappend asm $tccode($ccode)
00224 lappend asm [Str [lindex $insn 3]]
00225
00226 }
00227 inc_restore {
00228 set jmpto [lindex $insn 2]
00229 if {![info exists jmp($jmpto)]} {
00230 return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\""
00231 }
00232 lappend asm $jmp($jmpto)
00233 lappend asm [Str [lindex $insn 3]]
00234 }
00235 icf_ntcall -
00236 icf_jalways -
00237 icf_jok -
00238 icf_jfail {
00239 set jmpto [lindex $insn 2]
00240 if {![info exists jmp($jmpto)]} {
00241 return -code error "Bad instruction \"$insn\", unknown branch destination \"$jmpto\""
00242 }
00243 lappend asm $jmp($jmpto)
00244 }
00245 }
00246 }
00247
00248 return [list $asm $pool [array get tokmap]]
00249 }
00250
00251 ret ::grammar::me::cpu::core::new (type code) {
00252 # The code generating the state is drawn out to integrate a
00253 # specification of how the machine state is mapped to Tcl as well.
00254
00255 Validate $code
00256
00257 set state {} ; # The state is representend as a Tcl list.
00258 # ### ### ### ######### ######### #########
00259 lappend state $code ; # [_0] code - list - code to run (-)
00260 lappend state 0 ; # [_1] pc - int - Program counter
00261 lappend state 0 ; # [_2] halt - bool - Flag, set (internal) when machine was halted (icf_halt).
00262 lappend state 0 ; # [_3] eof - bool - Flag, set (external) when where will be no more input.
00263 lappend state {} ; # [_4] tc - list - Terminal cache, pending and processed tokens.
00264 lappend state -1 ; # [_5] cl - int - Current Location
00265 lappend state {} ; # [_6] ct - token - Current Character
00266 lappend state 0 ; # [_7] ok - bool - Match Status
00267 lappend state {} ; # [_8] sv - any - Semantic Value
00268 lappend state {} ; # [_9] er - list - Error status (*)
00269 lappend state {} ; # [10] ls - list - Location Stack (x)
00270 lappend state {} ; # [11] as - list - Ast Stack
00271 lappend state {} ; # [12] ms - list - Ast Marker Stack
00272 lappend state {} ; # [13] es - list - Error Stack
00273 lappend state {} ; # [14] rs - list - Return Stack
00274 lappend state {} ; # [15] nc - dict - Nonterminal Cache (backtracking)
00275 # ### ### ### ######### ######### #########
00276
00277 # tc = list(token)
00278 # token = list(str lexeme line col)
00279
00280
00281 # (-) See manpage of this package for the representation.
00282
00283 # (*) 2 elements, first is error location, second is list of
00284 # ... strings, the error messages. The strings are actually
00285 # ... represented by references into the pool element of the code.
00286
00287 # (x) Regarding the various stacks maintained in the state, their
00288 # top element is always at the right end, i.e. the last
00289 # element in the list representing it.
00290
00291 return $state
00292 }
00293
00294 ret ::grammar::me::cpu::core::ntok (type state) {
00295 return [llength [lindex $state 4]]
00296 }
00297
00298 ret ::grammar::me::cpu::core::lc (type state , type loc) {
00299 set tc [lindex $state 4]
00300 set loc [INDEX $tc $loc "Illegal location"]
00301 return [lrange [lindex $tc $loc] 2 3]
00302 # result = list(line col)
00303 }
00304
00305 ret ::grammar::me::cpu::core::tok (type state , type args) {
00306 if {[llength $args] > 2} {
00307 return -code error {wrong # args: should be "grammar::me::cpu::core::tok state ?from ?to??"}
00308 }
00309 set tc [lindex $state 4]
00310 if {[llength $args] == 0} {
00311 return $tc
00312 } elseif {[llength $args] == 1} {
00313 set at [INDEX $tc [lindex $args 0] "Illegal location"]
00314 return [lrange $tc $at $at]
00315 } else {
00316 set from [INDEX $tc [lindex $args 0] "Illegal start location"]
00317 set to [INDEX $tc [lindex $args 1] "Illegal end location"]
00318 if {$from > $to} {
00319 return -code error "Illegal empty location range $from .. $to"
00320 }
00321 return [lrange $tc $from $to]
00322 }
00323 # result = list(token), token = list(str lex line col)
00324 }
00325
00326 ret ::grammar::me::cpu::core::pc (type state) {
00327 return [lindex $state 1]
00328 }
00329
00330 ret ::grammar::me::cpu::core::iseof (type state) {
00331 return [lindex $state 3]
00332 }
00333
00334 ret ::grammar::me::cpu::core::at (type state) {
00335 return [lindex $state 5]
00336 }
00337
00338 ret ::grammar::me::cpu::core::cc (type state) {
00339 return [lindex $state 6]
00340 }
00341
00342 ret ::grammar::me::cpu::core::sv (type state) {
00343 return [lindex $state 8]
00344 }
00345
00346 ret ::grammar::me::cpu::core::ok (type state) {
00347 return [lindex $state 7]
00348 }
00349
00350 ret ::grammar::me::cpu::core::error (type state) {
00351 set er [lindex $state 9]
00352 if {[llength $er]} {
00353 foreach {l m} $er break
00354
00355 set pool [lindex $state 0 1] ; # state ->/0 code ->/1 pool
00356 set mx {}
00357 foreach id $m {
00358 lappend mx [lindex $pool $id]
00359 }
00360 set er [list $l $mx]
00361 }
00362 return $er
00363 }
00364
00365 ret ::grammar::me::cpu::core::lstk (type state) {
00366 return [lindex $state 10]
00367 }
00368
00369 ret ::grammar::me::cpu::core::astk (type state) {
00370 return [lindex $state 11]
00371 }
00372
00373 ret ::grammar::me::cpu::core::mstk (type state) {
00374 return [lindex $state 12]
00375 }
00376
00377 ret ::grammar::me::cpu::core::estk (type state) {
00378 return [lindex $state 13]
00379 }
00380
00381 ret ::grammar::me::cpu::core::rstk (type state) {
00382 return [lindex $state 14]
00383 }
00384
00385 ret ::grammar::me::cpu::core::nc (type state) {
00386 return [lindex $state 15]
00387 }
00388
00389 ret ::grammar::me::cpu::core::ast (type state) {
00390 return [lindex $state 11 end]
00391 }
00392
00393 ret ::grammar::me::cpu::core::halted (type state) {
00394 return [lindex $state 2]
00395 }
00396
00397 ret ::grammar::me::cpu::core::code (type state) {
00398 return [lindex $state 0]
00399 }
00400
00401 ret ::grammar::me::cpu::core::eof (type statevar) {
00402 upvar 1 $statevar state
00403 lset state 3 1
00404 return
00405 }
00406
00407 ret ::grammar::me::cpu::core::put (type statevar , type tok , type lex , type line , type col) {
00408 upvar 1 $statevar state
00409 if {[lindex $state 3]} {
00410 return -code error "Cannot add input data after eof"
00411 }
00412 set tc [K [lindex $state 4] [lset state 4 {}]]
00413 lappend tc [list $tok $lex $line $col]
00414 lset state 4 $tc
00415 return
00416 }
00417
00418 ret ::grammar::me::cpu::core::run (type statevar , optional steps =-1) {
00419 # Execution loop. Should be instrumented for statistics about
00420 # dynamic instruction frequency. I.e. which instructions are
00421 # executed the most => put them at the front of the if/switch for
00422 # quicker selection. I.e. frequency coding of the branches for
00423 # speed.
00424
00425 # A C implementation can shimmer the state into a directly
00426 # accessible data structure. And the asm instructions can shimmer
00427 # into an integer index upon which we can switch fast.
00428
00429 variable anum
00430 variable tclass
00431 upvar 1 $statevar state
00432 variable iname ; # For debug output
00433
00434 # Do nothing for a stopped machine (halt flag set).
00435 if {[lindex $state 2]} {return $state}
00436
00437 # Fail if there are no instruction to execute
00438 if {![llength [lindex $state 0 0]]} {
00439 # No instructions to execute
00440 return -code error "No instructions to execute"
00441 }
00442
00443 # Unpack state into locally accessible variables
00444 # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
00445 foreach {code pc halt eof tc cl ct ok sv er ls as ms es rs nc} $state break
00446
00447 # Unpack match program for easy access as well.
00448 # 0 1 2
00449 foreach {asm pool tokmap} $code break
00450
00451 if 0 {
00452 puts ________________________
00453 puts [join [disasm $code] \n]
00454 puts ________________________
00455 }
00456
00457 # Ensure that the unpacked information is not shared
00458 unset state
00459
00460 # Internal flags for optimal handling of the nonterminal
00461 # cache. Avoid multiple unpacking of the dictionary, and avoid
00462 # repacking if it was not modified.
00463
00464 set ncunpacked 0
00465 set ncmodified 0
00466 set tmunpacked 0
00467
00468 while {1} {
00469 # Stop execution if the specified number of instructions have
00470 # been executed. Ignore if infinity was specified.
00471 if {$steps == 0} break
00472 if {$steps > 0} {incr steps -1}
00473
00474 # Get current instruction ...
00475
00476 if 0 {puts .$pc:\t$iname([lindex $asm $pc])}
00477 if 0 {puts -nonewline .$pc:\t$iname([lindex $asm $pc])}
00478
00479 set insn [lindex $asm $pc] ; incr pc
00480
00481 # And its arguments ...
00482
00483 set an [lindex $anum $insn]
00484 if {$an == 1} {
00485 set a [lindex $asm $pc] ; incr pc
00486 if 0 {puts \t<$a>}
00487 } elseif {$an == 2} {
00488 set a [lindex $asm $pc] ; incr pc
00489 set b [lindex $asm $pc] ; incr pc
00490 if 0 {puts \t<$a|$b>}
00491 } elseif {$an == 3} {
00492 set a [lindex $asm $pc] ; incr pc
00493 set b [lindex $asm $pc] ; incr pc
00494 set c [lindex $asm $pc] ; incr pc
00495 if 0 {puts \t<$a|$b|$c>}
00496 } ;# else {puts ""}
00497
00498 # Dispatch to implementation of the instruction ...
00499
00500 # Separate if commands are used for easier ordering of the
00501 # dispatch. The order of the branches should be frequency
00502 # coded to have the most frequently used instructions first.
00503
00504 # ict_advance <a:message>
00505 if {$insn == 0} {
00506 if 0 {puts \t\[$cl|[llength $tc]|$eof\]}
00507 incr cl
00508 if {$cl < [llength $tc]} {
00509 if 0 {puts \tConsume}
00510
00511 set ct [lindex $tc $cl 0]
00512 set ok 1
00513 set er {}
00514 } elseif {$eof} {
00515 if 0 {puts \tFail<Eof>}
00516
00517 # We have no input, and there won't be more coming in
00518 # either. Fail the advance. We do _not_ stop the match
00519 # loop, the program has to complete. The failure might
00520 # be no such, revealed during backtracking. The current
00521 # location is not rewound automatically, this is the
00522 # responsibility of any backtracking.
00523
00524 set er [list $cl [list $a]]
00525 set ok 0
00526 } else {
00527 if 0 {puts \tSuspend&Wait}
00528
00529 # We have no input, stop matching and wait for
00530 # more. We reset the machine into a state
00531 # which will restart this instruction when
00532 # execution resumes.
00533
00534 incr cl -1
00535 incr pc -2 ; # code and message argument
00536 break
00537 }
00538 if 0 {puts .Next}
00539 continue
00540 }
00541
00542 # ict_match_token <a:token> <b:message>
00543 if {$insn == 1} {
00544 if {[llength $tokmap]} {
00545 if {!$tmunpacked} {
00546 array set tm $tokmap
00547 set tmunpacked 1
00548 }
00549 set ok [expr {$a == $tm($ct)}]
00550 } else {
00551 set xch [lindex $pool $a]
00552 set ok [expr {$xch eq $ct}]
00553 }
00554 if {!$ok} {
00555 set er [list $cl [list $b]]
00556 } else {
00557 set er {}
00558 }
00559 continue
00560 }
00561
00562 # ict_match_tokrange <a:tokstart> <b:tokend> <c:message>
00563 if {$insn == 2} {
00564 if {[llength $tokmap]} {
00565 if {!$tmunpacked} {
00566 array set tm $tokmap
00567 set tmunpacked 1
00568 }
00569 set x $tm($ct)
00570 set ok [expr {($a <= $x) && ($x <= $b)}]
00571 } else {
00572 set a [lindex $pool $a]
00573 set b [lindex $pool $b]
00574 set ok [expr {
00575 ([string compare $a $ct] <= 0) &&
00576 ([string compare $ct $b] <= 0)
00577 }] ; # {}
00578 }
00579 if {!$ok} {
00580 set er [list $cl [list $c]]
00581 } else {
00582 set er {}
00583 }
00584 continue
00585 }
00586
00587 # ict_match_tokclass <a:code> <b:message>
00588 if {$insn == 3} {
00589 set strcode [lindex $tclass $a]
00590 set ok [string is $strcode -strict $ct]
00591 if {!$ok} {
00592 set er [list $cl [list $b]]
00593 } else {
00594 set er {}
00595 }
00596 continue
00597 }
00598
00599 # inc_restore <a:branchtarget> <b:nonterminal>
00600 if {$insn == 4} {
00601 set sym [lindex $pool $b]
00602
00603 # Unpack the cache dict, only here.
00604 # 8.5 - Use dict operations instead.
00605
00606 if {!$ncunpacked} {
00607 array set ncc $nc
00608 set ncunpacked 1
00609 }
00610
00611 if {[info exists ncc($cl,$sym)]} {
00612 foreach {go ok error sv} $ncc($cl,$sym) break
00613
00614 # Go forward, as the nonterminal matches (or not).
00615 set cl $go
00616 set pc $a
00617 }
00618 continue
00619 }
00620
00621 # inc_save <a:nonterminal>
00622 if {$insn == 5} {
00623 set sym [lindex $pool $a]
00624 set at [lindex $ls end]
00625 set ls [lrange $ls 0 end-1]
00626
00627 # Unpack, modify, only here.
00628 # 8.5 - Use dict operations instead.
00629
00630 if {!$ncunpacked} {
00631 array set ncc $nc
00632 set ncunpacked 1
00633 }
00634
00635 set ncc($at,$sym) [list $cl $ok $er $sv]
00636 set ncmodified 1
00637 continue
00638 }
00639
00640 # icf_ntcall <a:branchtarget>
00641 if {$insn == 6} {
00642 lappend rs $pc
00643 set pc $a
00644 continue
00645 }
00646
00647 # icf_ntreturn
00648 if {$insn == 7} {
00649 set pc [lindex $rs end]
00650 set rs [lrange $rs 0 end-1]
00651 continue
00652 }
00653
00654 # iok_ok
00655 if {$insn == 8} {
00656 set ok 1
00657 continue
00658 }
00659
00660 # iok_fail
00661 if {$insn == 9} {
00662 set ok 0
00663 continue
00664 }
00665
00666 # iok_negate
00667 if {$insn == 10} {
00668 set ok [expr {!$ok}]
00669 continue
00670 }
00671
00672 # icf_jalways <a:branchtarget>
00673 if {$insn == 11} {
00674 set pc $a
00675 continue
00676 }
00677
00678 # icf_jok <a:branchtarget>
00679 if {$insn == 12} {
00680 if {$ok} {set pc $a}
00681 # !ok => pc is already on next instruction.
00682 continue
00683 }
00684
00685 # icf_jfail <a:branchtarget>
00686 if {$insn == 13} {
00687 if {!$ok} {set pc $a}
00688 # ok => pc is already on next instruction.
00689 continue
00690 }
00691
00692 # icf_halt
00693 if {$insn == 14} {
00694 set halt 1
00695 break
00696 }
00697
00698 # icl_push
00699 if {$insn == 15} {
00700 lappend ls $cl
00701 continue
00702 }
00703
00704 # icl_rewind
00705 if {$insn == 16} {
00706 set cl [lindex $ls end]
00707 set ls [lrange $ls 0 end-1]
00708 continue
00709 }
00710
00711 # icl_pop
00712 if {$insn == 17} {
00713 set ls [lrange $ls 0 end-1]
00714 continue
00715 }
00716
00717 # ier_push
00718 if {$insn == 18} {
00719 lappend es $er
00720 continue
00721 }
00722
00723 # ier_clear
00724 if {$insn == 19} {
00725 set er {}
00726 continue
00727 }
00728
00729 # ier_nonterminal <a:nonterminal>
00730 if {$insn == 20} {
00731 if {[llength $er]} {
00732 set pos [lindex $ls end]
00733 incr pos
00734 set eloc [lindex $er 0]
00735 if {$eloc == $pos} {
00736 set er [list $eloc [list $a]]
00737 }
00738 }
00739 continue
00740 }
00741
00742 # ier_merge
00743 if {$insn == 21} {
00744 set old [lindex $es end]
00745 set es [lrange $es 0 end-1]
00746
00747 # We have either old or current error data, keep it.
00748
00749 if {![llength $er]} {
00750 # No current data, keep old
00751 set er $old
00752 } elseif {[llength $old]} {
00753 # If one of the errors is further on in the input
00754 # choose that as the information to propagate.
00755
00756 foreach {loe msgse} $er break
00757 foreach {lon msgsn} $old break
00758
00759 if {$lon > $loe} {
00760 set er $old
00761 } elseif {$loe == $lon} {
00762 # Equal locations, merge the message lists.
00763
00764 foreach m $msgsn {lappend msgse $m}
00765 set er [list $loe [lsort -uniq $msgse]]
00766 }
00767 # else lon < loe - er is better - nothing
00768 }
00769 # else - !old, but er - nothing
00770
00771 continue
00772 }
00773
00774 # isv_clear
00775 if {$insn == 22} {
00776 set sv {}
00777 continue
00778 }
00779
00780 # isv_terminal (implied ias_push)
00781 if {$insn == 23} {
00782 set sv [list {} $cl $cl]
00783 lappend as $sv
00784 continue
00785 }
00786
00787 # isv_nonterminal_leaf <a:nonterminal>
00788 if {$insn == 24} {
00789 set pos [lindex $ls end]
00790 set sv [list $a $pos $cl]
00791 continue
00792 }
00793
00794 # isv_nonterminal_range <a:nonterminal>
00795 if {$insn == 25} {
00796 set pos [lindex $ls end]
00797 set sv [list $a $pos $cl [list {} $pos $cl]]
00798 continue
00799 }
00800
00801 # isv_nonterminal_reduce <a:nonterminal>
00802 if {$insn == 26} {
00803 set pos [lindex $ls end]
00804 if {[llength $ms]} {
00805 set mrk [lindex $ms end]
00806 incr mrk
00807 } else {
00808 set mrk 0
00809 }
00810 set sv [lrange $as $mrk end]
00811 set sv [linsert $sv 0 $a $pos $cl]
00812 continue
00813 }
00814
00815 # ias_push
00816 if {$insn == 27} {
00817 lappend as $sv
00818 continue
00819 }
00820
00821 # ias_mark
00822 if {$insn == 28} {
00823 set mark [llength $as]
00824 incr mark -1
00825 lappend ms $mark
00826 continue
00827 }
00828
00829 # ias_mrewind
00830 if {$insn == 29} {
00831 set mark [lindex $ms end]
00832 set ms [lrange $ms 0 end-1]
00833 set as [lrange $as 0 $mark]
00834 continue
00835 }
00836
00837 # ias_mpop
00838 if {$insn == 30} {
00839 set ms [lrange $ms 0 end-1]
00840 continue
00841 }
00842
00843 return -code error "Illegal instruction $insn"
00844 }
00845
00846 # Repack a modified cache dictionary, then repack and store the
00847 # updated state value.
00848
00849 if 0 {puts .Repackage\ state}
00850
00851 if {$ncmodified} {set nc [array get ncc]}
00852 set state [list $code $pc $halt $eof $tc $cl $ct $ok $sv $er $ls $as $ms $es $rs $nc]
00853 return
00854 }
00855
00856 namespace grammar::me::cpu::core {
00857
00858 variable tclass {}
00859 variable tccode
00860
00861 foreach {x code} {
00862 0 alnum
00863 1 alpha
00864 2 digit
00865 3 xdigit
00866 4 punct
00867 5 space
00868 } {
00869 lappend tclass $code
00870 tccode = ($code) $x
00871 }
00872
00873
00874
00875 variable anum {}
00876
00877
00878 variable iname
00879
00880 foreach {z insn x notes} {
00881 0 ict_advance 1 {-- TESTED}
00882 1 ict_match_token 2 {-- TESTED}
00883 2 ict_match_tokrange 3 {-- TESTED}
00884 3 ict_match_tokclass 2 {-- TESTED}
00885 4 inc_restore 2 {-- TESTED}
00886 5 inc_save 1 {-- TESTED}
00887 6 icf_ntcall 1 {-- TESTED}
00888 7 icf_ntreturn 0 {-- TESTED}
00889 8 iok_ok 0 {-- TESTED}
00890 9 iok_fail 0 {-- TESTED}
00891 10 iok_negate 0 {-- TESTED}
00892 11 icf_jalways 1 {-- TESTED}
00893 12 icf_jok 1 {-- TESTED}
00894 13 icf_jfail 1 {-- TESTED}
00895 14 icf_halt 0 {-- TESTED}
00896 15 icl_push 0 {-- TESTED}
00897 16 icl_rewind 0 {-- TESTED}
00898 17 icl_pop 0 {-- TESTED}
00899 18 ier_push 0 {-- TESTED}
00900 19 ier_clear 0 {-- TESTED}
00901 20 ier_nonterminal 1 {-- TESTED}
00902 21 ier_merge 0 {-- TESTED}
00903 22 isv_clear 0 {-- TESTED}
00904 23 isv_terminal 0 {-- TESTED}
00905 24 isv_nonterminal_leaf 1 {-- TESTED}
00906 25 isv_nonterminal_range 1 {-- TESTED}
00907 26 isv_nonterminal_reduce 1 {-- TESTED}
00908 27 ias_push 0 {-- TESTED}
00909 28 ias_mark 0 {-- TESTED}
00910 29 ias_mrewind 0 {-- TESTED}
00911 30 ias_mpop 0 {-- TESTED}
00912 } {
00913 lappend anum $x
00914 iname = ($z) $insn
00915 iname = ($insn) $z
00916 }
00917 }
00918
00919
00920
00921
00922 ret ::grammar::me::cpu::core::INDEX (type list , type i , type label) {
00923 if {$i eq "end"} {
00924 set i [expr {[llength $list] - 1}]
00925 } elseif {[regexp {^end-([0-9]+)$} $i -> n]} {
00926 set i [expr {[llength $list] - $n -1}]
00927 }
00928 if {
00929 ![string is integer -strict $i] ||
00930 ($i < 0) ||
00931 ($i >= [llength $list])
00932 } {
00933 return -code error "$label $i"
00934 }
00935 return $i
00936 }
00937
00938 ret ::grammar::me::cpu::core::K (type x , type y) {set x}
00939
00940 ret ::grammar::me::cpu::core::Str (type str) {
00941 upvar 1 pool pool poolh poolh
00942 if {![info exists poolh($str)]} {
00943 set poolh($str) [llength $pool]
00944 lappend pool $str
00945 }
00946 return $poolh($str)
00947 }
00948
00949 ret ::grammar::me::cpu::core::Tok (type str) {
00950 upvar 1 tokmap tokmap ord ord plain plain
00951
00952 if {[regexp {^([^:]+):(.+)$} $str -> id name]} {
00953 if {$plain} {
00954 return -code error "Bad assembly, mixing plain and ranked tokens"
00955 }
00956 if {[info exists ord($id)]} {
00957 return -code error "Bad assembly, non-total ordering for $name and $ord($id), at rank $id"
00958 }
00959 set ord($id) $name
00960 set tokmap($name) $id
00961
00962 return $id
00963 } else {
00964 if {[array size ord]} {
00965 return -code error "Bad assembly, mixing plain and ranked tokens"
00966 }
00967 set plain 1
00968 return [uplevel 1 [list Str $str]]
00969 }
00970 }
00971
00972 ret ::grammar::me::cpu::core::Validate (type code , optional ovar ={) {tvar {}} {jvar {}}} {
00973 variable anum
00974 variable iname
00975
00976
00977
00978 if {[llength $code] != 3} {
00979 return -code error "Bad length"
00980 }
00981
00982 foreach {asm pool tokmap} $code break
00983
00984 if {[llength $tokmap] % 2 == 1} {
00985 return -code error "Bad tokmap, expected a dictionary"
00986 }
00987
00988 array ord = {}
00989 if {[llength $tokmap] > 0} {
00990 foreach {tok rank} $tokmap {
00991 if {[info exists ord($rank)]} {
00992 return -code error "Bad tokmap, non-total ordering for $tok and $ord($rank), at rank $rank"
00993 }
00994 ord = ($rank) $tok
00995 }
00996 }
00997
00998
00999
01000
01001 array target = {}
01002
01003 pc = 0
01004 pcend = [llength $asm]
01005 poolend = [llength $pool]
01006
01007 while {$pc < $pcend} {
01008 target = ($pc) .
01009
01010 insn = [lindex $asm $pc]
01011 if {($insn < 0) || ($insn > 30)} {
01012 return -code error "Invalid instruction $insn at PC $pc"
01013 }
01014
01015 incr pc
01016 incr pc [lindex $anum $insn]
01017 }
01018
01019 if {$pc > $pcend} {
01020 return -code error "Bad program, last instruction $insn ($iname($insn)) is truncated"
01021 }
01022
01023
01024
01025
01026 if {$jvar ne ""} {
01027 upvar 1 $jvar jmp
01028 }
01029 array jmp = {}
01030
01031 pc = 0
01032 while {$pc < $pcend} {
01033 base = $pc
01034 insn = [lindex $asm $pc] ; incr pc
01035 an = [lindex $anum $insn]
01036
01037 if {$an == 1} {
01038 a = [lindex $asm $pc] ; incr pc
01039 } elseif {$an == 2} {
01040 a = [lindex $asm $pc] ; incr pc
01041 b = [lindex $asm $pc] ; incr pc
01042 } elseif {$an == 3} {
01043 a = [lindex $asm $pc] ; incr pc
01044 b = [lindex $asm $pc] ; incr pc
01045 c = [lindex $asm $pc] ; incr pc
01046 }
01047
01048 switch -exact $insn {
01049 0 - 5 - 20 - 24 - 25 - 26 -
01050 a/string {
01051 if {($a < 0) || ($a >= $poolend)} {
01052 return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
01053 }
01054 }
01055 1 {
01056
01057 if {![llength $tokmap]} {
01058 if {($a < 0) || ($a >= $poolend)} {
01059 return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
01060 }
01061 } else {
01062 if {![info exists ord($a)]} {
01063 return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base"
01064 }
01065 }
01066 if {($b < 0) || ($b >= $poolend)} {
01067 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
01068 }
01069 }
01070 2 {
01071
01072
01073 if {![llength $tokmap]} {
01074
01075 if {($a < 0) || ($a >= $poolend)} {
01076 return -code error "Invalid string reference $a for instruction $insn ($iname($insn)) at $base"
01077 }
01078 if {($b < 0) || ($b >= $poolend)} {
01079 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
01080 }
01081 if {$a == $b} {
01082 return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base"
01083 }
01084 if {[string compare [lindex $pool $a] [lindex $pool $b]] > 0} {
01085 return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base"
01086 }
01087 } else {
01088
01089 if {![info exists ord($a)]} {
01090 return -code error "Invalid token rank $a for instruction $insn ($iname($insn)) at $base"
01091 }
01092 if {![info exists ord($b)]} {
01093 return -code error "Invalid token rank $b for instruction $insn ($iname($insn)) at $base"
01094 }
01095 if {$a == $b} {
01096 return -code error "Invalid single-token range for instruction $insn ($iname($insn)) at $base"
01097 }
01098 if {$a > $b} {
01099 return -code error "Invalid empty range for instruction $insn ($iname($insn)) at $base"
01100 }
01101 }
01102 if {($c < 0) || ($c >= $poolend)} {
01103 return -code error "Invalid string reference $c for instruction $insn ($iname($insn)) at $base"
01104 }
01105 }
01106 3 {
01107
01108 if {($a < 0) || ($a > 5)} {
01109 return -code error "Invalid token-class $a for instruction $insn ($iname($insn)) at $base"
01110 }
01111 if {($b < 0) || ($b >= $poolend)} {
01112 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
01113 }
01114 }
01115 4 {
01116
01117 if {![info exists target($a)]} {
01118 return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base"
01119 } else {
01120 jmp = ($a) .
01121 }
01122 if {($b < 0) || ($b >= $poolend)} {
01123 return -code error "Invalid string reference $b for instruction $insn ($iname($insn)) at $base"
01124 }
01125 }
01126 6 - 11 - 12 - 13 -
01127 a/branch {
01128 if {![info exists target($a)]} {
01129 return -code error "Invalid branch target $a for instruction $insn ($iname($insn)) at $base"
01130 } else {
01131 jmp = ($base) $a
01132 }
01133 }
01134 default {}
01135 }
01136 }
01137
01138
01139
01140
01141
01142 if {$ovar ne ""} {
01143 upvar 1 $ovar o
01144 array o = [array get ord]
01145 }
01146 if {$tvar ne ""} {
01147 upvar 1 $tvar t
01148 array t = [array get target]
01149 }
01150 return
01151 }
01152
01153
01154
01155
01156 package provide grammar::me::cpu::core 0.2
01157