00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 package require grammar::fa::op ;
00017 package require snit ;
00018 package require struct::list ;
00019 package require struct:: ; # Extended = operations = .
00020
00021
00022
00023
00024 snit::type ::grammar::fa {
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 ret = {b} ()
00035 method --> {b} {}
00036
00037 ret serialize () {}
00038 ret deserialize (type value) {}
00039 ret deserialize_merge (type value) {}
00040
00041 ret states () {}
00042 ret state (type cmd , type s , type args) {}
00043
00044 ret startstates () {}
00045 ret start? (type s) {}
00046 ret start?set (type states) {}
00047 ret start (type cmd , type args) {}
00048
00049 ret finalstates () {}
00050 ret final? (type s) {}
00051 ret final?set (type states) {}
00052 ret final (type cmd , type args) {}
00053
00054 ret symbols () {}
00055 ret symbols@ (type state) {}
00056 ret symbols@set (type states) {}
00057 ret symbol (type cmd , type sym) {}
00058
00059 ret next (type s , type sym , type args) {}
00060 ret !next (type s , type sym , type args) {}
00061 ret nextset (type states , type sym) {}
00062
00063 ret is (type cmd) {}
00064
00065 ret reachable_states () {}
00066 ret unreachable_states () {}
00067 ret reachable (type s) {}
00068
00069 ret useful_states () {}
00070 ret unuseful_states () {}
00071 ret useful (type s) {}
00072
00073 ret epsilon_closure (type s) {}
00074
00075 ret clear () {}
00076
00077
00078
00079
00080
00081 ret reverse () {op::reverse $self}
00082 ret complete (optional sink ={)} {op::complete $self $sink}
00083 method remove_eps {} {op::remove_eps $self}
00084 method trim {{what !reachable|!useful}} {op::trim $self $what}
00085 ret complement () {op::complement $self}
00086 ret kleene () {op::kleene $self}
00087 ret optional () {op::optional $self}
00088 ret fromRegex (type regex , optional over ={)} {op::fromRegex $self $regex $over}
00089
00090 method determinize {{mapvar {}}} {
00091 if {$mapvar ne ""} {upvar 1 $mapvar map}
00092 op::determinize $self map
00093 }
00094
00095 ret minimize (optional mapvar ={)} {
00096 if {$mapvar ne ""} {upvar 1 $mapvar map}
00097 op::minimize $self map
00098 }
00099
00100 ret union (type fa , optional mapvar ={)} {
00101 if {$mapvar ne ""} {upvar 1 $mapvar map}
00102 op::union $self $fa map
00103 }
00104
00105 ret intersect (type fa , optional mapvar ={)} {
00106 if {$mapvar ne ""} {upvar 1 $mapvar map}
00107 op::intersect $self $fa map
00108 }
00109
00110 ret difference (type fa , optional mapvar ={)} {
00111 if {$mapvar ne ""} {upvar 1 $mapvar map}
00112 op::difference $self $fa map
00113 }
00114
00115 ret concatenate (type fa , optional mapvar ={)} {
00116 if {$mapvar ne ""} {upvar 1 $mapvar map}
00117 op::concatenate $self $fa map
00118 }
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136 variable order ;
00137 variable final ;
00138 variable start ;
00139 variable transinv ;
00140
00141
00142
00143
00144 variable scount 0 ;
00145
00146
00147
00148
00149 variable symbol ;
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161 variable transym ;
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172 variable reach {} ;
00173 variable reachvalid 0 ;
00174
00175 variable useful {} ;
00176 variable usefulvalid 0 ;
00177
00178 variable nondete {} ;
00179 variable nondets ;
00180
00181 variable ec ;
00182
00183
00184
00185
00186
00187 constructor {args} {
00188 alen = [llength $args]
00189 if {($alen != 2) && ($alen != 0) && ($alen != 3)} {
00190 return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"*/
00191 }
00192
00193 array order = {} ; nondete = {}
00194 array start = {} ; scount = 0
00195 array final = {} ; reach = {}
00196 array symbol = {} ; reachvalid = 0
00197 array transym = {} ; useful = {}
00198 array transinv = {} ; usefulvalid = 0
00199 array nondets = {}
00200 array ec = {}
00201
00202 if {$alen == 0} return
00203
00204 foreach {cmd object} $args break
00205 switch -exact -- $cmd {
00206 = - := - <-- - as {
00207 if {$alen != 2} {
00208 return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"*/
00209 }
00210 $self = $object
00211 }
00212 deserialize {
00213 if {$alen != 2} {
00214 return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"*/
00215 }
00216
00217 $self deserialize $object
00218 }
00219 fromRegex {
00220
00221 if {$alen == 2} {
00222 $self fromRegex $object
00223 } else {
00224 $self fromRegex $object [lindex $args 2]
00225 }
00226 }
00227 default {
00228 return -code error "bad assignment: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"
00229 }
00230 }
00231 return
00232 }
00233
00234
00235
00236
00237
00238 ret = {b} (
00239 $type self , type deserialize [$, type b , type serialize]
00240 )
00241
00242 method --> {b} {
00243 $b deserialize [$self serialize]
00244 }
00245
00246
00247
00248 ret serialize () {
00249 set ord {}
00250 foreach {s n} [array get order] {
00251 lappend ord [list $s $n]
00252 }
00253 set states {} ; # Dictionary
00254 foreach item [lsort -index 1 -integer -increasing $ord] {
00255 set s [lindex $item 0]
00256 set sdata {}
00257
00258 # Dict data per state :
00259
00260 lappend sdata [info exists start($s)]
00261 lappend sdata [info exists final($s)]
00262
00263 # Transitions from the state.
00264
00265 upvar #0 ${selfns}::trans_$order($s) jump
00266
00267 if {![info exists jump]} {
00268 lappend sdata {}
00269 } else {
00270 lappend sdata [array get jump]
00271 }
00272
00273 # ----------------------
00274 lappend states $s $sdata
00275 }
00276
00277 return [::list \
00278 grammar::fa \
00279 [array names symbol] \
00280 $states \
00281 ]
00282 }
00283
00284 ret deserialize (type value) {
00285 $self CheckSerialization $value st states acc tr newsymbols
00286 $self clear
00287
00288 foreach s $states {set order($s) [incr scount]}
00289 foreach sym $newsymbols {set symbol($sym) .}
00290 foreach s $acc {set final($s) .}
00291 foreach s $st {set start($s) .}
00292
00293 foreach {sa sym se} $tr {$self Next $sa $sym $se}
00294 return
00295 }
00296
00297 ret deserialize_merge (type value) {
00298 $self CheckSerialization $value st states acc tr newsymbols
00299
00300 foreach s $states {set order($s) [incr scount]}
00301 foreach sym $newsymbols {set symbol($sym) .}
00302 foreach s $acc {set final($s) .}
00303 foreach s $st {set start($s) .}
00304
00305 foreach {sa sym se} $tr {$self Next $sa $sym $se}
00306 return
00307 }
00308
00309
00310
00311 ret states () {
00312 return [array names order]
00313 }
00314
00315 ret state (type cmd , type s , type args) {
00316 switch -exact -- $cmd {
00317 add {
00318 set args [linsert $args 0 $s]
00319 foreach s $args {
00320 if {[info exists order($s)]} {
00321 return -code error "State \"$s\" is already known"
00322 }
00323 }
00324 foreach s $args {set order($s) [incr scount]}
00325 return
00326 }
00327 delete {
00328 set args [linsert $args 0 $s]
00329 $self StateCheckSet $args
00330
00331 foreach s $args {
00332 unset -nocomplain start($s) ; # Start/Initial indicator
00333 unset -nocomplain final($s) ; # Final/Accept indicator
00334
00335 # Remove all inbound transitions.
00336 if {[info exists transinv($s)]} {
00337 set src $transinv($s)
00338 unset transinv($s)
00339
00340 foreach srcitem $src {
00341 struct::list assign $srcitem sin sym
00342 $self !Next $sin $sym $s
00343 }
00344 }
00345
00346 # We remove transition data only after the inbound
00347 # ones. Otherwise we screw up the removal of
00348 # looping transitions. We have to consider the
00349 # backpointers to us in transinv as well.
00350
00351 upvar #0 ${selfns}::trans_$order($s) jump
00352 if {[info exists jump]} {
00353 foreach sym [array names jump] {
00354 $self !Transym $s $sym
00355 foreach nexts $jump($sym) {
00356 $self !Transinv $s $sym $nexts
00357 }
00358 }
00359
00360 unset ${selfns}::trans_$order($s) ; # Transitions from s
00361 }
00362 unset order($s) ; # State ordering
00363
00364 # Removal of a state may break the automaton into
00365 # disconnected pieces. This means that the set of
00366 # reachable and useful states may change, and the
00367 # cache cannot be used from now on.
00368
00369 $self InvalidateReach
00370 $self InvalidateUseful
00371 }
00372 return
00373 }
00374 rename {
00375 set alen [llength $args]
00376 if {($alen != 1)} {
00377 return -code error "wrong#args: [list $self] state rename s s'"
00378 }
00379 $self StateCheck $s
00380 set snew [lindex $args 0]
00381 if {[info exists order($snew)]} {
00382 return -code error "State \"$snew\" is already known"
00383 }
00384
00385 set o $order($s)
00386 unset order($s) ; # State ordering
00387 set order($snew) $o
00388
00389 # Start/Initial indicator
00390 if {[info exists start($s)]} {
00391 set start($snew) $start($s)
00392 unset start($s)
00393 }
00394 # Final/Accept indicator
00395 if {[info exists final($s)]} {
00396 set final($snew) $final($s)
00397 unset final($s)
00398 }
00399 # Update all inbound transitions.
00400 if {[info exists transinv($s)]} {
00401 set transinv($snew) $transinv($s)
00402 unset transinv($s)
00403
00404 # We have to perform a bit more here. We have to
00405 # go through the inbound transitions and chane the
00406 # listed destination state to the new name.
00407
00408 foreach srcitem $transinv($snew) {
00409 struct::list assign $srcitem sin sym
00410 upvar #0 ${selfns}::trans_$order($sin) jump
00411 upvar 0 jump($sym) destinations
00412 set pos [lsearch -exact $destinations $s]
00413 set destinations [lreplace $destinations $pos $pos $snew]
00414 }
00415 }
00416
00417 # Another place to change are the back pointers from
00418 # all the states we have transitions to, i.e. transinv
00419 # for all outbound states.
00420
00421 upvar #0 ${selfns}::trans_$o jump
00422 if {[info exists jump]} {
00423 foreach sym [array names jump] {
00424 foreach sout $jump($sym) {
00425 upvar 0 transinv($sout) backpointer
00426 set pos [lsearch -exact $backpointer [list $s $sym]]
00427 set backpointer [lreplace $backpointer $pos $pos [list $snew $sym]]
00428 }
00429
00430 # And also to update: Transym information for the symbol.
00431 upvar 0 transym($sym) users
00432 set pos [lsearch -exact $users $s]
00433 set users [lreplace $users $pos $pos $snew]
00434 }
00435 }
00436
00437 # Changing the name of a state does not change the
00438 # reachables / useful states per se. We just may have
00439 # to replace the name in the caches as well.
00440
00441 # - Invalidation will do the same, at the expense of a
00442 # - larger computation later.
00443
00444 $self InvalidateReach
00445 $self InvalidateUseful
00446 }
00447 exists {
00448 set alen [llength $args]
00449 if {$alen != 0} {
00450 return -code error "wrong#args: [list $self] state exists s"
00451 }
00452 return [info exists order($s)]
00453 }
00454 default {
00455 return -code error "Expected add, exists, delete, or rename, got \"$cmd\""
00456 }
00457 }
00458 return
00459 }
00460
00461
00462
00463 ret startstates () {
00464 return [array names start]
00465 }
00466
00467 ret start? (type s) {
00468 $self StateCheck $s
00469 return [info exists start($s)]
00470 }
00471
00472 ret start?set (type states) {
00473 $self StateCheckSet $states
00474 foreach s $states {
00475 if {[info exists start($s)]} {return 1}
00476 }
00477 return 0
00478 }
00479
00480 ret start (type cmd , type args) {
00481 # Note: Adding or removing start states does not change
00482 # usefulness, only reachability
00483 switch -exact -- $cmd {
00484 add {
00485 if {[llength $args] < 1} {
00486 return -code error "wrong#args: start add state ..."
00487 }
00488 $self StateCheckSet $args
00489 foreach s $args {set start($s) .}
00490 $self InvalidateReach
00491 }
00492 set {
00493 if {[llength $args] != 1} {
00494 return -code error "wrong#args: start set states"
00495 }
00496 set states [lindex $args 0]
00497 $self StateCheckSet $states
00498 array unset start
00499 foreach s $states {set start($s) .}
00500 $self InvalidateReach
00501 }
00502 remove {
00503 if {[llength $args] < 1} {
00504 return -code error "wrong#args: start remove state ..."
00505 }
00506 $self StateCheckSet $args
00507 foreach s $args {
00508 unset -nocomplain start($s)
00509 }
00510 $self InvalidateReach
00511 }
00512 clear {
00513 if {[llength $args] != 0} {
00514 return -code error "wrong#args: start clear"
00515 }
00516 array unset start
00517 $self InvalidateReach
00518 }
00519 default {
00520 return -code error "Expected add, clear, remove, or set, got \"$cmd\""
00521 }
00522 }
00523 return
00524 }
00525
00526
00527
00528 ret finalstates () {
00529 return [array names final]
00530 }
00531
00532 ret final? (type s) {
00533 $self StateCheck $s
00534 return [info exists final($s)]
00535 }
00536
00537 ret final?set (type states) {
00538 $self StateCheckSet $states
00539 foreach s $states {
00540 if {[info exists final($s)]} {return 1}
00541 }
00542 return 0
00543 }
00544
00545 ret final (type cmd , type args) {
00546 # Note: Adding or removing start states does not change
00547 # reachability, only usefulness
00548 switch -exact -- $cmd {
00549 add {
00550 if {[llength $args] < 1} {
00551 return -code error "wrong#args: final add state ..."
00552 }
00553 $self StateCheckSet $args
00554 foreach s $args {set final($s) .}
00555 $self InvalidateUseful
00556 }
00557 set {
00558 if {[llength $args] != 1} {
00559 return -code error "wrong#args: final set states"
00560 }
00561 set states [lindex $args 0]
00562 $self StateCheckSet $states
00563 array unset final
00564 foreach s $states {set final($s) .}
00565 $self InvalidateReach
00566 }
00567 remove {
00568 if {[llength $args] < 1} {
00569 return -code error "wrong#args: final remove state ..."
00570 }
00571 $self StateCheckSet $args
00572 foreach s $args {
00573 unset -nocomplain final($s)
00574 }
00575 $self InvalidateUseful
00576 }
00577 clear {
00578 if {[llength $args] != 0} {
00579 return -code error "wrong#args: final clear"
00580 }
00581 array unset final
00582 $self InvalidateReach
00583 }
00584 default {
00585 return -code error "Expected add, clear, remove, or set, got \"$cmd\""
00586 }
00587 }
00588 return
00589 }
00590
00591
00592
00593 ret symbols () {
00594 return [array names symbol]
00595 }
00596
00597 ret symbols@ (type s , optional t ={)} {
00598 $self StateCheck $s
00599 if {$t ne ""} { $self StateCheck $t}
00600 upvar
00601 if {![info exists jump]} {return {}}
00602 if {$t eq ""} {
00603
00604 return [array names jump]
00605 }
00606
00607 result = {}
00608 foreach sym [array names jump] {
00609 if {[lsearch -exact $jump($sym) $t] < 0} continue
00610 lappend result $sym
00611 }
00612 return [lsort -uniq $result]
00613 }
00614
00615 ret symbols@set (type states) {
00616 # Union (fa symbol@ s, f.a. s in states)
00617
00618 $self StateCheckSet $states
00619 set result {}
00620 foreach s $states {
00621 upvar #0 ${selfns}::trans_$order($s) jump
00622 if {![info exists jump]} continue
00623 foreach sym [array names jump] {
00624 lappend result $sym
00625 }
00626 }
00627 return [lsort -uniq $result]
00628 }
00629
00630 ret symbol (type cmd , type sym , type args) {
00631 switch -exact -- $cmd {
00632 add {
00633 set args [linsert $args 0 $sym]
00634 foreach sym $args {
00635 if {$sym eq ""} {
00636 return -code error "Cannot add illegal empty symbol \"\""
00637 }
00638 if {[info exists symbol($sym)]} {
00639 return -code error "Symbol \"$sym\" is already known"
00640 }
00641 }
00642 foreach sym $args {set symbol($sym) .}
00643 }
00644 delete {
00645 set args [linsert $args 0 $sym]
00646 $self SymbolCheckSetNE $args
00647 foreach sym $args {
00648 unset symbol($sym)
00649
00650 # Delete all transitions using the removed symbol.
00651
00652 if {[info exists transym($sym)]} {
00653 foreach s $transym($sym) {
00654 $self !Next $s $sym
00655 }
00656 }
00657 }
00658 }
00659 rename {
00660 set alen [llength $args]
00661 if {$alen != 1} {
00662 return -code error "wrong#args: [list $self] symbol rename sym newsym"
00663 }
00664 $self SymbolCheckNE $sym
00665 set newsym [lindex $args 0]
00666
00667 if {$newsym eq ""} {
00668 return -code error "Cannot add illegal empty symbol \"\""
00669 }
00670 if {[info exists symbol($newsym)]} {
00671 return -code error "Symbol \"$newsym\" is already known"
00672 }
00673
00674 unset symbol($sym)
00675 set symbol($newsym) .
00676
00677 if {[info exists transym($sym)]} {
00678 set transym($newsym) [set states $transym($sym)]
00679 unset transym($sym)
00680
00681 foreach s $states {
00682 # Update the jump tables for each of the states
00683 # using this symbol, and the reverse tables as
00684 # well.
00685
00686 upvar #0 ${selfns}::trans_$order($s) jump
00687 set jump($newsym) [set destinations $jump($sym)]
00688 unset jump($sym)
00689
00690 foreach sd $destinations {
00691 upvar 0 transinv($sd) backpointer
00692 set pos [lsearch -exact $backpointer [list $s $sym]]
00693 set backpointer [lreplace $backpointer $pos $pos [list $s $newsym]]
00694 }
00695 }
00696 }
00697 }
00698 exists {
00699 set alen [llength $args]
00700 if {$alen != 0} {
00701 return -code error "wrong#args: [list $self] symbol exists sym"
00702 }
00703 return [info exists symbol($sym)]
00704 }
00705 default {
00706 return -code error "Expected add, delete, exists, or rename, got \"$cmd\""
00707 }
00708 }
00709 return
00710 }
00711
00712
00713
00714 ret next (type s , type sym , type args) {
00715 ## Split into checking and functionality ...
00716
00717 set alen [llength $args]
00718 if {($alen != 2) && ($alen != 0)} {
00719 return -code error "wrong#args: [list $self] next s sym ?--> s'?"
00720 }
00721 $self StateCheck $s
00722 $self SymbolCheck $sym
00723
00724 if {($alen == 2) && [set cmd [lindex $args 0]] ne "-->"} {
00725 return -code error "Expected -->, got \"$cmd\""
00726 }
00727
00728 if {$alen == 0} {
00729 # Query transition table.
00730 upvar #0 ${selfns}::trans_$order($s) jump
00731 if {![info exists jump($sym)]} {return {}}
00732 return $jump($sym)
00733 }
00734
00735 set nexts [lindex $args 1]
00736 $self StateCheck $nexts
00737
00738 upvar #0 ${selfns}::trans_$order($s) jump
00739 if {[info exists jump($sym)] && [struct::set contains $jump($sym) $nexts]} {
00740 return -code error "Transition \"($s, ($sym)) --> $nexts\" is already known"
00741 }
00742
00743 $self Next $s $sym $nexts
00744 return
00745 }
00746
00747 ret !next (type s , type sym , type args) {
00748 set alen [llength $args]
00749 if {($alen != 2) && ($alen != 0)} {
00750 return -code error "wrong#args: [list $self] !next s sym ?--> s'?"
00751 }
00752 $self StateCheck $s
00753 $self SymbolCheck $sym
00754
00755 if {$alen == 2} {
00756 if {[lindex $args 0] ne "-->"} {
00757 return -code error "Expected -->, got \"[lindex $args 0]\""
00758 }
00759 set nexts [lindex $args 1]
00760 $self StateCheck $nexts
00761 $self !Next $s $sym $nexts
00762 } else {
00763 $self !Next $s $sym
00764 }
00765 }
00766
00767 ret nextset (type states , type sym) {
00768 $self SymbolCheck $sym
00769 $self StateCheckSet $states
00770
00771 set result {}
00772 foreach s $states {
00773 upvar #0 ${selfns}::trans_$order($s) jump
00774 if {![info exists jump($sym)]} continue
00775 struct::set add result $jump($sym)
00776 }
00777 return $result
00778 }
00779
00780
00781
00782 ret is (type cmd) {
00783 switch -exact -- $cmd {
00784 complete {
00785 # The FA is complete if Trans(State, Sym) != {} for all
00786 # states and symbols (Not counting epsilon transitions).
00787 # Without symbols the FA is deemed complete. Note:
00788 # States with epsilon transitions can use symbols
00789 # indirectly! Need their closures for exact
00790 # computation.
00791
00792 set nsymbols [llength [array names symbol]]
00793 if {$nsymbols == 0} {return 1}
00794 foreach s [array names order] {
00795 upvar #0 ${selfns}::trans_$order($s) jump
00796 if {![info exists jump]} {return 0}
00797 set njsym [array size jump]
00798 if {[info exists jump()]} {
00799 set njsym [llength [$self symbols@set [$self epsilon_closure $s]]]
00800 incr njsym -1
00801 }
00802 if {$njsym != $nsymbols} {return 0}
00803 }
00804 return 1
00805 }
00806 deterministic {
00807 # The FA is deterministic if it has on start state, no
00808 # epsilon transitions, and the transition function is
00809 # State x Symbol -> State, and not
00810 # State x Symbol -> P(State).
00811
00812 return [expr {
00813 ([array size start] == 1) &&
00814 ![llength $nondete] &&
00815 ![array size nondets]
00816 }] ;#{}
00817 }
00818 epsilon-free {
00819 # FA is epsion-free if there are no states having epsilon transitions.
00820 return [expr {![llength $nondete]}]
00821 }
00822 useful {
00823 # The FA is useful if and only if we have states and
00824 # all states are reachable and useful.
00825
00826 set states [$self states]
00827 return [expr {
00828 [struct::set size $states] &&
00829 [struct::set equal $states [$self reachable_states]] &&
00830 [struct::set equal $states [$self useful_states]]
00831 }] ;# {}
00832 }
00833 }
00834 return -code error "Expected complete, deterministic, epsilon-free, or useful, got \"$cmd\""
00835 }
00836
00837
00838
00839 ret reachable_states () {
00840 if {$reachvalid} {return $reach}
00841 if {![array size start]} {
00842 set reach {}
00843 } else {
00844 # Basic algorithm like for epsilon_closure, except that we
00845 # process all transitions, not only epsilons, and that
00846 # the initial state is fixed to start.
00847
00848 set reach [array names start]
00849 set pending $reach
00850 array set visited {}
00851 while {[llength $pending]} {
00852 set s [struct::list shift pending]
00853 if {[info exists visited($s)]} continue
00854 set visited($s) .
00855 upvar #0 ${selfns}::trans_$order($s) jump
00856 if {![info exists jump]} continue
00857 if {![array size jump]} continue
00858 foreach sym [array names jump] {
00859 struct::set add reach $jump($sym)
00860 struct::set add pending $jump($sym)
00861 }
00862 }
00863 }
00864 set reachvalid 1
00865 return $reach
00866 }
00867
00868 ret unreachable_states () {
00869 # unreachable = states - reachables
00870 return [struct::set difference \
00871 [$self states] [$self reachable_states]]
00872 }
00873
00874 ret reachable (type s) {
00875 $self StateCheck $s
00876 return [struct::set contains [$self reachable_states] $s]
00877 }
00878
00879
00880
00881 ret useful_states () {
00882 if {$usefulvalid} {return $useful}
00883
00884 # A state is useful if a final state
00885 # can be reached from it.
00886
00887 if {![array size final]} {
00888 set useful {}
00889 } else {
00890 # Basic algorithm like for epsilon_closure, except that we
00891 # process all transitions, not only epsilons, and that
00892 # the initial set of states is fixed to final.
00893
00894 set useful [array names final]
00895 array set known [array get final]
00896 set pending $useful
00897 array set visited {}
00898 while {[llength $pending]} {
00899 set s [struct::list shift pending]
00900 if {[info exists visited($s)]} continue
00901 set visited($s) .
00902
00903 # All predecessors are useful, and have to be visited as well.
00904 # We get the predecessors from the transinv structure.
00905
00906 if {![info exists transinv($s)]} continue
00907 foreach before $transinv($s) {
00908 set before [lindex $before 0]
00909 if {[info exists visited($before)]} continue
00910 lappend pending $before
00911 if {[info exists known($before)]} continue
00912 lappend useful $before
00913 set known($before) .
00914 }
00915 }
00916 }
00917 set usefulvalid 1
00918 return $useful
00919 }
00920
00921 ret unuseful_states () {
00922 # unuseful = states - useful
00923 return [struct::set difference \
00924 [$self states] [$self useful_states]]
00925 }
00926
00927 ret useful (type s) {
00928 $self StateCheck $s
00929 return [struct::set contains [$self useful_states] $s]
00930 }
00931
00932
00933
00934 ret epsilon_closure (type s) {
00935 # Iterative graph traversal. Keeps a set of states to look at,
00936 # and adds to them everything it can reach from the current
00937 # state via epsilon-transitions. Loops are handled through the
00938 # visited array to weed out all the states already processed.
00939
00940 $self StateCheck $s
00941
00942 # Prefer cached information
00943 if {[info exists ec($s)]} {
00944 return $ec($s)
00945 }
00946
00947 set closure [list $s]
00948 set pending [list $s]
00949 array set visited {}
00950 while {[llength $pending]} {
00951 set t [struct::list shift pending]
00952 if {[info exists visited($t)]} continue
00953 set visited($t) .
00954 upvar #0 ${selfns}::trans_$order($t) jump
00955 if {![info exists jump()]} continue
00956 struct::set add closure $jump()
00957 struct::set add pending $jump()
00958 }
00959 set ec($s) $closure
00960 return $closure
00961 }
00962
00963
00964
00965 ret clear () {
00966 array unset order ; set nondete {}
00967 array unset start ; set scount 0
00968 array unset final ; set reach {}
00969 array unset symbol ; set reachvalid 0
00970 array unset transym ; set useful {}
00971 array unset transinv ; set usefulvalid 0
00972 array unset nondets
00973 array unset ec
00974
00975 # Locate all 'trans_' arrays and remove them as well.
00976
00977 foreach v [info vars ${selfns}::trans_*] {
00978 unset $v
00979 }
00980 return
00981 }
00982
00983
00984
00985
00986 ret StateCheck (type s) {
00987 if {![info exists order($s)]} {
00988 return -code error "Illegal state \"$s\""
00989 }
00990 }
00991
00992 ret StateCheckSet (type states) {
00993 foreach s $states {
00994 if {![info exists order($s)]} {
00995 return -code error "Illegal state \"$s\""
00996 }
00997 }
00998 }
00999
01000 ret SymbolCheck (type sym) {
01001 if {$sym eq ""} return
01002 if {![info exists symbol($sym)]} {
01003 return -code error "Illegal symbol \"$sym\""
01004 }
01005 }
01006
01007 ret SymbolCheckNE (type sym) {
01008 if {($sym eq "") || ![info exists symbol($sym)]} {
01009 return -code error "Illegal symbol \"$sym\""
01010 }
01011 }
01012
01013 if 0 {
01014
01015 ret SymbolCheckSet (type symbols) {
01016 foreach sym $symbols {
01017 if {$sym eq ""} continue
01018 if {![info exists symbol($sym)]} {
01019 return -code error "Illegal symbol \"$sym\""
01020 }
01021 }
01022 }
01023 }
01024
01025 ret SymbolCheckSetNE (type symbols) {
01026 foreach sym $symbols {
01027 if {($sym eq "") || ![info exists symbol($sym)]} {
01028 return -code error "Illegal symbol \"$sym\""
01029 }
01030 }
01031 }
01032
01033 ret Next (type s , type sym , type nexts) {
01034 # Modify transition table. May update the set of
01035 # non-deterministic states. Invalidates reachable
01036 # cache, as states may become reachable. Updates
01037 # the transym and transinv mappings.
01038
01039 upvar #0 ${selfns}::trans_$order($s) jump
01040
01041 $self InvalidateReach
01042 $self InvalidateUseful
01043 # Clear closure cache when epsilons change.
01044 if {$sym eq ""} {array unset ec}
01045
01046 if {[info exists transym($sym)]} {
01047 struct::set include transym($sym) $s
01048 } else {
01049 set transym($sym) [list $s]
01050 }
01051
01052 if {[info exists transinv($nexts)]} {
01053 struct::set include transinv($nexts) [list $s $sym]
01054 } else {
01055 set transinv($nexts) [list [list $s $sym]]
01056 }
01057
01058 if {![info exists jump($sym)]} {
01059 set jump($sym) [list $nexts]
01060 } else {
01061 struct::set include jump($sym) $nexts
01062 }
01063 $self NonDeterministic $s $sym
01064 return
01065 }
01066
01067 ret !Next (type s , type sym , type args) {
01068 upvar #0 ${selfns}::trans_$order($s) jump
01069 # Anything to do at all ?
01070 if {![info exists jump($sym)]} return
01071 $self InvalidateReach
01072 $self InvalidateUseful
01073 # Clear closure cache when epsilons change.
01074 if {$sym eq ""} {array unset ec}
01075
01076 if {![llength $args]} {
01077 # Unset all transitions for (s, sym)
01078 # Update transym and transinv mappings as well, if existing.
01079
01080 $self !Transym $s $sym
01081 foreach nexts $jump($sym) {
01082 $self !Transinv $s $sym $nexts
01083 }
01084
01085 unset jump($sym)
01086 } else {
01087 # Remove the single transition (s, sym) -> nexts
01088 set nexts [lindex $args 0]
01089
01090 struct::set exclude jump($sym) $nexts
01091 $self !Transinv $s $sym $nexts
01092
01093 if {![struct::set size $jump($sym)]} {
01094 $self !Transym $s $sym
01095 unset jump($sym)
01096 if {![array size jump]} {
01097 unset jump
01098 }
01099 }
01100 }
01101
01102 $self NonDeterministic $s $sym
01103 return
01104 }
01105
01106 ret !Transym (type s , type sym) {
01107 struct::set exclude transym($sym) $s
01108 if {![struct::set size $transym($sym)]} {
01109 unset transym($sym)
01110 }
01111 }
01112
01113 ret !Transinv (type s , type sym , type nexts) {
01114 if {[info exists transinv($nexts)]} {
01115 struct::set exclude transinv($nexts) [list $s $sym]
01116 if {![struct::set size $transinv($nexts)]} {
01117 unset transinv($nexts)
01118 }
01119 }
01120 }
01121
01122 ret InvalidateReach () {
01123 set reachvalid 0
01124 set reach {}
01125 return
01126 }
01127
01128 ret InvalidateUseful () {
01129 set usefulvalid 0
01130 set useful {}
01131 return
01132 }
01133
01134 ret NonDeterministic (type s , type sym) {
01135 upvar #0 ${selfns}::trans_$order($s) jump
01136
01137 # Epsilon rule, whole state check. Epslion present <=> Not a DFA.
01138
01139 if {[info exists jump()]} {
01140 struct::set include nondete $s
01141 } else {
01142 struct::set exclude nondete $s
01143 }
01144
01145 # Non-determinism over a symbol.
01146
01147 upvar #0 ${selfns}::trans_$order($s) jump
01148
01149 if {[info exists jump($sym)] && [struct::set size $jump($sym)] > 1} {
01150 if {![info exists nondets($s)]} {
01151 set nondets($s) [list $sym]
01152 } else {
01153 struct::set include nondets($s) $sym
01154 }
01155 return
01156 } else {
01157 if {![info exists nondets($s)]} return
01158 struct::set exclude nondets($s) $sym
01159 if {![struct::set size $nondets($s)]} {
01160 unset nondets($s)
01161 }
01162 }
01163 return
01164 }
01165
01166 ret CheckSerialization (type value , type startst , type states , type acc , type trans , type syms) {
01167 # value is list/3 ('grammar::fa' symbols states)
01168 # !("" in symbols)
01169 # states is ordered dict (key is state, value is statedata)
01170 # statedata is list/3 (start final trans|"")
01171 # start is boolean
01172 # final is boolean
01173 # trans is dict (key in symbols, value is destinations)
01174 # destinations is set of states
01175
01176 upvar 1 $startst startstates \
01177 $states sts \
01178 $acc a \
01179 $trans t \
01180 $syms symbols
01181
01182 set prefix "error in serialization:"
01183 if {[llength $value] != 3} {
01184 return -code error "$prefix list length not 3"
01185 }
01186
01187 struct::list assign $value stype symbols statedata
01188
01189 if {$stype ne "grammar::fa"} {
01190 return -code error "$prefix unknown type \"$stype\""
01191 }
01192 if {[struct::set contains $symbols ""]} {
01193 return -code error "$prefix empty symbol is not legal"
01194 }
01195
01196 if {[llength $statedata] % 2 == 1} {
01197 return -code error "$prefix state data is not a dictionary"
01198 }
01199 array set _states $statedata
01200 if {[llength $statedata] != (2*[array size _states])} {
01201 return -code error "$prefix state data contains duplicate states"
01202 }
01203 set startstates {}
01204 set sts {}
01205 set p {}
01206 set a {}
01207 set e {}
01208 set l {}
01209 set m {}
01210 set t {}
01211 foreach {k v} $statedata {
01212 lappend sts $k
01213 if {[llength $v] != 3} {
01214 return -code error "$prefix state list length not 3"
01215 }
01216
01217 struct::list assign $v begin accept trans
01218
01219 if {![string is boolean -strict $begin]} {
01220 return -code error "$prefix expected boolean for start, got \"$begin\""
01221 }
01222 if {$begin} {lappend startstates $k}
01223 if {![string is boolean -strict $accept]} {
01224 return -code error "$prefix expected boolean for final, got \"$accept\""
01225 }
01226 if {$accept} {lappend a $k}
01227
01228 if {[llength $trans] % 2 == 1} {
01229 return -code error "$prefix transition data is not a dictionary"
01230 }
01231 array set _trans $trans
01232 if {[llength $trans] != (2*[array size _trans])} {
01233 return -code error "$prefix transition data contains duplicate symbols"
01234 }
01235 unset _trans
01236
01237 foreach {sym destinations} $trans {
01238 # destinations = list of state
01239 if {($sym ne "") && ![struct::set contains $symbols $sym]} {
01240 return -code error "$prefix illegal symbol \"$sym\" in transition"
01241 }
01242 foreach dest $destinations {
01243 if {![info exists _states($dest)]} {
01244 return -code error "$prefix illegal destination state \"$dest\""
01245 }
01246 lappend t $k $sym $dest
01247 }
01248 }
01249 }
01250 return
01251 }
01252
01253
01254
01255
01256
01257
01258
01259
01260 }
01261
01262
01263
01264
01265
01266 ::grammar::fa::op::constructor ::grammar::fa
01267
01268
01269
01270
01271 package provide grammar::fa 0.3
01272