fa.tcl

Go to the documentation of this file.
00001 /*  -*- tcl -*-*/
00002 /*  (c) 2004-1007 Andreas Kupries*/
00003 /*  Grammar / Finite Automatons / Container*/
00004 
00005 /*  ### ### ### ######### ######### #########*/
00006 /*  Package description*/
00007 
00008 /*  A class whose instances hold all the information describing a*/
00009 /*  single finite automaton (states, symbols, start state, set of*/
00010 /*  accepting states, transition function), and operations to define,*/
00011 /*  manipulate, and query this information.*/
00012 
00013 /*  ### ### ### ######### ######### #########*/
00014 /*  Requisites*/
00015 
00016 package require grammar::fa::op ; /*  Heavy FA operations.*/
00017 package require snit            ; /*  OO system in use*/
00018 package require struct::list    ; /*  Extended list operations.*/
00019 package require struct::     ; # Extended =   operations = .
00020 
00021 /*  ### ### ### ######### ######### #########*/
00022 /*  Implementation*/
00023 
00024 snit::type ::grammar::fa {
00025     /*  ### ### ### ######### ######### #########*/
00026     /*  Type API. A number of operations on FAs*/
00027 
00028     /*  ### ### ### ######### ######### #########*/
00029     /*  Instance API*/
00030 
00031     /* constructor {args} {}*/
00032     /* destructor  {}*/
00033 
00034     ret  =   {b} ()
00035     method --> {b} {}
00036 
00037     ret  serialize () {}
00038     ret  deserialize (type value) {}
00039     ret  deserialize_merge (type value) {}
00040 
00041     ret  states () {}
00042     ret  state (type cmd , type s , type args) {}
00043 
00044     ret  startstates () {}
00045     ret  start?      (type s) {}
00046     ret  start?set   (type states) {}
00047     ret  start       (type cmd , type args) {}
00048 
00049     ret  finalstates () {}
00050     ret  final?      (type s) {}
00051     ret  final?set   (type states) {}
00052     ret  final       (type cmd , type args) {}
00053 
00054     ret  symbols     () {}
00055     ret  symbols@    (type state) {}
00056     ret  symbols@set (type states) {}
00057     ret  symbol      (type cmd , type sym) {}
00058 
00059     ret  next  (type s , type sym , type args) {}
00060     ret  !next (type s , type sym , type args) {}
00061     ret  nextset (type states , type sym) {}
00062 
00063     ret  is (type cmd) {}
00064 
00065     ret  reachable_states   () {}
00066     ret  unreachable_states () {}
00067     ret  reachable          (type s) {}
00068 
00069     ret  useful_states   () {}
00070     ret  unuseful_states () {}
00071     ret  useful          (type s) {}
00072 
00073     ret  epsilon_closure (type s) {}
00074 
00075     ret  clear () {}
00076 
00077     /*  ### ### ### ######### ######### #########*/
00078     /*  Instance API. Complex FA operations.*/
00079     /*  The heavy lifting is done by the operations package.*/
00080 
00081     ret  reverse    ()                          {op::reverse    $self}
00082     ret  complete   (optional sink ={)}                 {op::complete   $self $sink}
00083     method remove_eps {}                          {op::remove_eps $self}
00084     method trim       {{what !reachable|!useful}} {op::trim       $self $what}
00085     ret  complement ()                          {op::complement $self}
00086     ret  kleene     ()                          {op::kleene     $self}
00087     ret  optional   ()                          {op::optional   $self}
00088     ret  fromRegex  (type regex , optional over ={)}           {op::fromRegex  $self $regex $over}
00089 
00090     method determinize {{mapvar {}}} {
00091     if {$mapvar ne ""} {upvar 1 $mapvar map}
00092     op::determinize $self map
00093     }
00094 
00095     ret  minimize (optional mapvar ={)} {
00096     if {$mapvar ne ""} {upvar 1 $mapvar map}
00097     op::minimize $self map
00098     }
00099 
00100     ret  union (type fa , optional mapvar ={)} {
00101     if {$mapvar ne ""} {upvar 1 $mapvar map}
00102     op::union $self $fa map
00103     }
00104 
00105     ret  intersect (type fa , optional mapvar ={)} {
00106     if {$mapvar ne ""} {upvar 1 $mapvar map}
00107     op::intersect $self $fa map
00108     }
00109 
00110     ret  difference (type fa , optional mapvar ={)} {
00111     if {$mapvar ne ""} {upvar 1 $mapvar map}
00112     op::difference $self $fa map
00113     }
00114 
00115     ret  concatenate (type fa , optional mapvar ={)} {
00116     if {$mapvar ne ""} {upvar 1 $mapvar map}
00117     op::concatenate $self $fa map
00118     }
00119 
00120     /*  ### ### ### ######### ######### #########*/
00121     /*  Internal data structures.*/
00122 
00123     /*  State information:*/
00124     /*  - Order    : Defined for all states, values provide creation order.*/
00125     /*  - Start    : Defined for states which are "start" (Input processing begins in).*/
00126     /*  - Final    : Defined for states which are "final" ("accept" input).*/
00127     /*  - Transinv : Inverse transitions. Per state the set of (state,sym)'s*/
00128     /*               which have transitions into the state. Defined only for*/
00129     /*               states which have inbound transitions.*/
00130     /** 
00131      *# Transinv is maintained to make state deletion easier: Direct
00132      *# access to the states and transitions which are inbound, for
00133      *# their deletion.
00134  */
00135 
00136     variable order        ; /*  Map : State -> Order of creation*/
00137     variable final        ; /*  Map : State -> .   Exists <=> Is a final State*/
00138     variable start        ; /*  Map : State -> .   Exists <=> Is a start State*/
00139     variable transinv     ; /*  Map : State -> {(State, Sym)}*/
00140 
00141     /*  Global information:*/
00142     /*  - Scount     : Counter for creation order of states.*/
00143 
00144     variable scount     0  ; /*  Counter for orderering states.*/
00145 
00146     /*  Symbol information:*/
00147     /*  - Symbol : Defined for all symbols, values irrelevant.*/
00148 
00149     variable symbol       ; /*  Map : Symbol -> . Exists = Symbol declared.*/
00150 
00151     /*  Transition data:*/
00152     /*  - TransN  : Dynamically created instance variables. Transition tables*/
00153     /*              for single states. Defined only for states which have*/
00154     /*              transitions.*/
00155     /*  - Transym : List of states having transitions on that symbol.*/
00156 
00157     /*  Transym is maintained for symbol deletion. Direct access to the transitions*/
00158     /*  we have to delete as well.*/
00159 
00160     /*  selfns::trans_$order(state) : Per state map : symbol -> list of destinations.*/
00161     variable transym      ; /*  Map : Sym -> {State}*/
00162 
00163     /*  Derived information:*/
00164     /*  - Reach       : Cache for set of states reachable from start.*/
00165     /*  - Reachvalid  : Boolean flag. True iff the reach cache contains valid data*/
00166     /*  - Useful      : Cache for set of states able to reach final.*/
00167     /*  - Usefulvalid : Boolean flag. True iff the useful cache contains valid data*/
00168     /*  - Nondete     : Set of states which are non-deterministic, because they have*/
00169     /*                   epsilon-transitions.*/
00170     /*  -  EC          : Cache of epsilon-closures*/
00171 
00172     variable reach      {} ; /*  Set of states reachable from 'start'.*/
00173     variable reachvalid 0  ; /*  Boolean flag, if 'reach' is valid.*/
00174 
00175     variable useful      {} ; /*  Set of states able to reach 'final'.*/
00176     variable usefulvalid 0  ; /*  Boolean flag, if 'useful' is valid.*/
00177 
00178     variable nondete    {} ; /*  Set of non-deterministic states, by epsilon/non-epsilon.*/
00179     variable nondets       ; /*  Per non-det state the set of symbols it is non-det in.*/
00180 
00181     variable ec            ; /*  Cache of epsilon-closures for states.*/
00182 
00183 
00184     /*  ### ### ### ######### ######### #########*/
00185     /*  Instance API Implementation.*/
00186 
00187     constructor {args} {
00188      alen =  [llength $args]
00189     if {($alen != 2) && ($alen != 0) && ($alen != 3)} {
00190         return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"*/
00191     }
00192 
00193     array  order =     {} ;  nondete =      {}
00194     array  start =     {} ;  scount =       0
00195     array  final =     {} ;  reach =        {}
00196     array  symbol =    {} ;  reachvalid =   0
00197     array  transym =   {} ;  useful =       {}
00198     array  transinv =  {} ;  usefulvalid =  0
00199     array  nondets =   {}
00200     array  ec =        {}
00201 
00202     if {$alen == 0} return
00203 
00204     foreach {cmd object} $args break
00205     switch -exact -- $cmd {
00206         = - := - <-- - as {
00207         if {$alen != 2} {
00208             return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"*/
00209         }
00210         $self = $object
00211         }
00212         deserialize {
00213         if {$alen != 2} {
00214             return -code error "wrong/* args: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"*/
00215         }
00216         /*  Object is actually a value, the deserialization to use.*/
00217         $self deserialize $object
00218         }
00219         fromRegex {
00220         /*  Object is actually a value, the regular expression to use.*/
00221         if {$alen == 2} {
00222             $self fromRegex $object
00223         } else {
00224             $self fromRegex $object [lindex $args 2]
00225         }
00226         }
00227         default {
00228         return -code error "bad assignment: $self ?=|:=|<--|as|deserialize a'|fromRegex re ?over??"
00229         }
00230     }
00231     return
00232     }
00233 
00234     /*  destructor {}*/
00235 
00236     /*  --- --- --- --------- --------- ---------*/
00237 
00238     ret  = {b} (
00239     $type self , type deserialize [$, type b , type serialize]
00240     )
00241 
00242     method --> {b} {
00243     $b deserialize [$self serialize]
00244     }
00245 
00246     /*  --- --- --- --------- --------- ---------*/
00247 
00248     ret  serialize () {
00249     set ord {}
00250     foreach {s n} [array get order] {
00251         lappend ord [list $s $n]
00252     }
00253     set states {} ; # Dictionary
00254     foreach item [lsort -index 1 -integer -increasing $ord] {
00255         set s [lindex $item 0]
00256         set sdata {}
00257 
00258         # Dict data per state :
00259 
00260         lappend sdata [info exists start($s)]
00261         lappend sdata [info exists final($s)]
00262 
00263         # Transitions from the state.
00264 
00265         upvar #0 ${selfns}::trans_$order($s) jump
00266 
00267         if {![info exists jump]} {
00268         lappend sdata {}
00269         } else {
00270         lappend sdata [array get jump]
00271         }
00272 
00273         # ----------------------
00274         lappend states $s $sdata
00275     }
00276 
00277     return [::list \
00278         grammar::fa \
00279         [array names symbol] \
00280         $states \
00281         ]
00282     }
00283 
00284     ret  deserialize (type value) {
00285     $self CheckSerialization $value st states acc tr newsymbols
00286     $self clear
00287 
00288     foreach s   $states     {set order($s)    [incr scount]}
00289     foreach sym $newsymbols {set symbol($sym) .}
00290     foreach s   $acc        {set final($s)    .}
00291     foreach s   $st         {set start($s)    .}
00292 
00293     foreach {sa sym se} $tr {$self Next $sa $sym $se}
00294     return
00295     }
00296 
00297     ret  deserialize_merge (type value) {
00298     $self CheckSerialization $value st states acc tr newsymbols
00299 
00300     foreach s   $states     {set order($s)    [incr scount]}
00301     foreach sym $newsymbols {set symbol($sym) .}
00302     foreach s   $acc        {set final($s)    .}
00303     foreach s   $st         {set start($s)    .}
00304 
00305     foreach {sa sym se} $tr {$self Next $sa $sym $se}
00306     return
00307     }
00308 
00309     /*  --- --- --- --------- --------- ---------*/
00310 
00311     ret  states () {
00312     return [array names order]
00313     }
00314 
00315     ret  state (type cmd , type s , type args) {
00316     switch -exact -- $cmd {
00317         add {
00318         set args [linsert $args 0 $s]
00319         foreach s $args {
00320             if {[info exists order($s)]} {
00321             return -code error "State \"$s\" is already known"
00322             }
00323         }
00324         foreach s $args {set order($s) [incr scount]}
00325         return
00326         }
00327         delete {
00328         set args [linsert $args 0 $s]
00329         $self StateCheckSet $args
00330 
00331         foreach s $args {
00332             unset -nocomplain start($s)                   ; # Start/Initial indicator
00333             unset -nocomplain final($s)                   ; # Final/Accept indicator
00334 
00335             # Remove all inbound transitions.
00336             if {[info exists transinv($s)]} {
00337             set src $transinv($s)
00338             unset    transinv($s)
00339 
00340             foreach srcitem $src {
00341                 struct::list assign $srcitem sin sym
00342                 $self !Next $sin $sym $s
00343             }
00344             }
00345 
00346             # We remove transition data only after the inbound
00347             # ones. Otherwise we screw up the removal of
00348             # looping transitions. We have to consider the
00349             # backpointers to us in transinv as well.
00350 
00351             upvar #0  ${selfns}::trans_$order($s) jump
00352             if {[info exists jump]} {
00353             foreach sym [array names jump] {
00354                 $self !Transym $s $sym
00355                 foreach nexts $jump($sym) {
00356                 $self !Transinv $s $sym $nexts
00357                 }
00358             }
00359 
00360             unset ${selfns}::trans_$order($s) ; # Transitions from s
00361             }
00362             unset order($s)                               ; # State ordering
00363 
00364             # Removal of a state may break the automaton into
00365             # disconnected pieces. This means that the set of
00366             # reachable and useful states may change, and the
00367             # cache cannot be used from now on.
00368 
00369             $self InvalidateReach
00370             $self InvalidateUseful
00371         }
00372         return
00373         }
00374         rename {
00375         set alen [llength $args]
00376         if {($alen != 1)} {
00377             return -code error "wrong#args: [list $self] state rename s s'"
00378         }
00379         $self StateCheck $s
00380         set snew [lindex $args 0]
00381         if {[info exists order($snew)]} {
00382             return -code error "State \"$snew\" is already known"
00383         }
00384 
00385         set o $order($s)
00386         unset order($s)                               ; # State ordering
00387         set   order($snew) $o
00388 
00389                 # Start/Initial indicator
00390         if {[info exists start($s)]} {
00391             set   start($snew) $start($s)
00392             unset start($s)
00393         }
00394                 # Final/Accept indicator
00395         if {[info exists final($s)]} {
00396             set   final($snew) $final($s)
00397             unset final($s)
00398         }
00399         # Update all inbound transitions.
00400         if {[info exists transinv($s)]} {
00401             set   transinv($snew) $transinv($s)
00402             unset transinv($s)
00403 
00404             # We have to perform a bit more here. We have to
00405             # go through the inbound transitions and chane the
00406             # listed destination state to the new name.
00407 
00408             foreach srcitem $transinv($snew) {
00409             struct::list assign $srcitem sin sym
00410             upvar #0 ${selfns}::trans_$order($sin) jump
00411             upvar 0 jump($sym) destinations
00412             set pos [lsearch -exact $destinations $s]
00413             set destinations [lreplace $destinations $pos $pos $snew]
00414             }
00415         }
00416 
00417         # Another place to change are the back pointers from
00418         # all the states we have transitions to, i.e. transinv
00419         # for all outbound states.
00420 
00421         upvar #0 ${selfns}::trans_$o jump
00422         if {[info exists jump]} {
00423             foreach sym [array names jump] {
00424             foreach sout $jump($sym) {
00425                 upvar 0 transinv($sout) backpointer
00426                 set pos [lsearch -exact $backpointer [list $s $sym]]
00427                 set backpointer [lreplace $backpointer $pos $pos [list $snew $sym]]
00428             }
00429 
00430             # And also to update: Transym information for the symbol.
00431             upvar 0 transym($sym) users
00432             set pos [lsearch -exact $users $s]
00433             set users [lreplace $users $pos $pos $snew]
00434             }
00435         }
00436 
00437         # Changing the name of a state does not change the
00438         # reachables / useful states per se. We just may have
00439         # to replace the name in the caches as well.
00440 
00441         # - Invalidation will do the same, at the expense of a
00442         # - larger computation later.
00443 
00444         $self InvalidateReach
00445         $self InvalidateUseful
00446         }
00447         exists {
00448         set alen [llength $args]
00449         if {$alen != 0} {
00450             return -code error "wrong#args: [list $self] state exists s"
00451         }
00452         return [info exists order($s)]
00453         }
00454         default {
00455         return -code error "Expected add, exists, delete, or rename, got \"$cmd\""
00456         }
00457     }
00458     return
00459     }
00460 
00461     /*  --- --- --- --------- --------- ---------*/
00462 
00463     ret  startstates () {
00464     return [array names start]
00465     }
00466 
00467     ret  start? (type s) {
00468     $self StateCheck $s
00469     return [info exists start($s)]
00470     }
00471 
00472     ret  start?set (type states) {
00473     $self StateCheckSet $states
00474     foreach s $states {
00475         if {[info exists start($s)]} {return 1}
00476     }
00477     return 0
00478     }
00479 
00480     ret  start (type cmd , type args) {
00481     # Note: Adding or removing start states does not change
00482     # usefulness, only reachability
00483     switch -exact -- $cmd {
00484         add {
00485         if {[llength $args] < 1} {
00486             return -code error "wrong#args: start add state ..."
00487         }
00488         $self StateCheckSet $args
00489         foreach s $args {set start($s) .}
00490         $self InvalidateReach
00491         }
00492         set {
00493         if {[llength $args] != 1} {
00494             return -code error "wrong#args: start set states"
00495         }
00496         set states [lindex $args 0]
00497         $self StateCheckSet $states
00498         array unset start
00499         foreach s $states {set start($s) .}
00500         $self InvalidateReach
00501         }
00502         remove {
00503         if {[llength $args] < 1} {
00504             return -code error "wrong#args: start remove state ..."
00505         }
00506         $self StateCheckSet $args
00507         foreach s $args {
00508             unset -nocomplain start($s)
00509         }
00510         $self InvalidateReach
00511         }
00512         clear {
00513         if {[llength $args] != 0} {
00514             return -code error "wrong#args: start clear"
00515         }
00516         array unset start
00517         $self InvalidateReach
00518         }
00519         default {
00520         return -code error "Expected add, clear, remove, or set, got \"$cmd\""
00521         }
00522     }
00523     return
00524     }
00525 
00526     /*  --- --- --- --------- --------- ---------*/
00527 
00528     ret  finalstates () {
00529     return [array names final]
00530     }
00531 
00532     ret  final? (type s) {
00533     $self StateCheck $s
00534     return [info exists final($s)]
00535     }
00536 
00537     ret  final?set (type states) {
00538     $self StateCheckSet $states
00539     foreach s $states {
00540         if {[info exists final($s)]} {return 1}
00541     }
00542     return 0
00543     }
00544 
00545     ret  final (type cmd , type args) {
00546     # Note: Adding or removing start states does not change
00547     # reachability, only usefulness
00548     switch -exact -- $cmd {
00549         add {
00550         if {[llength $args] < 1} {
00551             return -code error "wrong#args: final add state ..."
00552         }
00553         $self StateCheckSet $args
00554         foreach s $args {set final($s) .}
00555         $self InvalidateUseful
00556         }
00557         set {
00558         if {[llength $args] != 1} {
00559             return -code error "wrong#args: final set states"
00560         }
00561         set states [lindex $args 0]
00562         $self StateCheckSet $states
00563         array unset final
00564         foreach s $states {set final($s) .}
00565         $self InvalidateReach
00566         }
00567         remove {
00568         if {[llength $args] < 1} {
00569             return -code error "wrong#args: final remove state ..."
00570         }
00571         $self StateCheckSet $args
00572         foreach s $args {
00573             unset -nocomplain final($s)
00574         }
00575         $self InvalidateUseful
00576         }
00577         clear {
00578         if {[llength $args] != 0} {
00579             return -code error "wrong#args: final clear"
00580         }
00581         array unset final
00582         $self InvalidateReach
00583         }
00584         default {
00585         return -code error "Expected add, clear, remove, or set, got \"$cmd\""
00586         }
00587     }
00588     return
00589     }
00590 
00591     /*  --- --- --- --------- --------- ---------*/
00592 
00593     ret  symbols () {
00594     return [array names symbol]
00595     }
00596 
00597     ret  symbols@ (type s , optional t ={)} {
00598     $self StateCheck $s
00599     if {$t ne ""} { $self StateCheck $t}
00600     upvar /* 0 ${selfns}::trans_$order($s) jump*/
00601     if {![info exists jump]} {return {}}
00602     if {$t eq ""} {
00603         /*  No destination, all symbols.*/
00604         return [array names jump]
00605     }
00606     /*  Specific destination, locate the symbols going there.*/
00607      result =  {}
00608     foreach sym [array names jump] {
00609         if {[lsearch -exact $jump($sym) $t] < 0} continue
00610         lappend result $sym
00611     }
00612     return [lsort -uniq $result]
00613     }
00614 
00615     ret  symbols@set (type states) {
00616     # Union (fa symbol@ s, f.a. s in states)
00617 
00618     $self StateCheckSet $states
00619     set result {}
00620     foreach s $states {
00621         upvar #0 ${selfns}::trans_$order($s) jump
00622         if {![info exists jump]} continue
00623         foreach sym [array names jump] {
00624         lappend result $sym
00625         }
00626     }
00627     return [lsort -uniq $result]
00628     }
00629 
00630     ret  symbol (type cmd , type sym , type args) {
00631     switch -exact -- $cmd {
00632         add {
00633         set args [linsert $args 0 $sym]
00634         foreach sym $args {
00635             if {$sym eq ""} {
00636             return -code error "Cannot add illegal empty symbol \"\""
00637             }
00638             if {[info exists symbol($sym)]} {
00639             return -code error "Symbol \"$sym\" is already known"
00640             }
00641         }
00642         foreach sym $args {set symbol($sym) .}
00643         }
00644         delete {
00645         set args [linsert $args 0 $sym]
00646         $self SymbolCheckSetNE $args
00647         foreach sym $args {
00648             unset symbol($sym)
00649 
00650             # Delete all transitions using the removed symbol.
00651 
00652             if {[info exists transym($sym)]} {
00653             foreach s $transym($sym) {
00654                 $self !Next $s $sym
00655             }
00656             }
00657         }
00658         }
00659         rename {
00660         set alen [llength $args]
00661         if {$alen != 1} {
00662             return -code error "wrong#args: [list $self] symbol rename sym newsym"
00663         }
00664         $self SymbolCheckNE $sym
00665         set newsym [lindex $args 0]
00666 
00667         if {$newsym eq ""} {
00668             return -code error "Cannot add illegal empty symbol \"\""
00669         }
00670         if {[info exists symbol($newsym)]} {
00671             return -code error "Symbol \"$newsym\" is already known"
00672         }
00673 
00674         unset symbol($sym)
00675         set symbol($newsym) .
00676 
00677         if {[info exists transym($sym)]} {
00678             set   transym($newsym) [set states $transym($sym)]
00679             unset transym($sym)
00680 
00681             foreach s $states {
00682             # Update the jump tables for each of the states
00683             # using this symbol, and the reverse tables as
00684             # well.
00685 
00686             upvar #0 ${selfns}::trans_$order($s) jump
00687             set   jump($newsym) [set destinations $jump($sym)]
00688             unset jump($sym)
00689 
00690             foreach sd $destinations {
00691                 upvar 0 transinv($sd) backpointer
00692                 set pos [lsearch -exact $backpointer [list $s $sym]]
00693                 set backpointer [lreplace $backpointer $pos $pos [list $s $newsym]]
00694             }
00695             }
00696         }
00697         }
00698         exists {
00699         set alen [llength $args]
00700         if {$alen != 0} {
00701             return -code error "wrong#args: [list $self] symbol exists sym"
00702         }
00703         return [info exists symbol($sym)]
00704         }
00705         default {
00706         return -code error "Expected add, delete, exists, or rename, got \"$cmd\""
00707         }
00708     }
00709     return
00710     }
00711 
00712     /*  --- --- --- --------- --------- ---------*/
00713 
00714     ret  next (type s , type sym , type args) {
00715     ## Split into checking and functionality ...
00716 
00717     set alen [llength $args]
00718     if {($alen != 2) && ($alen != 0)} {
00719         return -code error "wrong#args: [list $self] next s sym ?--> s'?"
00720     }
00721     $self StateCheck  $s
00722     $self SymbolCheck $sym
00723 
00724     if {($alen == 2) && [set cmd [lindex $args 0]] ne "-->"} {
00725         return -code error "Expected -->, got \"$cmd\""
00726     }
00727 
00728     if {$alen == 0} {
00729         # Query transition table.
00730         upvar #0 ${selfns}::trans_$order($s) jump
00731         if {![info exists jump($sym)]} {return {}}
00732         return $jump($sym)
00733     }
00734 
00735     set nexts [lindex $args 1]
00736     $self StateCheck $nexts
00737 
00738     upvar #0 ${selfns}::trans_$order($s) jump
00739     if {[info exists jump($sym)] && [struct::set contains $jump($sym) $nexts]} {
00740         return -code error "Transition \"($s, ($sym)) --> $nexts\" is already known"
00741     }
00742 
00743     $self Next $s $sym $nexts
00744     return
00745     }
00746 
00747     ret  !next (type s , type sym , type args) {
00748     set alen [llength $args]
00749     if {($alen != 2) && ($alen != 0)} {
00750         return -code error "wrong#args: [list $self] !next s sym ?--> s'?"
00751     }
00752     $self StateCheck  $s
00753     $self SymbolCheck $sym
00754 
00755     if {$alen == 2} {
00756         if {[lindex $args 0] ne "-->"} {
00757         return -code error "Expected -->, got \"[lindex $args 0]\""
00758         }
00759         set nexts [lindex $args 1]
00760         $self StateCheck $nexts
00761         $self !Next $s $sym $nexts
00762     } else {
00763         $self !Next $s $sym
00764     }
00765     }
00766 
00767     ret  nextset (type states , type sym) {
00768     $self SymbolCheck   $sym
00769     $self StateCheckSet $states
00770 
00771     set result {}
00772     foreach s $states {
00773         upvar #0 ${selfns}::trans_$order($s) jump
00774         if {![info exists jump($sym)]} continue
00775         struct::set add result $jump($sym)
00776     }
00777     return $result
00778     }
00779 
00780     /*  --- --- --- --------- --------- ---------*/
00781 
00782     ret  is (type cmd) {
00783     switch -exact -- $cmd {
00784         complete {
00785         # The FA is complete if Trans(State, Sym) != {} for all
00786         # states and symbols (Not counting epsilon transitions).
00787         # Without symbols the FA is deemed complete. Note:
00788         # States with epsilon transitions can use symbols
00789         # indirectly! Need their closures for exact
00790         # computation.
00791 
00792         set nsymbols [llength [array names symbol]]
00793         if {$nsymbols == 0} {return 1}
00794         foreach s [array names order] {
00795             upvar #0 ${selfns}::trans_$order($s) jump
00796             if {![info exists jump]} {return 0}
00797             set njsym [array size jump]
00798             if {[info exists jump()]} {
00799             set  njsym [llength [$self symbols@set [$self epsilon_closure $s]]]
00800             incr njsym -1
00801             }
00802             if {$njsym != $nsymbols}  {return 0}
00803         }
00804         return 1
00805         }
00806         deterministic {
00807         # The FA is deterministic if it has on start state, no
00808         # epsilon transitions, and the transition function is
00809         # State x Symbol -> State, and not
00810         # State x Symbol -> P(State).
00811 
00812         return [expr {
00813             ([array size start] == 1) &&
00814             ![llength $nondete] &&
00815             ![array size nondets]
00816         }] ;#{}
00817         }
00818         epsilon-free {
00819         # FA is epsion-free if there are no states having epsilon transitions.
00820         return [expr {![llength $nondete]}]
00821         }
00822         useful {
00823         # The FA is useful if and only if we have states and
00824         # all states are reachable and useful.
00825 
00826         set states [$self states]
00827         return [expr {
00828             [struct::set size $states] &&
00829             [struct::set equal $states [$self reachable_states]] &&
00830             [struct::set equal $states [$self useful_states]]
00831         }] ;# {}
00832         }
00833     }
00834     return -code error "Expected complete, deterministic, epsilon-free, or useful, got \"$cmd\""
00835     }
00836 
00837     /*  --- --- --- --------- --------- ---------*/
00838 
00839     ret  reachable_states () {
00840     if {$reachvalid} {return $reach}
00841     if {![array size start]} {
00842         set reach {}
00843     } else {
00844         # Basic algorithm like for epsilon_closure, except that we
00845         # process all transitions, not only epsilons, and that
00846         # the initial state is fixed to start.
00847 
00848         set reach   [array names start]
00849         set pending $reach
00850         array set visited {}
00851         while {[llength $pending]} {
00852         set s [struct::list shift pending]
00853         if {[info exists visited($s)]} continue
00854         set visited($s) .
00855         upvar #0 ${selfns}::trans_$order($s) jump
00856         if {![info exists jump]} continue
00857         if {![array size  jump]} continue
00858         foreach sym [array names jump] {
00859             struct::set add reach   $jump($sym)
00860             struct::set add pending $jump($sym)
00861         }
00862         }
00863     }
00864     set reachvalid 1
00865     return $reach
00866     }
00867 
00868     ret  unreachable_states () {
00869     # unreachable = states - reachables
00870     return [struct::set difference \
00871         [$self states] [$self reachable_states]]
00872     }
00873 
00874     ret  reachable (type s) {
00875     $self StateCheck $s
00876     return [struct::set contains [$self reachable_states] $s]
00877     }
00878 
00879     /*  --- --- --- --------- --------- ---------*/
00880 
00881     ret  useful_states () {
00882     if {$usefulvalid} {return $useful}
00883 
00884     # A state is useful if a final state
00885     # can be reached from it.
00886 
00887     if {![array size final]} {
00888         set useful {}
00889     } else {
00890         # Basic algorithm like for epsilon_closure, except that we
00891         # process all transitions, not only epsilons, and that
00892         # the initial set of states is fixed to final.
00893 
00894         set useful      [array names final]
00895         array set known [array get final]
00896         set pending $useful
00897         array set visited {}
00898         while {[llength $pending]} {
00899         set s [struct::list shift pending]
00900         if {[info exists visited($s)]} continue
00901         set visited($s) .
00902 
00903         # All predecessors are useful, and have to be visited as well.
00904         # We get the predecessors from the transinv structure.
00905 
00906         if {![info exists transinv($s)]} continue
00907         foreach before $transinv($s) {
00908             set before [lindex $before 0]
00909             if {[info exists visited($before)]} continue
00910             lappend pending $before
00911             if {[info exists known($before)]} continue
00912             lappend useful $before
00913             set known($before) .
00914         }
00915         }
00916     }
00917     set usefulvalid 1
00918     return $useful
00919     }
00920 
00921     ret  unuseful_states () {
00922     # unuseful = states - useful
00923     return [struct::set difference \
00924         [$self states] [$self useful_states]]
00925     }
00926 
00927     ret  useful (type s) {
00928     $self StateCheck $s
00929     return [struct::set contains [$self useful_states] $s]
00930     }
00931 
00932     /*  --- --- --- --------- --------- ---------*/
00933 
00934     ret  epsilon_closure (type s) {
00935     # Iterative graph traversal. Keeps a set of states to look at,
00936     # and adds to them everything it can reach from the current
00937     # state via epsilon-transitions. Loops are handled through the
00938     # visited array to weed out all the states already processed.
00939 
00940     $self StateCheck $s
00941 
00942     # Prefer cached information
00943     if {[info exists ec($s)]} {
00944         return $ec($s)
00945     }
00946 
00947     set closure [list $s]
00948     set pending [list $s]
00949     array set visited {}
00950     while {[llength $pending]} {
00951         set t [struct::list shift pending]
00952         if {[info exists visited($t)]} continue
00953         set visited($t) .
00954         upvar #0 ${selfns}::trans_$order($t) jump
00955         if {![info exists jump()]} continue
00956         struct::set add closure $jump()
00957         struct::set add pending $jump()
00958     }
00959     set ec($s) $closure
00960     return $closure
00961     }
00962 
00963     /*  --- --- --- --------- --------- ---------*/
00964 
00965     ret  clear () {
00966     array unset order    ; set nondete     {}
00967     array unset start    ; set scount      0
00968     array unset final    ; set reach       {}
00969     array unset symbol   ; set reachvalid  0
00970     array unset transym  ; set useful      {}
00971     array unset transinv ; set usefulvalid 0
00972     array unset nondets
00973     array unset ec
00974 
00975     # Locate all 'trans_' arrays and remove them as well.
00976 
00977     foreach v [info vars ${selfns}::trans_*] {
00978         unset $v
00979     }
00980     return
00981     }
00982 
00983     /*  ### ### ### ######### ######### #########*/
00984     /*  Instance Internals.*/
00985 
00986     ret  StateCheck (type s) {
00987     if {![info exists order($s)]} {
00988         return -code error "Illegal state \"$s\""
00989     }
00990     }
00991 
00992     ret  StateCheckSet (type states) {
00993     foreach s $states {
00994         if {![info exists order($s)]} {
00995         return -code error "Illegal state \"$s\""
00996         }
00997     }
00998     }
00999 
01000     ret  SymbolCheck (type sym) {
01001     if {$sym eq ""} return
01002     if {![info exists symbol($sym)]} {
01003         return -code error "Illegal symbol \"$sym\""
01004     }
01005     }
01006 
01007     ret  SymbolCheckNE (type sym) {
01008     if {($sym eq "") || ![info exists symbol($sym)]} {
01009         return -code error "Illegal symbol \"$sym\""
01010     }
01011     }
01012 
01013     if 0 {
01014     /*  Unused. Activate when needed.*/
01015     ret  SymbolCheckSet (type symbols) {
01016         foreach sym $symbols {
01017         if {$sym eq ""} continue
01018         if {![info exists symbol($sym)]} {
01019             return -code error "Illegal symbol \"$sym\""
01020         }
01021         }
01022     }
01023     }
01024 
01025     ret  SymbolCheckSetNE (type symbols) {
01026     foreach sym $symbols {
01027         if {($sym eq "") || ![info exists symbol($sym)]} {
01028         return -code error "Illegal symbol \"$sym\""
01029         }
01030     }
01031     }
01032 
01033     ret  Next (type s , type sym , type nexts) {
01034     # Modify transition table. May update the set of
01035     # non-deterministic states. Invalidates reachable
01036     # cache, as states may become reachable. Updates
01037     # the transym and transinv mappings.
01038 
01039     upvar #0 ${selfns}::trans_$order($s) jump
01040 
01041     $self InvalidateReach
01042     $self InvalidateUseful
01043     # Clear closure cache when epsilons change.
01044     if {$sym eq ""} {array unset ec}
01045 
01046     if {[info exists transym($sym)]} {
01047         struct::set include transym($sym) $s
01048     } else {
01049         set transym($sym) [list $s]
01050     }
01051 
01052     if {[info exists transinv($nexts)]} {
01053         struct::set include transinv($nexts) [list $s $sym]
01054     } else {
01055         set transinv($nexts) [list [list $s $sym]]
01056     }
01057 
01058     if {![info exists jump($sym)]} {
01059         set jump($sym) [list $nexts]
01060     } else {
01061         struct::set include jump($sym) $nexts
01062     }
01063     $self NonDeterministic $s $sym
01064     return
01065     }
01066 
01067     ret  !Next (type s , type sym , type args) {
01068     upvar #0 ${selfns}::trans_$order($s) jump
01069     # Anything to do at all ?
01070     if {![info exists jump($sym)]} return
01071     $self InvalidateReach
01072     $self InvalidateUseful
01073     # Clear closure cache when epsilons change.
01074     if {$sym eq ""} {array unset ec}
01075 
01076     if {![llength $args]} {
01077         # Unset all transitions for (s, sym)
01078         # Update transym and transinv mappings as well, if existing.
01079 
01080         $self !Transym $s $sym
01081         foreach nexts $jump($sym) {
01082         $self !Transinv $s $sym $nexts
01083         }
01084 
01085         unset jump($sym)
01086     } else {
01087         # Remove the single transition (s, sym) -> nexts
01088         set nexts [lindex $args 0]
01089 
01090         struct::set exclude jump($sym) $nexts
01091         $self !Transinv $s $sym $nexts
01092 
01093         if {![struct::set size $jump($sym)]} {
01094         $self !Transym $s $sym
01095         unset jump($sym)
01096         if {![array size jump]} {
01097             unset jump
01098         }
01099         }
01100     }
01101 
01102     $self NonDeterministic $s $sym
01103     return
01104     }
01105 
01106     ret  !Transym (type s , type sym) {
01107     struct::set exclude transym($sym) $s
01108     if {![struct::set size $transym($sym)]} {
01109         unset transym($sym)
01110     }
01111     }
01112 
01113     ret  !Transinv (type s , type sym , type nexts) {
01114     if {[info exists transinv($nexts)]} {
01115         struct::set exclude transinv($nexts) [list $s $sym]
01116         if {![struct::set size $transinv($nexts)]} {
01117         unset transinv($nexts)
01118         }
01119     }
01120     }
01121 
01122     ret  InvalidateReach () {
01123     set reachvalid 0
01124     set reach      {}
01125     return
01126     }
01127 
01128     ret  InvalidateUseful () {
01129     set usefulvalid 0
01130     set useful     {}
01131     return
01132     }
01133 
01134     ret  NonDeterministic (type s , type sym) {
01135     upvar #0 ${selfns}::trans_$order($s) jump
01136 
01137     # Epsilon rule, whole state check. Epslion present <=> Not a DFA.
01138 
01139     if {[info exists jump()]} {
01140         struct::set include nondete $s
01141     } else {
01142         struct::set exclude nondete $s
01143     }
01144 
01145     # Non-determinism over a symbol.
01146 
01147     upvar #0 ${selfns}::trans_$order($s) jump
01148 
01149     if {[info exists jump($sym)] && [struct::set size $jump($sym)] > 1} {
01150         if {![info exists nondets($s)]} {
01151         set nondets($s) [list $sym]
01152         } else {
01153         struct::set include nondets($s) $sym
01154         }
01155         return
01156     } else {
01157         if {![info exists nondets($s)]} return
01158         struct::set exclude nondets($s) $sym
01159         if {![struct::set size $nondets($s)]} {
01160         unset nondets($s)
01161         }
01162     }
01163     return
01164     }
01165 
01166     ret  CheckSerialization (type value , type startst , type states , type acc , type trans , type syms) {
01167     # value is list/3 ('grammar::fa' symbols states)
01168     # !("" in symbols)
01169     # states is ordered dict (key is state, value is statedata)
01170     # statedata is list/3 (start final trans|"")
01171     # start is boolean
01172     # final is boolean
01173     # trans is dict (key in symbols, value is destinations)
01174     # destinations is set of states
01175 
01176     upvar 1 $startst startstates \
01177         $states  sts \
01178         $acc     a \
01179         $trans   t \
01180         $syms    symbols
01181 
01182     set prefix "error in serialization:"
01183     if {[llength $value] != 3} {
01184         return -code error "$prefix list length not 3"
01185     }
01186 
01187     struct::list assign $value   stype symbols statedata
01188 
01189     if {$stype ne "grammar::fa"} {
01190         return -code error "$prefix unknown type \"$stype\""
01191     }
01192     if {[struct::set contains $symbols ""]} {
01193         return -code error "$prefix empty symbol is not legal"
01194     }
01195 
01196     if {[llength $statedata] % 2 == 1} {
01197         return -code error "$prefix state data is not a dictionary"
01198     }
01199     array set _states $statedata
01200     if {[llength $statedata] != (2*[array size _states])} {
01201         return -code error "$prefix state data contains duplicate states"
01202     }
01203     set startstates {}
01204     set sts {}
01205     set p   {}
01206     set a   {}
01207     set e   {}
01208     set l   {}
01209     set m   {}
01210     set t   {}
01211     foreach {k v} $statedata {
01212         lappend sts $k
01213         if {[llength $v] != 3} {
01214         return -code error "$prefix state list length not 3"
01215         }
01216 
01217         struct::list assign $v begin accept trans
01218 
01219         if {![string is boolean -strict $begin]} {
01220         return -code error "$prefix expected boolean for start, got \"$begin\""
01221         }
01222         if {$begin} {lappend startstates $k}
01223         if {![string is boolean -strict $accept]} {
01224         return -code error "$prefix expected boolean for final, got \"$accept\""
01225         }
01226         if {$accept} {lappend a $k}
01227 
01228         if {[llength $trans] % 2 == 1} {
01229         return -code error "$prefix transition data is not a dictionary"
01230         }
01231         array set _trans $trans
01232         if {[llength $trans] != (2*[array size _trans])} {
01233         return -code error "$prefix transition data contains duplicate symbols"
01234         }
01235         unset _trans
01236 
01237         foreach {sym destinations} $trans {
01238         # destinations = list of state
01239         if {($sym ne "") && ![struct::set contains $symbols $sym]} {
01240             return -code error "$prefix illegal symbol \"$sym\" in transition"
01241         }
01242         foreach dest $destinations {
01243             if {![info exists _states($dest)]} {
01244             return -code error "$prefix illegal destination state \"$dest\""
01245             }
01246             lappend t $k $sym $dest
01247         }
01248         }
01249     }
01250     return
01251     }
01252 
01253     /*  ### ### ### ######### ######### #########*/
01254     /*  Type API implementation.*/
01255 
01256     /*  ### ### ### ######### ######### #########*/
01257     /*  Type Internals.*/
01258 
01259     /*  ### ### ### ######### ######### #########*/
01260 }
01261 
01262 /*  ### ### ### ######### ######### #########*/
01263 /*  Initialization. Specify the container constructor command to use by*/
01264 /*  the operations package.*/
01265 
01266 ::grammar::fa::op::constructor ::grammar::fa
01267 
01268 /*  ### ### ### ######### ######### #########*/
01269 /*  Package Management*/
01270 
01271 package provide grammar::fa 0.3
01272 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1