faop.tcl

Go to the documentation of this file.
00001 /*  -*- tcl -*-*/
00002 /*  Grammar / FA / Operations*/
00003 
00004 /*  ### ### ### ######### ######### #########*/
00005 /*  Package description*/
00006 
00007 /*  ### ### ### ######### ######### #########*/
00008 /*  Requisites*/
00009 
00010 package require struct::list ; /*  Extended list operations.*/
00011 package require struct::  ; # Extended =   operations = .
00012 
00013 /*  ### ### ### ######### ######### #########*/
00014 /*  Implementation*/
00015 
00016 namespace ::grammar::fa::op {
00017 
00018     /*  ### ### ### ######### ######### #########*/
00019     /*  API. Structure / Language / Compilation*/
00020 
00021     ret  reverse     (type fa) {}
00022     ret  complete    (type fa , optional sink ={)} {}
00023     proc remove_eps  {fa} {}
00024     proc trim        {fa {what !reachable|!useful}} {}
00025     ret  determinize (type fa , optional mapvar ={) {idstart 0}} {}
00026     ret  minimize    (type fa , optional mapvar ={)} {}
00027 
00028     proc complement  {fa} {}
00029     proc kleene      {fa} {}
00030     proc optional    {fa} {}
00031     proc union       {fa fb {mapvar {}}} {}
00032     ret  intersect   (type fa , type fb , optional mapvar ={) {idstart 0}} {}
00033     ret  difference  (type fa , type fb , optional mapvar ={)} {}
00034     proc concatenate {fa fb {mapvar {}}} {}
00035 
00036     ret  fromRegex   (type fa , type regex , optional over ={)} {}
00037 
00038     proc toRegexp    {fa} {}
00039     proc toRegexp2   {fa} {}
00040 
00041     proc simplifyRegexp {rex} {}
00042     proc toTclRegexp    {rex symdict} {}
00043 
00044     # ### ### ### ######### ######### #########
00045 
00046     namespace export reverse complete remove_eps trim \
00047         determinize minimize complement kleene \
00048         optional union intersect difference \
00049         concatenate fromRegex toRegexp toRegexp2 \
00050         simplifyRegexp toTclRegexp
00051 
00052     # ### ### ### ######### ######### #########
00053     ## Internal data structures.
00054 
00055     variable cons {}
00056 
00057     # ### ### ### ######### ######### #########
00058 }
00059 
00060 # ### ### ### ######### ######### #########
00061 ## API implementation. Structure
00062 
00063 proc ::grammar::fa::op::reverse {fa} {
00064     # Reversal means that all transitions change their direction
00065     # and start and final states are swapped.
00066 
00067     # Note that reversed FA might not be deterministic, even if the FA
00068     # itself was.
00069 
00070     # One loop is not enough for this. If we reverse the
00071     # transitions for a state immediately we may modify a state
00072     # which has not been processed yet. And when we come to this
00073     # state we reverse already reversed transitions, creating a
00074     # complete mess. Thus two loops, one to collect the current
00075     # transitions (and also remove them), and a second to insert
00076     # the reversed transitions.
00077 
00078     set tmp [$fa finalstates]
00079     $fa final set [$fa startstates]
00080     $fa start set $tmp
00081 
00082     # FUTURE : Method to retrieve all transitions
00083     # FUTURE : Method to delete all transitions
00084 
00085     set trans {}
00086     foreach s [$fa states] {
00087     foreach sym [$fa symbols@ $s] {
00088         lappend trans $s $sym [$fa next $s $sym]
00089         $fa !next $s $sym
00090     }
00091     }
00092     foreach {s sym destinations} $trans {
00093     foreach d $destinations {
00094         $fa next $d $sym --> $s
00095     }
00096     }
00097     return
00098 }
00099 
00100 /*  --- --- --- --------- --------- ---------*/
00101 
00102 ret  ::grammar::fa::op::complete (type fa , optional sink ={)} {
00103     if {[$fa is complete]} return
00104 
00105     /*  We have an incomplete FA.*/
00106 
00107     if {$sink eq ""} {
00108      sink =  [FindNewState $fa sink]
00109     } elseif {[$fa state exists $sink]} {
00110     return -code error "The chosen sink state exists already"
00111     }
00112     $fa state add $sink
00113 
00114     /*  Add transitions to it from all states which are not*/
00115     /*  complete. The sink state itself loops on all inputs. IOW it is a*/
00116     /*  non-useful state.*/
00117 
00118      symbols =  [$fa symbols]
00119     foreach sym $symbols {
00120     $fa next $sink $sym --> $sink  
00121     }
00122 
00123     if {[$fa is epsilon-free]} {
00124     foreach s [$fa states] {
00125         foreach missing [struct:: difference =  \
00126             $symbols \
00127             [$fa symbols@ $s]] {
00128         $fa next $s $missing --> $sink
00129         }
00130     }
00131     } else {
00132     /*  For an FA with epsilon-transitions we cannot simply look at*/
00133     /*  the direct transitions to find the used symbols. We have to*/
00134     /*  determine this for the epsilon-closure of the state in*/
00135     /*  question. Oh, and we have to defer actually adding the*/
00136     /*  transitions after we have picked them all, or otherwise the*/
00137     /*  newly added transitions throw the symbol calculations for*/
00138     /*  epsilon closures off.*/
00139 
00140      new =  {}
00141     foreach s [$fa states] {
00142         foreach missing [struct:: difference =  \
00143             $symbols \
00144             [$fa symbols@ [$fa =  epsilon_closure $s]]] {
00145         lappend new $s $missing
00146         }
00147     }
00148 
00149     foreach {s missing} $new {
00150         $fa next $s $missing --> $sink
00151     }
00152     }
00153     return
00154 }
00155 
00156 /*  --- --- --- --------- --------- ---------*/
00157 
00158 ret  ::grammar::fa::op::remove_eps (type fa) {
00159     # We eliminate all epsilon transitions by duplicating a number
00160     # of regular transitions, which we get through the epsilon
00161     # closure of the states having epsilon transitions. We do
00162     # nothing if the FA is epsilon free to begin with.
00163 
00164     if {[$fa is epsilon-free]} return
00165 
00166     # Note: Epsilon transitions touching start and final states
00167     # propagate the start markers forward and final markers
00168     # backward. We do this first by propagating start markers twice,
00169     # once with a reversed FA. This also gives us some
00170     # epsilon-closures as well.
00171 
00172     foreach n {1 2} {
00173     foreach s [$fa startstates] {
00174         foreach e [$fa epsilon_closure $s] {
00175         $fa start add $e
00176         }
00177     }
00178     reverse $fa
00179     }
00180 
00181     # Now duplicate all transitions which are followed or preceeded by
00182     # epsilon transitions of any number greater than zero.
00183 
00184     # Note: The closure computations done by the FA are cached in the
00185     # FA, so doing it multiple times is no big penalty.
00186 
00187     # FUTURE : Retrieve all transitions on one command.
00188 
00189     # FUTURE : Different algorithm ...
00190     # Retrieve non-eps transitions for all states ...
00191     # Iterate this list. Compute e-closures for endpoints, cache
00192     # them. Duplicate the transition if needed, in that case add it to
00193     # the end of the list, for possible more duplication (may touch
00194     # different e-closures). Stop when the list is empty again.
00195 
00196     set changed 1
00197     while {$changed} {
00198     set changed 0
00199     foreach s [$fa states] {
00200         foreach sym [$fa symbols@ $s] {
00201         set dest [$fa next $s $sym]
00202         if {$sym eq ""} {
00203             # Epsilon transitions.
00204 
00205             # Get the closure, and duplicate all transitions for all
00206             # non-empty symbols as transitions of the original state.
00207             # This may lead to parallel transitions between states, hence
00208             # the catch. It prevents the generated error from stopping the
00209             # action, and no actual parallel transitions are created.
00210 
00211             set clos [$fa epsilon_closure $s]
00212             foreach csym [$fa symbols@set $clos] {
00213             if {$csym eq ""} continue
00214             foreach d [$fa nextset $clos $csym] {
00215                 if {![catch {$fa next $s $csym --> $d} msg]} {
00216                 set changed 1
00217                 }
00218             }
00219             }
00220         } else {
00221             # Regular transition. Go through all destination
00222             # states, compute their closures and replicate the
00223             # transition if the closure contains more than the
00224             # destination itself, to all states in the closure.
00225 
00226             foreach d $dest {
00227             set clos [$fa epsilon_closure $d]
00228             if {[llength $clos] > 1} {
00229                 foreach e $clos {
00230                 if {![catch {$fa next $s $sym --> $e}]} {
00231                     set changed 1
00232                 }
00233                 }
00234             }
00235             }
00236         }
00237         }
00238     }
00239     }
00240 
00241     # At last, drop the epsilons for all states. Only now is this
00242     # possible because otherwise we might compute bad epsilon
00243     # closures in the previous loop.
00244 
00245     foreach s [$fa states] {
00246     $fa !next $s ""
00247     }
00248     return
00249 }
00250 
00251 /*  --- --- --- --------- --------- ---------*/
00252 
00253 ret  ::grammar::fa::op::trim (type fa , optional what =!reachable|!useful) {
00254     # Remove various unwanted pices from the FA.
00255 
00256     switch -exact -- $what {
00257     !reachable {
00258         set remove [$fa unreachable_states]
00259     }
00260     !useful {
00261         set remove [$fa unuseful_states]
00262     }
00263     !reachable&!useful -
00264     !(reachable|useful) {
00265         set remove [struct::set intersect [$fa unreachable_states] [$fa unuseful_states]]
00266     }
00267     !reachable|!useful -
00268     !(reachable&useful) {
00269         set remove [struct::set union [$fa unreachable_states] [$fa unuseful_states]]
00270     }
00271     default {
00272         return -code error "Expected !reachable, !useful, !reachable&!useful, !(reachable|useful), !reachable|!useful, or !(reachable&useful), got \"$what\""
00273     }
00274     }
00275 
00276     foreach s $remove {
00277     $fa state delete $s
00278     }
00279     return
00280 }
00281 
00282 /*  --- --- --- --------- --------- ---------*/
00283 
00284 ret  ::grammar::fa::op::determinize (type fa , optional mapvar ={) {idstart 0}} {
00285     /*  We do the operation in several stages instead of jumping*/
00286     /*  directly in the subset construction. Basically we try the less*/
00287     /*  expensive operations first to see if they are enough. It does*/
00288     /*  help that they will us also bring nearer to the ultimate goal*/
00289     /*  even if they are not enough.*/
00290 
00291      hasmap =  0
00292     if {$mapvar ne ""} {
00293     upvar 1 $mapvar map ;  hasmap =  1
00294     }
00295 
00296     /*  First, is the input already deterministic ?*/
00297     /*  There is nothing to do in that case.*/
00298 
00299     if {[$fa is deterministic]} {
00300     if {$hasmap} { map =  {}}
00301     return
00302     }
00303 
00304     /*  Second, trim unreachable and unuseables. We are done if only*/
00305     /*  they carried the non-determinism. Otherwise we might have made*/
00306     /*  the FA smaller and was less time consuming to convert.*/
00307 
00308     if {[llength [$fa startstates]]} {trim $fa !reachable}
00309     if {[llength [$fa finalstates]]} {trim $fa !useful}
00310     if {[$fa is deterministic]} {
00311     if {$hasmap} { map =  {}}
00312     return
00313     }
00314 
00315     /*  Third, remove any epsilon transitions, and stop if that was*/
00316     /*  enough. Of course, weed out again states which have become*/
00317     /*  irrelevant. The removal of the epsilons will at least ensure*/
00318     /*  that the subset construction won't have to deal with*/
00319     /*  closures. I.e. simpler.*/
00320 
00321     remove_eps $fa
00322     if {[llength [$fa startstates]]} {trim $fa !reachable}
00323     if {[llength [$fa finalstates]]} {trim $fa !useful}
00324     if {[$fa is deterministic]} {
00325     if {$hasmap} { map =  {}}
00326     return
00327     }
00328 
00329     /*  Fourth. There is no way to avoid the subset construction.*/
00330     /*  Dive in. This is the only part of the algorithm which requires*/
00331     /*  us to keep a map. We construct the dfa in a transient container*/
00332     /*  and copy the result back to fa when completed.*/
00333 
00334     array  subsets =  {}
00335      id =       $idstart
00336      pending =  {}
00337      dfa =  [[cons] %AUTO%]
00338     /*  FUTURE : $dfa symbol set [$fa symbols]*/
00339     foreach sym [$fa symbols] {$dfa symbol add $sym}
00340 
00341     /*  If we have start states we can initialize the algorithm with*/
00342     /*  their set. Otherwise we have to the single-element sets of all*/
00343     /*  states as the beginning.*/
00344 
00345      starts =  [$fa startstates]
00346     if {[llength $starts] > 0} {
00347     /*  Make the set of start states the initial stae of the result.*/
00348 
00349      starts =  [lsort $starts] ; /*  Sort to get canonical form.*/
00350     $dfa state add $id
00351     $dfa start add $id
00352 
00353     /*  The start may also be a final state*/
00354     if {[$fa final? $starts = ]} {
00355         $dfa final add $id
00356     }
00357 
00358      subsets = (dfa,$starts) $id
00359      subsets = (nfa,$id) $starts
00360     
00361     lappend pending $id
00362     incr id
00363     } else {
00364     /*  Convert all states of the input into sets (of one element)*/
00365     /*  in the output. Do not forget to mark all final states we*/
00366     /*  come by. No start states, otherwise we wouldn't be here.*/
00367 
00368     foreach s [$fa states] {
00369          nfaset =  [list $s]
00370 
00371         $dfa state add $id
00372         if {[$fa final? $s]} {
00373         $dfa final add $id
00374         }
00375 
00376          subsets = (dfa,$nfa) $id = 
00377          subsets = (nfa,$id) $nfa
00378         lappend =  pending $id
00379         incr id
00380     }
00381     }
00382 
00383     while {[llength $pending]} {
00384      dfastate =  [struct::list shift pending]
00385 
00386     /*  We have to compute the transition function for this dfa state.*/
00387 
00388      nfaset =  $subs = (nfa,$dfastate)
00389 
00390     foreach sym [$fa symbols@ $nfaset = ] {
00391          nfanext =  [lsort [$fa next $nfaset =  $sym]]
00392 
00393         if {![info exists subs = (dfa,$nfanext)]} {
00394         /*  Unknown destination. Add it as a new state.*/
00395 
00396         $dfa state add $id
00397         if {[$fa final? $nfanext = ]} {
00398             $dfa final add $id
00399         }
00400 
00401          subsets = (dfa,$nfanext) $id
00402          subsets = (nfa,$id) $nfanext
00403 
00404         /*  Schedule the calculation of the transition function*/
00405         /*  of the new state.*/
00406 
00407         lappend pending $id
00408         incr id
00409         }
00410 
00411         /*  Add the transition*/
00412         $dfa next $dfastate $sym --> $subs = (dfa,$nfanext)
00413     }
00414     }
00415 
00416     if {[llength [$fa startstates]]} {trim $fa !reachable}
00417     if {[llength [$fa finalstates]]} {trim $fa !useful}
00418 
00419     if {$hasmap} {
00420     /*  The map is from new dfa states to the sets of nfa states.*/
00421 
00422      map =  {}
00423     foreach s [$dfa states] {
00424         lappend map $s $subs = (nfa,$s)
00425     }
00426     }
00427 
00428     $fa = $dfa
00429     $dfa destroy
00430 
00431     /*  ASSERT : $fa is deterministic*/
00432     return
00433 }
00434 
00435 /*  --- --- --- --------- --------- ---------*/
00436 
00437 ret  ::grammar::fa::op::minimize (type fa , optional mapvar ={)} {
00438     # Brzozowski's method:
00439     # Reverse, determinize, reverse again, determinize again.
00440 
00441     reverse     $fa
00442     determinize $fa mapa
00443     reverse     $fa
00444     determinize $fa mapb
00445 
00446     if {$mapvar ne ""} {
00447     upvar 1 $mapvar map
00448 
00449     if {![llength $mapa] && ![llength $mapb]} {
00450         /*  No state reorganizations, signal up*/
00451          map =  {}
00452     } elseif {[llength $mapa] && ![llength $mapb]} {
00453         /*  Only one reorg, this is the combined reorg as well.*/
00454          map =  $mapa
00455     } elseif {![llength $mapa] && [llength $mapb]} {
00456         /*  Only one reorg, this is the combined reorg as well.*/
00457          map =  $mapb
00458     } else {
00459         /*  Two reorgs. Compose the maps into the final map signaled*/
00460         /*  up.*/
00461 
00462         /*  mapb : final state -> set of states in mapa -> sets of original states.*/
00463 
00464          map =  {}
00465         array  tmp =  $mapa
00466         foreach {b a} $mapb =  {
00467          compose =  {}
00468         foreach a $a {foreach =  o $tmp($a) {lappend compose $o}}
00469         lappend map $b [lsort -uniq $compose]
00470         }
00471     }
00472     }
00473 
00474     /*  The FA is implicitly trimmed by the determinize's.*/
00475     return
00476 }
00477 
00478 /*  ### ### ### ######### ######### #########*/
00479 /*  API implementation. Language.*/
00480 
00481 ret  ::grammar::fa::op::complement (type fa) {
00482     # Complementing is possible if and only if the FA is complete,
00483     # and accomplished by swapping the final and non-final states.
00484 
00485     if {![$fa is complete]} {
00486     return -code error "Unable to complement incomplete FA"
00487     }
00488     if {![$fa is deterministic]} {
00489     return -code error "Unable to complement non-deterministic FA"
00490     }
00491 
00492     set newfinal [struct::set difference [$fa states] [$fa finalstates]]
00493     $fa final set $newfinal
00494     return
00495 }
00496 
00497 /*  --- --- --- --------- --------- ---------*/
00498 
00499 ret  ::grammar::fa::op::kleene (type fa) {
00500     # The Kleene Closure of the FA makes no sense if we don't have
00501     # start and final states we can work from.
00502 
00503     set start [$fa startstates]
00504     set final [$fa finalstates]
00505 
00506     if {![llength $start] || ![llength $final]} {
00507     return -code error "Unable to add Kleene's closure to a FA without start/final states"
00508     }
00509 
00510     # FUTURE :: If final states have no outgoing transitions, and start
00511     # FUTURE :: states have no input transitions, then place the new
00512     # FUTURE :: transitions directly between start and final
00513     # FUTURE :: states. In that case we don't need new states.
00514 
00515     # We need new start/final states, like for optional (see below)
00516 
00517     set ns [NewState $fa s]
00518     set nf [NewState $fa f]
00519 
00520     foreach s $start {$fa next $ns "" --> $s}
00521     foreach f $final {$fa next $f  "" --> $nf}
00522 
00523     $fa start clear ; $fa start add $ns
00524     $fa final clear ; $fa final add $nf
00525 
00526     $fa next $ns "" --> $nf ; # Optionality
00527     $fa next $nf "" --> $ns ; # Loop for closure
00528     return
00529 }
00530 
00531 /*  --- --- --- --------- --------- ---------*/
00532 
00533 ret  ::grammar::fa::op::optional (type fa) {
00534     # The Optionality of the FA makes no sense if we don't have
00535     # start and final states we can work from.
00536 
00537     set start [$fa startstates]
00538     set final [$fa finalstates]
00539 
00540     if {![llength $start] || ![llength $final]} {
00541     return -code error "Unable to make a FA without start/final states optional"
00542     }
00543 
00544     # We have to introduce new start and final states to ensure
00545     # that we do not get additional recognized words from the FA
00546     # due to epsilon transitions. IOW just placing epsilons from
00547     # all start to all final states is wrong. Consider unreachable
00548     # final states, they become reachable. Or final states able to
00549     # reach final states from. Again the epsilons would extend the
00550     # language. We have to detach our optional epsilon from anything
00551     # in the existing start/final states. Hence the new start/final.
00552 
00553     # FUTURE : Recognize if there are no problems with placing direct
00554     # FUTURE : epsilons from start to final.
00555 
00556     set ns [NewState $fa s]
00557     set nf [NewState $fa f]
00558 
00559     foreach s $start {$fa next $ns "" --> $s}
00560     foreach f $final {$fa next $f  "" --> $nf}
00561 
00562     $fa start clear ; $fa start add $ns
00563     $fa final clear ; $fa final add $nf
00564 
00565     $fa next $ns "" --> $nf ; # This is the transition which creates the optionality.
00566     return
00567 }
00568 
00569 /*  --- --- --- --------- --------- ---------*/
00570 
00571 ret  ::grammar::fa::op::union (type fa , type fb , optional mapvar ={)} {
00572     # We union the input symbols, then add the states and
00573     # transitions of the second FA to the first, adding in
00574     # epsilons for the start and final states as well. When
00575     # adding states we make sure that the new states do not
00576     # intersect with the existing states.
00577 
00578     struct::list assign \
00579         [MergePrepare $fa $fb union smap] \
00580         astart afinal bstart bfinal
00581 
00582     if {$mapvar ne ""} {
00583     upvar 1 $mapvar map
00584      map =  $smap
00585     }
00586 
00587     /*  And now the new start & final states*/
00588 
00589      ns =  [NewState $fa s]
00590      nf =  [NewState $fa f]
00591 
00592     eLink1N $fa $ns $astart
00593     eLink1N $fa $ns $bstart
00594 
00595     eLinkN1 $fa $afinal $nf
00596     eLinkN1 $fa $bfinal $nf
00597 
00598     $fa start clear ; $fa start add $ns
00599     $fa final clear ; $fa final add $nf
00600     return
00601 }
00602 
00603 /*  --- --- --- --------- --------- ---------*/
00604 
00605 ret  ::grammar::fa::op::intersect (type fa , type fb , optional mapvar ={) {idstart 0}} {
00606     /*  Intersection has to run the two automata in parallel, using*/
00607     /*  paired states. If we have start states we begin the*/
00608     /*  construction with them. This leads to a smaller result as we*/
00609     /*  do not have create a full cross-crossproduct. The latter is*/
00610     /*  unfortunately required if there are no start states.*/
00611 
00612     struct::list assign [CrossPrepare $fa $fb intersection] tmp res
00613 
00614     /*  The start states of the new FA consist of the cross-product of*/
00615     /*  the start states of fa with fb. These are also the states used*/
00616     /*  to seed DoCross.*/
00617 
00618      id =  $idstart
00619      smap =  {}
00620      bstart =  [$tmp startstates]
00621     foreach a [$fa startstates] {
00622     foreach b $bstart {
00623          pair =  [list $a $b]
00624         lappend smap    $id $pair
00625         lappend pending $pair $id
00626         $res state add $id
00627         $res start add $id
00628         incr id
00629     }
00630     }
00631 
00632      cp =  [DoCross $fa $tmp $res $id $pending smap]
00633 
00634     foreach {id pair} $smap {
00635     struct::list assign $pair a b
00636     if {[$fa final? $a] && [$tmp final? $b]} {
00637         $res final add $id
00638     }
00639     }
00640 
00641     /*  Remove excess states (generated because of the sinks).*/
00642     trim $res
00643     if {$mapvar ne ""} {
00644     upvar 1 $mapvar map
00645     /*  The loop is required to filter out the mappings for all*/
00646     /*  states which were trimmed off.*/
00647      map =  {}
00648     foreach {id pair} $smap {
00649         if {![$res state exists $id]} continue
00650         lappend map $id $pair
00651     }
00652     }
00653 
00654     /*  Copy result into permanent storage and delete all intermediaries*/
00655     $fa = $res
00656     $res destroy
00657     if {$tmp ne $fb} {$tmp destroy}
00658     return
00659 }
00660 
00661 /*  --- --- --- --------- --------- ---------*/
00662 
00663 ret  ::grammar::fa::op::difference (type fa , type fb , optional mapvar ={)} {
00664     # Difference has to run the two automata in parallel, using
00665     # paired states. Only the final states are defined differently
00666     # than for intersection. It has to be final in fa and _not_ final
00667     # in fb to be a final state of the result. <=> Accepted by A, but
00668     # not B, to be in the difference.
00669 
00670     struct::list assign [CrossPrepare $fa $fb difference] tmp res
00671 
00672     # The start states of the new FA consist of the cross-product of
00673     # the start states of fa with fb. These are also the states used
00674     # to seed DoCross.
00675 
00676     set id 0
00677     set smap {}
00678      bstart =  [$tmp startstates]
00679     foreach a [$fa startstates] {
00680     foreach b $bstart {
00681          pair =  [list $a $b]
00682         lappend smap    $id $pair
00683         lappend pending $pair $id
00684         $res state add $id
00685         $res start add $id
00686         incr id
00687     }
00688     }
00689 
00690      cp =  [DoCross $fa $tmp $res $id $pending smap]
00691 
00692     foreach {id pair} $smap {
00693     struct::list assign $pair a b
00694     if {[$fa final? $a] && ![$tmp final? $b]} {
00695         $res final add $id
00696     }
00697     }
00698 
00699     /*  Remove excess states (generated because of the sinks).*/
00700     trim $res
00701     if {$mapvar ne ""} {
00702     upvar 1 $mapvar map
00703     /*  The loop is required to filter out the mappings for all*/
00704     /*  states which were trimmed off.*/
00705      map =  {}
00706     foreach {id pair} $smap {
00707         if {![$res state exists $id]} continue
00708         lappend map $id $pair
00709     }
00710     }
00711 
00712     /*  Copy result into permanent storage and delete all intermediaries*/
00713     $fa = $res
00714     $res destroy
00715     if {$tmp ne $fb} {$tmp destroy}
00716     return
00717 }
00718 
00719 /*  --- --- --- --------- --------- ---------*/
00720 
00721 ret  ::grammar::fa::op::concatenate (type fa , type fb , optional mapvar ={)} {
00722     # Like union, only the interconnect between existing and new FA is different.
00723 
00724     struct::list assign \
00725         [MergePrepare $fa $fb concatenate smap] \
00726         astart afinal bstart bfinal
00727 
00728     if {$mapvar ne ""} {
00729     upvar 1 $mapvar map
00730      map =  $smap
00731     }
00732 
00733      ns =  [NewState $fa s]
00734      nm =  [NewState $fa m] ;/*  Midpoint.*/
00735      nf =  [NewState $fa f]
00736 
00737     eLink1N $fa $ns $astart
00738     eLinkN1 $fa $afinal $nm
00739 
00740     eLink1N $fa $nm $bstart
00741     eLinkN1 $fa $bfinal $nf
00742 
00743     $fa start clear ; $fa start add $ns
00744     $fa final clear ; $fa final add $nf
00745     return
00746 }
00747 
00748 /*  ### ### ### ######### ######### #########*/
00749 /*  API implementation. Compilation (regexp -> FA).*/
00750 
00751 ret  ::grammar::fa::op::fromRegex (type fa , type regex , optional over ={)} {
00752     # Convert a regular expression into a FA. The regex is given as
00753     # parse tree in the form of a nested list.
00754 
00755     # {. A B ...} ... Concatenation (accepts zero|one arguments).
00756     /*  {| A B ...} ... Alternatives  (accepts zero|one arguments).*/
00757     /*  {? A}       ... Optional.*/
00758     /*  {* A}       ... Kleene.*/
00759     /*  {+ A}       ... Pos.Kleene.*/
00760     /*  {! A}       ... Complement/Negation.*/
00761     /*  {S Symbol}  ... Atom, Symbol*/
00762     /* */
00763     /*  Recursive descent with a helper ...*/
00764 
00765     if {![llength $regex]} {
00766     $fa clear
00767     return
00768     }
00769 
00770      tmp =  [[cons] %AUTO%]
00771 
00772     if {![llength $over]} {
00773      over =  [lsort -uniq [RESymbols $regex]]
00774     }
00775     foreach sym $over {
00776     $tmp symbol add $sym
00777     }
00778 
00779      id =  0
00780     struct::list assign [Regex $tmp $regex id] s f
00781     $tmp start  [list =  $s]
00782     $tmp final  [list =  $f]
00783 
00784     $fa = $tmp
00785     $tmp destroy
00786     return
00787 }
00788 
00789 /*  ### ### ### ######### ######### #########*/
00790 /*  Internal helpers.*/
00791 
00792 ret  ::grammar::fa::op::RESymbols (type regex) {
00793     set cmd [lindex $regex 0]
00794     switch -exact -- $cmd {
00795     ? - * - ! - + {
00796         return [RESymbols [lindex $regex 1]]
00797     }
00798     . - | - & {
00799         set res {}
00800         foreach sub [lrange $regex 1 end] {
00801         foreach sym [RESymbols $sub] {lappend res $sym}
00802         }
00803         return $res
00804     }
00805     S {
00806         return [list [lindex $regex 1]]
00807     }
00808     default {
00809         return -code error "Expected . ! ? * | &, or S, got \"$cmd\""
00810     }
00811     }
00812 }
00813 
00814 ret  ::grammar::fa::op::Regex (type fa , type regex , type idvar) {
00815     upvar 1 $idvar id
00816     set cmd [lindex $regex 0]
00817     switch -exact -- $cmd {
00818     ? {
00819         # Optional
00820         set a $id ; incr id ; $fa state add $a
00821         set b $id ; incr id ; $fa state add $b
00822 
00823         struct::list assign [Regex $fa [lindex $regex 1] id] s f
00824         $fa next $a "" --> $s
00825         $fa next $f "" --> $b
00826         $fa next $a "" --> $b
00827     }
00828     * {
00829         # Kleene
00830         set a $id ; incr id ; $fa state add $a
00831         set b $a
00832 
00833         struct::list assign [Regex $fa [lindex $regex 1] id] s f
00834         $fa next $a "" --> $s
00835         $fa next $f "" --> $a ;# == b
00836     }
00837     + {
00838         # Pos. Kleene
00839         set a $id ; incr id ; $fa state add $a
00840         set b $id ; incr id ; $fa state add $b
00841 
00842         struct::list assign [Regex $fa [lindex $regex 1] id] s f
00843         $fa next $a "" --> $s
00844         $fa next $f "" --> $b
00845         $fa next $b "" --> $a
00846     }
00847     ! {
00848         # Complement.
00849         # Build up in a temp FA, complement, and
00850         # merge nack into the current
00851 
00852         set a $id ; incr id ; $fa state add $a
00853         set b $id ; incr id ; $fa state add $b
00854 
00855         set tmp [[cons] %AUTO%]
00856         foreach sym [$fa symbols] {$tmp symbol add $sym}
00857         struct::list assign [Regex $tmp [lindex $regex 1] id] s f
00858         $tmp start add $s
00859         $tmp final add $f
00860 
00861         determinize $tmp {} $id
00862         incr id [llength [$tmp states]]
00863         if {![$tmp is complete]} {
00864         complete    $tmp $id
00865         incr id
00866         }
00867         complement  $tmp
00868 
00869         # Merge and link.
00870         $fa deserialize_merge [$tmp serialize]
00871 
00872         eLink1N $fa $a [$tmp startstates]
00873         eLinkN1 $fa [$tmp finalstates] $b
00874         $tmp destroy
00875     }
00876     & {
00877         # Intersection ... /And
00878 
00879         if {[llength $regex] < 3} {
00880         # Optimized path. Intersection of one sub-expression
00881         # is the sub-expression itself.
00882 
00883         struct::list assign [Regex $fa [lindex $regex 1] id] a b
00884         } else {
00885         set a $id ; incr id ; $fa state add $a
00886         set b $id ; incr id ; $fa state add $b
00887 
00888         set tmp [[cons] %AUTO%]
00889         foreach sym [$fa symbols] {$tmp symbol add $sym}
00890         set idsub 0
00891         struct::list assign [Regex $tmp [lindex $regex 1] idsub] s f
00892         $tmp start add $s
00893         $tmp final add $f
00894 
00895         set beta [[cons] %AUTO%]
00896         foreach sub [lrange $regex 2 end] {
00897             foreach sym [$fa symbols] {$beta symbol add $sym}
00898             struct::list assign [Regex $beta $sub idsub] s f
00899             $beta start add $s
00900             $beta final add $f
00901             intersect $tmp $beta {} $id
00902         }
00903         $beta destroy
00904         determinize $tmp {} $id
00905         incr id [llength [$tmp states]]
00906 
00907         # Merge and link.
00908         $fa deserialize_merge [$tmp serialize]
00909 
00910         eLink1N $fa $a [$tmp startstates]
00911         eLinkN1 $fa [$tmp finalstates] $b
00912         $tmp destroy
00913         }
00914     }
00915     . {
00916         # Concatenation ...
00917 
00918         if {[llength $regex] == 1} {
00919         # Optimized path. No sub-expressions. This represents
00920         # language containing only the empty string, aka
00921         # epsilon.
00922 
00923         set a $id ; incr id ; $fa state add $a
00924         set b $id ; incr id ; $fa state add $b
00925         $fa next $a "" --> $b
00926 
00927         } elseif {[llength $regex] == 2} {
00928         # Optimized path. Concatenation of one sub-expression
00929         # is the sub-expression itself.
00930 
00931         struct::list assign [Regex $fa [lindex $regex 1] id] a b
00932         } else {
00933         set first 1
00934         set last {}
00935         foreach sub [lrange $regex 1 end] {
00936             struct::list assign [Regex $fa $sub id] s f
00937             if {$first} {set first 0 ; set a $s}
00938             if {$last != {}} {
00939             $fa next $last "" --> $s
00940             }
00941             set last $f
00942         }
00943         set b $f
00944         }
00945     }
00946     | {
00947         # Alternatives ... (Union)
00948 
00949         if {[llength $regex] == 1} {
00950         # Optimized path. No sub-expressions. This represents
00951         # the empty language, i.e. the language without words.
00952 
00953         set a $id ; incr id ; $fa state add $a
00954         set b $id ; incr id ; $fa state add $b
00955 
00956         } elseif {[llength $regex] == 2} {
00957         # Optimized path. Choice/Union of one sub-expression
00958         # is the sub-expression itself.
00959 
00960         struct::list assign [Regex $fa [lindex $regex 1] id] a b
00961         } else {
00962         set a $id ; incr id ; $fa state add $a
00963         set b $id ; incr id ; $fa state add $b
00964         foreach sub [lrange $regex 1 end] {
00965             struct::list assign [Regex $fa $sub id] s f
00966             $fa next $a "" --> $s
00967             $fa next $f "" --> $b
00968         }
00969         }
00970     }
00971     S {
00972         # Atom, base transition.
00973         set sym [lindex $regex 1]
00974         set a $id ; incr id ; $fa state add $a
00975         set b $id ; incr id ; $fa state add $b
00976         $fa next $a $sym --> $b
00977     }
00978     default {
00979         return -code error "Expected . ! ? * | &, or S, got \"$cmd\""
00980     }
00981     }
00982     return [list $a $b]
00983 }
00984 
00985 /*  --- --- --- --------- --------- ---------*/
00986 
00987 ret  ::grammar::fa::op::CrossPrepare (type fa , type fb , type label) {
00988     set starta [$fa startstates]
00989     set finala [$fa finalstates]
00990     set startb [$fb startstates]
00991     set finalb [$fb finalstates]
00992     if {
00993     ![llength $starta] || ![llength $finala] ||
00994     ![llength $startb] || ![llength $finalb]
00995     } {
00996     return -code error "Unable to perform the $label of two FAs without start/final states"
00997     }
00998 
00999     # The inputs are made complete over the union of their symbol
01000     # sets. A temp. container is used for the second input if necessary.
01001 
01002     set totals [struct::set union [$fa symbols] [$fb symbols]]
01003     foreach sym [struct::set difference $totals [$fa symbols]] {
01004     $fa symbol add $sym
01005     }
01006     if {![$fa is epsilon-free]} {
01007     remove_eps $fa
01008     trim       $fa
01009     }
01010     if {![$fa is complete]} {
01011     complete $fa
01012     }
01013     set tmp $fb
01014     set bnew [struct::set difference $totals [$fb symbols]]
01015     if {[llength $bnew]} {
01016     set tmp [[cons] %AUTO% = $fb]
01017     foreach sym $bnew {
01018         $tmp symbol add $sym
01019     }    
01020     }
01021     if {![$fb is epsilon-free]} {
01022     if {$tmp eq $fb} {set tmp [[cons] %AUTO% = $fb]}
01023     remove_eps $tmp
01024     trim       $tmp
01025     }
01026     if {![$fb is complete]} {
01027     if {$tmp eq $fb} {set tmp [[cons] %AUTO% = $fb]}
01028     complete $tmp
01029     }
01030 
01031     set res [[cons] %AUTO%]
01032     foreach sym $totals {
01033     $res symbol add $sym
01034     } 
01035 
01036     return [list $tmp $res]
01037 }
01038 
01039 /*  --- --- --- --------- --------- ---------*/
01040 
01041 ret  ::grammar::fa::op::DoCross (type fa , type fb , type res , type id , type seed , type smapvar) {
01042     upvar 1 $smapvar smap
01043 
01044     set symbols [$fa symbols]
01045     array set tmp $seed
01046 
01047     set pending $seed
01048     while {[llength $pending]} {
01049     set cpair [struct::list shift pending]
01050     set cid   [struct::list shift pending]
01051 
01052     struct::list assign $cpair a b
01053 
01054     # ASSERT: /res state exists /cid
01055 
01056     # Generate the transitions for the pair, add the resulting
01057     # destinations to the FA, and schedule them for a visit if
01058     # they are new.
01059 
01060     foreach sym $symbols {
01061         set adestinations [$fa next $a $sym]
01062         set bdestinations [$fb next $b $sym]
01063 
01064         foreach ad $adestinations {
01065         foreach bd $bdestinations {
01066             set dest [list $ad $bd]
01067 
01068             if {![info exists tmp($dest)]} {
01069             $res state add $id
01070             lappend smap $id $dest
01071             lappend pending $dest $id
01072             set tmp($dest) $id
01073             incr id
01074             }
01075             $res next $cid $sym --> $tmp($dest)
01076         }
01077         }
01078     }
01079     }
01080     return
01081 }
01082 
01083 /*  --- --- --- --------- --------- ---------*/
01084 
01085 ret  ::grammar::fa::op::MergePrepare (type fa , type fb , type label , type mapvar) {
01086     upvar 1 $mapvar map
01087 
01088     set starta [$fa startstates]
01089     set finala [$fa finalstates]
01090     set startb [$fb startstates]
01091     set finalb [$fb finalstates]
01092     if {
01093     ![llength $starta] || ![llength $finala] ||
01094     ![llength $startb] || ![llength $finalb]
01095     } {
01096     return -code error "Unable to $label FAs without start/final states"
01097     }
01098 
01099     # FUTURE: add {*}[symbols], ignore dup's
01100     foreach sym [$fb symbols] {catch {$fa symbol add $sym}}
01101 
01102     set dup [struct::set intersect [$fa states] [$fb states]]
01103     if {![llength $dup]} {
01104     # The states do not overlap. A plain merge of fb is enough to
01105     # copy the information.
01106 
01107     $fa deserialize_merge [$fb serialize]
01108     set map {}
01109     } else {
01110     # We have duplicate states, therefore we have to remap fb to
01111     # prevent interference between the two.
01112 
01113     set map {}
01114     set tmp [[cons] %AUTO% = $fb]
01115     set id 0
01116     foreach s $dup {
01117         # The renaming process has to ensure that the new name is
01118         # in neither fa, nor already in fb as well.
01119         while {
01120         [$fa  state exists $id] ||
01121         [$tmp state exists $id]
01122         } {incr id}
01123         $tmp state rename $s $id
01124         lappend map $id $s
01125         incr id
01126     }
01127 
01128     set startb [$tmp startstates]
01129     set finalb [$tmp finalstates]
01130 
01131     $fa deserialize_merge [$tmp serialize]
01132     $tmp destroy
01133     }
01134 
01135     return [list $starta $finala $startb $finalb]
01136 }
01137 
01138 /*  --- --- --- --------- --------- ---------*/
01139 
01140 ret  ::grammar::fa::op::eLink1N (type fa , type from , type states) {
01141     foreach s $states {
01142     $fa next $from "" --> $s
01143     }
01144     return
01145 }
01146 
01147 /*  --- --- --- --------- --------- ---------*/
01148 
01149 ret  ::grammar::fa::op::eLinkN1 (type fa , type states , type to) {
01150     foreach s $states {
01151     $fa next $s "" --> $to
01152     }
01153     return
01154 }
01155 
01156 /*  --- --- --- --------- --------- ---------*/
01157 
01158 ret  ::grammar::fa::op::NewState (type fa , type prefix) {
01159     set newstate [FindNewState $fa $prefix]
01160     $fa state add $newstate
01161     return $newstate
01162 }
01163 
01164 /*  --- --- --- --------- --------- ---------*/
01165 
01166 ret  ::grammar::fa::op::FindNewState (type fa , type prefix) {
01167     #if {![$fa state exists $prefix]} {return $prefix}
01168     set n 0
01169     while {[$fa state exists ${prefix}.$n]} {incr n}
01170     return ${prefix}.$n
01171 }
01172 
01173 /*  ### ### ### ######### ######### #########*/
01174 /*  API implementation. Decompilation (FA -> regexp).*/
01175 
01176 ret  ::grammar::fa::op::toRegexp (type fa) {
01177     # NOTE: FUTURE - Do not go through the serialization, nor through
01178     # a matrix. The algorithm can be expressed more directly as
01179     # operations on the automaton (states and transitions).
01180 
01181     set ET [ser_to_ematrix [$fa serialize]]
01182     while {[llength $ET] > 2} {
01183     set ET [matrix_drop_state $ET]
01184     }
01185     return [lindex $ET 0 1]
01186 }
01187 
01188 ret  ::grammar::fa::op::toRegexp2 (type fa) {
01189     # NOTE: FUTURE - See above.
01190     set ET [ser_to_ematrix [$fa serialize]]
01191     while {[llength $ET] > 2} {
01192     set ET [matrix_drop_state $ET re2]
01193     }
01194     return [lindex $ET 0 1]
01195 }
01196 
01197 /*  ### ### ### ######### ######### #########*/
01198 /*  Internal helpers.*/
01199 
01200 ret  ::grammar::fa::op::ser_to_ematrix (type ser) {
01201     if {[lindex $ser 0] ne "grammar::fa"} then {
01202     error "Expected grammar::fa automaton serialisation"
01203     }
01204     set stateL {}
01205     set n 2; foreach {state des} [lindex $ser 2] {
01206     lappend stateL $state
01207     set N($state) $n
01208     incr n
01209     }
01210     set row0 {}
01211     for {set k 0} {$k<$n} {incr k} {lappend row0 [list |]}
01212     set res [list $row0 $row0]
01213     foreach {from des} [lindex $ser 2] {
01214     set row [lrange $row0 0 1]
01215     if {[lindex $des 0]} then {lset res 0 $N($from) [list .]}
01216     if {[lindex $des 1]} then {lset row 1 [list .]}
01217     foreach to $stateL {set S($to) [list |]}
01218     foreach {symbol targetL} [lindex $des 2] {
01219         if {$symbol eq ""} then {
01220         set atom [list .]
01221         } else {
01222         set atom [list S $symbol]
01223         }
01224         foreach to $targetL {lappend S($to) $atom}
01225     }
01226     foreach to $stateL {
01227         if {[llength $S($to)] == 2} then {
01228         lappend row [lindex $S($to) 1]
01229         } else {
01230         lappend row $S($to)
01231         }
01232     }
01233     lappend res $row
01234     }
01235     return $res
01236 }
01237 
01238 ret  ::grammar::fa::op::matrix_drop_state (type T_, type in , optional ns =re1) {
01239     set sumcmd ${ns}::|
01240     set prodcmd ${ns}::.
01241     set T1 {}
01242     set lastcol {}
01243     foreach row $T_in {
01244     lappend T1 [lreplace $row end end]
01245     lappend lastcol [lindex $row end]
01246     }
01247     set lastrow [lindex $T1 end]
01248     set T1 [lreplace $T1 end end]
01249     set b [${ns}::* [lindex $lastcol end]]
01250     set lastcol [lreplace $lastcol end end]
01251     set res {}
01252     foreach row $T1 a $lastcol {
01253     set newrow {}
01254     foreach pos $row c $lastrow {
01255         lappend newrow [$sumcmd $pos [$prodcmd $a $b $c]]
01256     }
01257     lappend res $newrow
01258     }
01259     return $res
01260 }
01261 
01262 /*  ### ### ### ######### ######### #########*/
01263 /*  Internal helpers. Regexp simplification I.*/
01264 
01265 namespace ::grammar::fa::op::re1 {
01266     namespace export | . {\*}
01267 }
01268 
01269 ret  ::grammar::fa::op::re1::| (type args) {
01270     set L {}
01271 
01272     # | = Choices.
01273     # Sub-choices are lifted into the top expression (foreach).
01274     # Identical choices are reduced to a single term (lsort -uniq).
01275 
01276     foreach re $args {
01277     switch -- [lindex $re 0] "|" {
01278         foreach term [lrange $re 1 end] {lappend L $term}
01279     } default {
01280         lappend L $re
01281     }
01282     }
01283     set L [lsort -unique $L]
01284     if {[llength $L] == 1} then {
01285     return [lindex $L 0]
01286     } else {
01287     return [linsert $L 0 |]
01288     }
01289 }
01290 
01291 ret  ::grammar::fa::op::re1::. (type args) {
01292     set L {}
01293 
01294     # . = Sequence.
01295     # One element sub-choices are lifted into the top expression.
01296     # Sub-sequences are lifted into the top expression.
01297 
01298     foreach re $args {
01299     switch -- [lindex $re 0] "." {
01300         foreach term [lrange $re 1 end] {lappend L $term}
01301     } "|" {
01302         if {[llength $re] == 1} then {return $re}
01303         lappend L $re
01304     } default {
01305         lappend L $re
01306     }
01307     }
01308     if {[llength $L] == 1} then {
01309     return [lindex $L 0]
01310     } else {
01311     return [linsert $L 0 .]
01312     }
01313 }
01314 
01315 ret  ::grammar::fa::op::re1::* (type re) {
01316     # * = Kleene closure.
01317     # Sub-closures are lifted into the top expression.
01318     # One-element sub-(choices,sequences) are lifted into the top expression.
01319 
01320     switch -- [lindex $re 0] "|" - "." {
01321     if {[llength $re] == 1} then {
01322         return [list .]
01323     } else {
01324         return [list * $re]
01325     }
01326     } "*" {
01327     return $re
01328     } default {
01329     return [list * $re]
01330     }
01331 }
01332 
01333 /*  ### ### ### ######### ######### #########*/
01334 /*  Internal helpers. Regexp simplification II.*/
01335 
01336 namespace ::grammar::fa::op::re2 {
01337     /*  Inherit choices and kleene-closure from the basic simplifier.*/
01338 
01339     namespace import [namespace parent]::re1::|
01340     namespace import [namespace parent]::re1::\\*
01341 }
01342 
01343 ret  ::grammar::fa::op::re2::. (type args) {
01344 
01345     # . = Sequences
01346     # Sub-sequences are lifted into the top expression.
01347     # Sub-choices are multiplied out.
01348     # <Example a(b|c) => ab|ac >
01349 
01350     set L {}
01351     set n -1
01352     foreach re $args {
01353     incr n
01354     switch -- [lindex $re 0] "." {
01355         foreach term [lrange $re 1 end] {lappend L $term}
01356     } "|" {
01357         set res [list |]
01358         set L2 [lreplace $args 0 $n]
01359         foreach term [lrange $re 1 end] {
01360         lappend res [eval [list .] $L [list $term] $L2]
01361         }
01362         return [eval $res]
01363     } default {
01364         lappend L $re
01365     }
01366     }
01367     if {[llength $L] == 1} then {
01368     return [lindex $L 0]
01369     } else {
01370     return [linsert $L 0 .]
01371     }
01372 }
01373 
01374 /*  ### ### ### ######### ######### #########*/
01375 /*  API. Simplification of regular expressions.*/
01376 
01377 ret  ::grammar::fa::op::simplifyRegexp (type RE0) {
01378     set RE1 [namespace inscope nonnull $RE0]
01379     if {[lindex $RE1 0] eq "S" || $RE1 eq "." || $RE1 eq "|"} then {
01380     return $RE1
01381     }
01382     set tmp [grammar::fa %AUTO% fromRegex $RE1]
01383     $tmp minimize
01384     set RE1 [toRegexp $tmp]
01385     $tmp destroy
01386     if {[string length $RE1] < [string length $RE0]} then {
01387     set RE0 $RE1
01388     }
01389     if {[lindex $RE0 0] eq "S"} then {return $RE0}
01390     set res [lrange $RE0 0 0]
01391     foreach branch [lrange $RE0 1 end] {
01392     lappend res [simplifyRegexp $branch]
01393     }
01394     return $res
01395 }
01396 
01397 /*  ### ### ### ######### ######### #########*/
01398 /*  Internal helpers.*/
01399 
01400 namespace ::grammar::fa::op::nonnull {}
01401 
01402 ret  ::grammar::fa::op::nonnull::| (type args) {
01403     set also_empty false
01404     set res [list |]
01405     foreach branch $args {
01406     set RE [eval $branch]
01407     if {[lindex $RE 0] eq "?"} then {
01408         set also_empty true
01409         set RE [lindex $RE 1]
01410     }
01411     switch -- [lindex $RE 0] "|" {
01412         eval [lreplace $RE 0 0 lappend res]
01413     } "." {
01414         if {[llength $RE] == 1} then {
01415         set also_empty true
01416         } else {
01417         lappend res $RE
01418         }
01419     } default {
01420         lappend res $RE
01421     }
01422     }
01423     if {!$also_empty} then {return $res}
01424     foreach branch [lrange $res 1 end] {
01425     if {[lindex $branch 0] eq "*"} then {return $res}
01426     }
01427     if {[llength $res] == 1} then {
01428     return [list .]
01429     } elseif {[llength $res] == 2} then {
01430     return [lreplace $res 0 0 ?]
01431     } else {
01432     return [list ? $res]
01433     }
01434 }
01435 
01436 ret  ::grammar::fa::op::nonnull::. (type args) {
01437     set res [list .]
01438     foreach branch $args {
01439     set RE [eval $branch]
01440     switch -- [lindex $RE 0] "|" {
01441         if {[llength $RE] == 1} then {return $RE}
01442         lappend res $RE
01443     } "." {
01444         eval [lreplace $RE 0 0 lappend res]
01445     } default {
01446         lappend res $RE
01447     }
01448     }
01449     return $res
01450 }
01451 
01452 ret  ::grammar::fa::op::nonnull::* (type sub) {
01453     set RE [eval $sub]
01454     switch -- [lindex $RE 0] "*" - "?" - "+" {
01455     return [lreplace $RE 0 0 *]
01456     } default {
01457     return [list * $RE]
01458     }
01459 }
01460 
01461 ret  ::grammar::fa::op::nonnull::+ (type sub) {
01462     set RE [eval $sub]
01463     switch -- [lindex $RE 0] "+" {
01464     return $RE
01465     } "*" - "?" {
01466     return [lreplace $RE 0 0 *]
01467     } default {
01468     return [list * $RE]
01469     }
01470 }
01471 
01472 ret  ::grammar::fa::op::nonnull::? (type sub) {
01473     set RE [eval $sub]
01474     switch -- [lindex $RE 0] "?" - "*" {
01475     return $RE
01476     } "+" {
01477     return [lreplace $RE 0 0 *]
01478     } default {
01479     return [list ? $RE]
01480     }
01481 }
01482 
01483 ret  ::grammar::fa::op::nonnull::S (type name) {
01484     return [list S $name]
01485 }
01486 
01487 /*  ### ### ### ######### ######### #########*/
01488 /*  API. Translate RE of this package to Tcl REs*/
01489 
01490 ret  ::grammar::fa::op::toTclRegexp (type re , type symdict) {
01491     return [lindex [namespace inscope tclre $re $symdict] 1]
01492 }
01493 
01494 /*  ### ### ### ######### ######### #########*/
01495 /*  Internal helpers.*/
01496 
01497 namespace ::grammar::fa::op::tclre {}
01498 
01499 ret  ::grammar::fa::op::tclre::S (type name , type dict) {
01500     array set A $dict
01501     if {[info exists A($name)]} then {
01502     return $A($name)
01503     } elseif {[string length $name] == 1} then {
01504     if {[regexp {[\\\[\]{}.()*+?^$]} $name]} then {
01505         return [list char \\$name]
01506     } else {
01507         return [list char $name]
01508     }
01509     } else {
01510     return [list class "\[\[:${name}:\]\]"]
01511     }
01512 }
01513 
01514 ret  ::grammar::fa::op::tclre::. (type args) {
01515     set suffix [lrange $args end end]
01516     set L {}
01517     foreach factor [lrange $args 0 end-1] {
01518     set pair [eval $factor $suffix]
01519     switch -- [lindex $pair 0] "sum" {
01520         lappend L ([lindex $pair 1])
01521     } default {
01522         lappend L [lindex $pair 1]
01523     }
01524     }
01525     return [list prod [join $L ""]]
01526 }
01527 
01528 ret  ::grammar::fa::op::tclre::* (type re , type dict) {
01529     set pair [eval $re [list $dict]]
01530     switch -- [lindex $pair 0] "sum" - "prod" {
01531     return [list prod "([lindex $pair 1])*"]
01532     } default {
01533     return [list prod "[lindex $pair 1]*"]
01534     }
01535 }
01536 
01537 ret  ::grammar::fa::op::tclre::+ (type re , type dict) {
01538     set pair [eval $re [list $dict]]
01539     switch -- [lindex $pair 0] "sum" - "prod" {
01540     return [list prod "([lindex $pair 1])+"]
01541     } default {
01542     return [list prod "[lindex $pair 1]+"]
01543     }
01544 }
01545 
01546 ret  ::grammar::fa::op::tclre::? (type re , type dict) {
01547     set pair [eval $re [list $dict]]
01548     switch -- [lindex $pair 0] "sum" - "prod" {
01549     return [list prod "([lindex $pair 1])?"]
01550     } default {
01551     return [list prod "[lindex $pair 1]?"]
01552     }
01553 }
01554 
01555 ret  ::grammar::fa::op::tclre::| (type args) {
01556     set suffix [lrange $args end end]
01557     set charL {}
01558     set classL {}
01559     set prodL {}
01560     foreach factor [lrange $args 0 end-1] {
01561     set pair [eval $factor $suffix]
01562     switch -- [lindex $pair 0] "char" {
01563         lappend charL [lindex $pair 1]
01564     } "class" {
01565         lappend classL [string range [lindex $pair 1] 1 end-1]
01566     } default {
01567         lappend prodL [lindex $pair 1]
01568     }
01569     }
01570     if {[llength $charL]>1 || [llength $classL]>0} then {
01571     while {[set n [lsearch $charL -]] >= 0} {
01572         lset charL $n {\-}
01573     }
01574     set bracket "\[[join $charL ""][join $classL ""]\]"
01575     if {![llength $prodL]} then {
01576         return [list atom $bracket]
01577     }
01578     lappend prodL $bracket
01579     } else {
01580     eval [list lappend prodL] $charL
01581     }
01582     return [list sum [join $prodL |]]
01583 }
01584 
01585 ret  ::grammar::fa::op::tclre::& (type args) {
01586     error "Cannot express language intersection in Tcl-RE's"
01587 
01588     # Note: This can be translated by constructing an automaton for
01589     # the intersection, and then translating its conversion to a
01590     # regular expression.
01591 }
01592 
01593 ret  ::grammar::fa::op::tclre::! (type args) {
01594     error "Cannot express language complementation in Tcl-RE's"
01595 
01596     # Note: This can be translated by constructing an automaton for
01597     # the complement, and then translating its conversion to a regular
01598     # expression. This however requires knowledge regarding the set of
01599     # symbols. Large (utf-8) for Tcl regexes.
01600 }
01601 
01602 /*  ### ### ### ######### ######### #########*/
01603 
01604 ret  ::grammar::fa::op::constructor (type cmd) {
01605     variable cons $cmd
01606     return
01607 }
01608 
01609 ret  ::grammar::fa::op::cons () {
01610     variable cons
01611     if {$cons ne ""} {return $cons}
01612     return -code errror "No constructor for FA container was established."
01613 }
01614 
01615 /*  ### ### ### ######### ######### #########*/
01616 /*  Package Management*/
01617 
01618 package provide grammar::fa::op 0.4
01619