util_norm_lemon.tcl

Go to the documentation of this file.
00001 /*  -*- tcl -*-*/
00002 /* */
00003 /*  Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00004 /*  Parser Generator / Transformation - Normalize PEG AST for later.*/
00005 
00006 /*  This package assumes to be used from within a PAGE plugin. It uses*/
00007 /*  the API commands listed below. These are identical across the major*/
00008 /*  types of PAGE plugins, allowing this package to be used in reader,*/
00009 /*  transform, and writer plugins. It cannot be used in a configuration*/
00010 /*  plugin, and this makes no sense either.*/
00011 /* */
00012 /*  To ensure that our assumption is ok we require the relevant pseudo*/
00013 /*  package setup by the PAGE plugin management code.*/
00014 /* */
00015 /*  -----------------+--*/
00016 /*  page_info        | Reporting to the user.*/
00017 /*  page_warning     |*/
00018 /*  page_error       |*/
00019 /*  -----------------+--*/
00020 /*  page_log_error   | Reporting of internals.*/
00021 /*  page_log_warning |*/
00022 /*  page_log_info    |*/
00023 /*  -----------------+--*/
00024 
00025 /*  ### ### ### ######### ######### #########*/
00026 /*  Requisites*/
00027 
00028 /*  @mdgen NODEP: page::plugin*/
00029 
00030 package require page::plugin ; /*  S.a. pseudo-package.*/
00031 package require treeql
00032 package require page::util::quote
00033 package require page::util::peg
00034 
00035 namespace ::page::util::norm::lemon {
00036     /*  Get the peg char de/encoder commands.*/
00037     /*  (unquote, quote'tcl)*/
00038 
00039     namespace import ::page::util::quote::*
00040     namespace import ::page::util::peg::*
00041 }
00042 
00043 /*  ### ### ### ######### ######### #########*/
00044 /*  API*/
00045 
00046 ret  ::page::util::norm::lemon (type t) {
00047     set q [treeql q -tree $t]
00048 
00049     page_info {[Lemon Normalization]}
00050 
00051     # Retrieve grammar name out of one directive.
00052     # Or from LHS of first rule.
00053 
00054     page_log_info ..Startsymbol
00055 
00056     set start {}
00057 
00058     $q query tree \
00059         withatt type nonterminal \
00060         withatt detail StartSymbol \
00061         descendants \
00062         withatt type terminal \
00063         over n {
00064 
00065     lemon::TokReduce $t $n detail
00066     set start [$t get $n detail]
00067 
00068     page_info "  StartSymbol: $start"
00069     }
00070 
00071     $q query tree \
00072         withatt type   nonterminal \
00073         withatt detail Name \
00074         descendants \
00075         withatt type terminal \
00076         over n {
00077 
00078     lemon::TokReduce $t $n detail
00079     set name [$t get $n detail]
00080 
00081     page_info "  Name:        $name"
00082 
00083     $t set root name $name
00084     }
00085 
00086     page_log_info ..Drop        ; lemon::Drop        $q $t
00087     page_log_info ..Terminals   ; lemon::Terminals   $q $t
00088     page_log_info ..Definitions ; lemon::Definitions $q $t
00089     page_log_info ..Rules       ; lemon::Rules       $q $t start
00090     page_log_info ..Epsilon     ; lemon::ElimEpsilon $q $t
00091     page_log_info ..Autoclass   ; lemon::AutoClassId $q $t
00092     page_log_info ..Chains
00093 
00094     # Find and cut operator chains, very restricted. Cut only chains
00095     # of x- and /-operators. The other operators have only one child
00096     # by definition and are thus not chains.
00097 
00098     #set q [treeql q -tree $t]
00099     # q query tree over n
00100     foreach n [$t children -all root] {
00101     if {[$t keyexists $n symbol]}        continue
00102     if {[llength [$t children $n]] != 1} continue
00103 
00104     set op [$t get $n op]
00105     if {($op ne "/") && ($op ne "x")} continue
00106     $t cut $n
00107     }
00108 
00109     page_log_info ..Flatten
00110 
00111     lemon::flatten $q $t
00112 
00113     # Analysis: Left recursion, and where.
00114     # Manual: Definitions for terminals.
00115     #         Definitions for space, comments.
00116     #         Integration of this into the grammar.
00117 
00118     # Sentinel for PE algorithms.
00119     $t set root symbol <StartExpression>
00120 
00121     if {$start eq ""} {
00122     page_error "  Startsymbol missing"
00123     } else {
00124     set s [$t insert root end]
00125     $t set $s op  n
00126     $t set $s sym $start
00127     $t set root start $s
00128 
00129     array set def [$t get root definitions]
00130 
00131     if {![info exists def($start)]} {
00132         page_error "  Startsymbol is undefined"
00133         $t set $s def ""
00134     } else {
00135         $t set $s def $def($start)
00136     }
00137     unset def
00138     }
00139 
00140     $q destroy
00141 
00142     page_log_info Ok
00143     return
00144 }
00145 
00146 /*  ### ### ### ######### ######### #########*/
00147 /*  Documentation*/
00148 /* */
00149 /*  See doc_normalize.txt for the specification of the publicly visible*/
00150 /*  attributes.*/
00151 /** 
00152  *# Internal attributes
00153  *# - DATA - Transient storage for terminal data.
00154  */
00155 
00156 /*  ### ### ### ######### ######### #########*/
00157 /*  Internal. Helpers*/
00158 
00159 ret  ::page::util::norm::lemon::Drop (type q , type t) {
00160     # Simple normalization.
00161     # All lemon specific data is dropped completely.
00162 
00163     foreach drop {
00164     Directive Codeblock Label Precedence
00165     } {
00166     $q query tree withatt type nonterminal \
00167         withatt detail $drop over n {
00168         $t delete $n
00169         }
00170     }
00171 
00172     # Some nodes can be dropped, but not their children.
00173 
00174     $q query tree withatt type nonterminal \
00175     withatt detail Statement over n {
00176         $t cut $n
00177     }
00178 
00179     # Cut the ALL and LemonGrammar nodes, direct access, no search
00180     # needed.
00181 
00182     $t cut [lindex [$t children root] 0]
00183     $t cut [lindex [$t children root] 0]
00184 
00185     return
00186 }
00187 
00188 ret  ::page::util::norm::lemon::Terminals (type q , type t) {
00189     # The data for all terminals is stored in their grandparental
00190     # nodes. We get rid of both terminals and their parents.
00191 
00192     $q query tree withatt type terminal over n {
00193     set p  [$t parent $n]
00194     set gp [$t parent $p]
00195 
00196     CopyLocation $t $n $gp
00197     AttrCopy     $t $n detail $gp DATA
00198     TokReduce    $t           $gp DATA
00199     $t delete $p
00200     }
00201 
00202     # We can now drop the type attribute, as all the remaining nodes
00203     # (which have it) will contain the value 'nonterminal'.
00204 
00205     $q query tree hasatt type over n {
00206     $t unset $n type
00207     }
00208     return
00209 }
00210 
00211 ret  ::page::util::norm::lemon::Definitions (type q , type t) {
00212     # Convert 'Definition' into the sequences they are.
00213     # Sequences of length one will be flattened later.
00214     # Empty sequences (Length zero) are epsilon.
00215     # Epsilon will be later converted to ? of the
00216     # whole choice they are part of.
00217 
00218     $q query tree withatt detail Definition over n {
00219     $t unset $n detail
00220 
00221     if {[$t children $n] < 1} {
00222         $t set $n op epsilon
00223     } else {
00224         $t set $n op x
00225     }
00226     }
00227     return
00228 }
00229 
00230 ret  ::page::util::norm::lemon::Rules (type q , type t , type sv) {
00231     upvar $sv start
00232     # We move nonterminal hint information from nodes into attributes,
00233     # and delete the now irrelevant nodes.
00234 
00235     # Like with the global metadata we move definition specific
00236     # information out of nodes into attributes, get rid of the
00237     # superfluous nodes, and tag the definition roots with marker
00238     # attributes.
00239 
00240     array set defs {}
00241     $q query tree withatt detail Rule over n {
00242     set first [Child $t $n 0]
00243 
00244     set sym   [$t get $first DATA]
00245     $t set $n symbol $sym
00246     $t set $n label  $sym
00247     $t set $n users  {}
00248     $t set $n mode value
00249 
00250     if {$start eq ""} {
00251         page_info "  StartSymbol: $sym"
00252         set start $sym
00253     }
00254 
00255     # We get the left extend of the definition from the terminal
00256     # for the symbol it defines.
00257 
00258     MergeLocations $t $first [Rightmost $t $n] $n
00259     $t unset $n detail
00260 
00261     lappend defs($sym) $n
00262     $t cut $first
00263     }
00264 
00265     set d {}
00266     foreach sym [array names defs] {
00267     set nodes $defs($sym)
00268     if {[llength $nodes] == 1} {
00269         lappend d $sym [lindex $nodes 0]
00270     } else {
00271         # Merge multi-node definition together, under a choice.
00272 
00273         set r [$t insert root end]
00274         set c [$t insert $r end]
00275 
00276         $t set $r symbol $sym
00277         $t set $r label  $sym
00278         $t set $r users  {}
00279         $t set $r mode value
00280         $t set $c op     /
00281 
00282         foreach n $nodes {
00283         set seq [lindex [$t children $n] 0]
00284         $t move $c end $seq
00285         $t delete $n
00286         }
00287 
00288         lappend d $sym $r
00289     }
00290     }
00291 
00292     # We remember a mapping from nonterminal names to their defining
00293     # nodes in the root as well, for quick reference later, when we
00294     # build nonterminal usage references
00295 
00296     $t set root definitions $d
00297     return
00298 }
00299 
00300 ret  ::page::util::norm::lemon::Rightmost (type t , type n) {
00301     # Determine the rightmost leaf under the specified node.
00302 
00303     if {[$t isleaf $n]} {return $n}
00304     return [Rightmost $t [lindex [$t children $n] end]]
00305 }
00306 
00307 ret  ::page::util::norm::lemon::ElimEpsilon (type q , type t) {
00308     # We convert choices with an epsilon in them into
00309     # optional choices without an epsilon branch.
00310 
00311     $q query tree withatt op epsilon over n {
00312     set choice [$t parent $n]
00313 
00314     # Move branches into the epsilon, which becomes the new
00315     # choice. And the choice becomes an option.
00316     foreach c [$t children $choice] {
00317         if {$c eq $n} continue
00318         $t move $n end $c
00319     }
00320     $t set $n      op /
00321     $t set $choice op ?
00322     }
00323     return
00324 }
00325 
00326 ret  ::page::util::norm::lemon::AutoClassId (type q , type t) {
00327 
00328     array set defs [$t get root definitions]
00329     array set use {}
00330 
00331     $q query tree \
00332         withatt op x \
00333         children \
00334         hasatt DATA \
00335         over n {
00336     # All identifiers are nonterminals, and for the
00337     # undefined ones we create rules which define
00338     # them as terminal sequences.
00339 
00340     set sym  [$t get $n DATA]
00341     $t unset $n DATA
00342 
00343     $t set $n op  n
00344     $t set $n sym $sym
00345 
00346     if {![info exists defs($sym)]} {
00347         set defs($sym) [NewTerminal $t $sym]
00348     }
00349     $t set $n def $defs($sym)
00350 
00351     lappend use($sym) $n
00352     $t unset $n detail
00353     }
00354 
00355     $t set root definitions [array get defs]
00356 
00357     foreach sym [array names use] {
00358     $t set $defs($sym) users $use($sym)
00359     }
00360 
00361     $t set root undefined {}
00362     return
00363 }
00364 
00365 ret  ::page::util::norm::lemon::NewTerminal (type t , type sym) {
00366     page_log_info "  Terminal: $sym"
00367 
00368     set     r [$t insert root end]
00369     $t set $r symbol $sym
00370     $t set $r label  $sym
00371     $t set $r users  {}
00372     $t set $r mode   leaf
00373 
00374     set     s [$t insert $r end]
00375     $t set $s op x
00376 
00377     foreach ch [split $sym {}] {
00378     set c [$t insert $s end]
00379     $t set $c op   t
00380     $t set $c char $ch
00381     }
00382     return $r
00383 }
00384 
00385 /*  ### ### ### ######### ######### #########*/
00386 /*  Internal. Low-level helpers.*/
00387 
00388 ret  ::page::util::norm::lemon::CopyLocation (type t , type src , type dst) {
00389     $t set $dst range    [$t get $src range]
00390     $t set $dst range_lc [$t get $src range_lc]
00391     return
00392 }
00393 
00394 ret  ::page::util::norm::lemon::MergeLocations (type t , type srca , type srcb , type dst) {
00395     set ar   [$t get $srca range]
00396     set arlc [$t get $srca range_lc]
00397 
00398     set br   [$t get $srcb range]
00399     set brlc [$t get $srcb range_lc]
00400 
00401     $t set $dst range    [list [lindex $ar   0] [lindex $br   1]]
00402     $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]]
00403     return
00404 }
00405 
00406 ret  ::page::util::norm::lemon::AttrCopy (type t , type src , type asrc , type dst , type adst) {
00407     $t set $dst $adst [$t get $src $asrc]
00408     return
00409 }
00410 
00411 ret  ::page::util::norm::lemon::Child (type t , type n , type index) {
00412     return [lindex [$t children $n] $index]
00413 }
00414 
00415 ret  ::page::util::norm::lemon::TokReduce (type t , type src , type attr) {
00416     set tokens [$t get $src $attr]
00417     set ch     {}
00418     foreach tok $tokens {
00419     lappend ch [lindex $tok 0]
00420     }
00421     $t set $src $attr [join $ch {}]
00422     return
00423 }
00424 
00425 /*  ### ### ### ######### ######### #########*/
00426 /*  Ready*/
00427 
00428 package provide page::util::norm::lemon 0.1
00429 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1