util_norm_peg.tcl

Go to the documentation of this file.
00001 /*  -*- tcl -*-*/
00002 /* */
00003 /*  Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00004 /*  Parser Generator / Transformation - Normalize PEG AST for later.*/
00005 
00006 /*  This package assumes to be used from within a PAGE plugin. It uses*/
00007 /*  the API commands listed below. These are identical across the major*/
00008 /*  types of PAGE plugins, allowing this package to be used in reader,*/
00009 /*  transform, and writer plugins. It cannot be used in a configuration*/
00010 /*  plugin, and this makes no sense either.*/
00011 /* */
00012 /*  To ensure that our assumption is ok we require the relevant pseudo*/
00013 /*  package setup by the PAGE plugin management code.*/
00014 /* */
00015 /*  -----------------+--*/
00016 /*  page_info        | Reporting to the user.*/
00017 /*  page_warning     |*/
00018 /*  page_error       |*/
00019 /*  -----------------+--*/
00020 /*  page_log_error   | Reporting of internals.*/
00021 /*  page_log_warning |*/
00022 /*  page_log_info    |*/
00023 /*  -----------------+--*/
00024 
00025 /*  ### ### ### ######### ######### #########*/
00026 /*  Requisites*/
00027 
00028 /*  @mdgen NODEP: page::plugin*/
00029 
00030 package require page::plugin ; /*  S.a. pseudo-package.*/
00031 package require treeql
00032 package require page::util::quote
00033 
00034 namespace ::page::util::norm::peg {
00035     /*  Get the peg char de/encoder commands.*/
00036     /*  (unquote, quote'tcl)*/
00037 
00038     namespace import ::page::util::quote::*
00039 }
00040 
00041 /*  ### ### ### ######### ######### #########*/
00042 /*  API*/
00043 
00044 ret  ::page::util::norm::peg (type t) {
00045     set q [treeql q -tree $t]
00046 
00047     page_info {[PEG Normalization]}
00048     page_log_info ..Terminals   ; peg::Terminals   $q $t
00049     page_log_info ..Chains      ; peg::CutChains   $q $t
00050     page_log_info ..Metadata    ; peg::Metadata    $q $t
00051     page_log_info ..Definitions ; peg::Definitions $q $t
00052     page_log_info ..Expressions ; peg::Expressions $q $t
00053 
00054     # Sentinel for PE algorithms.
00055     $t set root symbol <StartExpression>
00056     $q destroy
00057 
00058     page_log_info Ok
00059     return
00060 }
00061 
00062 /*  ### ### ### ######### ######### #########*/
00063 /*  Documentation*/
00064 /* */
00065 /*  See doc_normalize.txt for the specification of the publicly visible*/
00066 /*  attributes.*/
00067 /** 
00068  *# Internal attributes
00069  *# - DATA - Transient storage for terminal data.
00070  */
00071 
00072 /*  ### ### ### ######### ######### #########*/
00073 /*  Internal. Helpers*/
00074 
00075 ret  ::page::util::norm::peg::Terminals (type q , type t) {
00076     # The data for all terminals is stored in their grandparental
00077     # nodes. We get rid of both terminals and their parents.
00078 
00079     $q query tree withatt type terminal over n {
00080     set p  [$t parent $n]
00081     set gp [$t parent $p]
00082 
00083     CopyLocation $t $n $gp
00084     AttrCopy     $t $n detail $gp DATA
00085     TokReduce    $t           $gp DATA
00086     $t delete $p
00087     }
00088 
00089     # We can now drop the type attribute, as all the remaining nodes
00090     # (which have it) will contain the value 'nonterminal'.
00091 
00092     $q query tree hasatt type over n {
00093     $t unset $n type
00094     }
00095     return
00096 }
00097 
00098 ret  ::page::util::norm::peg::CutChains (type q , type t) {
00099     # All nodes which have exactly one child are irrelevant. We get
00100     # rid of them. The root node is the sole exception. The immediate
00101     # child of the root however is superfluous as well.
00102 
00103     $q query tree notq {root} over n {
00104     if {[llength [$t children $n]] != 1} continue
00105     $t cut $n
00106     }
00107 
00108     foreach n [$t children root] {$t cut $n}
00109     return
00110 }
00111 
00112 ret  ::page::util::norm::peg::Metadata (type q , type t) {
00113     # Having the name of the grammar in a tree node is overkill. We
00114     # move this information into an attribute of the root node.
00115     # The node keeping the start expression separate is irrelevant as
00116     # well. We get rid of it, and tag the root of the start expression
00117     # with a marker attribute.
00118 
00119     $q query tree withatt detail Header over n {
00120     set tmp    [Child $t $n 0]
00121     set sexpr  [Child $t $n 1]
00122 
00123     AttrCopy $t $tmp DATA root name
00124     $t cut $tmp
00125     $t cut $n
00126     break
00127     }
00128 
00129     # Remember the node for the start expression in the root for quick
00130     # access by later stages.
00131 
00132     $t set root start $sexpr
00133     return
00134 }
00135 
00136 ret  ::page::util::norm::peg::Definitions (type q , type t) {
00137     # We move nonterminal hint information from nodes into attributes,
00138     # and delete the now irrelevant nodes.
00139 
00140     # NOTE: This transformation is dependent on the removal of all
00141     # nodes with exactly one child, as it removes the all 'Attribute'
00142     # nodes already. Otherwise this transformation would have to put
00143     # the information into the grandparental node.
00144 
00145     # The default mode for nonterminals is 'value'.
00146 
00147     $q query tree withatt detail Definition over n {
00148     $t set $n mode value
00149     }
00150 
00151     foreach {a mode} {
00152     VOID  discard
00153     MATCH match
00154     LEAF  leaf
00155     } {
00156     $q query tree withatt detail $a over n {
00157         set p [$t parent $n]
00158         $t set $p mode $mode
00159         $t delete $n
00160     }
00161     }
00162 
00163     # Like with the global metadata we move definition specific
00164     # information out of nodes into attributes, get rid of the
00165     # superfluous nodes, and tag the definition roots with marker
00166     # attributes.
00167 
00168     set defs {}
00169     $q query tree withatt detail Definition over n {
00170     # Define mode information for all nonterminals without an
00171     # explicit specification. We also save the mode information
00172     # from deletion when we redo the definition node.
00173 
00174     set first [Child $t $n 0]
00175 
00176     set sym [$t get $first DATA]
00177     $t set $n symbol $sym
00178     $t set $n label  $sym
00179     $t set $n users  {}
00180 
00181     # Now determine the range in the input covered by the
00182     # definition. The left extent comes from the terminal for the
00183     # nonterminal symbol it defines. The right extent comes from
00184     # the rightmost child under the definition. While this not an
00185     # expression tree yet the location data is sound already.
00186 
00187     MergeLocations $t $first [Rightmost $t $n] $n
00188     $t unset $n detail
00189 
00190     lappend defs $sym $n
00191     $t cut $first
00192     }
00193 
00194     # We remember a mapping from nonterminal names to their defining
00195     # nodes in the root as well, for quick reference later, when we
00196     # build nonterminal usage references
00197 
00198     $t set root definitions $defs
00199     return
00200 }
00201 
00202 ret  ::page::util::norm::peg::Rightmost (type t , type n) {
00203     # Determine the rightmost leaf under the specified node.
00204 
00205     if {[$t isleaf $n]} {return $n}
00206     return [Rightmost $t [lindex [$t children $n] end]]
00207 }
00208 
00209 ret  ::page::util::norm::peg::Expressions (type q , type t) {
00210     # We now transform the remaining nodes into proper expression
00211     # trees. The order matters, to shed as much nodes as possible
00212     # early, and to avoid unncessary work.
00213 
00214     ExprRanges       $q $t
00215     ExprUnaryOps     $q $t
00216     ExprChars        $q $t
00217     ExprNonterminals $q $t
00218     ExprOperators    $q $t
00219     ExprFlatten      $q $t
00220     return
00221 }
00222 
00223 ret  ::page::util::norm::peg::ExprRanges (type q , type t) {
00224     # Ranges = .. operator
00225 
00226     $q query tree withatt detail Range over n {
00227     # Two the children, both of text 'Char', their data is what we
00228     # take. The children become irrelevant and are removed.
00229 
00230     foreach {b e} [$t children $n] break
00231     set begin [unquote [$t get $b DATA]]
00232     set end   [unquote [$t get $e DATA]]
00233 
00234     $t set $n op ..
00235     $t set $n begin $begin
00236     $t set $n end   $end
00237 
00238     MergeLocations $t $b $e $n
00239 
00240     $t unset $n detail
00241 
00242     $t delete $b
00243     $t delete $e
00244     }
00245     return
00246 }
00247 
00248 ret  ::page::util::norm::peg::ExprUnaryOps (type q , type t) {
00249     # Unary operators ... Their transformation sheds more nodes.
00250 
00251     foreach {a op} {
00252     QUESTION ?
00253     STAR     *
00254     PLUS     +
00255     AND      &
00256     NOT      !
00257     } {
00258     $q query tree withatt detail $a over n {
00259         set p [$t parent $n]
00260 
00261         $t set $p op $op
00262         $t cut $n
00263 
00264         $t unset $p detail
00265     }
00266     }
00267     return
00268 }
00269 
00270 ret  ::page::util::norm::peg::ExprChars (type q , type t) {
00271     # Chars = t operator (The remaining Char'acters are plain terminal
00272     # symbols.
00273 
00274     $q query tree withatt detail Char over n {
00275     set ch [unquote [$t get $n DATA]]
00276 
00277     $t set $n op   t
00278     $t set $n char $ch
00279 
00280     $t unset $n detail
00281     $t unset $n DATA
00282     }
00283     return
00284 }
00285 
00286 ret  ::page::util::norm::peg::ExprNonterminals (type q , type t) {
00287     # Identifiers = n operator (nonterminal references) ...
00288 
00289     array set defs [$t get root definitions]
00290     array set undefined {}
00291 
00292     $q query tree withatt detail Identifier over n {
00293     set sym [$t get $n DATA]
00294 
00295     $t set $n op  n
00296     $t set $n sym $sym
00297 
00298     $t unset $n detail
00299     $t unset $n DATA
00300 
00301     # Create x-references between the users and the definition of
00302     # a nonterminal symbol.
00303 
00304     if {![info exists defs($sym)]} {
00305         $t set $n def {}
00306         lappend undefined($sym) $n
00307         continue
00308     } else {
00309         set def $defs($sym)
00310         $t set $n def $def
00311     }
00312 
00313     set users [$t get $def users]
00314     lappend users $n
00315     $t set $def users $users
00316     }
00317 
00318     $t set root undefined [array get undefined]
00319     return
00320 }
00321 
00322 ret  ::page::util::norm::peg::ExprOperators (type q , type t) {
00323     # The remaining operator nodes can be changed directly from node
00324     # text to operator. Se we do.
00325 
00326     foreach {a op} {
00327     EPSILON    epsilon
00328     ALNUM      alnum
00329     ALPHA      alpha
00330     DOT        dot
00331     Literal    x
00332     Class      /
00333     Sequence   x
00334     Expression /
00335     } {
00336     $q query tree withatt detail $a over n {
00337         $t set   $n op $op
00338         $t unset $n detail
00339     }
00340     }
00341     return
00342 }
00343 
00344 ret  ::page::util::norm::peg::ExprFlatten (type q , type t) {
00345     # Last tweaks of the expressions. Classes inside of Expressions,
00346     # and Literals in Sequences create nested / or x expressions. We
00347     # locate such and flatten the nested expression, cutting out the
00348     # superfluous operator.
00349 
00350     foreach op {x /} {
00351     # Locate all x operators, whose parents are x operators as
00352     # well, then go back to the child operators and cut them out.
00353 
00354     $q query tree withatt op $op \
00355         parent unique withatt op $op \
00356         children withatt op $op \
00357         over n {
00358         $t cut $n
00359     }
00360 
00361     # Locate all x operators without children and convert them
00362     # into epsilon operators. Because that is what they accept,
00363     # nothing.
00364 
00365     $q query tree withatt op $op over n {
00366         if {[$t numchildren $n]} continue
00367         $t set $n op epsilon
00368     }
00369     }
00370     return
00371 }
00372 
00373 /*  ### ### ### ######### ######### #########*/
00374 /*  Internal. Low-level helpers.*/
00375 
00376 ret  ::page::util::norm::peg::CopyLocation (type t , type src , type dst) {
00377     $t set $dst range    [$t get $src range]
00378     $t set $dst range_lc [$t get $src range_lc]
00379     return
00380 }
00381 
00382 ret  ::page::util::norm::peg::MergeLocations (type t , type srca , type srcb , type dst) {
00383     set ar   [$t get $srca range]
00384     set arlc [$t get $srca range_lc]
00385 
00386     set br   [$t get $srcb range]
00387     set brlc [$t get $srcb range_lc]
00388 
00389     $t set $dst range    [list [lindex $ar   0] [lindex $br   1]]
00390     $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]]
00391     return
00392 }
00393 
00394 ret  ::page::util::norm::peg::TokReduce (type t , type src , type attr) {
00395     set tokens [$t get $src $attr]
00396     set ch     {}
00397     foreach tok $tokens {
00398     lappend ch [lindex $tok 0]
00399     }
00400     $t set $src $attr [join $ch {}]
00401     return
00402 }
00403 
00404 ret  ::page::util::norm::peg::AttrCopy (type t , type src , type asrc , type dst , type adst) {
00405     $t set $dst $adst [$t get $src $asrc]
00406     return
00407 }
00408 
00409 ret  ::page::util::norm::peg::Child (type t , type n , type index) {
00410     return [lindex [$t children $n] $index]
00411 }
00412 
00413 /*  ### ### ### ######### ######### #########*/
00414 /*  Ready*/
00415 
00416 package provide page::util::norm::peg 0.1
00417 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1