util_norm_peg.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 package require page::plugin ;
00031 package require treeql
00032 package require page::util::quote
00033
00034 namespace ::page::util::norm::peg {
00035
00036
00037
00038 namespace import ::page::util::quote::*
00039 }
00040
00041
00042
00043
00044 ret ::page::util::norm::peg (type t) {
00045 set q [treeql q -tree $t]
00046
00047 page_info {[PEG Normalization]}
00048 page_log_info ..Terminals ; peg::Terminals $q $t
00049 page_log_info ..Chains ; peg::CutChains $q $t
00050 page_log_info ..Metadata ; peg::Metadata $q $t
00051 page_log_info ..Definitions ; peg::Definitions $q $t
00052 page_log_info ..Expressions ; peg::Expressions $q $t
00053
00054 # Sentinel for PE algorithms.
00055 $t set root symbol <StartExpression>
00056 $q destroy
00057
00058 page_log_info Ok
00059 return
00060 }
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075 ret ::page::util::norm::peg::Terminals (type q , type t) {
00076 # The data for all terminals is stored in their grandparental
00077 # nodes. We get rid of both terminals and their parents.
00078
00079 $q query tree withatt type terminal over n {
00080 set p [$t parent $n]
00081 set gp [$t parent $p]
00082
00083 CopyLocation $t $n $gp
00084 AttrCopy $t $n detail $gp DATA
00085 TokReduce $t $gp DATA
00086 $t delete $p
00087 }
00088
00089 # We can now drop the type attribute, as all the remaining nodes
00090 # (which have it) will contain the value 'nonterminal'.
00091
00092 $q query tree hasatt type over n {
00093 $t unset $n type
00094 }
00095 return
00096 }
00097
00098 ret ::page::util::norm::peg::CutChains (type q , type t) {
00099 # All nodes which have exactly one child are irrelevant. We get
00100 # rid of them. The root node is the sole exception. The immediate
00101 # child of the root however is superfluous as well.
00102
00103 $q query tree notq {root} over n {
00104 if {[llength [$t children $n]] != 1} continue
00105 $t cut $n
00106 }
00107
00108 foreach n [$t children root] {$t cut $n}
00109 return
00110 }
00111
00112 ret ::page::util::norm::peg::Metadata (type q , type t) {
00113 # Having the name of the grammar in a tree node is overkill. We
00114 # move this information into an attribute of the root node.
00115 # The node keeping the start expression separate is irrelevant as
00116 # well. We get rid of it, and tag the root of the start expression
00117 # with a marker attribute.
00118
00119 $q query tree withatt detail Header over n {
00120 set tmp [Child $t $n 0]
00121 set sexpr [Child $t $n 1]
00122
00123 AttrCopy $t $tmp DATA root name
00124 $t cut $tmp
00125 $t cut $n
00126 break
00127 }
00128
00129 # Remember the node for the start expression in the root for quick
00130 # access by later stages.
00131
00132 $t set root start $sexpr
00133 return
00134 }
00135
00136 ret ::page::util::norm::peg::Definitions (type q , type t) {
00137 # We move nonterminal hint information from nodes into attributes,
00138 # and delete the now irrelevant nodes.
00139
00140 # NOTE: This transformation is dependent on the removal of all
00141 # nodes with exactly one child, as it removes the all 'Attribute'
00142 # nodes already. Otherwise this transformation would have to put
00143 # the information into the grandparental node.
00144
00145 # The default mode for nonterminals is 'value'.
00146
00147 $q query tree withatt detail Definition over n {
00148 $t set $n mode value
00149 }
00150
00151 foreach {a mode} {
00152 VOID discard
00153 MATCH match
00154 LEAF leaf
00155 } {
00156 $q query tree withatt detail $a over n {
00157 set p [$t parent $n]
00158 $t set $p mode $mode
00159 $t delete $n
00160 }
00161 }
00162
00163 # Like with the global metadata we move definition specific
00164 # information out of nodes into attributes, get rid of the
00165 # superfluous nodes, and tag the definition roots with marker
00166 # attributes.
00167
00168 set defs {}
00169 $q query tree withatt detail Definition over n {
00170 # Define mode information for all nonterminals without an
00171 # explicit specification. We also save the mode information
00172 # from deletion when we redo the definition node.
00173
00174 set first [Child $t $n 0]
00175
00176 set sym [$t get $first DATA]
00177 $t set $n symbol $sym
00178 $t set $n label $sym
00179 $t set $n users {}
00180
00181 # Now determine the range in the input covered by the
00182 # definition. The left extent comes from the terminal for the
00183 # nonterminal symbol it defines. The right extent comes from
00184 # the rightmost child under the definition. While this not an
00185 # expression tree yet the location data is sound already.
00186
00187 MergeLocations $t $first [Rightmost $t $n] $n
00188 $t unset $n detail
00189
00190 lappend defs $sym $n
00191 $t cut $first
00192 }
00193
00194 # We remember a mapping from nonterminal names to their defining
00195 # nodes in the root as well, for quick reference later, when we
00196 # build nonterminal usage references
00197
00198 $t set root definitions $defs
00199 return
00200 }
00201
00202 ret ::page::util::norm::peg::Rightmost (type t , type n) {
00203 # Determine the rightmost leaf under the specified node.
00204
00205 if {[$t isleaf $n]} {return $n}
00206 return [Rightmost $t [lindex [$t children $n] end]]
00207 }
00208
00209 ret ::page::util::norm::peg::Expressions (type q , type t) {
00210 # We now transform the remaining nodes into proper expression
00211 # trees. The order matters, to shed as much nodes as possible
00212 # early, and to avoid unncessary work.
00213
00214 ExprRanges $q $t
00215 ExprUnaryOps $q $t
00216 ExprChars $q $t
00217 ExprNonterminals $q $t
00218 ExprOperators $q $t
00219 ExprFlatten $q $t
00220 return
00221 }
00222
00223 ret ::page::util::norm::peg::ExprRanges (type q , type t) {
00224 # Ranges = .. operator
00225
00226 $q query tree withatt detail Range over n {
00227 # Two the children, both of text 'Char', their data is what we
00228 # take. The children become irrelevant and are removed.
00229
00230 foreach {b e} [$t children $n] break
00231 set begin [unquote [$t get $b DATA]]
00232 set end [unquote [$t get $e DATA]]
00233
00234 $t set $n op ..
00235 $t set $n begin $begin
00236 $t set $n end $end
00237
00238 MergeLocations $t $b $e $n
00239
00240 $t unset $n detail
00241
00242 $t delete $b
00243 $t delete $e
00244 }
00245 return
00246 }
00247
00248 ret ::page::util::norm::peg::ExprUnaryOps (type q , type t) {
00249 # Unary operators ... Their transformation sheds more nodes.
00250
00251 foreach {a op} {
00252 QUESTION ?
00253 STAR *
00254 PLUS +
00255 AND &
00256 NOT !
00257 } {
00258 $q query tree withatt detail $a over n {
00259 set p [$t parent $n]
00260
00261 $t set $p op $op
00262 $t cut $n
00263
00264 $t unset $p detail
00265 }
00266 }
00267 return
00268 }
00269
00270 ret ::page::util::norm::peg::ExprChars (type q , type t) {
00271 # Chars = t operator (The remaining Char'acters are plain terminal
00272 # symbols.
00273
00274 $q query tree withatt detail Char over n {
00275 set ch [unquote [$t get $n DATA]]
00276
00277 $t set $n op t
00278 $t set $n char $ch
00279
00280 $t unset $n detail
00281 $t unset $n DATA
00282 }
00283 return
00284 }
00285
00286 ret ::page::util::norm::peg::ExprNonterminals (type q , type t) {
00287 # Identifiers = n operator (nonterminal references) ...
00288
00289 array set defs [$t get root definitions]
00290 array set undefined {}
00291
00292 $q query tree withatt detail Identifier over n {
00293 set sym [$t get $n DATA]
00294
00295 $t set $n op n
00296 $t set $n sym $sym
00297
00298 $t unset $n detail
00299 $t unset $n DATA
00300
00301 # Create x-references between the users and the definition of
00302 # a nonterminal symbol.
00303
00304 if {![info exists defs($sym)]} {
00305 $t set $n def {}
00306 lappend undefined($sym) $n
00307 continue
00308 } else {
00309 set def $defs($sym)
00310 $t set $n def $def
00311 }
00312
00313 set users [$t get $def users]
00314 lappend users $n
00315 $t set $def users $users
00316 }
00317
00318 $t set root undefined [array get undefined]
00319 return
00320 }
00321
00322 ret ::page::util::norm::peg::ExprOperators (type q , type t) {
00323 # The remaining operator nodes can be changed directly from node
00324 # text to operator. Se we do.
00325
00326 foreach {a op} {
00327 EPSILON epsilon
00328 ALNUM alnum
00329 ALPHA alpha
00330 DOT dot
00331 Literal x
00332 Class /
00333 Sequence x
00334 Expression /
00335 } {
00336 $q query tree withatt detail $a over n {
00337 $t set $n op $op
00338 $t unset $n detail
00339 }
00340 }
00341 return
00342 }
00343
00344 ret ::page::util::norm::peg::ExprFlatten (type q , type t) {
00345 # Last tweaks of the expressions. Classes inside of Expressions,
00346 # and Literals in Sequences create nested / or x expressions. We
00347 # locate such and flatten the nested expression, cutting out the
00348 # superfluous operator.
00349
00350 foreach op {x /} {
00351 # Locate all x operators, whose parents are x operators as
00352 # well, then go back to the child operators and cut them out.
00353
00354 $q query tree withatt op $op \
00355 parent unique withatt op $op \
00356 children withatt op $op \
00357 over n {
00358 $t cut $n
00359 }
00360
00361 # Locate all x operators without children and convert them
00362 # into epsilon operators. Because that is what they accept,
00363 # nothing.
00364
00365 $q query tree withatt op $op over n {
00366 if {[$t numchildren $n]} continue
00367 $t set $n op epsilon
00368 }
00369 }
00370 return
00371 }
00372
00373
00374
00375
00376 ret ::page::util::norm::peg::CopyLocation (type t , type src , type dst) {
00377 $t set $dst range [$t get $src range]
00378 $t set $dst range_lc [$t get $src range_lc]
00379 return
00380 }
00381
00382 ret ::page::util::norm::peg::MergeLocations (type t , type srca , type srcb , type dst) {
00383 set ar [$t get $srca range]
00384 set arlc [$t get $srca range_lc]
00385
00386 set br [$t get $srcb range]
00387 set brlc [$t get $srcb range_lc]
00388
00389 $t set $dst range [list [lindex $ar 0] [lindex $br 1]]
00390 $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]]
00391 return
00392 }
00393
00394 ret ::page::util::norm::peg::TokReduce (type t , type src , type attr) {
00395 set tokens [$t get $src $attr]
00396 set ch {}
00397 foreach tok $tokens {
00398 lappend ch [lindex $tok 0]
00399 }
00400 $t set $src $attr [join $ch {}]
00401 return
00402 }
00403
00404 ret ::page::util::norm::peg::AttrCopy (type t , type src , type asrc , type dst , type adst) {
00405 $t set $dst $adst [$t get $src $asrc]
00406 return
00407 }
00408
00409 ret ::page::util::norm::peg::Child (type t , type n , type index) {
00410 return [lindex [$t children $n] $index]
00411 }
00412
00413
00414
00415
00416 package provide page::util::norm::peg 0.1
00417