util_norm_lemon.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 package require page::plugin ;
00031 package require treeql
00032 package require page::util::quote
00033 package require page::util::peg
00034
00035 namespace ::page::util::norm::lemon {
00036
00037
00038
00039 namespace import ::page::util::quote::*
00040 namespace import ::page::util::peg::*
00041 }
00042
00043
00044
00045
00046 ret ::page::util::norm::lemon (type t) {
00047 set q [treeql q -tree $t]
00048
00049 page_info {[Lemon Normalization]}
00050
00051 # Retrieve grammar name out of one directive.
00052 # Or from LHS of first rule.
00053
00054 page_log_info ..Startsymbol
00055
00056 set start {}
00057
00058 $q query tree \
00059 withatt type nonterminal \
00060 withatt detail StartSymbol \
00061 descendants \
00062 withatt type terminal \
00063 over n {
00064
00065 lemon::TokReduce $t $n detail
00066 set start [$t get $n detail]
00067
00068 page_info " StartSymbol: $start"
00069 }
00070
00071 $q query tree \
00072 withatt type nonterminal \
00073 withatt detail Name \
00074 descendants \
00075 withatt type terminal \
00076 over n {
00077
00078 lemon::TokReduce $t $n detail
00079 set name [$t get $n detail]
00080
00081 page_info " Name: $name"
00082
00083 $t set root name $name
00084 }
00085
00086 page_log_info ..Drop ; lemon::Drop $q $t
00087 page_log_info ..Terminals ; lemon::Terminals $q $t
00088 page_log_info ..Definitions ; lemon::Definitions $q $t
00089 page_log_info ..Rules ; lemon::Rules $q $t start
00090 page_log_info ..Epsilon ; lemon::ElimEpsilon $q $t
00091 page_log_info ..Autoclass ; lemon::AutoClassId $q $t
00092 page_log_info ..Chains
00093
00094 # Find and cut operator chains, very restricted. Cut only chains
00095 # of x- and /-operators. The other operators have only one child
00096 # by definition and are thus not chains.
00097
00098 #set q [treeql q -tree $t]
00099 # q query tree over n
00100 foreach n [$t children -all root] {
00101 if {[$t keyexists $n symbol]} continue
00102 if {[llength [$t children $n]] != 1} continue
00103
00104 set op [$t get $n op]
00105 if {($op ne "/") && ($op ne "x")} continue
00106 $t cut $n
00107 }
00108
00109 page_log_info ..Flatten
00110
00111 lemon::flatten $q $t
00112
00113 # Analysis: Left recursion, and where.
00114 # Manual: Definitions for terminals.
00115 # Definitions for space, comments.
00116 # Integration of this into the grammar.
00117
00118 # Sentinel for PE algorithms.
00119 $t set root symbol <StartExpression>
00120
00121 if {$start eq ""} {
00122 page_error " Startsymbol missing"
00123 } else {
00124 set s [$t insert root end]
00125 $t set $s op n
00126 $t set $s sym $start
00127 $t set root start $s
00128
00129 array set def [$t get root definitions]
00130
00131 if {![info exists def($start)]} {
00132 page_error " Startsymbol is undefined"
00133 $t set $s def ""
00134 } else {
00135 $t set $s def $def($start)
00136 }
00137 unset def
00138 }
00139
00140 $q destroy
00141
00142 page_log_info Ok
00143 return
00144 }
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 ret ::page::util::norm::lemon::Drop (type q , type t) {
00160 # Simple normalization.
00161 # All lemon specific data is dropped completely.
00162
00163 foreach drop {
00164 Directive Codeblock Label Precedence
00165 } {
00166 $q query tree withatt type nonterminal \
00167 withatt detail $drop over n {
00168 $t delete $n
00169 }
00170 }
00171
00172 # Some nodes can be dropped, but not their children.
00173
00174 $q query tree withatt type nonterminal \
00175 withatt detail Statement over n {
00176 $t cut $n
00177 }
00178
00179 # Cut the ALL and LemonGrammar nodes, direct access, no search
00180 # needed.
00181
00182 $t cut [lindex [$t children root] 0]
00183 $t cut [lindex [$t children root] 0]
00184
00185 return
00186 }
00187
00188 ret ::page::util::norm::lemon::Terminals (type q , type t) {
00189 # The data for all terminals is stored in their grandparental
00190 # nodes. We get rid of both terminals and their parents.
00191
00192 $q query tree withatt type terminal over n {
00193 set p [$t parent $n]
00194 set gp [$t parent $p]
00195
00196 CopyLocation $t $n $gp
00197 AttrCopy $t $n detail $gp DATA
00198 TokReduce $t $gp DATA
00199 $t delete $p
00200 }
00201
00202 # We can now drop the type attribute, as all the remaining nodes
00203 # (which have it) will contain the value 'nonterminal'.
00204
00205 $q query tree hasatt type over n {
00206 $t unset $n type
00207 }
00208 return
00209 }
00210
00211 ret ::page::util::norm::lemon::Definitions (type q , type t) {
00212 # Convert 'Definition' into the sequences they are.
00213 # Sequences of length one will be flattened later.
00214 # Empty sequences (Length zero) are epsilon.
00215 # Epsilon will be later converted to ? of the
00216 # whole choice they are part of.
00217
00218 $q query tree withatt detail Definition over n {
00219 $t unset $n detail
00220
00221 if {[$t children $n] < 1} {
00222 $t set $n op epsilon
00223 } else {
00224 $t set $n op x
00225 }
00226 }
00227 return
00228 }
00229
00230 ret ::page::util::norm::lemon::Rules (type q , type t , type sv) {
00231 upvar $sv start
00232 # We move nonterminal hint information from nodes into attributes,
00233 # and delete the now irrelevant nodes.
00234
00235 # Like with the global metadata we move definition specific
00236 # information out of nodes into attributes, get rid of the
00237 # superfluous nodes, and tag the definition roots with marker
00238 # attributes.
00239
00240 array set defs {}
00241 $q query tree withatt detail Rule over n {
00242 set first [Child $t $n 0]
00243
00244 set sym [$t get $first DATA]
00245 $t set $n symbol $sym
00246 $t set $n label $sym
00247 $t set $n users {}
00248 $t set $n mode value
00249
00250 if {$start eq ""} {
00251 page_info " StartSymbol: $sym"
00252 set start $sym
00253 }
00254
00255 # We get the left extend of the definition from the terminal
00256 # for the symbol it defines.
00257
00258 MergeLocations $t $first [Rightmost $t $n] $n
00259 $t unset $n detail
00260
00261 lappend defs($sym) $n
00262 $t cut $first
00263 }
00264
00265 set d {}
00266 foreach sym [array names defs] {
00267 set nodes $defs($sym)
00268 if {[llength $nodes] == 1} {
00269 lappend d $sym [lindex $nodes 0]
00270 } else {
00271 # Merge multi-node definition together, under a choice.
00272
00273 set r [$t insert root end]
00274 set c [$t insert $r end]
00275
00276 $t set $r symbol $sym
00277 $t set $r label $sym
00278 $t set $r users {}
00279 $t set $r mode value
00280 $t set $c op /
00281
00282 foreach n $nodes {
00283 set seq [lindex [$t children $n] 0]
00284 $t move $c end $seq
00285 $t delete $n
00286 }
00287
00288 lappend d $sym $r
00289 }
00290 }
00291
00292 # We remember a mapping from nonterminal names to their defining
00293 # nodes in the root as well, for quick reference later, when we
00294 # build nonterminal usage references
00295
00296 $t set root definitions $d
00297 return
00298 }
00299
00300 ret ::page::util::norm::lemon::Rightmost (type t , type n) {
00301 # Determine the rightmost leaf under the specified node.
00302
00303 if {[$t isleaf $n]} {return $n}
00304 return [Rightmost $t [lindex [$t children $n] end]]
00305 }
00306
00307 ret ::page::util::norm::lemon::ElimEpsilon (type q , type t) {
00308 # We convert choices with an epsilon in them into
00309 # optional choices without an epsilon branch.
00310
00311 $q query tree withatt op epsilon over n {
00312 set choice [$t parent $n]
00313
00314 # Move branches into the epsilon, which becomes the new
00315 # choice. And the choice becomes an option.
00316 foreach c [$t children $choice] {
00317 if {$c eq $n} continue
00318 $t move $n end $c
00319 }
00320 $t set $n op /
00321 $t set $choice op ?
00322 }
00323 return
00324 }
00325
00326 ret ::page::util::norm::lemon::AutoClassId (type q , type t) {
00327
00328 array set defs [$t get root definitions]
00329 array set use {}
00330
00331 $q query tree \
00332 withatt op x \
00333 children \
00334 hasatt DATA \
00335 over n {
00336 # All identifiers are nonterminals, and for the
00337 # undefined ones we create rules which define
00338 # them as terminal sequences.
00339
00340 set sym [$t get $n DATA]
00341 $t unset $n DATA
00342
00343 $t set $n op n
00344 $t set $n sym $sym
00345
00346 if {![info exists defs($sym)]} {
00347 set defs($sym) [NewTerminal $t $sym]
00348 }
00349 $t set $n def $defs($sym)
00350
00351 lappend use($sym) $n
00352 $t unset $n detail
00353 }
00354
00355 $t set root definitions [array get defs]
00356
00357 foreach sym [array names use] {
00358 $t set $defs($sym) users $use($sym)
00359 }
00360
00361 $t set root undefined {}
00362 return
00363 }
00364
00365 ret ::page::util::norm::lemon::NewTerminal (type t , type sym) {
00366 page_log_info " Terminal: $sym"
00367
00368 set r [$t insert root end]
00369 $t set $r symbol $sym
00370 $t set $r label $sym
00371 $t set $r users {}
00372 $t set $r mode leaf
00373
00374 set s [$t insert $r end]
00375 $t set $s op x
00376
00377 foreach ch [split $sym {}] {
00378 set c [$t insert $s end]
00379 $t set $c op t
00380 $t set $c char $ch
00381 }
00382 return $r
00383 }
00384
00385
00386
00387
00388 ret ::page::util::norm::lemon::CopyLocation (type t , type src , type dst) {
00389 $t set $dst range [$t get $src range]
00390 $t set $dst range_lc [$t get $src range_lc]
00391 return
00392 }
00393
00394 ret ::page::util::norm::lemon::MergeLocations (type t , type srca , type srcb , type dst) {
00395 set ar [$t get $srca range]
00396 set arlc [$t get $srca range_lc]
00397
00398 set br [$t get $srcb range]
00399 set brlc [$t get $srcb range_lc]
00400
00401 $t set $dst range [list [lindex $ar 0] [lindex $br 1]]
00402 $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]]
00403 return
00404 }
00405
00406 ret ::page::util::norm::lemon::AttrCopy (type t , type src , type asrc , type dst , type adst) {
00407 $t set $dst $adst [$t get $src $asrc]
00408 return
00409 }
00410
00411 ret ::page::util::norm::lemon::Child (type t , type n , type index) {
00412 return [lindex [$t children $n] $index]
00413 }
00414
00415 ret ::page::util::norm::lemon::TokReduce (type t , type src , type attr) {
00416 set tokens [$t get $src $attr]
00417 set ch {}
00418 foreach tok $tokens {
00419 lappend ch [lindex $tok 0]
00420 }
00421 $t set $src $attr [join $ch {}]
00422 return
00423 }
00424
00425
00426
00427
00428 package provide page::util::norm::lemon 0.1
00429