gen_peg_canon.tcl

Go to the documentation of this file.
00001 /*  -*- tcl -*-*/
00002 /* */
00003 /*  Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00004 /*  Parser Generator / Backend - PEG as ... PEG*/
00005 
00006 /*  ### ### ### ######### ######### #########*/
00007 /*  Dumping the input grammar. But not as Tcl or other code. In PEG*/
00008 /*  format again, pretty printing.*/
00009 
00010 /*  ### ### ### ######### ######### #########*/
00011 /*  Requisites*/
00012 
00013 package require textutil
00014 
00015 namespace ::page::gen::peg::canon {}
00016 
00017 /*  ### ### ### ######### ######### #########*/
00018 /*  API*/
00019 
00020 ret  ::page::gen::peg::canon (type t , type chan) {
00021 
00022     # Generate data for inherited attributes
00023     # used during synthesis.
00024     canon::Setup $t
00025 
00026     # Synthesize all text fragments we need.
00027     canon::Synth $t
00028 
00029     # And write the grammar text.
00030     puts $chan [$t get root TEXT]
00031     return
00032 }
00033 
00034 /*  ### ### ### ######### ######### #########*/
00035 /*  Internal. Helpers*/
00036 
00037 ret  ::page::gen::peg::canon::Setup (type t) {
00038     # Phase 1: Top-down, inherited attributes:
00039     #
00040     # - Max length of nonterminal symbols defined by the grammar.
00041     #
00042     # - Indentation put on all rules to get enough space for
00043     #   definition attributes.
00044 
00045     set       max   -1
00046     array set modes {}
00047 
00048     foreach {sym def} [$t get root definitions] {
00049     set l [string length $sym]
00050     if {$l > $max} {set max $l}
00051 
00052     set mode [string index [$t get $def mode] 0]
00053     set modes($mode) .
00054     }
00055     set modeset [join [lsort [array names modes]] ""]
00056     set mlen    [AttrFieldLength $modeset]
00057     set heading [expr {$max + $mlen + 4}]
00058     # The constant 4 is for ' <- ', see
00059     # SynthNode/Nonterminal
00060 
00061     # Save the computed information for access by the definitions and
00062     # other operators.
00063 
00064     $t set root SYM_FIELDLEN $max
00065     $t set root ATT_FIELDLEN $mlen
00066     $t set root ATT_BASE     $modeset
00067     $t set root HEADLEN      $heading
00068     return
00069 }
00070 
00071 ret  ::page::gen::peg::canon::Synth (type t) {
00072     # Phase 2: Bottom-up, synthesized attributes
00073     #
00074     # - Text block per node, length and height.
00075 
00076     $t walk root -order post -type dfs n {
00077     SynthNode $t $n
00078     }
00079     return
00080 }
00081 
00082 ret  ::page::gen::peg::canon::SynthNode (type t , type n) {
00083     if {$n eq "root"} {
00084     set code Root
00085     } elseif {[$t keyexists $n symbol]} {
00086     set code Nonterminal
00087     } elseif {[$t keyexists $n op]} {
00088     set code [$t get $n op]
00089     } else {
00090     return -code error "PANIC. Bad node $n, cannot classify"
00091     }
00092 
00093     #puts stderr "SynthNode/$code $t $n"
00094 
00095     SynthNode/$code $t $n
00096 
00097     #SHOW [$t get $n TEXT] 1 0
00098     #catch {puts stderr "\t.[$t get $n W]x[$t get $n H]"}
00099     return
00100 }
00101 
00102 ret  ::page::gen::peg::canon::SynthNode/Root (type t , type n) {
00103     # Root is the grammar itself.
00104 
00105     # Get the data we need from our children, which are start
00106     # expression and nonterminal definitions.
00107 
00108     set gname  [$t get root name]
00109     set gstart [$t get root start]
00110     if {$gstart ne ""} {
00111     set stext  [$t get $gstart TEXT]
00112     } else {
00113     puts stderr "No start expression."
00114     set stext ""
00115     }
00116     set rules  {}
00117     foreach {sym def} [$t get root definitions] {
00118     lappend rules [list $sym [$t get $def TEXT]]
00119     }
00120 
00121     # Combine them into a text for the whole grammar.
00122 
00123     set intro  "PEG $gname \("
00124     set ispace [::textutil::blank [string length $intro]]
00125 
00126     set    out ""
00127     append out "# -*- text -*-" \n
00128     append out "## Parsing Expression Grammar '$gname'." \n
00129     append out "## Layouted by the PG backend 'PEGwriter'." \n
00130     append out \n
00131     append out $intro[::textutil::indent $stext $ispace 1]\)
00132     append out \n
00133     append out \n
00134 
00135     foreach e [lsort -dict -index 0 $rules] {
00136     foreach {sym text} $e break
00137     append out $text \n
00138     append out \n
00139     }
00140 
00141     append out "END\;" \n
00142 
00143     $t set root TEXT $out
00144     return
00145 }
00146 
00147 ret  ::page::gen::peg::canon::SynthNode/Nonterminal (type t , type n) {
00148     # This is the root of a definition. We now
00149     # have to combine the text block for the
00150     # expression with nonterminal and attribute
00151     # data.
00152 
00153     variable ms
00154 
00155     set abase [$t get root ATT_BASE]
00156     set sfl   [$t get root SYM_FIELDLEN]
00157     set mode  [$t get $n mode]
00158     set sym   [$t get $n symbol]
00159     set etext [$t get [lindex [$t children $n] 0] TEXT]
00160 
00161     set    out ""
00162     append out $ms($abase,$mode)
00163     append out $sym
00164     append out [::textutil::blank [expr {$sfl - [string length $sym]}]]
00165     append out " <- "
00166 
00167     set ispace [::textutil::blank [string length $out]]
00168 
00169     append out [::textutil::indent $etext $ispace 1]
00170     append out " ;"
00171 
00172     $t set $n TEXT $out
00173     return
00174 }
00175 
00176 ret  ::page::gen::peg::canon::SynthNode/t (type t , type n) {
00177     # Terminal node. Primitive layout.
00178     # Put the char into single or double quotes.
00179 
00180     set ch [$t get $n char]
00181     if {$ch eq "'"} {set q "\""} else {set q '}
00182 
00183     set text $q$ch$q
00184 
00185     SetBlock $t $n $text
00186     return
00187 }
00188 
00189 ret  ::page::gen::peg::canon::SynthNode/n (type t , type n) {
00190     # Nonterminal node. Primitive layout. Text is the name of smybol
00191     # itself.
00192 
00193     SetBlock $t $n [$t get $n sym]
00194     return
00195 }
00196 
00197 ret  ::page::gen::peg::canon::SynthNode/.. (type t , type n) {
00198     # Range is [x-y]
00199     set b [$t get $n begin]
00200     set e [$t get $n end]
00201     SetBlock $t $n "\[${b}-${e}\]"
00202     return
00203 }
00204 
00205 ret  ::page::gen::peg::canon::SynthNode/alnum   (type t , type n) {SetBlock $t $n <alnum>}
00206 ret  ::page::gen::peg::canon::SynthNode/alpha   (type t , type n) {SetBlock $t $n <alpha>}
00207 ret  ::page::gen::peg::canon::SynthNode/dot     (type t , type n) {SetBlock $t $n .}
00208 ret  ::page::gen::peg::canon::SynthNode/epsilon (type t , type n) {SetBlock $t $n ""}
00209 
00210 ret  ::page::gen::peg::canon::SynthNode/? (type t , type n) {SynthSuffix $t $n ?}
00211 ret  ::page::gen::peg::canon::SynthNode/* (type t , type n) {SynthSuffix $t $n *}
00212 ret  ::page::gen::peg::canon::SynthNode/+ (type t , type n) {SynthSuffix $t $n +}
00213 
00214 ret  ::page::gen::peg::canon::SynthNode/! (type t , type n) {SynthPrefix $t $n !}
00215 ret  ::page::gen::peg::canon::SynthNode/& (type t , type n) {SynthPrefix $t $n &}
00216 
00217 ret  ::page::gen::peg::canon::SynthSuffix (type t , type n , type op) {
00218 
00219     set sub   [lindex [$t children $n] 0]
00220     set sop   [$t get $sub op]
00221     set etext [$t get $sub TEXT]
00222 
00223     WrapParens $op $sop etext
00224     SetBlock $t $n $etext$op
00225     return
00226 }
00227 
00228 ret  ::page::gen::peg::canon::SynthPrefix (type t , type n , type op) {
00229 
00230     set sub   [lindex [$t children $n] 0]
00231     set sop   [$t get $sub op]
00232     set etext [$t get $sub TEXT]
00233 
00234     WrapParens $op $sop etext
00235     SetBlock $t $n $op$etext
00236     return
00237 }
00238 
00239 ret  ::page::gen::peg::canon::SynthNode/x (type t , type n) {
00240     variable llen
00241 
00242     # Space given to us for an expression.
00243     set lend [expr {$llen - [$t get root HEADLEN]}]
00244 
00245     set clist [$t children $n]
00246     if {[llength $clist] == 1} {
00247     # Implicit cutting out of chains.
00248 
00249     CopyBlock $t $n [lindex $clist 0]
00250 
00251     #puts stderr <<implicit>>
00252     return
00253     }
00254 
00255     set out ""
00256 
00257     # We are not tracking the total width of the block, but only the
00258     # width of the current line, as that is where we may have to
00259     # wrap. The height however is the total height.
00260 
00261     #puts stderr <<$clist>>
00262     #puts stderr \t___________________________________
00263 
00264     set w 0
00265     set h 0
00266     foreach c $clist {
00267     set sop   [$t get $c op]
00268     set sub   [$t get $c TEXT]
00269     set sw    [$t get $c W]
00270     set slw   [$t get $c Wlast]
00271     set sh    [$t get $c H]
00272 
00273     #puts stderr \t<$sop/$sw/$slw/$sh>___________________________________
00274     #SHOW $sub $slw $sh
00275 
00276     if {[Paren x $sop]} {
00277         set sub "([::textutil::indent $sub " " 1])"
00278         incr slw 2
00279         incr sw  2
00280 
00281         #puts stderr /paren/
00282         #SHOW $sub $slw $sh
00283     }
00284 
00285     # Empty buffer ... Put element, and extend dimensions
00286 
00287     #puts stderr \t.=============================
00288     #SHOW $out $w $h
00289 
00290     if {$w == 0} {
00291         #puts stderr /init
00292         append out $sub
00293         set w $slw
00294         set h $sh
00295     } elseif {($w + $sw + 1) > $lend} {
00296         #puts stderr /wrap/[expr {($w + $sw + 1)}]/$lend
00297         # To large, wrap into next line.
00298         append out \n $sub
00299         incr h $sh
00300         set  w $slw
00301     } else {
00302         # We have still space to put the block in. Either by
00303         # simply appending, or by indenting a multiline block
00304         # properly so that its parts stay aligned with each other.
00305         if {$sh == 1} {
00306         #puts stderr /add/line
00307         append out " " $sub
00308         incr w ; incr w $slw
00309         } else {
00310         append out " "  ; incr w
00311         #puts stderr /add/block/$w
00312         append out [::textutil::indent $sub [::textutil::blank $w] 1]
00313         incr w $slw
00314         incr h $sh ; incr h -1
00315         }
00316     }
00317 
00318     #puts stderr \t.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00319     #SHOW $out $w $h
00320     }
00321 
00322     SetBlock $t $n $out
00323     return
00324 }
00325 
00326 ret  ::page::gen::peg::canon::SynthNode// (type t , type n) {
00327     # We take all branches and put them together, nicely aligned under
00328     # each other.
00329 
00330     set clist [$t children $n]
00331     if {[llength $clist] == 1} {
00332     # Implicit cutting out of chains.
00333 
00334     CopyBlock $t $n [lindex $clist 0]
00335     return
00336     }
00337 
00338     set out ""
00339     foreach c $clist {
00340     set sop   [$t get $c op]
00341     set sub   [$t get $c TEXT]
00342     WrapParens / $sop sub
00343     append out "/ [::textutil::indent $sub "  " 1]" \n
00344     }
00345 
00346     SetBlock $t $n " [string range $out 1 end]"
00347     return
00348 }
00349 
00350 ret  ::page::gen::peg::canon::WrapParens (type op , type sop , type tvar) {
00351     if {[Paren $op $sop]} {
00352     upvar 1 $tvar text
00353     set text "([::textutil::indent $text " " 1])"
00354     }
00355 }
00356 
00357 ret  ::page::gen::peg::canon::Paren (type op , type sop) {
00358     # sop is nested under op.
00359     # Parens are required if sop has a lower priority than op.
00360 
00361     return [expr {[Priority $sop] < [Priority $op]}]
00362 }
00363 
00364 ret  ::page::gen::peg::canon::Priority (type op) {
00365     switch -exact -- $op {
00366     t        -
00367     n        -
00368     ..       -
00369     alnum    -
00370     alpha    -
00371     dot      -
00372     epsilon  {return 4}
00373     ? -
00374     * -
00375     +        {return 3}
00376     ! -
00377     &        {return 2}
00378     x        {return 1}
00379     /        {return 0}
00380     }
00381     return -code error "Internal error, bad operator \"$op\""
00382 }
00383 
00384 ret  ::page::gen::peg::canon::CopyBlock (type t , type n , type src) {
00385     $t set $n TEXT  [$t get $src TEXT]
00386     $t set $n W     [$t get $src W]
00387     $t set $n Wlast [$t get $src Wlast]
00388     $t set $n H     [$t get $src H]
00389     return
00390 }
00391 
00392 ret  ::page::gen::peg::canon::SetBlock (type t , type n , type text) {
00393     set text   [string trimright $text]
00394     set lines  [split $text \n]
00395     set height [llength $lines]
00396 
00397     if {$height > 1} {
00398     set max -1
00399     set ntext {}
00400 
00401     foreach line $lines {
00402         set line [string trimright $line]
00403         set l [string length $line]
00404         if {$l > $max} {set max $l}
00405         lappend ntext $line
00406         set wlast $l
00407     }
00408     set text  [join $ntext \n]
00409     set width $max
00410     } else {
00411     set width [string length $text]
00412     set wlast $width
00413     }
00414 
00415     $t set $n TEXT $text
00416     $t set $n W     $width
00417     $t set $n Wlast $wlast
00418     $t set $n H     $height
00419     return
00420 }
00421 
00422 ret  ::page::gen::peg::canon::AttrFieldLength (type modeset) {
00423     variable ms
00424     return  $ms($modeset,*)
00425 }
00426 
00427 if {0} {
00428     ret  ::page::gen::peg::canon::SHOW (type text , type w , type h) {
00429     set wl $w ; incr wl -1
00430     puts stderr "\t/$h"
00431     puts stderr "[textutil::indent $text \t|]"
00432     puts stderr "\t\\[string repeat "-" $wl]^ ($w)"
00433     return
00434     }
00435 }
00436 
00437 /*  ### ### ### ######### ######### #########*/
00438 /*  Internal. Strings.*/
00439 
00440 namespace ::page::gen::peg::canon {
00441     variable llen 80
00442     variable ms ; array  ms =  {
00443     dlmv,discard {void:  }
00444     dlmv,leaf    {leaf:  }
00445     dlmv,match   {match: }
00446     dlmv,value   {       }
00447     dlmv,*       7
00448 
00449     dlm,discard  {void:  }      dlv,discard  {void: }
00450     dlm,leaf     {leaf:  }      dlv,leaf     {leaf: }
00451     dlm,match    {match: }      dlv,value    {      }
00452     dlm,*        7          dlv,*        6
00453 
00454     dmv,discard  {void:  }      lmv,leaf     {leaf:  }
00455     dmv,match    {match: }      lmv,match    {match: }
00456     dmv,value    {       }      lmv,value    {       }
00457     dmv,*        7          lmv,*        7
00458 
00459     dl,discard   {void: }       dm,discard   {void:  }
00460     dl,leaf      {leaf: }       dm,match     {match: }
00461     dl,*         6          dm,*         7
00462 
00463     lm,leaf      {leaf:  }      dv,discard   {void: }
00464     lm,match     {match: }      dv,value     {      }
00465     lm,*         7          dv,*         6
00466 
00467     lv,leaf      {leaf: }       mv,match     {match: }
00468     lv,value     {      }       mv,value     {       }
00469     lv,*         6          mv,*         7
00470 
00471     d,discard    {void: }       d,*       6
00472     l,leaf       {leaf: }       l,*       6
00473     m,match      {match: }      m,*       7
00474     v,value      {}         v,*       0
00475     }
00476 }
00477 
00478 /*  ### ### ### ######### ######### #########*/
00479 /*  Ready*/
00480 
00481 package provide page::gen::peg::canon 0.1
00482 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1