peg_interp.tcl
Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 package require grammar::me::tcl
00022 
00023 
00024 
00025 
00026 namespace ::grammar::peg::interp {
00027     
00028 
00029     namespace import ::grammar::me::tcl::*
00030     upvar 
00031 }
00032 
00033 
00034 
00035 
00036 ret  ::grammar::peg::interp::setup (type peg) {
00037     variable ru
00038     variable mo
00039     variable se
00040 
00041     if {![$peg is valid]} {
00042         return -code error "Cannot initialize interpreter for invalid grammar"
00043     }
00044     set se [$peg start]
00045     foreach s [$peg nonterminals] {
00046         set ru($s) [$peg nonterminal rule $s]
00047         set mo($s) [$peg nonterminal mode $s]
00048     }
00049 
00050     #parray mo
00051     return
00052 }
00053 
00054 ret  ::grammar::peg::interp::parse (type nxcmd , type emvar , type astvar) {
00055     variable ok
00056     variable se
00057 
00058     upvar 1 $emvar emsg $astvar ast
00059 
00060     init $nxcmd
00061 
00062     MatchExpr $se
00063     isv_nonterminal_reduce ALL -1
00064     set ast [sv]
00065     if {!$ok} {
00066         foreach {l m} [ier_get] break
00067         lappend l [lc $l]
00068         set emsg [list $l $m]
00069     }
00070 
00071     return $ok
00072 }
00073 
00074 
00075 
00076 
00077 ret  ::grammar::peg::interp::MatchExpr (type e) {
00078     variable ok
00079     variable mode
00080     variable mo
00081     variable ru
00082 
00083     set op [lindex $e 0]
00084     set ar [lrange $e 1 end]
00085 
00086     switch -exact -- $op {
00087         epsilon {
00088             # No input to match, nor consume. Match always.
00089             iok_ok
00090         }
00091         dot {
00092             # Match and consume one character. No matter which
00093             # character. Fails only when reaching eof. Does not
00094             # consume input on failure.
00095             
00096             ict_advance "Expected any character (got EOF)"
00097             if {$ok && ($mode eq "value")} {isv_terminal}
00098         }
00099         alnum - alpha {
00100             ict_advance            "Expected <$op> (got EOF)"
00101             if {!$ok} return
00102 
00103             ict_match_tokclass $op "Expected <$op>"
00104             if {$ok && ($mode eq "value")} {isv_terminal}
00105         }
00106         t {
00107             # Match and consume one specific character. Fails if
00108             # the character at the location is not what was
00109             # expected. Does not consume input on failure.
00110 
00111             set ch [lindex $ar 0]
00112 
00113             ict_advance     "Expected $ch (got EOF)"
00114             if {!$ok} return
00115 
00116             ict_match_token "Expected $ch"
00117             if {$ok && ($mode eq "value")} {isv_terminal}
00118         }
00119         .. {
00120             # Match and consume one character, if in the specified
00121             # range. Fails if the read character is outside of the
00122             # range. Does not consume input on failure.
00123 
00124             foreach {chbegin chend} $ar break
00125 
00126             ict_advance                        "Expected \[$chbegin .. $chend\] (got EOF)"
00127             if {!$ok} return
00128 
00129             ict_match_tokrange $chbegin $chend "Expected \[$chbegin .. $chend\]"
00130             if {$ok && ($mode eq "value")} {isv_terminal}
00131         }
00132         n {
00133             # To match a nonterminal in the input we match its
00134             # parsing expression. This can be cut short if the
00135             # necessary information can be obtained from the memo
00136             # cache. Does not consume input on failure.
00137 
00138             set nt [lindex $ar 0]
00139             set savemode $mode
00140             set mode $mo($nt)
00141 
00142             if {[inc_restore $nt]} {
00143                 if {$ok && ($mode ne "discard")} ias_push
00144                 set mode $savemode
00145                 return
00146             }
00147 
00148             set pos [icl_get]
00149             set mrk [ias_mark]
00150 
00151             MatchExpr $ru($nt)
00152 
00153             # Generate semantic value, based on mode.
00154             if {$mode eq "value"} {
00155                 isv_nonterminal_reduce $nt $pos $mrk
00156             } elseif {$mode eq "match"} {
00157                 isv_nonterminal_range  $nt $pos
00158             } elseif {$mode eq "leaf"} {
00159                 isv_nonterminal_leaf   $nt $pos
00160             } else {
00161                 # mode eq "discard"
00162                 isv_clear
00163             }
00164             inc_save $nt $pos
00165 
00166             # AST operations ...
00167             ias_pop2mark $mrk
00168             if {$ok && ($mode ne "discard")} ias_push
00169 
00170             set mode $savemode
00171             # Even if match is ok.
00172         ier_nonterminal "Expected $nt" $pos
00173         }
00174         & {
00175             # Lookahead predicate. And. Matches the expression
00176             # against the input and returns match result. Never
00177             # consumes any input.
00178 
00179             set pos [icl_get]
00180 
00181             MatchExpr [lindex $ar 0]
00182 
00183             icl_rewind $pos
00184             return
00185         }
00186         ! {
00187             # Negated lookahead predicate. Matches the expression
00188             # against the input and returns the negated match
00189             # result. Never consumes any input.
00190 
00191             set pos [icl_get]
00192             set mrk [ias_mark]
00193             
00194             MatchExpr [lindex $ar 0]
00195 
00196             if {$ok} {ias_pop2mark $mrk}
00197             icl_rewind $pos
00198 
00199             iok_negate
00200             return
00201         }
00202         * {
00203             # Zero or more repetitions. This consumes as much
00204             # input as it was able to match the sub
00205             # expression. The expresion as a whole always matches,
00206             # even if the sub expression fails (zero repetition).
00207 
00208             set sub [lindex $ar 0]
00209 
00210             while {1} {
00211                 set pos [icl_get]
00212 
00213                 set old [ier_get]
00214                 MatchExpr $sub
00215                 ier_merge $old
00216 
00217                 if {$ok} continue
00218         break
00219             }
00220 
00221         icl_rewind $pos
00222         iok_ok
00223         return
00224         }
00225         + {
00226             # One or more repetition. Like *, except for one match
00227             # at the front which has to match for success. This
00228             # expression can fail. It will consume only as much
00229             # input as it was able to match.
00230 
00231             set sub [lindex $ar 0]
00232 
00233             set pos [icl_get]
00234 
00235             MatchExpr $sub
00236             if {!$ok} {
00237                 icl_rewind $pos
00238                 return
00239             }
00240 
00241             while {1} {
00242                 set pos [icl_get]
00243 
00244                 set old [ier_get]
00245                 MatchExpr $sub
00246                 ier_merge $old
00247 
00248                 if {$ok} continue
00249         break
00250             }
00251 
00252         icl_rewind $pos
00253         iok_ok
00254         return
00255         }
00256         ? {
00257             # Optional matching. Tries to match the sub
00258             # expression. Will never fail, even if the sub
00259             # expression is not matching. Consumes only input as
00260             # it could match in the sub expression. Like *, but
00261             # without the repetition.
00262 
00263             set pos [icl_get]
00264 
00265         set old [ier_get]
00266             MatchExpr [lindex $ar 0]
00267         ier_merge $old
00268 
00269             if {!$ok} {
00270                 icl_rewind $pos
00271                 iok_ok
00272             }
00273             return
00274         }
00275         x {
00276             # Sequence. Matches each sub expression in turn, each
00277             # consuming input. In case of failure by one of the
00278             # sequence elements nothing is consumed at all.
00279 
00280             set pos [icl_get]
00281             set mrk [ias_mark]
00282             ier_clear
00283 
00284             foreach e $ar {
00285 
00286                 set old [ier_get]
00287                 MatchExpr $e
00288                 ier_merge $old
00289 
00290                 if {!$ok} {
00291                     ias_pop2mark $mrk
00292                     icl_rewind $pos
00293                     return
00294                 }
00295             }
00296             # OK
00297             return
00298         }
00299         / {
00300             # Choice. Matches each sub expression in turn, always
00301             # starting from the current location. Nothing is
00302             # consumed if all branches fail. Consumes as much as
00303             # was consumed by the matching branch.
00304 
00305             set pos [icl_get]
00306             set mrk [ias_mark]
00307 
00308             ier_clear
00309             foreach e $ar {
00310 
00311                 set old [ier_get]
00312                 MatchExpr $e
00313                 ier_merge $old
00314 
00315                 if {!$ok} {
00316                     ias_pop2mark $mrk
00317                     icl_rewind $pos
00318                     continue
00319                 }
00320                 return
00321             }
00322             # FAIL
00323             iok_fail
00324             return
00325         }
00326     }
00327 }
00328 
00329 
00330 
00331 
00332 namespace ::grammar::peg::interp {
00333     
00334     
00335     
00336 
00337     variable se {} ; 
00338     variable ru    ; 
00339     variable mo    ; 
00340 
00341     variable mode value ; 
00342 
00343     array  ru =  {}
00344     array  mo =  {}
00345 }
00346 
00347 
00348 
00349 
00350 package provide grammar::peg::interp 0.1
00351