peg_interp.tcl

Go to the documentation of this file.
00001 /*  -*- tcl -*-*/
00002 /* */
00003 /*  Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00004 /*  Grammar / Parsing Expression Grammar / Interpreter (Namespace based)*/
00005 
00006 /*  ### ### ### ######### ######### #########*/
00007 /*  Package description*/
00008 
00009 /*  The instances of this class match an input provided by a buffer to*/
00010 /*  a parsing expression grammar provided by a peg container. The*/
00011 /*  matching process is interpretative, i.e. expressions are matched on*/
00012 /*  the fly and multiple as they are encountered. The interpreter*/
00013 /*  operates in pull-push mode, i.e. the interpreter object is in*/
00014 /*  charge and reads the character stream from the buffer as it needs,*/
00015 /*  and returns with the result of the match either when encountering*/
00016 /*  an error, or when the match was successful.*/
00017 
00018 /*  ### ### ### ######### ######### #########*/
00019 /*  Requisites*/
00020 
00021 package require grammar::me::tcl
00022 
00023 /*  ### ### ### ######### ######### #########*/
00024 /*  Implementation*/
00025 
00026 namespace ::grammar::peg::interp {
00027     /*  Import the virtual machine for matching.*/
00028 
00029     namespace import ::grammar::me::tcl::*
00030     upvar /* 0 ::grammar::me::tcl::ok ok*/
00031 }
00032 
00033 /*  ### ### ### ######### ######### #########*/
00034 /*  Instance API Implementation.*/
00035 
00036 ret  ::grammar::peg::interp::setup (type peg) {
00037     variable ru
00038     variable mo
00039     variable se
00040 
00041     if {![$peg is valid]} {
00042         return -code error "Cannot initialize interpreter for invalid grammar"
00043     }
00044     set se [$peg start]
00045     foreach s [$peg nonterminals] {
00046         set ru($s) [$peg nonterminal rule $s]
00047         set mo($s) [$peg nonterminal mode $s]
00048     }
00049 
00050     #parray mo
00051     return
00052 }
00053 
00054 ret  ::grammar::peg::interp::parse (type nxcmd , type emvar , type astvar) {
00055     variable ok
00056     variable se
00057 
00058     upvar 1 $emvar emsg $astvar ast
00059 
00060     init $nxcmd
00061 
00062     MatchExpr $se
00063     isv_nonterminal_reduce ALL -1
00064     set ast [sv]
00065     if {!$ok} {
00066         foreach {l m} [ier_get] break
00067         lappend l [lc $l]
00068         set emsg [list $l $m]
00069     }
00070 
00071     return $ok
00072 }
00073 
00074 /*  ### ### ### ######### ######### #########*/
00075 /*  Internal helper methods*/
00076 
00077 ret  ::grammar::peg::interp::MatchExpr (type e) {
00078     variable ok
00079     variable mode
00080     variable mo
00081     variable ru
00082 
00083     set op [lindex $e 0]
00084     set ar [lrange $e 1 end]
00085 
00086     switch -exact -- $op {
00087         epsilon {
00088             # No input to match, nor consume. Match always.
00089             iok_ok
00090         }
00091         dot {
00092             # Match and consume one character. No matter which
00093             # character. Fails only when reaching eof. Does not
00094             # consume input on failure.
00095             
00096             ict_advance "Expected any character (got EOF)"
00097             if {$ok && ($mode eq "value")} {isv_terminal}
00098         }
00099         alnum - alpha {
00100             ict_advance            "Expected <$op> (got EOF)"
00101             if {!$ok} return
00102 
00103             ict_match_tokclass $op "Expected <$op>"
00104             if {$ok && ($mode eq "value")} {isv_terminal}
00105         }
00106         t {
00107             # Match and consume one specific character. Fails if
00108             # the character at the location is not what was
00109             # expected. Does not consume input on failure.
00110 
00111             set ch [lindex $ar 0]
00112 
00113             ict_advance     "Expected $ch (got EOF)"
00114             if {!$ok} return
00115 
00116             ict_match_token "Expected $ch"
00117             if {$ok && ($mode eq "value")} {isv_terminal}
00118         }
00119         .. {
00120             # Match and consume one character, if in the specified
00121             # range. Fails if the read character is outside of the
00122             # range. Does not consume input on failure.
00123 
00124             foreach {chbegin chend} $ar break
00125 
00126             ict_advance                        "Expected \[$chbegin .. $chend\] (got EOF)"
00127             if {!$ok} return
00128 
00129             ict_match_tokrange $chbegin $chend "Expected \[$chbegin .. $chend\]"
00130             if {$ok && ($mode eq "value")} {isv_terminal}
00131         }
00132         n {
00133             # To match a nonterminal in the input we match its
00134             # parsing expression. This can be cut short if the
00135             # necessary information can be obtained from the memo
00136             # cache. Does not consume input on failure.
00137 
00138             set nt [lindex $ar 0]
00139             set savemode $mode
00140             set mode $mo($nt)
00141 
00142             if {[inc_restore $nt]} {
00143                 if {$ok && ($mode ne "discard")} ias_push
00144                 set mode $savemode
00145                 return
00146             }
00147 
00148             set pos [icl_get]
00149             set mrk [ias_mark]
00150 
00151             MatchExpr $ru($nt)
00152 
00153             # Generate semantic value, based on mode.
00154             if {$mode eq "value"} {
00155                 isv_nonterminal_reduce $nt $pos $mrk
00156             } elseif {$mode eq "match"} {
00157                 isv_nonterminal_range  $nt $pos
00158             } elseif {$mode eq "leaf"} {
00159                 isv_nonterminal_leaf   $nt $pos
00160             } else {
00161                 # mode eq "discard"
00162                 isv_clear
00163             }
00164             inc_save $nt $pos
00165 
00166             # AST operations ...
00167             ias_pop2mark $mrk
00168             if {$ok && ($mode ne "discard")} ias_push
00169 
00170             set mode $savemode
00171             # Even if match is ok.
00172         ier_nonterminal "Expected $nt" $pos
00173         }
00174         & {
00175             # Lookahead predicate. And. Matches the expression
00176             # against the input and returns match result. Never
00177             # consumes any input.
00178 
00179             set pos [icl_get]
00180 
00181             MatchExpr [lindex $ar 0]
00182 
00183             icl_rewind $pos
00184             return
00185         }
00186         ! {
00187             # Negated lookahead predicate. Matches the expression
00188             # against the input and returns the negated match
00189             # result. Never consumes any input.
00190 
00191             set pos [icl_get]
00192             set mrk [ias_mark]
00193             
00194             MatchExpr [lindex $ar 0]
00195 
00196             if {$ok} {ias_pop2mark $mrk}
00197             icl_rewind $pos
00198 
00199             iok_negate
00200             return
00201         }
00202         * {
00203             # Zero or more repetitions. This consumes as much
00204             # input as it was able to match the sub
00205             # expression. The expresion as a whole always matches,
00206             # even if the sub expression fails (zero repetition).
00207 
00208             set sub [lindex $ar 0]
00209 
00210             while {1} {
00211                 set pos [icl_get]
00212 
00213                 set old [ier_get]
00214                 MatchExpr $sub
00215                 ier_merge $old
00216 
00217                 if {$ok} continue
00218         break
00219             }
00220 
00221         icl_rewind $pos
00222         iok_ok
00223         return
00224         }
00225         + {
00226             # One or more repetition. Like *, except for one match
00227             # at the front which has to match for success. This
00228             # expression can fail. It will consume only as much
00229             # input as it was able to match.
00230 
00231             set sub [lindex $ar 0]
00232 
00233             set pos [icl_get]
00234 
00235             MatchExpr $sub
00236             if {!$ok} {
00237                 icl_rewind $pos
00238                 return
00239             }
00240 
00241             while {1} {
00242                 set pos [icl_get]
00243 
00244                 set old [ier_get]
00245                 MatchExpr $sub
00246                 ier_merge $old
00247 
00248                 if {$ok} continue
00249         break
00250             }
00251 
00252         icl_rewind $pos
00253         iok_ok
00254         return
00255         }
00256         ? {
00257             # Optional matching. Tries to match the sub
00258             # expression. Will never fail, even if the sub
00259             # expression is not matching. Consumes only input as
00260             # it could match in the sub expression. Like *, but
00261             # without the repetition.
00262 
00263             set pos [icl_get]
00264 
00265         set old [ier_get]
00266             MatchExpr [lindex $ar 0]
00267         ier_merge $old
00268 
00269             if {!$ok} {
00270                 icl_rewind $pos
00271                 iok_ok
00272             }
00273             return
00274         }
00275         x {
00276             # Sequence. Matches each sub expression in turn, each
00277             # consuming input. In case of failure by one of the
00278             # sequence elements nothing is consumed at all.
00279 
00280             set pos [icl_get]
00281             set mrk [ias_mark]
00282             ier_clear
00283 
00284             foreach e $ar {
00285 
00286                 set old [ier_get]
00287                 MatchExpr $e
00288                 ier_merge $old
00289 
00290                 if {!$ok} {
00291                     ias_pop2mark $mrk
00292                     icl_rewind $pos
00293                     return
00294                 }
00295             }
00296             # OK
00297             return
00298         }
00299         / {
00300             # Choice. Matches each sub expression in turn, always
00301             # starting from the current location. Nothing is
00302             # consumed if all branches fail. Consumes as much as
00303             # was consumed by the matching branch.
00304 
00305             set pos [icl_get]
00306             set mrk [ias_mark]
00307 
00308             ier_clear
00309             foreach e $ar {
00310 
00311                 set old [ier_get]
00312                 MatchExpr $e
00313                 ier_merge $old
00314 
00315                 if {!$ok} {
00316                     ias_pop2mark $mrk
00317                     icl_rewind $pos
00318                     continue
00319                 }
00320                 return
00321             }
00322             # FAIL
00323             iok_fail
00324             return
00325         }
00326     }
00327 }
00328 
00329 /*  ### ### ### ######### ######### #########*/
00330 /*  Interpreter data structures.*/
00331 
00332 namespace ::grammar::peg::interp {
00333     /*  Start expression.*/
00334     /*  Map from nonterminals to their expressions.*/
00335     /*  Reference to internal memo cache.*/
00336 
00337     variable se {} ; /*  Start expression.*/
00338     variable ru    ; /*  Nonterminals and rule map.*/
00339     variable mo    ; /*  Nonterminal modes.*/
00340 
00341     variable mode value ; /*  Matching mode.*/
00342 
00343     array  ru =  {}
00344     array  mo =  {}
00345 }
00346 
00347 /*  ### ### ### ######### ######### #########*/
00348 /*  Package Management*/
00349 
00350 package provide grammar::peg::interp 0.1
00351 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1