peg_interp.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 package require grammar::me::tcl
00022
00023
00024
00025
00026 namespace ::grammar::peg::interp {
00027
00028
00029 namespace import ::grammar::me::tcl::*
00030 upvar
00031 }
00032
00033
00034
00035
00036 ret ::grammar::peg::interp::setup (type peg) {
00037 variable ru
00038 variable mo
00039 variable se
00040
00041 if {![$peg is valid]} {
00042 return -code error "Cannot initialize interpreter for invalid grammar"
00043 }
00044 set se [$peg start]
00045 foreach s [$peg nonterminals] {
00046 set ru($s) [$peg nonterminal rule $s]
00047 set mo($s) [$peg nonterminal mode $s]
00048 }
00049
00050 #parray mo
00051 return
00052 }
00053
00054 ret ::grammar::peg::interp::parse (type nxcmd , type emvar , type astvar) {
00055 variable ok
00056 variable se
00057
00058 upvar 1 $emvar emsg $astvar ast
00059
00060 init $nxcmd
00061
00062 MatchExpr $se
00063 isv_nonterminal_reduce ALL -1
00064 set ast [sv]
00065 if {!$ok} {
00066 foreach {l m} [ier_get] break
00067 lappend l [lc $l]
00068 set emsg [list $l $m]
00069 }
00070
00071 return $ok
00072 }
00073
00074
00075
00076
00077 ret ::grammar::peg::interp::MatchExpr (type e) {
00078 variable ok
00079 variable mode
00080 variable mo
00081 variable ru
00082
00083 set op [lindex $e 0]
00084 set ar [lrange $e 1 end]
00085
00086 switch -exact -- $op {
00087 epsilon {
00088 # No input to match, nor consume. Match always.
00089 iok_ok
00090 }
00091 dot {
00092 # Match and consume one character. No matter which
00093 # character. Fails only when reaching eof. Does not
00094 # consume input on failure.
00095
00096 ict_advance "Expected any character (got EOF)"
00097 if {$ok && ($mode eq "value")} {isv_terminal}
00098 }
00099 alnum - alpha {
00100 ict_advance "Expected <$op> (got EOF)"
00101 if {!$ok} return
00102
00103 ict_match_tokclass $op "Expected <$op>"
00104 if {$ok && ($mode eq "value")} {isv_terminal}
00105 }
00106 t {
00107 # Match and consume one specific character. Fails if
00108 # the character at the location is not what was
00109 # expected. Does not consume input on failure.
00110
00111 set ch [lindex $ar 0]
00112
00113 ict_advance "Expected $ch (got EOF)"
00114 if {!$ok} return
00115
00116 ict_match_token "Expected $ch"
00117 if {$ok && ($mode eq "value")} {isv_terminal}
00118 }
00119 .. {
00120 # Match and consume one character, if in the specified
00121 # range. Fails if the read character is outside of the
00122 # range. Does not consume input on failure.
00123
00124 foreach {chbegin chend} $ar break
00125
00126 ict_advance "Expected \[$chbegin .. $chend\] (got EOF)"
00127 if {!$ok} return
00128
00129 ict_match_tokrange $chbegin $chend "Expected \[$chbegin .. $chend\]"
00130 if {$ok && ($mode eq "value")} {isv_terminal}
00131 }
00132 n {
00133 # To match a nonterminal in the input we match its
00134 # parsing expression. This can be cut short if the
00135 # necessary information can be obtained from the memo
00136 # cache. Does not consume input on failure.
00137
00138 set nt [lindex $ar 0]
00139 set savemode $mode
00140 set mode $mo($nt)
00141
00142 if {[inc_restore $nt]} {
00143 if {$ok && ($mode ne "discard")} ias_push
00144 set mode $savemode
00145 return
00146 }
00147
00148 set pos [icl_get]
00149 set mrk [ias_mark]
00150
00151 MatchExpr $ru($nt)
00152
00153 # Generate semantic value, based on mode.
00154 if {$mode eq "value"} {
00155 isv_nonterminal_reduce $nt $pos $mrk
00156 } elseif {$mode eq "match"} {
00157 isv_nonterminal_range $nt $pos
00158 } elseif {$mode eq "leaf"} {
00159 isv_nonterminal_leaf $nt $pos
00160 } else {
00161 # mode eq "discard"
00162 isv_clear
00163 }
00164 inc_save $nt $pos
00165
00166 # AST operations ...
00167 ias_pop2mark $mrk
00168 if {$ok && ($mode ne "discard")} ias_push
00169
00170 set mode $savemode
00171 # Even if match is ok.
00172 ier_nonterminal "Expected $nt" $pos
00173 }
00174 & {
00175 # Lookahead predicate. And. Matches the expression
00176 # against the input and returns match result. Never
00177 # consumes any input.
00178
00179 set pos [icl_get]
00180
00181 MatchExpr [lindex $ar 0]
00182
00183 icl_rewind $pos
00184 return
00185 }
00186 ! {
00187 # Negated lookahead predicate. Matches the expression
00188 # against the input and returns the negated match
00189 # result. Never consumes any input.
00190
00191 set pos [icl_get]
00192 set mrk [ias_mark]
00193
00194 MatchExpr [lindex $ar 0]
00195
00196 if {$ok} {ias_pop2mark $mrk}
00197 icl_rewind $pos
00198
00199 iok_negate
00200 return
00201 }
00202 * {
00203 # Zero or more repetitions. This consumes as much
00204 # input as it was able to match the sub
00205 # expression. The expresion as a whole always matches,
00206 # even if the sub expression fails (zero repetition).
00207
00208 set sub [lindex $ar 0]
00209
00210 while {1} {
00211 set pos [icl_get]
00212
00213 set old [ier_get]
00214 MatchExpr $sub
00215 ier_merge $old
00216
00217 if {$ok} continue
00218 break
00219 }
00220
00221 icl_rewind $pos
00222 iok_ok
00223 return
00224 }
00225 + {
00226 # One or more repetition. Like *, except for one match
00227 # at the front which has to match for success. This
00228 # expression can fail. It will consume only as much
00229 # input as it was able to match.
00230
00231 set sub [lindex $ar 0]
00232
00233 set pos [icl_get]
00234
00235 MatchExpr $sub
00236 if {!$ok} {
00237 icl_rewind $pos
00238 return
00239 }
00240
00241 while {1} {
00242 set pos [icl_get]
00243
00244 set old [ier_get]
00245 MatchExpr $sub
00246 ier_merge $old
00247
00248 if {$ok} continue
00249 break
00250 }
00251
00252 icl_rewind $pos
00253 iok_ok
00254 return
00255 }
00256 ? {
00257 # Optional matching. Tries to match the sub
00258 # expression. Will never fail, even if the sub
00259 # expression is not matching. Consumes only input as
00260 # it could match in the sub expression. Like *, but
00261 # without the repetition.
00262
00263 set pos [icl_get]
00264
00265 set old [ier_get]
00266 MatchExpr [lindex $ar 0]
00267 ier_merge $old
00268
00269 if {!$ok} {
00270 icl_rewind $pos
00271 iok_ok
00272 }
00273 return
00274 }
00275 x {
00276 # Sequence. Matches each sub expression in turn, each
00277 # consuming input. In case of failure by one of the
00278 # sequence elements nothing is consumed at all.
00279
00280 set pos [icl_get]
00281 set mrk [ias_mark]
00282 ier_clear
00283
00284 foreach e $ar {
00285
00286 set old [ier_get]
00287 MatchExpr $e
00288 ier_merge $old
00289
00290 if {!$ok} {
00291 ias_pop2mark $mrk
00292 icl_rewind $pos
00293 return
00294 }
00295 }
00296 # OK
00297 return
00298 }
00299 / {
00300 # Choice. Matches each sub expression in turn, always
00301 # starting from the current location. Nothing is
00302 # consumed if all branches fail. Consumes as much as
00303 # was consumed by the matching branch.
00304
00305 set pos [icl_get]
00306 set mrk [ias_mark]
00307
00308 ier_clear
00309 foreach e $ar {
00310
00311 set old [ier_get]
00312 MatchExpr $e
00313 ier_merge $old
00314
00315 if {!$ok} {
00316 ias_pop2mark $mrk
00317 icl_rewind $pos
00318 continue
00319 }
00320 return
00321 }
00322 # FAIL
00323 iok_fail
00324 return
00325 }
00326 }
00327 }
00328
00329
00330
00331
00332 namespace ::grammar::peg::interp {
00333
00334
00335
00336
00337 variable se {} ;
00338 variable ru ;
00339 variable mo ;
00340
00341 variable mode value ;
00342
00343 array ru = {}
00344 array mo = {}
00345 }
00346
00347
00348
00349
00350 package provide grammar::peg::interp 0.1
00351