00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 package require sgml 1.9
00022
00023 package require uri 1.1
00024
00025 package provide sgmlparser 1.1
00026
00027 namespace sgml {
00028 namespace export tokenise parseEvent
00029
00030 namespace export parseDTD
00031
00032
00033
00034
00035
00036 variable ParseEventNum
00037 if {![info exists ParseEventNum]} {
00038 ParseEventNum = 0
00039 }
00040 variable ParseDTDnum
00041 if {![info exists ParseDTDNum]} {
00042 ParseDTDNum = 0
00043 }
00044
00045 variable declExpr [cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)
00046 variable EntityExpr [cl $::sgml::Wsp]*(%[cl $::sgml::Wsp])?[cl $::sgml::Wsp]*($::sgml::Name)[cl $::sgml::Wsp]+(.*)
00047
00048
00049
00050 variable MarkupDeclExpr <[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^>]*)>
00051 variable MarkupDeclSub "\} {\\1} {\\2} \{"
00052
00053 variable ExternalEntityExpr ^(PUBLIC|SYSTEM)[cl $::sgml::Wsp]+("|')(.*?)\\2([cl $::sgml::Wsp]+("|')(.*?)\\2)?([cl $::sgml::Wsp]+NDATA[cl $::sgml::Wsp]+($::xml::Name))?\$
00054
00055 variable StdOptions
00056 array StdOptions = [list \
00057 -elementstartcommand [namespace current]::noop \
00058 -elementendcommand [namespace current]::noop \
00059 -characterdatacommand [namespace current]::noop \
00060 -ret essinginstructioncommand [namespace current]::noop \
00061 -externalentitycommand () \
00062 -xmldeclcommand [namespace current]::noop \
00063 -doctypecommand [namespace current]::noop \
00064 -commentcommand [namespace current]::noop \
00065 -entitydeclcommand [namespace current]::noop \
00066 -unparsedentitydeclcommand [namespace current]::noop \
00067 -parameterentitydeclcommand [namespace current]::noop \
00068 -notationdeclcommand [namespace current]::noop \
00069 -elementdeclcommand [namespace current]::noop \
00070 -attlistdeclcommand [namespace current]::noop \
00071 -paramentityparsing 1 \
00072 -defaultexpandinternalentities 1 \
00073 -startdoctypedeclcommand [namespace current]::noop \
00074 -enddoctypedeclcommand [namespace current]::noop \
00075 -entityreferencecommand {} \
00076 -warningcommand [namespace current]::noop \
00077 -errorcommand [namespace current]::Error \
00078 -final 1 \
00079 -validate 0 \
00080 -baseuri {} \
00081 -name {} \
00082 -cmd {} \
00083 -emptyelement [namespace current]::EmptyElement \
00084 -parseattributelistcommand [namespace current]::noop \
00085 -parseentitydeclcommand [namespace current]::noop \
00086 -normalize 1 \
00087 -internaldtd {} \
00088 -reportempty 0 \
00089 -ignorewhitespace 0 \
00090 ]
00091 }
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111 ret sgml::tokenise (type sgml , type elemExpr , type elemSub , type args) {
00112 array set options {-final 1}
00113 array set options $args
00114 set options(-final) [Boolean $options(-final)]
00115
00116 # If the data is not final then there must be a variable to store
00117 # unused data.
00118 if {!$options(-final) && ![info exists options(-statevariable)]} {
00119 return -code error {option "-statevariable" required if not final}
00120 }
00121
00122 # Pre-process stage
00123 #
00124 # Extract the internal DTD subset, if any
00125
00126 catch {upvar #0 $options(-internaldtdvariable) dtd}
00127 if {[regexp {<!DOCTYPE[^[<]+\[([^]]+)\]} $sgml discard dtd]} {
00128 regsub {(<!DOCTYPE[^[<]+)(\[[^]]+\])} $sgml {\1\&xml:intdtd;} sgml
00129 }
00130
00131 # Protect Tcl special characters
00132 regsub -all {([{}\\])} $sgml {\\\1} sgml
00133
00134 # Do the translation
00135
00136 if {[info exists options(-statevariable)]} {
00137 # Mats: Several rewrites here to handle -final 0 option.
00138 # If any cached unparsed xml (state(leftover)), prepend it.
00139 upvar #0 $options(-statevariable) state
00140 if {[string length $state(leftover)]} {
00141 regsub -all $elemExpr $state(leftover)$sgml $elemSub sgml
00142 set state(leftover) {}
00143 } else {
00144 regsub -all $elemExpr $sgml $elemSub sgml
00145 }
00146 set sgml "{} {} {} \{$sgml\}"
00147
00148 # Performance note (Tcl 8.0):
00149 # Use of lindex, lreplace will cause parsing to list object
00150
00151 # This RE only fixes chopped inside tags, not chopped text.
00152 if {[regexp {^([^<]*)(<[^>]*$)} [lindex $sgml end] x text rest]} {
00153 set sgml [lreplace $sgml end end $text]
00154 # Mats: unmatched stuff means that it is chopped off. Cache it for next round.
00155 set state(leftover) $rest
00156 }
00157
00158 # Patch from bug report #596959, Marshall Rose
00159 if {[string compare [lindex $sgml 4] ""]} {
00160 set sgml [linsert $sgml 0 {} {} {} {} {}]
00161 }
00162
00163 } else {
00164
00165 # Performance note (Tcl 8.0):
00166 # In this case, no conversion to list object is performed
00167
00168 # Mats: This fails if not -final and $sgml is chopped off right in a tag.
00169 regsub -all $elemExpr $sgml $elemSub sgml
00170 set sgml "{} {} {} \{$sgml\}"
00171 }
00172
00173 return $sgml
00174
00175 }
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233 ret sgml::parseEvent (type sgml , type args) {
00234 variable Wsp
00235 variable noWsp
00236 variable Nmtoken
00237 variable Name
00238 variable ParseEventNum
00239 variable StdOptions
00240
00241 array set options [array get StdOptions]
00242 catch {array set options $args}
00243
00244 # Mats:
00245 # If the data is not final then there must be a variable to persistently store the parse state.
00246 if {!$options(-final) && ![info exists options(-statevariable)]} {
00247 return -code error {option "-statevariable" required if not final}
00248 }
00249
00250 foreach {opt value} [array get options *command] {
00251 if {[string compare $opt "-externalentitycommand"] && ![string length $value]} {
00252 set options($opt) [namespace current]::noop
00253 }
00254 }
00255
00256 if {![info exists options(-statevariable)]} {
00257 set options(-statevariable) [namespace current]::ParseEvent[incr ParseEventNum]
00258 }
00259 if {![info exists options(entities)]} {
00260 set options(entities) [namespace current]::Entities$ParseEventNum
00261 array set $options(entities) [array get [namespace current]::EntityPredef]
00262 }
00263 if {![info exists options(extentities)]} {
00264 set options(extentities) [namespace current]::ExtEntities$ParseEventNum
00265 }
00266 if {![info exists options(parameterentities)]} {
00267 set options(parameterentities) [namespace current]::ParamEntities$ParseEventNum
00268 }
00269 if {![info exists options(externalparameterentities)]} {
00270 set options(externalparameterentities) [namespace current]::ExtParamEntities$ParseEventNum
00271 }
00272 if {![info exists options(elementdecls)]} {
00273 set options(elementdecls) [namespace current]::ElementDecls$ParseEventNum
00274 }
00275 if {![info exists options(attlistdecls)]} {
00276 set options(attlistdecls) [namespace current]::AttListDecls$ParseEventNum
00277 }
00278 if {![info exists options(notationdecls)]} {
00279 set options(notationdecls) [namespace current]::NotationDecls$ParseEventNum
00280 }
00281 if {![info exists options(namespaces)]} {
00282 set options(namespaces) [namespace current]::Namespaces$ParseEventNum
00283 }
00284
00285 # For backward-compatibility
00286 catch {set options(-baseuri) $options(-baseurl)}
00287
00288 # Choose an external entity resolver
00289
00290 if {![string length $options(-externalentitycommand)]} {
00291 if {$options(-validate)} {
00292 set options(-externalentitycommand) [namespace code ResolveEntity]
00293 } else {
00294 set options(-externalentitycommand) [namespace code noop]
00295 }
00296 }
00297
00298 upvar #0 $options(-statevariable) state
00299 upvar #0 $options(entities) entities
00300
00301 # Mats:
00302 # The problem is that the state is not maintained when -final 0 !
00303 # I've switched back to an older version here.
00304
00305 if {![info exists state(line)]} {
00306 # Initialise the state variable
00307 array set state {
00308 mode normal
00309 haveXMLDecl 0
00310 haveDocElement 0
00311 inDTD 0
00312 context {}
00313 stack {}
00314 line 0
00315 defaultNS {}
00316 defaultNSURI {}
00317 }
00318 }
00319
00320 foreach {tag close param text} $sgml {
00321
00322 # Keep track of lines in the input
00323 incr state(line) [regsub -all \n $param {} discard]
00324 incr state(line) [regsub -all \n $text {} discard]
00325
00326 # If the current mode is cdata or comment then we must undo what the
00327 # regsub has done to reconstitute the data
00328
00329 set empty {}
00330 switch $state(mode) {
00331 comment {
00332 # This had "[string length $param] && " as a guard -
00333 # can't remember why :-(
00334 if {[regexp ([cl ^-]*)--\$ $tag discard comm1]} {
00335 # end of comment (in tag)
00336 set tag {}
00337 set close {}
00338 set state(mode) normal
00339 DeProtect1 $options(-commentcommand) $state(commentdata)<$comm1
00340 unset state(commentdata)
00341 } elseif {[regexp ([cl ^-]*)--\$ $param discard comm1]} {
00342 # end of comment (in attributes)
00343 DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag>$comm1
00344 unset state(commentdata)
00345 set tag {}
00346 set param {}
00347 set close {}
00348 set state(mode) normal
00349 } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm1 text]} {
00350 # end of comment (in text)
00351 DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag$param>$comm1
00352 unset state(commentdata)
00353 set tag {}
00354 set param {}
00355 set close {}
00356 set state(mode) normal
00357 } else {
00358 # comment continues
00359 append state(commentdata) <$close$tag$param>$text
00360 continue
00361 }
00362 }
00363 cdata {
00364 if {[string length $param] && [regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $tag discard cdata1]} {
00365 # end of CDATA (in tag)
00366 PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$cdata1]
00367 set text [subst -novariable -nocommand $text]
00368 set tag {}
00369 unset state(cdata)
00370 set state(mode) normal
00371 } elseif {[regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $param discard cdata1]} {
00372 # end of CDATA (in attributes)
00373 PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$cdata1]
00374 set text [subst -novariable -nocommand $text]
00375 set tag {}
00376 set param {}
00377 unset state(cdata)
00378 set state(mode) normal
00379 } elseif {[regexp (.*)\]\][cl $Wsp]*>(.*) $text discard cdata1 text]} {
00380 # end of CDATA (in text)
00381 PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$param>$cdata1]
00382 set text [subst -novariable -nocommand $text]
00383 set tag {}
00384 set param {}
00385 set close {}
00386 unset state(cdata)
00387 set state(mode) normal
00388 } else {
00389 # CDATA continues
00390 append state(cdata) [subst -nocommand -novariable <$close$tag$param>$text]
00391 continue
00392 }
00393 }
00394 continue {
00395 # We're skipping elements looking for the close tag
00396 switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close {
00397 0,* {
00398 continue
00399 }
00400 *,0, {
00401 if {![string compare $tag $state(continue:tag)]} {
00402 set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]
00403 if {![string length $empty]} {
00404 incr state(continue:level)
00405 }
00406 }
00407 continue
00408 }
00409 *,0,/ {
00410 if {![string compare $tag $state(continue:tag)]} {
00411 incr state(continue:level) -1
00412 }
00413 if {!$state(continue:level)} {
00414 unset state(continue:tag)
00415 unset state(continue:level)
00416 set state(mode) {}
00417 }
00418 }
00419 default {
00420 continue
00421 }
00422 }
00423 }
00424 default {
00425 # The trailing slash on empty elements can't be automatically separated out
00426 # in the RE, so we must do it here.
00427 regexp (.*)(/)[cl $Wsp]*$ $param discard param empty
00428 }
00429 }
00430
00431 # default: normal mode
00432
00433 # Bug: if the attribute list has a right angle bracket then the empty
00434 # element marker will not be seen
00435
00436 set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]
00437
00438 switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty {
00439
00440 0,0,, {
00441 # Ignore empty tag - dealt with non-normal mode above
00442 }
00443 *,0,, {
00444
00445 # Start tag for an element.
00446
00447 # Check if the internal DTD entity is in an attribute value
00448 regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param
00449
00450 set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg]
00451 set state(haveDocElement) 1
00452 switch $code {
00453 0 {# OK}
00454 3 {
00455 # break
00456 return {}
00457 }
00458 4 {
00459 # continue
00460 # Remember this tag and look for its close
00461 set state(continue:tag) $tag
00462 set state(continue:level) 1
00463 set state(mode) continue
00464 continue
00465 }
00466 default {
00467 return -code $code -errorinfo $::errorInfo $msg
00468 }
00469 }
00470
00471 }
00472
00473 *,0,/, {
00474
00475 # End tag for an element.
00476
00477 set code [catch {ParseEvent:ElementClose $tag [array get options]} msg]
00478 switch $code {
00479 0 {# OK}
00480 3 {
00481 # break
00482 return {}
00483 }
00484 4 {
00485 # continue
00486 # skip sibling nodes
00487 set state(continue:tag) [lindex $state(stack) end]
00488 set state(continue:level) 1
00489 set state(mode) continue
00490 continue
00491 }
00492 default {
00493 return -code $code -errorinfo $::errorInfo $msg
00494 }
00495 }
00496
00497 }
00498
00499 *,0,,/ {
00500
00501 # Empty element
00502
00503 # The trailing slash sneaks through into the param variable
00504 regsub -all /[cl $::sgml::Wsp]*\$ $param {} param
00505
00506 set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg]
00507 set state(haveDocElement) 1
00508 switch $code {
00509 0 {# OK}
00510 3 {
00511 # break
00512 return {}
00513 }
00514 4 {
00515 # continue
00516 # Pretty useless since it closes straightaway
00517 }
00518 default {
00519 return -code $code -errorinfo $::errorInfo $msg
00520 }
00521 }
00522 set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg]
00523 switch $code {
00524 0 {# OK}
00525 3 {
00526 # break
00527 return {}
00528 }
00529 4 {
00530 # continue
00531 # skip sibling nodes
00532 set state(continue:tag) [lindex $state(stack) end]
00533 set state(continue:level) 1
00534 set state(mode) continue
00535 continue
00536 }
00537 default {
00538 return -code $code -errorinfo $::errorInfo $msg
00539 }
00540 }
00541
00542 }
00543
00544 *,1,* {
00545 # Processing instructions or XML declaration
00546 switch -glob -- $tag {
00547
00548 {\?xml} {
00549 # XML Declaration
00550 if {$state(haveXMLDecl)} {
00551 uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"]
00552 } elseif {![regexp {\?$} $param]} {
00553 uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"]
00554 } else {
00555
00556 # We can do the parsing in one step with Tcl 8.1 RE's
00557 # This has the benefit of performing better WF checking
00558
00559 set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp]
00560
00561 if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} {
00562 # Otherwise we must fallback to 8.0.
00563 # This won't detect certain well-formedness errors
00564
00565 # Get the version number
00566 if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} {
00567 if {[string compare $version "1.0"]} {
00568 # Should we support future versions?
00569 # At least 1.X?
00570 uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"]
00571 }
00572 } else {
00573 uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"]
00574 }
00575
00576 # Get the encoding declaration
00577 set encoding {}
00578 regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding
00579 regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding
00580
00581 # Get the standalone declaration
00582 set standalone {}
00583 regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone
00584 regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone
00585
00586 # Invoke the callback
00587 uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]
00588
00589 } elseif {$matches == 0} {
00590 uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"]
00591 } else {
00592
00593 # Invoke the callback
00594 uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]
00595
00596 }
00597
00598 }
00599
00600 }
00601
00602 {\?*} {
00603 # Processing instruction
00604 set tag [string range $tag 1 end]
00605 if {[regsub {\?$} $tag {} tag]} {
00606 if {[string length [string trim $param]]} {
00607 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"]
00608 }
00609 } elseif {![regexp ^$Name\$ $tag]} {
00610 uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""]
00611 } elseif {[regexp {[xX][mM][lL]} $tag]} {
00612 uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""]
00613 } elseif {![regsub {\?$} $param {} param]} {
00614 uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"]
00615 }
00616 set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg]
00617 switch $code {
00618 0 {# OK}
00619 3 {
00620 # break
00621 return {}
00622 }
00623 4 {
00624 # continue
00625 # skip sibling nodes
00626 set state(continue:tag) [lindex $state(stack) end]
00627 set state(continue:level) 1
00628 set state(mode) continue
00629 continue
00630 }
00631 default {
00632 return -code $code -errorinfo $::errorInfo $msg
00633 }
00634 }
00635 }
00636
00637 !DOCTYPE {
00638 # External entity reference
00639 # This should move into xml.tcl
00640 # Parse the params supplied. Looking for Name, ExternalID and MarkupDecl
00641 set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param]
00642 set state(doc_name) [Normalize $state(doc_name) $options(-normalize)]
00643 set externalID {}
00644 set pubidlit {}
00645 set systemlit {}
00646 set externalID {}
00647 if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} {
00648 switch [string toupper $id] {
00649 SYSTEM {
00650 if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {
00651 set externalID [list SYSTEM $systemlit] ;# "
00652 } else {
00653 uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}}
00654 }
00655 }
00656 PUBLIC {
00657 if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} {
00658 if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {
00659 set externalID [list PUBLIC $pubidlit $systemlit]
00660 } else {
00661 uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"]
00662 }
00663 } else {
00664 uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"]
00665 }
00666 }
00667 }
00668 if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} {
00669 lappend externalID $notation
00670 }
00671 }
00672
00673 set state(inDTD) 1
00674
00675 ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd)
00676
00677 set state(inDTD) 0
00678
00679 }
00680
00681 !--* {
00682
00683 # Start of a comment
00684 # See if it ends in the same tag, otherwise change the
00685 # parsing mode
00686
00687 regexp {!--(.*)} $tag discard comm1
00688 if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} {
00689 # processed comment (end in tag)
00690 uplevel #0 $options(-commentcommand) [list $comm1_1]
00691 } elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} {
00692 # processed comment (end in attributes)
00693 uplevel #0 $options(-commentcommand) [list $comm1$comm2]
00694 } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} {
00695 # processed comment (end in text)
00696 uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2]
00697 } else {
00698 # start of comment
00699 set state(mode) comment
00700 set state(commentdata) "$comm1$param$empty>$text"
00701 continue
00702 }
00703 }
00704
00705 {!\[CDATA\[*} {
00706
00707 regexp {!\[CDATA\[(.*)} $tag discard cdata1
00708 if {[regexp {(.*)]]$} $cdata1 discard cdata2]} {
00709 # processed CDATA (end in tag)
00710 PCDATA [array get options] [subst -novariable -nocommand $cdata2]
00711 set text [subst -novariable -nocommand $text]
00712 } elseif {[regexp {(.*)]]$} $param discard cdata2]} {
00713 # processed CDATA (end in attribute)
00714 # Backslashes in param are quoted at this stage
00715 PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2]
00716 set text [subst -novariable -nocommand $text]
00717 } elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} {
00718 # processed CDATA (end in text)
00719 # Backslashes in param and text are quoted at this stage
00720 PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2]
00721 set text [subst -novariable -nocommand $text]
00722 } else {
00723 # start CDATA
00724 set state(cdata) "$cdata1$param>$text"
00725 set state(mode) cdata
00726 continue
00727 }
00728
00729 }
00730
00731 !ELEMENT -
00732 !ATTLIST -
00733 !ENTITY -
00734 !NOTATION {
00735 uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"]
00736 }
00737
00738 default {
00739 uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"]
00740 }
00741 }
00742 }
00743 *,1,* -
00744 *,0,/,/ {
00745 # Syntax error
00746 uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"]
00747 }
00748 }
00749
00750 # Process character data
00751
00752 if {$state(haveDocElement) && [llength $state(stack)]} {
00753
00754 # Check if the internal DTD entity is in the text
00755 regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text
00756
00757 # Look for entity references
00758 if {([array size entities] || \
00759 [string length $options(-entityreferencecommand)]) && \
00760 $options(-defaultexpandinternalentities) && \
00761 [regexp {&[^;]+;} $text]} {
00762
00763 # protect Tcl specials
00764 # NB. braces and backslashes may already be protected
00765 regsub -all {\\({|}|\\)} $text {\1} text
00766 regsub -all {([][$\\{}])} $text {\\\1} text
00767
00768 # Mark entity references
00769 regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text
00770 set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}"
00771 eval $text
00772 } else {
00773
00774 # Restore protected special characters
00775 regsub -all {\\([][{}\\])} $text {\1} text
00776 PCDATA [array get options] $text
00777 }
00778 } elseif {[string length [string trim $text]]} {
00779 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"]
00780 }
00781
00782 }
00783
00784 # If this is the end of the document, close all open containers
00785 if {$options(-final) && [llength $state(stack)]} {
00786 eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"]
00787 }
00788
00789 return {}
00790 }
00791
00792 /* sgml::DeProtect --*/
00793 /* */
00794 /* Invoke given command after removing protecting backslashes*/
00795 /* from given text.*/
00796 /* */
00797 /* Arguments:*/
00798 /* cmd Command to invoke*/
00799 /* text Text to deprotect*/
00800 /* */
00801 /* Results:*/
00802 /* Depends on command*/
00803
00804 ret sgml::DeProtect1 (type cmd , type text) {
00805 if {[string compare {} $text]} {
00806 regsub -all {\\([]$[{}\\])} $text {\1} text
00807 uplevel #0 $cmd [list $text]
00808 }
00809 }
00810 ret sgml::DeProtect (type cmd , type text) {
00811 set text [lindex $text 0]
00812 if {[string compare {} $text]} {
00813 regsub -all {\\([]$[{}\\])} $text {\1} text
00814 uplevel #0 $cmd [list $text]
00815 }
00816 }
00817
00818 /* sgml::ParserDelete --*/
00819 /* */
00820 /* Free all memory associated with parser*/
00821 /* */
00822 /* Arguments:*/
00823 /* var global state array*/
00824 /* */
00825 /* Results:*/
00826 /* Variables unset*/
00827
00828 ret sgml::ParserDelete var (
00829 type upvar #0 $, type var , type state
00830
00831 , type if , optional ![info =exists state] , optional
00832 return =-code error ="unknown parser"
00833
00834
00835 , type catch , optional unset =$state(entities)
00836 , type catch , optional unset =$state(parameterentities)
00837 , type catch , optional unset =$state(elementdecls)
00838 , type catch , optional unset =$state(attlistdecls)
00839 , type catch , optional unset =$state(notationdecls)
00840 , type catch , optional unset =$state(namespaces)
00841
00842 , type unset , type state
00843
00844 , type return , optional
00845 )
00846
00847 # sgml::ParseEvent:ElementOpen --
00848 #
00849 # Start of an element.
00850 #
00851 # Arguments:
00852 # tag Element name
00853 # attr Attribute list
00854 # opts Options
00855 # args further configuration options
00856 #
00857 # Options:
00858 # -empty boolean
00859 # indicates whether the element was an empty element
00860 #
00861 # Results:
00862 # Modify state and invoke callback
00863
00864 proc sgml::ParseEvent:ElementOpen {tag attr opts args} {
00865 variable Name
00866 variable Wsp
00867
00868 array options = $opts
00869 upvar /* 0 $options(-statevariable) state*/
00870 array cfg = {-empty 0}
00871 array cfg = $args
00872 handleEmpty = 0
00873
00874 if {$options(-normalize)} {
00875 tag = [string toupper $tag]
00876 }
00877
00878 /* Update state*/
00879 lappend state(stack) $tag
00880
00881 /* Parse attribute list into a key-value representation*/
00882 if {[string compare $options(-parseattributelistcommand) {}]} {
00883 if {[catch {uplevel /* 0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} {*/
00884 if {[string compare [lindex $attr 0] "unterminated attribute value"]} {
00885 uplevel /* 0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]*/
00886 attr = {}
00887 } else {
00888
00889 /* It is most likely that a ">" character was in an attribute value.*/
00890 /* This manifests itself by ">" appearing in the element's text.*/
00891
00892
00893
00894
00895 foreach {msg attlist brokenattr} $attr break
00896
00897 upvar text elemText
00898 if {[string first > $elemText] >= 0} {
00899
00900
00901 regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue
00902 regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText
00903 regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist
00904
00905 /* Gotcha: watch out for empty element syntax*/
00906 if {[string match */ [string trimright $remattlist]]} {
00907 remattlist = [string range $remattlist 0 end-1]
00908 handleEmpty = 1
00909 cfg = (-empty) 1
00910 }
00911
00912 append attvalue >$remattvalue
00913 lappend attlist $attname $attvalue
00914
00915 /* Complete parsing the attribute list*/
00916 if {[catch {uplevel /* 0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} {*/
00917 uplevel /* 0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]*/
00918 attr = {}
00919 attlist = {}
00920 } else {
00921 eval lappend attlist $attr
00922 }
00923
00924 attr = $attlist
00925
00926 } else {
00927 uplevel /* 0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]*/
00928 attr = {}
00929 }
00930 }
00931 }
00932 }
00933
00934 empty = {}
00935 if {$cfg(-empty) && $options(-reportempty)} {
00936 empty = {-empty 1}
00937 }
00938
00939 /* Check for namespace declarations*/
00940 upvar /* 0 $options(namespaces) namespaces*/
00941 nsdecls = {}
00942 if {[llength $attr]} {
00943 array attrlist = $attr
00944 foreach {attrName attrValue} [array get attrlist xmlns*] {
00945 un attrlist = ($attrName)
00946 colon = [ prefix = {}]
00947 if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} {
00948 switch -glob [string length $colon],[string length $prefix] {
00949 0,0 {
00950 /* default NS declaration*/
00951 lappend state(defaultNSURI) $attrValue
00952 lappend state(defaultNS) [llength $state(stack)]
00953 lappend nsdecls $attrValue {}
00954 }
00955 0,* {
00956 /* Huh?*/
00957 }
00958 *,0 {
00959 /* Error*/
00960 uplevel /* 0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\""*/
00961 }
00962 default {
00963 namespaces = ($prefix,[llength $state(stack)]) $attrValue
00964 lappend nsdecls $attrValue $prefix
00965 }
00966 }
00967 }
00968 }
00969 if {[llength $nsdecls]} {
00970 nsdecls = [list -namespacedecls $nsdecls]
00971 }
00972 attr = [array get attrlist]
00973 }
00974
00975
00976 ns = {}
00977 if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {
00978 nsspec = [lsort -dictionary -decreasing [array names namespaces $prefix,*]]
00979 if {[llength $nsspec]} {
00980 nsuri = $namespaces([lindex $nsspec 0])
00981 ns = [list -namespace $nsuri]
00982 } else {
00983 uplevel
00984 }
00985 } elseif {[llength $state(defaultNSURI)]} {
00986 ns = [list -namespace [lindex $state(defaultNSURI) end]]
00987 }
00988
00989
00990 code = [catch {uplevel
00991
00992
00993 if {$code == 0 && $handleEmpty} {
00994 ParseEvent:ElementClose $tag $opts -empty 1
00995 }
00996
00997 return -code $code -errorinfo $::errorInfo $msg
00998 }
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016 ret sgml::ParseEvent:ElementClose (type tag , type opts , type args) {
01017 array set options $opts
01018 upvar #0 $options(-statevariable) state
01019 array set cfg {-empty 0}
01020 array set cfg $args
01021
01022 # WF check
01023 if {[string compare $tag [lindex $state(stack) end]]} {
01024 uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"]
01025 return
01026 }
01027
01028 # Check whether this element has an expanded name
01029 upvar #0 $options(namespaces) namespaces
01030 set ns {}
01031 if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {
01032 set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0])
01033 set ns [list -namespace $nsuri]
01034 } elseif {[llength $state(defaultNSURI)]} {
01035 set ns [list -namespace [lindex $state(defaultNSURI) end]]
01036 }
01037
01038 # Pop namespace stacks, if any
01039 if {[llength $state(defaultNS)]} {
01040 if {[llength $state(stack)] == [lindex $state(defaultNS) end]} {
01041 set state(defaultNS) [lreplace $state(defaultNS) end end]
01042 }
01043 }
01044 foreach nsspec [array names namespaces *,[llength $state(stack)]] {
01045 unset namespaces($nsspec)
01046 }
01047
01048 # Update state
01049 set state(stack) [lreplace $state(stack) end end]
01050
01051 set empty {}
01052 if {$cfg(-empty) && $options(-reportempty)} {
01053 set empty {-empty 1}
01054 }
01055
01056 # Invoke callback
01057 # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback.
01058 set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg]
01059 return -code $code -errorinfo $::errorInfo $msg
01060 }
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074 ret sgml::PCDATA (type opts , type pcdata) {
01075 array set options $opts
01076
01077 if {$options(-ignorewhitespace) && \
01078 ![string length [string trim $pcdata]]} {
01079 return {}
01080 }
01081
01082 if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} {
01083 upvar \#0 $options(-statevariable) state
01084 uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"]
01085 }
01086
01087 uplevel \#0 $options(-characterdatacommand) [list $pcdata]
01088 }
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101 ret sgml::Normalize (type name , type req) {
01102 if {$req} {
01103 return [string toupper $name]
01104 } else {
01105 return $name
01106 }
01107 }
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123 ret sgml::Entity (type opts , type entityrefcmd , type pcdatacmd , type entities , type ref) {
01124 array set options $opts
01125 upvar #0 $options(-statevariable) state
01126
01127 if {![string length $entities]} {
01128 set entities [namespace current]::EntityPredef
01129 }
01130
01131 # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
01132 switch -glob -- $ref {
01133 {%*} {
01134 # Parameter entity - not recognised outside of a DTD
01135 }
01136 {#x*} {
01137 # Character entity - hex
01138 if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} {
01139 return -code error "malformed character entity \"$ref\""
01140 }
01141 uplevel #0 $pcdatacmd [list $char]
01142
01143 return {}
01144
01145 }
01146 {#*} {
01147 # Character entity - decimal
01148 if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} {
01149 return -code error "malformed character entity \"$ref\""
01150 }
01151 uplevel #0 $pcdatacmd [list $char]
01152
01153 return {}
01154
01155 }
01156 default {
01157 # General entity
01158 upvar #0 $entities map
01159 if {[info exists map($ref)]} {
01160
01161 if {![regexp {<|&} $map($ref)]} {
01162
01163 # Simple text replacement - optimise
01164 uplevel #0 $pcdatacmd [list $map($ref)]
01165
01166 return {}
01167
01168 }
01169
01170 # Otherwise an additional round of parsing is required.
01171 # This only applies to XML, since HTML doesn't have general entities
01172
01173 # Must parse the replacement text for start & end tags, etc
01174 # This text must be self-contained: balanced closing tags, and so on
01175
01176 set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr]
01177 set options(-final) 0
01178 eval parseEvent [list $tokenised] [array get options]
01179
01180 return {}
01181
01182 } elseif {[string compare $entityrefcmd "::sgml::noop"]} {
01183
01184 set result [uplevel #0 $entityrefcmd [list $ref]]
01185
01186 if {[string length $result]} {
01187 uplevel #0 $pcdatacmd [list $result]
01188 }
01189
01190 return {}
01191
01192 } else {
01193
01194 # Reconstitute entity reference
01195
01196 uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""]
01197
01198 return {}
01199
01200 }
01201 }
01202 }
01203
01204 # If all else fails leave the entity reference untouched
01205 uplevel #0 $pcdatacmd [list &$ref\;]
01206
01207 return {}
01208 }
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230 ret sgml::ParseEvent:DocTypeDecl (type opts , type docEl , type pubId , type sysId , type intSSet) {
01231 array set options {}
01232 array set options $opts
01233
01234 set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err]
01235 switch $code {
01236 3 {
01237 # break
01238 return {}
01239 }
01240 0 -
01241 4 {
01242 # continue
01243 }
01244 default {
01245 return -code $code $err
01246 }
01247 }
01248
01249 # Otherwise we'll parse the DTD and report it piecemeal
01250
01251 # The internal DTD subset is processed first (XML 2.8)
01252 # During this stage, parameter entities are only allowed
01253 # between markup declarations
01254
01255 ParseDTD:Internal [array get options] $intSSet
01256
01257 # The external DTD subset is processed last (XML 2.8)
01258 # During this stage, parameter entities may occur anywhere
01259
01260 # We must resolve the external identifier to obtain the
01261 # DTD data. The application may supply its own resolver.
01262
01263 if {[string length $pubId] || [string length $sysId]} {
01264 uplevel #0 $options(-externalentitycommand) [list $options(-cmd) $options(-baseuri) $sysId $pubId]
01265 }
01266
01267 return {}
01268 }
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283 ret sgml::ParseDTD:Internal (type opts , type dtd) {
01284 variable MarkupDeclExpr
01285 variable MarkupDeclSub
01286
01287 array set options {}
01288 array set options $opts
01289
01290 upvar #0 $options(-statevariable) state
01291 upvar #0 $options(parameterentities) PEnts
01292 upvar #0 $options(externalparameterentities) ExtPEnts
01293
01294 # Bug 583947: remove comments before further processing
01295 regsub -all {<!--.*?-->} $dtd {} dtd
01296
01297 # Tokenize the DTD
01298
01299 # Protect Tcl special characters
01300 regsub -all {([{}\\])} $dtd {\\\1} dtd
01301
01302 regsub -all $MarkupDeclExpr $dtd $MarkupDeclSub dtd
01303
01304 # Entities may have angle brackets in their replacement
01305 # text, which breaks the RE processing. So, we must
01306 # use a similar technique to processing doc instances
01307 # to rebuild the declarations from the pieces
01308
01309 set mode {} ;# normal
01310 set delimiter {}
01311 set name {}
01312 set param {}
01313
01314 set state(inInternalDTD) 1
01315
01316 # Process the tokens
01317 foreach {decl value text} [lrange "{} {} \{$dtd\}" 3 end] {
01318
01319 # Keep track of line numbers
01320 incr state(line) [regsub -all \n $text {} discard]
01321
01322 ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
01323
01324 ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode replText text param
01325
01326 # There may be parameter entity references between markup decls
01327
01328 if {[regexp {%.*;} $text]} {
01329
01330 # Protect Tcl special characters
01331 regsub -all {([{}\\])} $text {\\\1} text
01332
01333 regsub -all %($::sgml::Name)\; $text "\} {\\1} \{" text
01334
01335 set PElist "\{$text\}"
01336 set PElist [lreplace $PElist end end]
01337 foreach {text entref} $PElist {
01338 if {[string length [string trim $text]]} {
01339 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text in internal DTD subset around line $state(line)"]
01340 }
01341
01342 # Expand parameter entity and recursively parse
01343 # BUG: no checks yet for recursive entity references
01344
01345 if {[info exists PEnts($entref)]} {
01346 set externalParser [$options(-cmd) entityparser]
01347 $externalParser parse $PEnts($entref) -dtdsubset internal
01348 } elseif {[info exists ExtPEnts($entref)]} {
01349 set externalParser [$options(-cmd) entityparser]
01350 $externalParser parse $ExtPEnts($entref) -dtdsubset external
01351 #$externalParser free
01352 } else {
01353 uplevel #0 $options(-errorcommand) [list illegalreference "reference to undeclared parameter entity \"$entref\""]
01354 }
01355 }
01356
01357 }
01358
01359 }
01360
01361 return {}
01362 }
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382 ret sgml::ParseDTD:EntityMode (type opts , type modeVar , type replTextVar , type declVar , type valueVar , type textVar , type delimiter , type name , type param) {
01383 upvar 1 $modeVar mode
01384 upvar 1 $replTextVar replText
01385 upvar 1 $declVar decl
01386 upvar 1 $valueVar value
01387 upvar 1 $textVar text
01388 array set options $opts
01389
01390 switch $mode {
01391 {} {
01392 # Pass through to normal processing section
01393 }
01394 entity {
01395 # Look for closing delimiter
01396 if {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $decl discard val1 remainder]} {
01397 append replText <$val1
01398 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01399 set decl /
01400 set text $remainder\ $value>$text
01401 set value {}
01402 set mode {}
01403 } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $value discard val2 remainder]} {
01404 append replText <$decl\ $val2
01405 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01406 set decl /
01407 set text $remainder>$text
01408 set value {}
01409 set mode {}
01410 } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $text discard val3 remainder]} {
01411 append replText <$decl\ $value>$val3
01412 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01413 set decl /
01414 set text $remainder
01415 set value {}
01416 set mode {}
01417 } else {
01418
01419 # Remain in entity mode
01420 append replText <$decl\ $value>$text
01421 return -code continue
01422
01423 }
01424 }
01425
01426 ignore {
01427 upvar #0 $options(-statevariable) state
01428
01429 if {[regexp {]](.*)$} $decl discard remainder]} {
01430 set state(condSections) [lreplace $state(condSections) end end]
01431 set decl $remainder
01432 set mode {}
01433 } elseif {[regexp {]](.*)$} $value discard remainder]} {
01434 set state(condSections) [lreplace $state(condSections) end end]
01435 regexp <[cl $::sgml::Wsp]*($::sgml::Name)(.*) $remainder discard decl value
01436 set mode {}
01437 } elseif {[regexp {]]>(.*)$} $text discard remainder]} {
01438 set state(condSections) [lreplace $state(condSections) end end]
01439 set decl /
01440 set value {}
01441 set text $remainder
01442 #regexp <[cl $::sgml::Wsp]*($::sgml::Name)([cl ^>]*)>(.*) $remainder discard decl value text
01443 set mode {}
01444 } else {
01445 set decl /
01446 }
01447
01448 }
01449
01450 comment {
01451 # Look for closing comment delimiter
01452
01453 upvar #0 $options(-statevariable) state
01454
01455 if {[regexp (.*?)--(.*)\$ $decl discard data1 remainder]} {
01456 } elseif {[regexp (.*?)--(.*)\$ $value discard data1 remainder]} {
01457 } elseif {[regexp (.*?)--(.*)\$ $text discard data1 remainder]} {
01458 } else {
01459 # comment continues
01460 append state(commentdata) <$decl\ $value>$text
01461 set decl /
01462 set value {}
01463 set text {}
01464 }
01465 }
01466
01467 }
01468
01469 return {}
01470 }
01471
01472
01473
01474
01475
01476
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490 ret sgml::ParseDTD:ProcessMarkupDecl (type opts , type declVar , type valueVar , type delimiterVar , type nameVar , type modeVar , type replTextVar , type textVar , type paramVar) {
01491 upvar 1 $modeVar mode
01492 upvar 1 $replTextVar replText
01493 upvar 1 $textVar text
01494 upvar 1 $declVar decl
01495 upvar 1 $valueVar value
01496 upvar 1 $nameVar name
01497 upvar 1 $delimiterVar delimiter
01498 upvar 1 $paramVar param
01499
01500 variable declExpr
01501 variable ExternalEntityExpr
01502
01503 array set options $opts
01504 upvar #0 $options(-statevariable) state
01505
01506 switch -glob -- $decl {
01507
01508 / {
01509 # continuation from entity processing
01510 }
01511
01512 !ELEMENT {
01513 # Element declaration
01514 if {[regexp $declExpr $value discard tag cmodel]} {
01515 DTD:ELEMENT [array get options] $tag $cmodel
01516 } else {
01517 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed element declaration around line $state(line)"]
01518 }
01519 }
01520
01521 !ATTLIST {
01522 # Attribute list declaration
01523 variable declExpr
01524 if {[regexp $declExpr $value discard tag attdefns]} {
01525 if {[catch {DTD:ATTLIST [array get options] $tag $attdefns} err]} {
01526 #puts stderr "Stack trace: $::errorInfo\n***\n"
01527 # Atttribute parsing has bugs at the moment
01528 #return -code error "$err around line $state(line)"
01529 return {}
01530 }
01531 } else {
01532 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed attribute list declaration around line $state(line)"]
01533 }
01534 }
01535
01536 !ENTITY {
01537 # Entity declaration
01538 variable EntityExpr
01539
01540 if {[regexp $EntityExpr $value discard param name value]} {
01541
01542 # Entity replacement text may have a '>' character.
01543 # In this case, the real delimiter will be in the following
01544 # text. This is complicated by the possibility of there
01545 # being several '<','>' pairs in the replacement text.
01546 # At this point, we are searching for the matching quote delimiter.
01547
01548 if {[regexp $ExternalEntityExpr $value]} {
01549 DTD:ENTITY [array get options] $name [string trim $param] $value
01550 } elseif {[regexp ("|')(.*?)\\1(.*) $value discard delimiter replText value]} {
01551
01552 if {[string length [string trim $value]]} {
01553 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
01554 } else {
01555 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01556 }
01557 } elseif {[regexp ("|')(.*) $value discard delimiter replText]} {
01558 append replText >$text
01559 set text {}
01560 set mode entity
01561 } else {
01562 uplevel #0 $options(-errorcommand) [list illegaldeclaration "no delimiter for entity declaration around line $state(line)"]
01563 }
01564
01565 } else {
01566 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
01567 }
01568 }
01569
01570 !NOTATION {
01571 # Notation declaration
01572 if {[regexp $declExpr param discard tag notation]} {
01573 DTD:ENTITY [array get options] $tag $notation
01574 } else {
01575 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
01576 }
01577 }
01578
01579 !--* {
01580 # Start of a comment
01581
01582 if {[regexp !--(.*?)--\$ $decl discard data]} {
01583 if {[string length [string trim $value]]} {
01584 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$value\""]
01585 }
01586 uplevel #0 $options(-commentcommand) [list $data]
01587 set decl /
01588 set value {}
01589 } elseif {[regexp -- ^(.*?)--\$ $value discard data2]} {
01590 regexp !--(.*)\$ $decl discard data1
01591 uplevel #0 $options(-commentcommand) [list $data1\ $data2]
01592 set decl /
01593 set value {}
01594 } elseif {[regexp (.*?)-->(.*)\$ $text discard data3 remainder]} {
01595 regexp !--(.*)\$ $decl discard data1
01596 uplevel #0 $options(-commentcommand) [list $data1\ $value>$data3]
01597 set decl /
01598 set value {}
01599 set text $remainder
01600 } else {
01601 regexp !--(.*)\$ $decl discard data1
01602 set state(commentdata) $data1\ $value>$text
01603 set decl /
01604 set value {}
01605 set text {}
01606 set mode comment
01607 }
01608 }
01609
01610 !*INCLUDE* -
01611 !*IGNORE* {
01612 if {$state(inInternalDTD)} {
01613 uplevel #0 $options(-errorcommand) [list illegalsection "conditional section not permitted in internal DTD subset around line $state(line)"]
01614 }
01615
01616 if {[regexp {^!\[INCLUDE\[(.*)} $decl discard remainder]} {
01617 # Push conditional section stack, popped by ]]> sequence
01618
01619 if {[regexp {(.*?)]]$} $remainder discard r2]} {
01620 # section closed immediately
01621 if {[string length [string trim $r2]]} {
01622 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01623 }
01624 } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} {
01625 # section closed immediately
01626 if {[string length [string trim $r2]]} {
01627 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01628 }
01629 if {[string length [string trim $r3]]} {
01630 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"]
01631 }
01632 } else {
01633
01634 lappend state(condSections) INCLUDE
01635
01636 set parser [$options(-cmd) entityparser]
01637 $parser parse $remainder\ $value> -dtdsubset external
01638 #$parser free
01639
01640 if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} {
01641 if {[string length [string trim $t1]]} {
01642 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
01643 }
01644 if {![llength $state(condSections)]} {
01645 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01646 }
01647 set state(condSections) [lreplace $state(condSections) end end]
01648 set text $t2
01649 }
01650
01651 }
01652 } elseif {[regexp {^!\[IGNORE\[(.*)} $decl discard remainder]} {
01653 # Set ignore mode. Still need a stack
01654 set mode ignore
01655
01656 if {[regexp {(.*?)]]$} $remainder discard r2]} {
01657 # section closed immediately
01658 if {[string length [string trim $r2]]} {
01659 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01660 }
01661 } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} {
01662 # section closed immediately
01663 if {[string length [string trim $r2]]} {
01664 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01665 }
01666 if {[string length [string trim $r3]]} {
01667 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"]
01668 }
01669 } else {
01670
01671 lappend state(condSections) IGNORE
01672
01673 if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} {
01674 if {[string length [string trim $t1]]} {
01675 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
01676 }
01677 if {![llength $state(condSections)]} {
01678 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01679 }
01680 set state(condSections) [lreplace $state(condSections) end end]
01681 set text $t2
01682 }
01683
01684 }
01685 } else {
01686 uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\" around line $state(line)"]
01687 }
01688
01689 }
01690
01691 default {
01692 if {[regexp {^\?(.*)} $decl discard target]} {
01693 # Processing instruction
01694 } else {
01695 uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\""]
01696 }
01697 }
01698 }
01699
01700 return {}
01701 }
01702
01703 /* sgml::ParseDTD:External --*/
01704 /* */
01705 /* Parse the external DTD subset.*/
01706 /* */
01707 /* Parameter entities are allowed anywhere.*/
01708 /* */
01709 /* Arguments:*/
01710 /* opts configuration options*/
01711 /* dtd DTD data*/
01712 /* */
01713 /* Results:*/
01714 /* Markup declarations parsed may cause callback invocation*/
01715
01716 ret sgml::ParseDTD:External (type opts , type dtd) {
01717 variable MarkupDeclExpr
01718 variable MarkupDeclSub
01719 variable declExpr
01720
01721 array set options $opts
01722 upvar #0 $options(parameterentities) PEnts
01723 upvar #0 $options(externalparameterentities) ExtPEnts
01724 upvar #0 $options(-statevariable) state
01725
01726 # As with the internal DTD subset, watch out for
01727 # entities with angle brackets
01728 set mode {} ;# normal
01729 set delimiter {}
01730 set name {}
01731 set param {}
01732
01733 set oldState 0
01734 catch {set oldState $state(inInternalDTD)}
01735 set state(inInternalDTD) 0
01736
01737 # Initialise conditional section stack
01738 if {![info exists state(condSections)]} {
01739 set state(condSections) {}
01740 }
01741 set startCondSectionDepth [llength $state(condSections)]
01742
01743 while {[string length $dtd]} {
01744 set progress 0
01745 set PEref {}
01746 if {![string compare $mode "ignore"]} {
01747 set progress 1
01748 if {[regexp {]]>(.*)} $dtd discard dtd]} {
01749 set remainder {}
01750 set mode {} ;# normal
01751 set state(condSections) [lreplace $state(condSections) end end]
01752 continue
01753 } else {
01754 uplevel #0 $options(-errorcommand) [list missingdelimiter "IGNORE conditional section closing delimiter not found"]
01755 }
01756 } elseif {[regexp ^(.*?)%($::sgml::Name)\;(.*)\$ $dtd discard data PEref remainder]} {
01757 set progress 1
01758 } else {
01759 set data $dtd
01760 set dtd {}
01761 set remainder {}
01762 }
01763
01764 # Tokenize the DTD (so far)
01765
01766 # Protect Tcl special characters
01767 regsub -all {([{}\\])} $data {\\\1} dataP
01768
01769 set n [regsub -all $MarkupDeclExpr $dataP $MarkupDeclSub dataP]
01770
01771 if {$n} {
01772 set progress 1
01773 # All but the last markup declaration should have no text
01774 set dataP [lrange "{} {} \{$dataP\}" 3 end]
01775 if {[llength $dataP] > 3} {
01776 foreach {decl value text} [lrange $dataP 0 [expr [llength $dataP] - 4]] {
01777 ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
01778 ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param
01779
01780 if {[string length [string trim $text]]} {
01781 # check for conditional section close
01782 if {[regexp {]]>(.*)$} $text discard text]} {
01783 if {[string length [string trim $text]]} {
01784 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""]
01785 }
01786 if {![llength $state(condSections)]} {
01787 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01788 }
01789 set state(condSections) [lreplace $state(condSections) end end]
01790 if {![string compare $mode "ignore"]} {
01791 set mode {} ;# normal
01792 }
01793 } else {
01794 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""]
01795 }
01796 }
01797 }
01798 }
01799 # Do the last declaration
01800 foreach {decl value text} [lrange $dataP [expr [llength $dataP] - 3] end] {
01801 ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
01802 ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param
01803 }
01804 }
01805
01806 # Now expand the PE reference, if any
01807 switch -glob $mode,[string length $PEref],$n {
01808 ignore,0,* {
01809 set dtd $text
01810 }
01811 ignore,*,* {
01812 set dtd $text$remainder
01813 }
01814 *,0,0 {
01815 set dtd $data
01816 }
01817 *,0,* {
01818 set dtd $text
01819 }
01820 *,*,0 {
01821 if {[catch {append data $PEnts($PEref)}]} {
01822 if {[info exists ExtPEnts($PEref)]} {
01823 set externalParser [$options(-cmd) entityparser]
01824 $externalParser parse $ExtPEnts($PEref) -dtdsubset external
01825 #$externalParser free
01826 } else {
01827 uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"]
01828 }
01829 }
01830 set dtd $data$remainder
01831 }
01832 default {
01833 if {[catch {append text $PEnts($PEref)}]} {
01834 if {[info exists ExtPEnts($PEref)]} {
01835 set externalParser [$options(-cmd) entityparser]
01836 $externalParser parse $ExtPEnts($PEref) -dtdsubset external
01837 #$externalParser free
01838 } else {
01839 uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"]
01840 }
01841 }
01842 set dtd $text$remainder
01843 }
01844 }
01845
01846 # Check whether a conditional section has been terminated
01847 if {[regexp {^(.*?)]]>(.*)$} $dtd discard t1 t2]} {
01848 if {![regexp <.*> $t1]} {
01849 if {[string length [string trim $t1]]} {
01850 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
01851 }
01852 if {![llength $state(condSections)]} {
01853 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01854 }
01855 set state(condSections) [lreplace $state(condSections) end end]
01856 if {![string compare $mode "ignore"]} {
01857 set mode {} ;# normal
01858 }
01859 set dtd $t2
01860 set progress 1
01861 }
01862 }
01863
01864 if {!$progress} {
01865 # No parameter entity references were found and
01866 # the text does not contain a well-formed markup declaration
01867 # Avoid going into an infinite loop
01868 upvar #0 $options(-errorcommand) [list syntaxerror "external entity does not contain well-formed markup declaration"]
01869 break
01870 }
01871 }
01872
01873 set state(inInternalDTD) $oldState
01874
01875 # Check that conditional sections have been closed properly
01876 if {[llength $state(condSections)] > $startCondSectionDepth} {
01877 uplevel #0 $options(-errorcommand) [list syntaxerror "[lindex $state(condSections) end] conditional section not closed"]
01878 }
01879 if {[llength $state(condSections)] < $startCondSectionDepth} {
01880 uplevel #0 $options(-errorcommand) [list syntaxerror "too many conditional section closures"]
01881 }
01882
01883 return {}
01884 }
01885
01886 /* Procedures for handling the various declarative elements in a DTD.*/
01887 /* New elements may be added by creating a procedure of the form*/
01888 /* parse:DTD:_element_*/
01889
01890 /* For each of these procedures, the various regular expressions they use*/
01891 /* are created outside of the proc to avoid overhead at runtime*/
01892
01893 /* sgml::DTD:ELEMENT --*/
01894 /* */
01895 /* <!ELEMENT ...> defines an element.*/
01896 /* */
01897 /* The content model for the element is stored in the contentmodel array,*/
01898 /* indexed by the element name. The content model is parsed into the*/
01899 /* following list form:*/
01900 /* */
01901 /* {} Content model is EMPTY.*/
01902 /* Indicated by an empty list.*/
01903 /* * Content model is ANY.*/
01904 /* Indicated by an asterix.*/
01905 /* {ELEMENT ...}*/
01906 /* Content model is element-only.*/
01907 /* {MIXED {element1 element2 ...}}*/
01908 /* Content model is mixed (PCDATA and elements).*/
01909 /* The second element of the list contains the */
01910 /* elements that may occur. #PCDATA is assumed */
01911 /* (ie. the list is normalised).*/
01912 /* */
01913 /* Arguments:*/
01914 /* opts configuration options*/
01915 /* name element GI*/
01916 /* modspec unparsed content model specification*/
01917
01918 ret sgml::DTD:ELEMENT (type opts , type name , type modspec) {
01919 variable Wsp
01920 array set options $opts
01921
01922 upvar #0 $options(elementdecls) elements
01923
01924 if {$options(-validate) && [info exists elements($name)]} {
01925 eval $options(-errorcommand) [list elementdeclared "element \"$name\" already declared"]
01926 } else {
01927 switch -- $modspec {
01928 EMPTY {
01929 set elements($name) {}
01930 uplevel #0 $options(-elementdeclcommand) $name {{}}
01931 }
01932 ANY {
01933 set elements($name) *
01934 uplevel #0 $options(-elementdeclcommand) $name *
01935 }
01936 default {
01937 # Don't parse the content model for now,
01938 # just pass the model to the application
01939 if {0 && [regexp [format {^\([%s]*#PCDATA[%s]*(\|([^)]+))?[%s]*\)*[%s]*$} $Wsp $Wsp $Wsp $Wsp] discard discard mtoks]} {
01940 set cm($name) [list MIXED [split $mtoks |]]
01941 } elseif {0} {
01942 if {[catch {CModelParse $state(state) $value} result]} {
01943 eval $options(-errorcommand) [list element? $result]
01944 } else {
01945 set cm($id) [list ELEMENT $result]
01946 }
01947 } else {
01948 set elements($name) $modspec
01949 uplevel #0 $options(-elementdeclcommand) $name [list $modspec]
01950 }
01951 }
01952 }
01953 }
01954 }
01955
01956
01957
01958
01959
01960
01961
01962
01963
01964
01965
01966
01967
01968
01969
01970
01971 ret sgml::CModelParse (type state , type value) {
01972 upvar #0 $state var
01973
01974 # First build syntax tree
01975 set syntaxTree [CModelMakeSyntaxTree $state $value]
01976
01977 # Build transition table
01978 set transitionTable [CModelMakeTransitionTable $state $syntaxTree]
01979
01980 return [list $syntaxTree $transitionTable]
01981 }
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992
01993
01994
01995
01996
01997
01998
01999
02000
02001
02002
02003
02004
02005
02006
02007
02008 ret sgml::CModelMakeSyntaxTree (type state , type spec) {
02009 upvar #0 $state var
02010 variable Wsp
02011 variable name
02012
02013 # Translate the spec into a Tcl list.
02014
02015 # None of the Tcl special characters are allowed in a content model spec.
02016 if {[regexp {\$|\[|\]|\{|\}} $spec]} {
02017 return -code error "illegal characters in specification"
02018 }
02019
02020 regsub -all [format {(%s)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $name $Wsp $Wsp] $spec [format {%sCModelSTname %s {\1} {\2} {\3}} \n $state] spec
02021 regsub -all {\(} $spec "\nCModelSTopenParen $state " spec
02022 regsub -all [format {\)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $Wsp $Wsp] $spec [format {%sCModelSTcloseParen %s {\1} {\2}} \n $state] spec
02023
02024 array set var {stack {} state start}
02025 eval $spec
02026
02027 # Peel off the outer seq, its redundant
02028 return [lindex [lindex $var(stack) 1] 0]
02029 }
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
02041
02042
02043
02044 ret sgml::CModelSTname (type state , type name , type rep , type cs , type args) {
02045 if {[llength $args]} {
02046 return -code error "syntax error in specification: \"$args\""
02047 }
02048
02049 CModelSTcp $state $name $rep $cs
02050 }
02051
02052
02053
02054
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064
02065 ret sgml::CModelSTcp (type state , type cp , type rep , type cs) {
02066 upvar #0 $state var
02067
02068 switch -glob -- [lindex $var(state) end]=$cs {
02069 start= {
02070 set var(state) [lreplace $var(state) end end end]
02071 # Add (dummy) grouping, either choice or sequence will do
02072 CModelSTcsSet $state ,
02073 CModelSTcpAdd $state $cp $rep
02074 }
02075 :choice= -
02076 :seq= {
02077 set var(state) [lreplace $var(state) end end end]
02078 CModelSTcpAdd $state $cp $rep
02079 }
02080 start=| -
02081 start=, {
02082 set var(state) [lreplace $var(state) end end [expr {$cs == "," ? ":seq" : ":choice"}]]
02083 CModelSTcsSet $state $cs
02084 CModelSTcpAdd $state $cp $rep
02085 }
02086 :choice=| -
02087 :seq=, {
02088 CModelSTcpAdd $state $cp $rep
02089 }
02090 :choice=, -
02091 :seq=| {
02092 return -code error "syntax error in specification: incorrect delimiter after \"$cp\", should be \"[expr {$cs == "," ? "|" : ","}]\""
02093 }
02094 end=* {
02095 return -code error "syntax error in specification: no delimiter before \"$cp\""
02096 }
02097 default {
02098 return -code error "syntax error"
02099 }
02100 }
02101
02102 }
02103
02104
02105
02106
02107
02108
02109
02110
02111
02112
02113
02114
02115 ret sgml::CModelSTcsSet (type state , type cs) {
02116 upvar #0 $state var
02117
02118 set cs [expr {$cs == "," ? ":seq" : ":choice"}]
02119
02120 if {[llength $var(stack)]} {
02121 set var(stack) [lreplace $var(stack) end end $cs]
02122 } else {
02123 set var(stack) [list $cs {}]
02124 }
02125 }
02126
02127
02128
02129
02130
02131
02132
02133
02134
02135
02136
02137
02138
02139 ret sgml::CModelSTcpAdd (type state , type cp , type rep) {
02140 upvar #0 $state var
02141
02142 if {[llength $var(stack)]} {
02143 set top [lindex $var(stack) end]
02144 lappend top [list $rep $cp]
02145 set var(stack) [lreplace $var(stack) end end $top]
02146 } else {
02147 set var(stack) [list $rep $cp]
02148 }
02149 }
02150
02151
02152
02153
02154
02155
02156
02157
02158
02159
02160
02161 ret sgml::CModelSTopenParen (type state , type args) {
02162 upvar #0 $state var
02163
02164 if {[llength $args]} {
02165 return -code error "syntax error in specification: \"$args\""
02166 }
02167
02168 lappend var(state) start
02169 lappend var(stack) [list {} {}]
02170 }
02171
02172
02173
02174
02175
02176
02177
02178
02179
02180
02181
02182
02183
02184 ret sgml::CModelSTcloseParen (type state , type rep , type cs , type args) {
02185 upvar #0 $state var
02186
02187 if {[llength $args]} {
02188 return -code error "syntax error in specification: \"$args\""
02189 }
02190
02191 set cp [lindex $var(stack) end]
02192 set var(stack) [lreplace $var(stack) end end]
02193 set var(state) [lreplace $var(state) end end]
02194 CModelSTcp $state $cp $rep $cs
02195 }
02196
02197
02198
02199
02200
02201
02202
02203
02204
02205
02206
02207
02208
02209
02210
02211
02212 ret sgml::CModelMakeTransitionTable (type state , type st) {
02213 upvar #0 $state var
02214
02215 # Construct nullable, firstpos and lastpos functions
02216 array set var {number 0}
02217 foreach {nullable firstpos lastpos} [ \
02218 TraverseDepth1st $state $st {
02219 # Evaluated for leaf nodes
02220 # Compute nullable(n)
02221 # Compute firstpos(n)
02222 # Compute lastpos(n)
02223 set nullable [nullable leaf $rep $name]
02224 set firstpos [list {} $var(number)]
02225 set lastpos [list {} $var(number)]
02226 set var(pos:$var(number)) $name
02227 } {
02228 # Evaluated for nonterminal nodes
02229 # Compute nullable, firstpos, lastpos
02230 set firstpos [firstpos $cs $firstpos $nullable]
02231 set lastpos [lastpos $cs $lastpos $nullable]
02232 set nullable [nullable nonterm $rep $cs $nullable]
02233 } \
02234 ] break
02235
02236 set accepting [incr var(number)]
02237 set var(pos:$accepting) #
02238
02239 # var(pos:N) maps from position to symbol.
02240 # Construct reverse map for convenience.
02241 # NB. A symbol may appear in more than one position.
02242 # var is about to be reset, so use different arrays.
02243
02244 foreach {pos symbol} [array get var pos:*] {
02245 set pos [lindex [split $pos :] 1]
02246 set pos2symbol($pos) $symbol
02247 lappend sym2pos($symbol) $pos
02248 }
02249
02250 # Construct the followpos functions
02251 catch {unset var}
02252 followpos $state $st $firstpos $lastpos
02253
02254 # Construct transition table
02255 # Dstates is [union $marked $unmarked]
02256 set unmarked [list [lindex $firstpos 1]]
02257 while {[llength $unmarked]} {
02258 set T [lindex $unmarked 0]
02259 lappend marked $T
02260 set unmarked [lrange $unmarked 1 end]
02261
02262 # Find which input symbols occur in T
02263 set symbols {}
02264 foreach pos $T {
02265 if {$pos != $accepting && [lsearch $symbols $pos2symbol($pos)] < 0} {
02266 lappend symbols $pos2symbol($pos)
02267 }
02268 }
02269 foreach a $symbols {
02270 set U {}
02271 foreach pos $sym2pos($a) {
02272 if {[lsearch $T $pos] >= 0} {
02273 # add followpos($pos)
02274 if {$var($pos) == {}} {
02275 lappend U $accepting
02276 } else {
02277 eval lappend U $var($pos)
02278 }
02279 }
02280 }
02281 set U [makeSet $U]
02282 if {[llength $U] && [lsearch $marked $U] < 0 && [lsearch $unmarked $U] < 0} {
02283 lappend unmarked $U
02284 }
02285 set Dtran($T,$a) $U
02286 }
02287
02288 }
02289
02290 return [list [array get Dtran] [array get sym2pos] $accepting]
02291 }
02292
02293
02294
02295
02296
02297
02298
02299
02300
02301
02302
02303
02304
02305
02306
02307 ret sgml::followpos (type state , type st , type firstpos , type lastpos) {
02308 upvar #0 $state var
02309
02310 switch -- [lindex [lindex $st 1] 0] {
02311 :seq {
02312 for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} {
02313 followpos $state [lindex [lindex $st 1] $i] \
02314 [lindex [lindex $firstpos 0] [expr $i - 1]] \
02315 [lindex [lindex $lastpos 0] [expr $i - 1]]
02316 foreach pos [lindex [lindex [lindex $lastpos 0] [expr $i - 1]] 1] {
02317 eval lappend var($pos) [lindex [lindex [lindex $firstpos 0] $i] 1]
02318 set var($pos) [makeSet $var($pos)]
02319 }
02320 }
02321 }
02322 :choice {
02323 for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} {
02324 followpos $state [lindex [lindex $st 1] $i] \
02325 [lindex [lindex $firstpos 0] [expr $i - 1]] \
02326 [lindex [lindex $lastpos 0] [expr $i - 1]]
02327 }
02328 }
02329 default {
02330 # No action at leaf nodes
02331 }
02332 }
02333
02334 switch -- [lindex $st 0] {
02335 ? {
02336 # We having nothing to do here ! Doing the same as
02337 # for * effectively converts this qualifier into the other.
02338 }
02339 * {
02340 foreach pos [lindex $lastpos 1] {
02341 eval lappend var($pos) [lindex $firstpos 1]
02342 set var($pos) [makeSet $var($pos)]
02343 }
02344 }
02345 }
02346
02347 }
02348
02349
02350
02351
02352
02353
02354
02355
02356
02357
02358
02359
02360
02361
02362
02363 ret sgml::TraverseDepth1st (type state , type t , type leaf , type nonTerm) {
02364 upvar #0 $state var
02365
02366 set nullable {}
02367 set firstpos {}
02368 set lastpos {}
02369
02370 switch -- [lindex [lindex $t 1] 0] {
02371 :seq -
02372 :choice {
02373 set rep [lindex $t 0]
02374 set cs [lindex [lindex $t 1] 0]
02375
02376 foreach child [lrange [lindex $t 1] 1 end] {
02377 foreach {childNullable childFirstpos childLastpos} \
02378 [TraverseDepth1st $state $child $leaf $nonTerm] break
02379 lappend nullable $childNullable
02380 lappend firstpos $childFirstpos
02381 lappend lastpos $childLastpos
02382 }
02383
02384 eval $nonTerm
02385 }
02386 default {
02387 incr var(number)
02388 set rep [lindex [lindex $t 0] 0]
02389 set name [lindex [lindex $t 1] 0]
02390 eval $leaf
02391 }
02392 }
02393
02394 return [list $nullable $firstpos $lastpos]
02395 }
02396
02397
02398
02399
02400
02401
02402
02403
02404
02405
02406
02407
02408
02409 ret sgml::firstpos (type cs , type firstpos , type nullable) {
02410 switch -- $cs {
02411 :seq {
02412 set result [lindex [lindex $firstpos 0] 1]
02413 for {set i 0} {$i < [llength $nullable]} {incr i} {
02414 if {[lindex [lindex $nullable $i] 1]} {
02415 eval lappend result [lindex [lindex $firstpos [expr $i + 1]] 1]
02416 } else {
02417 break
02418 }
02419 }
02420 }
02421 :choice {
02422 foreach child $firstpos {
02423 eval lappend result $child
02424 }
02425 }
02426 }
02427
02428 return [list $firstpos [makeSet $result]]
02429 }
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444 ret sgml::lastpos (type cs , type lastpos , type nullable) {
02445 switch -- $cs {
02446 :seq {
02447 set result [lindex [lindex $lastpos end] 1]
02448 for {set i [expr [llength $nullable] - 1]} {$i >= 0} {incr i -1} {
02449 if {[lindex [lindex $nullable $i] 1]} {
02450 eval lappend result [lindex [lindex $lastpos $i] 1]
02451 } else {
02452 break
02453 }
02454 }
02455 }
02456 :choice {
02457 foreach child $lastpos {
02458 eval lappend result $child
02459 }
02460 }
02461 }
02462
02463 return [list $lastpos [makeSet $result]]
02464 }
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476 ret sgml::makeSet s (
02477 type foreach , type r $, type s , optional
02478 if ={[llength $r] , optional
02479 set =unique($r) {
02480 )
02481 }
02482 return [array names unique]
02483 }
02484
02485 # sgml::nullable --
02486 #
02487 # Compute the nullable function for a node.
02488 #
02489 # Arguments:
02490 # nodeType leaf or nonterminal
02491 # rep repetition applying to this node
02492 # name leaf node: symbol for this node, nonterm node: choice or seq node
02493 # subtree nonterm node: nullable functions for the subtree
02494 #
02495 # Results:
02496 # Returns nullable function for this branch of the tree.
02497
02498 proc sgml::nullable {nodeType rep name {subtree {}}} {
02499 switch -glob -- $rep:$nodeType {
02500 :leaf -
02501 +:leaf {
02502 return [list {} 0]
02503 }
02504 \\*:leaf -
02505 \\?:leaf {
02506 return [list {} 1]
02507 }
02508 \\*:nonterm -
02509 \\?:nonterm {
02510 return [list $subtree 1]
02511 }
02512 :nonterm -
02513 +:nonterm {
02514 switch -- $name {
02515 :choice {
02516 result = 0
02517 foreach child $subtree {
02518 result = [expr $result || [lindex $child 1]]
02519 }
02520 }
02521 :seq {
02522 result = 1
02523 foreach child $subtree {
02524 result = [expr $result && [lindex $child 1]]
02525 }
02526 }
02527 }
02528 return [list $subtree $result]
02529 }
02530 }
02531 }
02532
02533
02534
02535
02536
02537
02538
02539
02540
02541
02542
02543
02544
02545 ret sgml::DTD:ATTLIST (type opts , type name , type attspec) {
02546 variable attlist_exp
02547 variable attlist_enum_exp
02548 variable attlist_fixed_exp
02549
02550 array set options $opts
02551
02552 # Parse the attribute list. If it were regular, could just use foreach,
02553 # but some attributes may have values.
02554 regsub -all {([][$\\])} $attspec {\\\1} attspec
02555 regsub -all $attlist_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {} \{" attspec
02556 regsub -all $attlist_enum_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {} {\\4} \{" attspec
02557 regsub -all $attlist_fixed_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {\\4} \{" attspec
02558
02559 eval "noop \{$attspec\}"
02560
02561 return {}
02562 }
02563
02564
02565
02566
02567
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577
02578
02579
02580
02581 ret sgml::DTDAttribute args (
02582 # type BUG: , type Some , type problems , type with , type parameter , type passing - , type deal , type with , type it , type later
02583 , type foreach , optional callback =name var =att type =default value =text $, type args , type break
02584
02585 , type upvar #0 $, type var , type atts
02586
02587 , type if , optional [string =length [string =trim $text]] , optional
02588 return =-code error ="unexpected text =\"$text\" in =attribute definition"
02589
02590
02591 # , type What , type about , type overridden , type attribute , type defns?
02592 # , type A , type non-, type validating , type app , type may , type want , type to , type know , type about , type them
02593 # (, type eg. , type an , type editor)
02594 , type if , optional ![info =exists atts($name/$att)] , optional
02595 set =atts($name/$att) [list =$type $default =$value]
02596 uplevel =#0 $callback =[list $name =$att $type =$default $value]
02597
02598
02599 , type return , optional
02600 )
02601
02602 # sgml::DTD:ENTITY --
02603 #
02604 # <!ENTITY ...> declaration.
02605 #
02606 # Callbacks:
02607 # -entitydeclcommand for general entity declaration
02608 # -unparsedentitydeclcommand for unparsed external entity declaration
02609 # -parameterentitydeclcommand for parameter entity declaration
02610 #
02611 # Arguments:
02612 # opts configuration options
02613 # name name of entity being defined
02614 # param whether a parameter entity is being defined
02615 # value unparsed replacement text
02616 #
02617 # Results:
02618 # Modifies the caller's entities array variable
02619
02620 proc sgml::DTD:ENTITY {opts name param value} {
02621
02622 array options = $opts
02623
02624 if {[string compare % $param]} {
02625
02626 upvar
02627 upvar
02628
02629 if {[info exists ents($name)] || [info exists externals($name)]} {
02630 eval $options(-warningcommand) entity [list "entity \"$name\" already declared"]
02631 } else {
02632 if {[catch {uplevel
02633 return -code error "unable to parse entity declaration due to \"$value\""
02634 }
02635 switch -glob [lindex $value 0],[lindex $value 3] {
02636 internal, {
02637 ents = ($name) [EntitySubst [array get options] [lindex $value 1]]
02638 uplevel
02639 }
02640 internal,* {
02641 return -code error "unexpected NDATA declaration"
02642 }
02643 external, {
02644 externals = ($name) [lrange $value 1 2]
02645 uplevel
02646 }
02647 external,* {
02648 externals = ($name) [lrange $value 1 3]
02649 uplevel
02650 }
02651 default {
02652 return -code error "internal error: unexpected parser state"
02653 }
02654 }
02655 }
02656 } else {
02657
02658 upvar
02659 upvar
02660
02661 if {[info exists PEnts($name)] || [info exists ExtPEnts($name)]} {
02662 eval $options(-warningcommand) parameterentity [list "parameter entity \"$name\" already declared"]
02663 } else {
02664 if {[catch {uplevel
02665 return -code error "unable to parse parameter entity declaration due to \"$value\""
02666 }
02667 if {[string length [lindex $value 3]]} {
02668 return -code error "NDATA illegal in parameter entity declaration"
02669 }
02670 switch [lindex $value 0] {
02671 internal {
02672
02673 value = [EntitySubst [array get options] [lindex $value 1]]
02674
02675 PEnts = ($name) $value
02676 uplevel
02677 }
02678 external -
02679 default {
02680
02681
02682
02683
02684 token = [uri::geturl [uri::resolve $options(-baseuri) [lindex $value 1]]]
02685
02686 ExtPEnts = ($name) [lindex [array get $token data] 1]
02687 uplevel
02688 }
02689 }
02690 }
02691 }
02692 }
02693
02694
02695
02696
02697
02698
02699
02700
02701
02702
02703
02704
02705
02706
02707
02708
02709 ret sgml::EntitySubst (type opts , type value) {
02710 array set options $opts
02711
02712 # Protect Tcl special characters
02713 regsub -all {([{}\\])} $value {\\\1} value
02714
02715 # Find entity references
02716 regsub -all (&#\[0-9\]+|&#x\[0-9a-fA-F\]+|%${::sgml::Name})\; $value "\[EntitySubstValue [list $options(parameterentities)] {\\1}\]" value
02717
02718 set result [subst $value]
02719
02720 return $result
02721 }
02722
02723
02724
02725
02726
02727
02728
02729
02730
02731
02732
02733
02734 ret sgml::EntitySubstValue (type PEvar , type ref) {
02735 # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
02736 switch -glob -- $ref {
02737 {&#x*} {
02738 scan [string range $ref 3 end] %x hex
02739 return [format %c $hex]
02740 }
02741 {&#*} {
02742 return [format %c [string range $ref 2 end]]
02743 }
02744 {%*} {
02745 upvar #0 $PEvar PEs
02746 set ref [string range $ref 1 end]
02747 if {[info exists PEs($ref)]} {
02748 return $PEs($ref)
02749 } else {
02750 return -code error "parameter entity \"$ref\" not declared"
02751 }
02752 }
02753 default {
02754 return -code error "internal error - unexpected entity reference"
02755 }
02756 }
02757 return {}
02758 }
02759
02760
02761
02762
02763
02764
02765
02766
02767
02768
02769 ret sgml::DTD:NOTATION (type opts , type name , type value) {
02770 return {}
02771
02772 variable notation_exp
02773 upvar opts state
02774
02775 if {[regexp $notation_exp $value x scheme data] == 2} {
02776 } else {
02777 eval $state(-errorcommand) [list notationvalue "notation value \"$value\" incorrectly specified"]
02778 }
02779 }
02780
02781
02782
02783
02784
02785
02786
02787
02788
02789
02790
02791 ret sgml::ResolveEntity (type cmd , type base , type sysId , type pubId) {
02792 variable ParseEventNum
02793
02794 if {[catch {uri::resolve $base $sysId} url]} {
02795 return -code error "unable to resolve system identifier \"$sysId\""
02796 }
02797 if {[catch {uri::geturl $url} token]} {
02798 return -code error "unable to retrieve external entity \"$url\" for system identifier \"$sysId\""
02799 }
02800
02801 upvar #0 $token data
02802
02803 set parser [uplevel #0 $cmd entityparser]
02804
02805 set body {}
02806 catch {set body $data(body)}
02807 catch {set body $data(data)}
02808 if {[string length $body]} {
02809 uplevel #0 $parser parse [list $body] -dtdsubset external
02810 }
02811 $parser free
02812
02813 return {}
02814 }
02815