sgmlparser.tcl

Go to the documentation of this file.
00001 /*  sgmlparser.tcl --*/
00002 /* */
00003 /*  This file provides the generic part of a parser for SGML-based*/
00004 /*  languages, namely HTML and XML.*/
00005 /* */
00006 /*  NB.  It is a misnomer.  There is no support for parsing*/
00007 /*  arbitrary SGML as such.*/
00008 /* */
00009 /*  See sgml.tcl for variable definitions.*/
00010 /* */
00011 /*  Copyright (c) 2008 Explain*/
00012 /*  http://www.explain.com.au/*/
00013 /*  Copyright (c) 1998-2003 Zveno Pty Ltd*/
00014 /*  http://www.zveno.com/*/
00015 /* */
00016 /*  See the file "LICENSE" in this distribution for information on usage and*/
00017 /*  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00018 /* */
00019 /*  $Id: sgmlparser.tcl,v 1.32 2003/12/09 04:43:15 balls Exp $*/
00020 
00021 package require sgml 1.9
00022 
00023 package require uri 1.1
00024 
00025 package provide sgmlparser 1.1
00026 
00027 namespace sgml {
00028     namespace export tokenise parseEvent
00029 
00030     namespace export parseDTD
00031 
00032     /*  NB. Most namespace variables are defined in sgml-8.[01].tcl*/
00033     /*  to account for differences between versions of Tcl.*/
00034     /*  This especially includes the regular expressions used.*/
00035 
00036     variable ParseEventNum
00037     if {![info exists ParseEventNum]} {
00038      ParseEventNum =  0
00039     }
00040     variable ParseDTDnum
00041     if {![info exists ParseDTDNum]} {
00042      ParseDTDNum =  0
00043     }
00044 
00045     variable declExpr [cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)
00046     variable EntityExpr [cl $::sgml::Wsp]*(%[cl $::sgml::Wsp])?[cl $::sgml::Wsp]*($::sgml::Name)[cl $::sgml::Wsp]+(.*)
00047 
00048     /* variable MarkupDeclExpr <([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)>*/
00049     /* variable MarkupDeclSub "} {\\1} {\\2} {\\3} {"*/
00050     variable MarkupDeclExpr <[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^>]*)>
00051     variable MarkupDeclSub "\} {\\1} {\\2} \{"
00052 
00053     variable ExternalEntityExpr ^(PUBLIC|SYSTEM)[cl $::sgml::Wsp]+("|')(.*?)\\2([cl $::sgml::Wsp]+("|')(.*?)\\2)?([cl $::sgml::Wsp]+NDATA[cl $::sgml::Wsp]+($::xml::Name))?\$
00054 
00055     variable StdOptions
00056     array  StdOptions =  [list \
00057     -elementstartcommand        [namespace current]::noop   \
00058     -elementendcommand      [namespace current]::noop   \
00059     -characterdatacommand       [namespace current]::noop   \
00060     -ret essinginstructioncommand   [namespace current]::noop   \
00061     -externalentitycommand      ()              \
00062     -xmldeclcommand         [namespace current]::noop   \
00063     -doctypecommand         [namespace current]::noop   \
00064     -commentcommand         [namespace current]::noop   \
00065     -entitydeclcommand      [namespace current]::noop   \
00066     -unparsedentitydeclcommand  [namespace current]::noop   \
00067     -parameterentitydeclcommand [namespace current]::noop   \
00068     -notationdeclcommand        [namespace current]::noop   \
00069     -elementdeclcommand     [namespace current]::noop   \
00070     -attlistdeclcommand     [namespace current]::noop   \
00071     -paramentityparsing     1               \
00072     -defaultexpandinternalentities  1               \
00073     -startdoctypedeclcommand    [namespace current]::noop   \
00074     -enddoctypedeclcommand      [namespace current]::noop   \
00075     -entityreferencecommand     {}              \
00076     -warningcommand         [namespace current]::noop   \
00077     -errorcommand           [namespace current]::Error  \
00078     -final              1               \
00079     -validate           0               \
00080     -baseuri            {}              \
00081     -name               {}              \
00082     -cmd                {}              \
00083     -emptyelement           [namespace current]::EmptyElement   \
00084     -parseattributelistcommand  [namespace current]::noop   \
00085     -parseentitydeclcommand     [namespace current]::noop   \
00086     -normalize          1               \
00087     -internaldtd            {}              \
00088     -reportempty            0               \
00089     -ignorewhitespace       0               \
00090     ]
00091 }
00092 
00093 /*  sgml::tokenise --*/
00094 /* */
00095 /*  Transform the given HTML/XML text into a Tcl list.*/
00096 /* */
00097 /*  Arguments:*/
00098 /*  sgml        text to tokenize*/
00099 /*  elemExpr    RE to recognise tags*/
00100 /*  elemSub     transform for matched tags*/
00101 /*  args        options*/
00102 /* */
00103 /*  Valid Options:*/
00104 /*        -internaldtdvariable*/
00105 /*  -final      boolean     True if no more data is to be supplied*/
00106 /*  -statevariable  varName     Name of a variable used to store info*/
00107 /* */
00108 /*  Results:*/
00109 /*  Returns a Tcl list representing the document.*/
00110 
00111 ret  sgml::tokenise (type sgml , type elemExpr , type elemSub , type args) {
00112     array set options {-final 1}
00113     array set options $args
00114     set options(-final) [Boolean $options(-final)]
00115 
00116     # If the data is not final then there must be a variable to store
00117     # unused data.
00118     if {!$options(-final) && ![info exists options(-statevariable)]} {
00119     return -code error {option "-statevariable" required if not final}
00120     }
00121 
00122     # Pre-process stage
00123     #
00124     # Extract the internal DTD subset, if any
00125 
00126     catch {upvar #0 $options(-internaldtdvariable) dtd}
00127     if {[regexp {<!DOCTYPE[^[<]+\[([^]]+)\]} $sgml discard dtd]} {
00128     regsub {(<!DOCTYPE[^[<]+)(\[[^]]+\])} $sgml {\1\&xml:intdtd;} sgml
00129     }
00130 
00131     # Protect Tcl special characters
00132     regsub -all {([{}\\])} $sgml {\\\1} sgml
00133 
00134     # Do the translation
00135 
00136     if {[info exists options(-statevariable)]} {
00137     # Mats: Several rewrites here to handle -final 0 option.
00138     # If any cached unparsed xml (state(leftover)), prepend it.
00139     upvar #0 $options(-statevariable) state
00140     if {[string length $state(leftover)]} {
00141         regsub -all $elemExpr $state(leftover)$sgml $elemSub sgml
00142         set state(leftover) {}
00143     } else {
00144         regsub -all $elemExpr $sgml $elemSub sgml
00145     }
00146     set sgml "{} {} {} \{$sgml\}"
00147 
00148     # Performance note (Tcl 8.0):
00149     #   Use of lindex, lreplace will cause parsing to list object
00150 
00151     # This RE only fixes chopped inside tags, not chopped text.
00152     if {[regexp {^([^<]*)(<[^>]*$)} [lindex $sgml end] x text rest]} {
00153         set sgml [lreplace $sgml end end $text]
00154         # Mats: unmatched stuff means that it is chopped off. Cache it for next round.
00155         set state(leftover) $rest
00156     }
00157 
00158     # Patch from bug report #596959, Marshall Rose
00159     if {[string compare [lindex $sgml 4] ""]} {
00160         set sgml [linsert $sgml 0 {} {} {} {} {}]
00161     }
00162 
00163     } else {
00164 
00165     # Performance note (Tcl 8.0):
00166     #   In this case, no conversion to list object is performed
00167 
00168     # Mats: This fails if not -final and $sgml is chopped off right in a tag.   
00169     regsub -all $elemExpr $sgml $elemSub sgml
00170     set sgml "{} {} {} \{$sgml\}"
00171     }
00172 
00173     return $sgml
00174 
00175 }
00176 
00177 /*  sgml::parseEvent --*/
00178 /* */
00179 /*  Produces an event stream for a XML/HTML document,*/
00180 /*  given the Tcl list format returned by tokenise.*/
00181 /* */
00182 /*  This procedure checks that the document is well-formed,*/
00183 /*  and throws an error if the document is found to be not*/
00184 /*  well formed.  Warnings are passed via the -warningcommand script.*/
00185 /* */
00186 /*  The procedure only check for well-formedness,*/
00187 /*  no DTD is required.  However, facilities are provided for entity expansion.*/
00188 /* */
00189 /*  Arguments:*/
00190 /*  sgml        Instance data, as a Tcl list.*/
00191 /*  args        option/value pairs*/
00192 /* */
00193 /*  Valid Options:*/
00194 /*  -final          Indicates end of document data*/
00195 /*  -validate       Boolean to enable validation*/
00196 /*  -baseuri        URL for resolving relative URLs*/
00197 /*  -elementstartcommand    Called when an element starts*/
00198 /*  -elementendcommand  Called when an element ends*/
00199 /*  -characterdatacommand   Called when character data occurs*/
00200 /*  -entityreferencecommand Called when an entity reference occurs*/
00201 /*  -processinginstructioncommand   Called when a PI occurs*/
00202 /*  -externalentitycommand  Called for an external entity reference*/
00203 /* */
00204 /*  -xmldeclcommand     Called when the XML declaration occurs*/
00205 /*  -doctypecommand     Called when the document type declaration occurs*/
00206 /*  -commentcommand     Called when a comment occurs*/
00207 /*  -entitydeclcommand  Called when a parsed entity is declared*/
00208 /*  -unparsedentitydeclcommand  Called when an unparsed external entity is declared*/
00209 /*  -parameterentitydeclcommand Called when a parameter entity is declared*/
00210 /*  -notationdeclcommand    Called when a notation is declared*/
00211 /*  -elementdeclcommand Called when an element is declared*/
00212 /*  -attlistdeclcommand Called when an attribute list is declared*/
00213 /*  -paramentityparsing Boolean to enable/disable parameter entity substitution*/
00214 /*  -defaultexpandinternalentities  Boolean to enable/disable expansion of entities declared in internal DTD subset*/
00215 /* */
00216 /*  -startdoctypedeclcommand    Called when the Doc Type declaration starts (see also -doctypecommand)*/
00217 /*  -enddoctypedeclcommand  Called when the Doc Type declaration ends (see also -doctypecommand)*/
00218 /* */
00219 /*  -errorcommand       Script to evaluate for a fatal error*/
00220 /*  -warningcommand     Script to evaluate for a reportable warning*/
00221 /*  -statevariable      global state variable*/
00222 /*  -normalize      whether to normalize names*/
00223 /*  -reportempty        whether to include an indication of empty elements*/
00224 /*  -ignorewhitespace   whether to automatically strip whitespace*/
00225 /* */
00226 /*  Results:*/
00227 /*  The various callback scripts are invoked.*/
00228 /*  Returns empty string.*/
00229 /* */
00230 /*  BUGS:*/
00231 /*  If command options are set to empty string then they should not be invoked.*/
00232 
00233 ret  sgml::parseEvent (type sgml , type args) {
00234     variable Wsp
00235     variable noWsp
00236     variable Nmtoken
00237     variable Name
00238     variable ParseEventNum
00239     variable StdOptions
00240 
00241     array set options [array get StdOptions]
00242     catch {array set options $args}
00243 
00244     # Mats:
00245     # If the data is not final then there must be a variable to persistently store the parse state.
00246     if {!$options(-final) && ![info exists options(-statevariable)]} {
00247     return -code error {option "-statevariable" required if not final}
00248     }
00249     
00250     foreach {opt value} [array get options *command] {
00251     if {[string compare $opt "-externalentitycommand"] && ![string length $value]} {
00252         set options($opt) [namespace current]::noop
00253     }
00254     }
00255 
00256     if {![info exists options(-statevariable)]} {
00257     set options(-statevariable) [namespace current]::ParseEvent[incr ParseEventNum]
00258     }
00259     if {![info exists options(entities)]} {
00260     set options(entities) [namespace current]::Entities$ParseEventNum
00261     array set $options(entities) [array get [namespace current]::EntityPredef]
00262     }
00263     if {![info exists options(extentities)]} {
00264     set options(extentities) [namespace current]::ExtEntities$ParseEventNum
00265     }
00266     if {![info exists options(parameterentities)]} {
00267     set options(parameterentities) [namespace current]::ParamEntities$ParseEventNum
00268     }
00269     if {![info exists options(externalparameterentities)]} {
00270     set options(externalparameterentities) [namespace current]::ExtParamEntities$ParseEventNum
00271     }
00272     if {![info exists options(elementdecls)]} {
00273     set options(elementdecls) [namespace current]::ElementDecls$ParseEventNum
00274     }
00275     if {![info exists options(attlistdecls)]} {
00276     set options(attlistdecls) [namespace current]::AttListDecls$ParseEventNum
00277     }
00278     if {![info exists options(notationdecls)]} {
00279     set options(notationdecls) [namespace current]::NotationDecls$ParseEventNum
00280     }
00281     if {![info exists options(namespaces)]} {
00282     set options(namespaces) [namespace current]::Namespaces$ParseEventNum
00283     }
00284 
00285     # For backward-compatibility
00286     catch {set options(-baseuri) $options(-baseurl)}
00287 
00288     # Choose an external entity resolver
00289 
00290     if {![string length $options(-externalentitycommand)]} {
00291     if {$options(-validate)} {
00292         set options(-externalentitycommand) [namespace code ResolveEntity]
00293     } else {
00294         set options(-externalentitycommand) [namespace code noop]
00295     }
00296     }
00297 
00298     upvar #0 $options(-statevariable) state
00299     upvar #0 $options(entities) entities
00300 
00301     # Mats:
00302     # The problem is that the state is not maintained when -final 0 !
00303     # I've switched back to an older version here. 
00304     
00305     if {![info exists state(line)]} {
00306     # Initialise the state variable
00307     array set state {
00308         mode normal
00309         haveXMLDecl 0
00310         haveDocElement 0
00311         inDTD 0
00312         context {}
00313         stack {}
00314         line 0
00315         defaultNS {}
00316         defaultNSURI {}
00317     }
00318     }
00319 
00320     foreach {tag close param text} $sgml {
00321 
00322     # Keep track of lines in the input
00323     incr state(line) [regsub -all \n $param {} discard]
00324     incr state(line) [regsub -all \n $text {} discard]
00325 
00326     # If the current mode is cdata or comment then we must undo what the
00327     # regsub has done to reconstitute the data
00328 
00329     set empty {}
00330     switch $state(mode) {
00331         comment {
00332         # This had "[string length $param] && " as a guard -
00333         # can't remember why :-(
00334         if {[regexp ([cl ^-]*)--\$ $tag discard comm1]} {
00335             # end of comment (in tag)
00336             set tag {}
00337             set close {}
00338             set state(mode) normal
00339             DeProtect1 $options(-commentcommand) $state(commentdata)<$comm1
00340             unset state(commentdata)
00341         } elseif {[regexp ([cl ^-]*)--\$ $param discard comm1]} {
00342             # end of comment (in attributes)
00343             DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag>$comm1
00344             unset state(commentdata)
00345             set tag {}
00346             set param {}
00347             set close {}
00348             set state(mode) normal
00349         } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm1 text]} {
00350             # end of comment (in text)
00351             DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag$param>$comm1
00352             unset state(commentdata)
00353             set tag {}
00354             set param {}
00355             set close {}
00356             set state(mode) normal
00357         } else {
00358             # comment continues
00359             append state(commentdata) <$close$tag$param>$text
00360             continue
00361         }
00362         }
00363         cdata {
00364         if {[string length $param] && [regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $tag discard cdata1]} {
00365             # end of CDATA (in tag)
00366             PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$cdata1]
00367             set text [subst -novariable -nocommand $text]
00368             set tag {}
00369             unset state(cdata)
00370             set state(mode) normal
00371         } elseif {[regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $param discard cdata1]} {
00372             # end of CDATA (in attributes)
00373             PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$cdata1]
00374             set text [subst -novariable -nocommand $text]
00375             set tag {}
00376             set param {}
00377             unset state(cdata)
00378             set state(mode) normal
00379         } elseif {[regexp (.*)\]\][cl $Wsp]*>(.*) $text discard cdata1 text]} {
00380             # end of CDATA (in text)
00381             PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$param>$cdata1]
00382             set text [subst -novariable -nocommand $text]
00383             set tag {}
00384             set param {}
00385             set close {}
00386             unset state(cdata)
00387             set state(mode) normal
00388         } else {
00389             # CDATA continues
00390             append state(cdata) [subst -nocommand -novariable <$close$tag$param>$text]
00391             continue
00392         }
00393         }
00394         continue {
00395         # We're skipping elements looking for the close tag
00396         switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close {
00397             0,* {
00398             continue
00399             }
00400             *,0, {
00401             if {![string compare $tag $state(continue:tag)]} {
00402                 set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]
00403                 if {![string length $empty]} {
00404                 incr state(continue:level)
00405                 }
00406             }
00407             continue
00408             }
00409             *,0,/ {
00410             if {![string compare $tag $state(continue:tag)]} {
00411                 incr state(continue:level) -1
00412             }
00413             if {!$state(continue:level)} {
00414                 unset state(continue:tag)
00415                 unset state(continue:level)
00416                 set state(mode) {}
00417             }
00418             }
00419             default {
00420             continue
00421             }
00422         }
00423         }
00424         default {
00425         # The trailing slash on empty elements can't be automatically separated out
00426         # in the RE, so we must do it here.
00427         regexp (.*)(/)[cl $Wsp]*$ $param discard param empty
00428         }
00429     }
00430 
00431     # default: normal mode
00432 
00433     # Bug: if the attribute list has a right angle bracket then the empty
00434     # element marker will not be seen
00435 
00436     set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]
00437 
00438     switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty {
00439 
00440         0,0,, {
00441         # Ignore empty tag - dealt with non-normal mode above
00442         }
00443         *,0,, {
00444 
00445         # Start tag for an element.
00446 
00447         # Check if the internal DTD entity is in an attribute value
00448         regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param
00449 
00450         set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg]
00451         set state(haveDocElement) 1
00452         switch $code {
00453             0 {# OK}
00454             3 {
00455             # break
00456             return {}
00457             }
00458             4 {
00459             # continue
00460             # Remember this tag and look for its close
00461             set state(continue:tag) $tag
00462             set state(continue:level) 1
00463             set state(mode) continue
00464             continue
00465             }
00466             default {
00467             return -code $code -errorinfo $::errorInfo $msg
00468             }
00469         }
00470 
00471         }
00472 
00473         *,0,/, {
00474 
00475         # End tag for an element.
00476 
00477         set code [catch {ParseEvent:ElementClose $tag [array get options]} msg]
00478         switch $code {
00479             0 {# OK}
00480             3 {
00481             # break
00482             return {}
00483             }
00484             4 {
00485             # continue
00486             # skip sibling nodes
00487             set state(continue:tag) [lindex $state(stack) end]
00488             set state(continue:level) 1
00489             set state(mode) continue
00490             continue
00491             }
00492             default {
00493             return -code $code -errorinfo $::errorInfo $msg
00494             }
00495         }
00496 
00497         }
00498 
00499         *,0,,/ {
00500 
00501         # Empty element
00502 
00503         # The trailing slash sneaks through into the param variable
00504         regsub -all /[cl $::sgml::Wsp]*\$ $param {} param
00505 
00506         set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg]
00507         set state(haveDocElement) 1
00508         switch $code {
00509             0 {# OK}
00510             3 {
00511             # break
00512             return {}
00513             }
00514             4 {
00515             # continue
00516             # Pretty useless since it closes straightaway
00517             }
00518             default {
00519             return -code $code -errorinfo $::errorInfo $msg
00520             }
00521         }
00522         set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg]
00523         switch $code {
00524             0 {# OK}
00525             3 {
00526             # break
00527             return {}
00528             }
00529             4 {
00530             # continue
00531             # skip sibling nodes
00532             set state(continue:tag) [lindex $state(stack) end]
00533             set state(continue:level) 1
00534             set state(mode) continue
00535             continue
00536             }
00537             default {
00538             return -code $code -errorinfo $::errorInfo $msg
00539             }
00540         }
00541 
00542         }
00543 
00544         *,1,* {
00545         # Processing instructions or XML declaration
00546         switch -glob -- $tag {
00547 
00548             {\?xml} {
00549             # XML Declaration
00550             if {$state(haveXMLDecl)} {
00551                 uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"]
00552             } elseif {![regexp {\?$} $param]} {
00553                 uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"]
00554             } else {
00555 
00556                 # We can do the parsing in one step with Tcl 8.1 RE's
00557                 # This has the benefit of performing better WF checking
00558 
00559                 set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp]
00560 
00561                 if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} {
00562                 # Otherwise we must fallback to 8.0.
00563                 # This won't detect certain well-formedness errors
00564 
00565                 # Get the version number
00566                 if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} {
00567                     if {[string compare $version "1.0"]} {
00568                     # Should we support future versions?
00569                     # At least 1.X?
00570                     uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"]
00571                     }
00572                 } else {
00573                     uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"]
00574                 }
00575 
00576                 # Get the encoding declaration
00577                 set encoding {}
00578                 regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding
00579                 regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding
00580 
00581                 # Get the standalone declaration
00582                 set standalone {}
00583                 regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone
00584                 regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone
00585 
00586                 # Invoke the callback
00587                 uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]
00588 
00589                 } elseif {$matches == 0} {
00590                 uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"]
00591                 } else {
00592 
00593                 # Invoke the callback
00594                 uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]
00595 
00596                 }
00597 
00598             }
00599 
00600             }
00601 
00602             {\?*} {
00603             # Processing instruction
00604             set tag [string range $tag 1 end]
00605             if {[regsub {\?$} $tag {} tag]} {
00606                 if {[string length [string trim $param]]} {
00607                 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"]
00608                 }
00609             } elseif {![regexp ^$Name\$ $tag]} {
00610                 uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""]
00611             } elseif {[regexp {[xX][mM][lL]} $tag]} {
00612                 uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""]
00613             } elseif {![regsub {\?$} $param {} param]} {
00614                 uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"]
00615             }
00616             set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg]
00617             switch $code {
00618                 0 {# OK}
00619                 3 {
00620                 # break
00621                 return {}
00622                 }
00623                 4 {
00624                 # continue
00625                 # skip sibling nodes
00626                 set state(continue:tag) [lindex $state(stack) end]
00627                 set state(continue:level) 1
00628                 set state(mode) continue
00629                 continue
00630                 }
00631                 default {
00632                 return -code $code -errorinfo $::errorInfo $msg
00633                 }
00634             }
00635             }
00636 
00637             !DOCTYPE {
00638             # External entity reference
00639             # This should move into xml.tcl
00640             # Parse the params supplied.  Looking for Name, ExternalID and MarkupDecl
00641             set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param]
00642             set state(doc_name) [Normalize $state(doc_name) $options(-normalize)]
00643             set externalID {}
00644             set pubidlit {}
00645             set systemlit {}
00646             set externalID {}
00647             if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} {
00648                 switch [string toupper $id] {
00649                 SYSTEM {
00650                     if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {
00651                     set externalID [list SYSTEM $systemlit] ;# "
00652                     } else {
00653                     uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}}
00654                     }
00655                 }
00656                 PUBLIC {
00657                     if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} {
00658                     if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {
00659                         set externalID [list PUBLIC $pubidlit $systemlit]
00660                     } else {
00661                         uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"]
00662                     }
00663                     } else {
00664                     uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"]
00665                     }
00666                 }
00667                 }
00668                 if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} {
00669                 lappend externalID $notation
00670                 }
00671             }
00672 
00673             set state(inDTD) 1
00674 
00675             ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd)
00676 
00677             set state(inDTD) 0
00678 
00679             }
00680 
00681             !--* {
00682 
00683             # Start of a comment
00684             # See if it ends in the same tag, otherwise change the
00685             # parsing mode
00686 
00687             regexp {!--(.*)} $tag discard comm1
00688             if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} {
00689                 # processed comment (end in tag)
00690                 uplevel #0 $options(-commentcommand) [list $comm1_1]
00691             } elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} {
00692                 # processed comment (end in attributes)
00693                 uplevel #0 $options(-commentcommand) [list $comm1$comm2]
00694             } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} {
00695                 # processed comment (end in text)
00696                 uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2]
00697             } else {
00698                 # start of comment
00699                 set state(mode) comment
00700                 set state(commentdata) "$comm1$param$empty>$text"
00701                 continue
00702             }
00703             }
00704 
00705             {!\[CDATA\[*} {
00706 
00707             regexp {!\[CDATA\[(.*)} $tag discard cdata1
00708             if {[regexp {(.*)]]$} $cdata1 discard cdata2]} {
00709                 # processed CDATA (end in tag)
00710                 PCDATA [array get options] [subst -novariable -nocommand $cdata2]
00711                 set text [subst -novariable -nocommand $text]
00712             } elseif {[regexp {(.*)]]$} $param discard cdata2]} {
00713                 # processed CDATA (end in attribute)
00714                 # Backslashes in param are quoted at this stage
00715                 PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2]
00716                 set text [subst -novariable -nocommand $text]
00717             } elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} {
00718                 # processed CDATA (end in text)
00719                 # Backslashes in param and text are quoted at this stage
00720                 PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2]
00721                 set text [subst -novariable -nocommand $text]
00722             } else {
00723                 # start CDATA
00724                 set state(cdata) "$cdata1$param>$text"
00725                 set state(mode) cdata
00726                 continue
00727             }
00728 
00729             }
00730 
00731             !ELEMENT -
00732             !ATTLIST -
00733             !ENTITY -
00734             !NOTATION {
00735             uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"]
00736             }
00737 
00738             default {
00739             uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"]
00740             }
00741         }
00742         }
00743         *,1,* -
00744         *,0,/,/ {
00745         # Syntax error
00746             uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"]
00747         }
00748     }
00749 
00750     # Process character data
00751 
00752     if {$state(haveDocElement) && [llength $state(stack)]} {
00753 
00754         # Check if the internal DTD entity is in the text
00755         regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text
00756 
00757         # Look for entity references
00758         if {([array size entities] || \
00759             [string length $options(-entityreferencecommand)]) && \
00760             $options(-defaultexpandinternalentities) && \
00761             [regexp {&[^;]+;} $text]} {
00762 
00763         # protect Tcl specials
00764         # NB. braces and backslashes may already be protected
00765         regsub -all {\\({|}|\\)} $text {\1} text
00766         regsub -all {([][$\\{}])} $text {\\\1} text
00767 
00768         # Mark entity references
00769         regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text
00770         set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}"
00771         eval $text
00772         } else {
00773 
00774         # Restore protected special characters
00775         regsub -all {\\([][{}\\])} $text {\1} text
00776         PCDATA [array get options] $text
00777         }
00778     } elseif {[string length [string trim $text]]} {
00779         uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"]
00780     }
00781 
00782     }
00783 
00784     # If this is the end of the document, close all open containers
00785     if {$options(-final) && [llength $state(stack)]} {
00786     eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"]
00787     }
00788 
00789     return {}
00790 }
00791 
00792 /*  sgml::DeProtect --*/
00793 /* */
00794 /*  Invoke given command after removing protecting backslashes*/
00795 /*  from given text.*/
00796 /* */
00797 /*  Arguments:*/
00798 /*  cmd Command to invoke*/
00799 /*  text    Text to deprotect*/
00800 /* */
00801 /*  Results:*/
00802 /*  Depends on command*/
00803 
00804 ret  sgml::DeProtect1 (type cmd , type text) {
00805     if {[string compare {} $text]} {
00806     regsub -all {\\([]$[{}\\])} $text {\1} text
00807     uplevel #0 $cmd [list $text]
00808     }
00809 }
00810 ret  sgml::DeProtect (type cmd , type text) {
00811     set text [lindex $text 0]
00812     if {[string compare {} $text]} {
00813     regsub -all {\\([]$[{}\\])} $text {\1} text
00814     uplevel #0 $cmd [list $text]
00815     }
00816 }
00817 
00818 /*  sgml::ParserDelete --*/
00819 /* */
00820 /*  Free all memory associated with parser*/
00821 /* */
00822 /*  Arguments:*/
00823 /*  var global state array*/
00824 /* */
00825 /*  Results:*/
00826 /*  Variables unset*/
00827 
00828 ret  sgml::ParserDelete var (
00829     type upvar #0 $, type var , type state
00830 
00831     , type if , optional ![info =exists state] , optional 
00832     return =-code error ="unknown parser"
00833     
00834 
00835     , type catch , optional unset =$state(entities)
00836     , type catch , optional unset =$state(parameterentities)
00837     , type catch , optional unset =$state(elementdecls)
00838     , type catch , optional unset =$state(attlistdecls)
00839     , type catch , optional unset =$state(notationdecls)
00840     , type catch , optional unset =$state(namespaces)
00841 
00842     , type unset , type state
00843 
00844     , type return , optional 
00845 )
00846 
00847 # sgml::ParseEvent:ElementOpen --
00848 #
00849 #   Start of an element.
00850 #
00851 # Arguments:
00852 #   tag Element name
00853 #   attr    Attribute list
00854 #   opts    Options
00855 #   args    further configuration options
00856 #
00857 # Options:
00858 #   -empty boolean
00859 #       indicates whether the element was an empty element
00860 #
00861 # Results:
00862 #   Modify state and invoke callback
00863 
00864 proc sgml::ParseEvent:ElementOpen {tag attr opts args} {
00865     variable Name
00866     variable Wsp
00867 
00868     array  options =  $opts
00869     upvar /* 0 $options(-statevariable) state*/
00870     array  cfg =  {-empty 0}
00871     array  cfg =  $args
00872      handleEmpty =  0
00873 
00874     if {$options(-normalize)} {
00875      tag =  [string toupper $tag]
00876     }
00877 
00878     /*  Update state*/
00879     lappend state(stack) $tag
00880 
00881     /*  Parse attribute list into a key-value representation*/
00882     if {[string compare $options(-parseattributelistcommand) {}]} {
00883     if {[catch {uplevel /* 0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} {*/
00884         if {[string compare [lindex $attr 0] "unterminated attribute value"]} {
00885         uplevel /* 0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]*/
00886          attr =  {}
00887         } else {
00888 
00889         /*  It is most likely that a ">" character was in an attribute value.*/
00890         /*  This manifests itself by ">" appearing in the element's text.*/
00891         /*  In this case the callback should return a three element list;*/
00892         /*  the message "unterminated attribute value", the attribute list it*/
00893         /*  did manage to parse and the remainder of the attribute list.*/
00894 
00895         foreach {msg attlist brokenattr} $attr break
00896 
00897         upvar text elemText
00898         if {[string first > $elemText] >= 0} {
00899 
00900             /*  Now piece the attribute list back together*/
00901             regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue
00902             regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText
00903             regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist
00904 
00905             /*  Gotcha: watch out for empty element syntax*/
00906             if {[string match */ [string trimright $remattlist]]} {
00907              remattlist =  [string range $remattlist 0 end-1]
00908              handleEmpty =  1
00909              cfg = (-empty) 1
00910             }
00911 
00912             append attvalue >$remattvalue
00913             lappend attlist $attname $attvalue
00914 
00915             /*  Complete parsing the attribute list*/
00916             if {[catch {uplevel /* 0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} {*/
00917             uplevel /* 0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]*/
00918              attr =  {}
00919              attlist =  {}
00920             } else {
00921             eval lappend attlist $attr
00922             }
00923 
00924              attr =  $attlist
00925 
00926         } else {
00927             uplevel /* 0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]*/
00928              attr =  {}
00929         }
00930         }
00931     }
00932     }
00933 
00934      empty =  {}
00935     if {$cfg(-empty) && $options(-reportempty)} {
00936      empty =  {-empty 1}
00937     }
00938 
00939     /*  Check for namespace declarations*/
00940     upvar /* 0 $options(namespaces) namespaces*/
00941      nsdecls =  {}
00942     if {[llength $attr]} {
00943     array  attrlist =  $attr
00944     foreach {attrName attrValue} [array get attrlist xmlns*] {
00945         un attrlist = ($attrName)
00946          colon =  [ prefix =  {}]
00947         if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} {
00948         switch -glob [string length $colon],[string length $prefix] {
00949             0,0 {
00950             /*  default NS declaration*/
00951             lappend state(defaultNSURI) $attrValue
00952             lappend state(defaultNS) [llength $state(stack)]
00953             lappend nsdecls $attrValue {}
00954             }
00955             0,* {
00956             /*  Huh?*/
00957             }
00958             *,0 {
00959             /*  Error*/
00960             uplevel /* 0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\""*/
00961             }
00962             default {
00963              namespaces = ($prefix,[llength $state(stack)]) $attrValue
00964             lappend nsdecls $attrValue $prefix
00965             }
00966         }
00967         }
00968     }
00969     if {[llength $nsdecls]} {
00970          nsdecls =  [list -namespacedecls $nsdecls]
00971     }
00972      attr =  [array get attrlist]
00973     }
00974 
00975     /*  Check whether this element has an expanded name*/
00976      ns =  {}
00977     if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {
00978      nsspec =  [lsort -dictionary -decreasing [array names namespaces $prefix,*]]
00979     if {[llength $nsspec]} {
00980          nsuri =  $namespaces([lindex $nsspec 0])
00981          ns =  [list -namespace $nsuri]
00982     } else {
00983         uplevel /* 0 $options(-errorcommand) [list namespaceundeclared "no namespace declared for prefix \"$prefix\" in element $tag"]*/
00984     }
00985     } elseif {[llength $state(defaultNSURI)]} {
00986      ns =  [list -namespace [lindex $state(defaultNSURI) end]]
00987     }
00988 
00989     /*  Invoke callback*/
00990      code =  [catch {uplevel /* 0 $options(-elementstartcommand) [list $tag $attr] $empty $ns $nsdecls} msg]*/
00991 
00992     /*  Sometimes empty elements must be handled here (see above)*/
00993     if {$code == 0 && $handleEmpty} {
00994     ParseEvent:ElementClose $tag $opts -empty 1
00995     }
00996 
00997     return -code $code -errorinfo $::errorInfo $msg
00998 }
00999 
01000 /*  sgml::ParseEvent:ElementClose --*/
01001 /* */
01002 /*  End of an element.*/
01003 /* */
01004 /*  Arguments:*/
01005 /*  tag Element name*/
01006 /*  opts    Options*/
01007 /*  args    further configuration options*/
01008 /* */
01009 /*  Options:*/
01010 /*  -empty boolean*/
01011 /*      indicates whether the element as an empty element*/
01012 /* */
01013 /*  Results:*/
01014 /*  Modify state and invoke callback*/
01015 
01016 ret  sgml::ParseEvent:ElementClose (type tag , type opts , type args) {
01017     array set options $opts
01018     upvar #0 $options(-statevariable) state
01019     array set cfg {-empty 0}
01020     array set cfg $args
01021 
01022     # WF check
01023     if {[string compare $tag [lindex $state(stack) end]]} {
01024     uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"]
01025     return
01026     }
01027 
01028     # Check whether this element has an expanded name
01029     upvar #0 $options(namespaces) namespaces
01030     set ns {}
01031     if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {
01032     set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0])
01033     set ns [list -namespace $nsuri]
01034     } elseif {[llength $state(defaultNSURI)]} {
01035     set ns [list -namespace [lindex $state(defaultNSURI) end]]
01036     }
01037 
01038     # Pop namespace stacks, if any
01039     if {[llength $state(defaultNS)]} {
01040     if {[llength $state(stack)] == [lindex $state(defaultNS) end]} {
01041         set state(defaultNS) [lreplace $state(defaultNS) end end]
01042     }
01043     }
01044     foreach nsspec [array names namespaces *,[llength $state(stack)]] {
01045     unset namespaces($nsspec)
01046     }
01047 
01048     # Update state
01049     set state(stack) [lreplace $state(stack) end end]
01050 
01051     set empty {}
01052     if {$cfg(-empty) && $options(-reportempty)} {
01053     set empty {-empty 1}
01054     }
01055 
01056     # Invoke callback
01057     # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback.
01058     set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg]
01059     return -code $code -errorinfo $::errorInfo $msg
01060 }
01061 
01062 /*  sgml::PCDATA --*/
01063 /* */
01064 /*  Process PCDATA before passing to application*/
01065 /* */
01066 /*  Arguments:*/
01067 /*  opts    options*/
01068 /*  pcdata  Character data to be processed*/
01069 /* */
01070 /*  Results:*/
01071 /*  Checks that characters are legal,*/
01072 /*  checks -ignorewhitespace setting.*/
01073 
01074 ret  sgml::PCDATA (type opts , type pcdata) {
01075     array set options $opts
01076 
01077     if {$options(-ignorewhitespace) && \
01078         ![string length [string trim $pcdata]]} {
01079     return {}
01080     }
01081 
01082     if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} {
01083     upvar \#0 $options(-statevariable) state
01084     uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"]
01085     }
01086 
01087     uplevel \#0 $options(-characterdatacommand) [list $pcdata]
01088 }
01089 
01090 /*  sgml::Normalize --*/
01091 /* */
01092 /*  Perform name normalization if required*/
01093 /* */
01094 /*  Arguments:*/
01095 /*  name    name to normalize*/
01096 /*  req normalization required*/
01097 /* */
01098 /*  Results:*/
01099 /*  Name returned as upper-case if normalization required*/
01100 
01101 ret  sgml::Normalize (type name , type req) {
01102     if {$req} {
01103     return [string toupper $name]
01104     } else {
01105     return $name
01106     }
01107 }
01108 
01109 /*  sgml::Entity --*/
01110 /* */
01111 /*  Resolve XML entity references (syntax: &xxx;).*/
01112 /* */
01113 /*  Arguments:*/
01114 /*  opts        options*/
01115 /*  entityrefcmd    application callback for entity references*/
01116 /*  pcdatacmd   application callback for character data*/
01117 /*  entities    name of array containing entity definitions.*/
01118 /*  ref     entity reference (the "xxx" bit)*/
01119 /* */
01120 /*  Results:*/
01121 /*  Returns substitution text for given entity.*/
01122 
01123 ret  sgml::Entity (type opts , type entityrefcmd , type pcdatacmd , type entities , type ref) {
01124     array set options $opts
01125     upvar #0 $options(-statevariable) state
01126 
01127     if {![string length $entities]} {
01128     set entities [namespace current]::EntityPredef
01129     }
01130 
01131     # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
01132     switch -glob -- $ref {
01133     {%*} {
01134         # Parameter entity - not recognised outside of a DTD
01135     }
01136     {#x*} {
01137         # Character entity - hex
01138         if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} {
01139         return -code error "malformed character entity \"$ref\""
01140         }
01141         uplevel #0 $pcdatacmd [list $char]
01142 
01143         return {}
01144 
01145     }
01146     {#*} {
01147         # Character entity - decimal
01148         if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} {
01149         return -code error "malformed character entity \"$ref\""
01150         }
01151         uplevel #0 $pcdatacmd [list $char]
01152 
01153         return {}
01154 
01155     }
01156     default {
01157         # General entity
01158         upvar #0 $entities map
01159         if {[info exists map($ref)]} {
01160 
01161         if {![regexp {<|&} $map($ref)]} {
01162 
01163             # Simple text replacement - optimise
01164             uplevel #0 $pcdatacmd [list $map($ref)]
01165 
01166             return {}
01167 
01168         }
01169 
01170         # Otherwise an additional round of parsing is required.
01171         # This only applies to XML, since HTML doesn't have general entities
01172 
01173         # Must parse the replacement text for start & end tags, etc
01174         # This text must be self-contained: balanced closing tags, and so on
01175 
01176         set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr]
01177         set options(-final) 0
01178         eval parseEvent [list $tokenised] [array get options]
01179 
01180         return {}
01181 
01182         } elseif {[string compare $entityrefcmd "::sgml::noop"]} {
01183 
01184         set result [uplevel #0 $entityrefcmd [list $ref]]
01185 
01186         if {[string length $result]} {
01187             uplevel #0 $pcdatacmd [list $result]
01188         }
01189 
01190         return {}
01191 
01192         } else {
01193 
01194         # Reconstitute entity reference
01195 
01196         uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""]
01197 
01198         return {}
01199 
01200         }
01201     }
01202     }
01203 
01204     # If all else fails leave the entity reference untouched
01205     uplevel #0 $pcdatacmd [list &$ref\;]
01206 
01207     return {}
01208 }
01209 
01210 /* */
01211 /* */
01212 /*  DTD parser for SGML (XML).*/
01213 /* */
01214 /*  This DTD actually only handles XML DTDs.  Other language's*/
01215 /*  DTD's, such as HTML, must be written in terms of a XML DTD.*/
01216 /* */
01217 /* */
01218 
01219 /*  sgml::ParseEvent:DocTypeDecl --*/
01220 /* */
01221 /*  Entry point for DTD parsing*/
01222 /* */
01223 /*  Arguments:*/
01224 /*  opts    configuration options*/
01225 /*  docEl   document element name*/
01226 /*  pubId   public identifier*/
01227 /*  sysId   system identifier (a URI)*/
01228 /*  intSSet internal DTD subset*/
01229 
01230 ret  sgml::ParseEvent:DocTypeDecl (type opts , type docEl , type pubId , type sysId , type intSSet) {
01231     array set options {}
01232     array set options $opts
01233 
01234     set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err]
01235     switch $code {
01236     3 {
01237         # break
01238         return {}
01239     }
01240     0 -
01241     4 {
01242         # continue
01243     }
01244     default {
01245         return -code $code $err
01246     }
01247     }
01248 
01249     # Otherwise we'll parse the DTD and report it piecemeal
01250 
01251     # The internal DTD subset is processed first (XML 2.8)
01252     # During this stage, parameter entities are only allowed
01253     # between markup declarations
01254 
01255     ParseDTD:Internal [array get options] $intSSet
01256 
01257     # The external DTD subset is processed last (XML 2.8)
01258     # During this stage, parameter entities may occur anywhere
01259 
01260     # We must resolve the external identifier to obtain the
01261     # DTD data.  The application may supply its own resolver.
01262 
01263     if {[string length $pubId] || [string length $sysId]} {
01264     uplevel #0 $options(-externalentitycommand) [list $options(-cmd) $options(-baseuri) $sysId $pubId]
01265     }
01266 
01267     return {}
01268 }
01269 
01270 /*  sgml::ParseDTD:Internal --*/
01271 /* */
01272 /*  Parse the internal DTD subset.*/
01273 /* */
01274 /*  Parameter entities are only allowed between markup declarations.*/
01275 /* */
01276 /*  Arguments:*/
01277 /*  opts    configuration options*/
01278 /*  dtd DTD data*/
01279 /* */
01280 /*  Results:*/
01281 /*  Markup declarations parsed may cause callback invocation*/
01282 
01283 ret  sgml::ParseDTD:Internal (type opts , type dtd) {
01284     variable MarkupDeclExpr
01285     variable MarkupDeclSub
01286 
01287     array set options {}
01288     array set options $opts
01289 
01290     upvar #0 $options(-statevariable) state
01291     upvar #0 $options(parameterentities) PEnts
01292     upvar #0 $options(externalparameterentities) ExtPEnts
01293 
01294     # Bug 583947: remove comments before further processing
01295     regsub -all {<!--.*?-->} $dtd {} dtd
01296 
01297     # Tokenize the DTD
01298 
01299     # Protect Tcl special characters
01300     regsub -all {([{}\\])} $dtd {\\\1} dtd
01301 
01302     regsub -all $MarkupDeclExpr $dtd $MarkupDeclSub dtd
01303 
01304     # Entities may have angle brackets in their replacement
01305     # text, which breaks the RE processing.  So, we must
01306     # use a similar technique to processing doc instances
01307     # to rebuild the declarations from the pieces
01308 
01309     set mode {} ;# normal
01310     set delimiter {}
01311     set name {}
01312     set param {}
01313 
01314     set state(inInternalDTD) 1
01315 
01316     # Process the tokens
01317     foreach {decl value text} [lrange "{} {} \{$dtd\}" 3 end] {
01318 
01319     # Keep track of line numbers
01320     incr state(line) [regsub -all \n $text {} discard]
01321 
01322     ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
01323 
01324     ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode replText text param
01325 
01326     # There may be parameter entity references between markup decls
01327 
01328     if {[regexp {%.*;} $text]} {
01329 
01330         # Protect Tcl special characters
01331         regsub -all {([{}\\])} $text {\\\1} text
01332 
01333         regsub -all %($::sgml::Name)\; $text "\} {\\1} \{" text
01334 
01335         set PElist "\{$text\}"
01336         set PElist [lreplace $PElist end end]
01337         foreach {text entref} $PElist {
01338         if {[string length [string trim $text]]} {
01339             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text in internal DTD subset around line $state(line)"]
01340         }
01341 
01342         # Expand parameter entity and recursively parse
01343         # BUG: no checks yet for recursive entity references
01344 
01345         if {[info exists PEnts($entref)]} {
01346             set externalParser [$options(-cmd) entityparser]
01347             $externalParser parse $PEnts($entref) -dtdsubset internal
01348         } elseif {[info exists ExtPEnts($entref)]} {
01349             set externalParser [$options(-cmd) entityparser]
01350             $externalParser parse $ExtPEnts($entref) -dtdsubset external
01351             #$externalParser free
01352         } else {
01353             uplevel #0 $options(-errorcommand) [list illegalreference "reference to undeclared parameter entity \"$entref\""]
01354         }
01355         }
01356 
01357     }
01358 
01359     }
01360 
01361     return {}
01362 }
01363 
01364 /*  sgml::ParseDTD:EntityMode --*/
01365 /* */
01366 /*  Perform special processing for various parser modes*/
01367 /* */
01368 /*  Arguments:*/
01369 /*  opts    configuration options*/
01370 /*  modeVar pass-by-reference mode variable*/
01371 /*  replTextVar pass-by-ref*/
01372 /*  declVar pass-by-ref*/
01373 /*  valueVar    pass-by-ref*/
01374 /*  textVar pass-by-ref*/
01375 /*  delimiter   delimiter currently in force*/
01376 /*  name*/
01377 /*  param*/
01378 /* */
01379 /*  Results:*/
01380 /*  Depends on current mode*/
01381 
01382 ret  sgml::ParseDTD:EntityMode (type opts , type modeVar , type replTextVar , type declVar , type valueVar , type textVar , type delimiter , type name , type param) {
01383     upvar 1 $modeVar mode
01384     upvar 1 $replTextVar replText
01385     upvar 1 $declVar decl
01386     upvar 1 $valueVar value
01387     upvar 1 $textVar text
01388     array set options $opts
01389 
01390     switch $mode {
01391     {} {
01392         # Pass through to normal processing section
01393     }
01394     entity {
01395         # Look for closing delimiter
01396         if {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $decl discard val1 remainder]} {
01397         append replText <$val1
01398         DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01399         set decl /
01400         set text $remainder\ $value>$text
01401         set value {}
01402         set mode {}
01403         } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $value discard val2 remainder]} {
01404         append replText <$decl\ $val2
01405         DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01406         set decl /
01407         set text $remainder>$text
01408         set value {}
01409         set mode {}
01410         } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $text discard val3 remainder]} {
01411         append replText <$decl\ $value>$val3
01412         DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01413         set decl /
01414         set text $remainder
01415         set value {}
01416         set mode {}
01417         } else {
01418 
01419         # Remain in entity mode
01420         append replText <$decl\ $value>$text
01421         return -code continue
01422 
01423         }
01424     }
01425 
01426     ignore {
01427         upvar #0 $options(-statevariable) state
01428 
01429         if {[regexp {]](.*)$} $decl discard remainder]} {
01430         set state(condSections) [lreplace $state(condSections) end end]
01431         set decl $remainder
01432         set mode {}
01433         } elseif {[regexp {]](.*)$} $value discard remainder]} {
01434         set state(condSections) [lreplace $state(condSections) end end]
01435         regexp <[cl $::sgml::Wsp]*($::sgml::Name)(.*) $remainder discard decl value
01436         set mode {}
01437         } elseif {[regexp {]]>(.*)$} $text discard remainder]} {
01438         set state(condSections) [lreplace $state(condSections) end end]
01439         set decl /
01440         set value {}
01441         set text $remainder
01442         #regexp <[cl $::sgml::Wsp]*($::sgml::Name)([cl ^>]*)>(.*) $remainder discard decl value text
01443         set mode {}
01444         } else {
01445         set decl /
01446         }
01447 
01448     }
01449 
01450     comment {
01451         # Look for closing comment delimiter
01452 
01453         upvar #0 $options(-statevariable) state
01454 
01455         if {[regexp (.*?)--(.*)\$ $decl discard data1 remainder]} {
01456         } elseif {[regexp (.*?)--(.*)\$ $value discard data1 remainder]} {
01457         } elseif {[regexp (.*?)--(.*)\$ $text discard data1 remainder]} {
01458         } else {
01459         # comment continues
01460         append state(commentdata) <$decl\ $value>$text
01461         set decl /
01462         set value {}
01463         set text {}
01464         }
01465     }
01466 
01467     }
01468 
01469     return {}
01470 }
01471 
01472 /*  sgml::ParseDTD:ProcessMarkupDecl --*/
01473 /* */
01474 /*  Process a single markup declaration*/
01475 /* */
01476 /*  Arguments:*/
01477 /*  opts    configuration options*/
01478 /*  declVar pass-by-ref*/
01479 /*  valueVar    pass-by-ref*/
01480 /*  delimiterVar    pass-by-ref for current delimiter in force*/
01481 /*  nameVar pass-by-ref*/
01482 /*  modeVar pass-by-ref for current parser mode*/
01483 /*  replTextVar pass-by-ref*/
01484 /*  textVar pass-by-ref*/
01485 /*  paramVar    pass-by-ref*/
01486 /* */
01487 /*  Results:*/
01488 /*  Depends on markup declaration.  May change parser mode*/
01489 
01490 ret  sgml::ParseDTD:ProcessMarkupDecl (type opts , type declVar , type valueVar , type delimiterVar , type nameVar , type modeVar , type replTextVar , type textVar , type paramVar) {
01491     upvar 1 $modeVar mode
01492     upvar 1 $replTextVar replText
01493     upvar 1 $textVar text
01494     upvar 1 $declVar decl
01495     upvar 1 $valueVar value
01496     upvar 1 $nameVar name
01497     upvar 1 $delimiterVar delimiter
01498     upvar 1 $paramVar param
01499 
01500     variable declExpr
01501     variable ExternalEntityExpr
01502 
01503     array set options $opts
01504     upvar #0 $options(-statevariable) state
01505 
01506     switch -glob -- $decl {
01507 
01508     / {
01509         # continuation from entity processing
01510     }
01511 
01512     !ELEMENT {
01513         # Element declaration
01514         if {[regexp $declExpr $value discard tag cmodel]} {
01515         DTD:ELEMENT [array get options] $tag $cmodel
01516         } else {
01517         uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed element declaration around line $state(line)"]
01518         }
01519     }
01520 
01521     !ATTLIST {
01522         # Attribute list declaration
01523         variable declExpr
01524         if {[regexp $declExpr $value discard tag attdefns]} {
01525         if {[catch {DTD:ATTLIST [array get options] $tag $attdefns} err]} {
01526             #puts stderr "Stack trace: $::errorInfo\n***\n"
01527             # Atttribute parsing has bugs at the moment
01528             #return -code error "$err around line $state(line)"
01529             return {}
01530         }
01531         } else {
01532         uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed attribute list declaration around line $state(line)"]
01533         }
01534     }
01535 
01536     !ENTITY {
01537         # Entity declaration
01538         variable EntityExpr
01539 
01540         if {[regexp $EntityExpr $value discard param name value]} {
01541 
01542         # Entity replacement text may have a '>' character.
01543         # In this case, the real delimiter will be in the following
01544         # text.  This is complicated by the possibility of there
01545         # being several '<','>' pairs in the replacement text.
01546         # At this point, we are searching for the matching quote delimiter.
01547 
01548         if {[regexp $ExternalEntityExpr $value]} {
01549             DTD:ENTITY [array get options] $name [string trim $param] $value
01550         } elseif {[regexp ("|')(.*?)\\1(.*) $value discard delimiter replText value]} {
01551 
01552             if {[string length [string trim $value]]} {
01553             uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
01554             } else {
01555             DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
01556             }
01557         } elseif {[regexp ("|')(.*) $value discard delimiter replText]} {
01558             append replText >$text
01559             set text {}
01560             set mode entity
01561         } else {
01562             uplevel #0 $options(-errorcommand) [list illegaldeclaration "no delimiter for entity declaration around line $state(line)"]
01563         }
01564 
01565         } else {
01566         uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
01567         }
01568     }
01569 
01570     !NOTATION {
01571         # Notation declaration
01572         if {[regexp $declExpr param discard tag notation]} {
01573         DTD:ENTITY [array get options] $tag $notation
01574         } else {
01575         uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
01576         }
01577     }
01578 
01579     !--* {
01580         # Start of a comment
01581 
01582         if {[regexp !--(.*?)--\$ $decl discard data]} {
01583         if {[string length [string trim $value]]} {
01584             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$value\""]
01585         }
01586         uplevel #0 $options(-commentcommand) [list $data]
01587         set decl /
01588         set value {}
01589         } elseif {[regexp -- ^(.*?)--\$ $value discard data2]} {
01590         regexp !--(.*)\$ $decl discard data1
01591         uplevel #0 $options(-commentcommand) [list $data1\ $data2]
01592         set decl /
01593         set value {}
01594         } elseif {[regexp (.*?)-->(.*)\$ $text discard data3 remainder]} {
01595         regexp !--(.*)\$ $decl discard data1
01596         uplevel #0 $options(-commentcommand) [list $data1\ $value>$data3]
01597         set decl /
01598         set value {}
01599         set text $remainder
01600         } else {
01601         regexp !--(.*)\$ $decl discard data1
01602         set state(commentdata) $data1\ $value>$text
01603         set decl /
01604         set value {}
01605         set text {}
01606         set mode comment
01607         }
01608     }
01609 
01610     !*INCLUDE* -
01611     !*IGNORE* {
01612         if {$state(inInternalDTD)} {
01613         uplevel #0 $options(-errorcommand) [list illegalsection "conditional section not permitted in internal DTD subset around line $state(line)"]
01614         }
01615 
01616         if {[regexp {^!\[INCLUDE\[(.*)} $decl discard remainder]} {
01617         # Push conditional section stack, popped by ]]> sequence
01618 
01619         if {[regexp {(.*?)]]$} $remainder discard r2]} {
01620             # section closed immediately
01621             if {[string length [string trim $r2]]} {
01622             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01623             }
01624         } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} {
01625             # section closed immediately
01626             if {[string length [string trim $r2]]} {
01627             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01628             }
01629             if {[string length [string trim $r3]]} {
01630             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"]
01631             }
01632         } else {
01633 
01634             lappend state(condSections) INCLUDE
01635 
01636             set parser [$options(-cmd) entityparser]
01637             $parser parse $remainder\ $value> -dtdsubset external
01638             #$parser free
01639 
01640             if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} {
01641             if {[string length [string trim $t1]]} {
01642                 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
01643             }
01644             if {![llength $state(condSections)]} {
01645                 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01646             }
01647             set state(condSections) [lreplace $state(condSections) end end]
01648             set text $t2
01649             }
01650 
01651         }
01652         } elseif {[regexp {^!\[IGNORE\[(.*)} $decl discard remainder]} {
01653         # Set ignore mode.  Still need a stack
01654         set mode ignore
01655 
01656         if {[regexp {(.*?)]]$} $remainder discard r2]} {
01657             # section closed immediately
01658             if {[string length [string trim $r2]]} {
01659             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01660             }
01661         } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} {
01662             # section closed immediately
01663             if {[string length [string trim $r2]]} {
01664             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
01665             }
01666             if {[string length [string trim $r3]]} {
01667             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"]
01668             }
01669         } else {
01670             
01671             lappend state(condSections) IGNORE
01672 
01673             if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} {
01674             if {[string length [string trim $t1]]} {
01675                 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
01676             }
01677             if {![llength $state(condSections)]} {
01678                 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01679             }
01680             set state(condSections) [lreplace $state(condSections) end end]
01681             set text $t2
01682             }
01683 
01684         }
01685         } else {
01686         uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\" around line $state(line)"]
01687         }
01688 
01689     }
01690 
01691     default {
01692         if {[regexp {^\?(.*)} $decl discard target]} {
01693         # Processing instruction
01694         } else {
01695         uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\""]
01696         }
01697     }
01698     }
01699 
01700     return {}
01701 }
01702 
01703 /*  sgml::ParseDTD:External --*/
01704 /* */
01705 /*  Parse the external DTD subset.*/
01706 /* */
01707 /*  Parameter entities are allowed anywhere.*/
01708 /* */
01709 /*  Arguments:*/
01710 /*  opts    configuration options*/
01711 /*  dtd DTD data*/
01712 /* */
01713 /*  Results:*/
01714 /*  Markup declarations parsed may cause callback invocation*/
01715 
01716 ret  sgml::ParseDTD:External (type opts , type dtd) {
01717     variable MarkupDeclExpr
01718     variable MarkupDeclSub
01719     variable declExpr
01720 
01721     array set options $opts
01722     upvar #0 $options(parameterentities) PEnts
01723     upvar #0 $options(externalparameterentities) ExtPEnts
01724     upvar #0 $options(-statevariable) state
01725 
01726     # As with the internal DTD subset, watch out for
01727     # entities with angle brackets
01728     set mode {} ;# normal
01729     set delimiter {}
01730     set name {}
01731     set param {}
01732 
01733     set oldState 0
01734     catch {set oldState $state(inInternalDTD)}
01735     set state(inInternalDTD) 0
01736 
01737     # Initialise conditional section stack
01738     if {![info exists state(condSections)]} {
01739     set state(condSections) {}
01740     }
01741     set startCondSectionDepth [llength $state(condSections)]
01742 
01743     while {[string length $dtd]} {
01744     set progress 0
01745     set PEref {}
01746     if {![string compare $mode "ignore"]} {
01747         set progress 1
01748         if {[regexp {]]>(.*)} $dtd discard dtd]} {
01749         set remainder {}
01750         set mode {} ;# normal
01751         set state(condSections) [lreplace $state(condSections) end end]
01752         continue
01753         } else {
01754         uplevel #0 $options(-errorcommand) [list missingdelimiter "IGNORE conditional section closing delimiter not found"]
01755         }
01756     } elseif {[regexp ^(.*?)%($::sgml::Name)\;(.*)\$ $dtd discard data PEref remainder]} {
01757         set progress 1
01758     } else {
01759         set data $dtd
01760         set dtd {}
01761         set remainder {}
01762     }
01763 
01764     # Tokenize the DTD (so far)
01765 
01766     # Protect Tcl special characters
01767     regsub -all {([{}\\])} $data {\\\1} dataP
01768 
01769     set n [regsub -all $MarkupDeclExpr $dataP $MarkupDeclSub dataP]
01770 
01771     if {$n} {
01772         set progress 1
01773         # All but the last markup declaration should have no text
01774         set dataP [lrange "{} {} \{$dataP\}" 3 end]
01775         if {[llength $dataP] > 3} {
01776         foreach {decl value text} [lrange $dataP 0 [expr [llength $dataP] - 4]] {
01777             ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
01778             ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param
01779 
01780             if {[string length [string trim $text]]} {
01781             # check for conditional section close
01782             if {[regexp {]]>(.*)$} $text discard text]} {
01783                 if {[string length [string trim $text]]} {
01784                 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""]
01785                 }
01786                 if {![llength $state(condSections)]} {
01787                 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01788                 }
01789                 set state(condSections) [lreplace $state(condSections) end end]
01790                 if {![string compare $mode "ignore"]} {
01791                 set mode {} ;# normal
01792                 }
01793             } else {
01794                 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""]
01795             }
01796             }
01797         }
01798         }
01799         # Do the last declaration
01800         foreach {decl value text} [lrange $dataP [expr [llength $dataP] - 3] end] {
01801         ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
01802         ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param
01803         }
01804     }
01805 
01806     # Now expand the PE reference, if any
01807     switch -glob $mode,[string length $PEref],$n {
01808         ignore,0,* {
01809         set dtd $text
01810         }
01811         ignore,*,* {
01812         set dtd $text$remainder
01813         }
01814         *,0,0 {
01815         set dtd $data
01816         }
01817         *,0,* {
01818         set dtd $text
01819         }
01820         *,*,0 {
01821         if {[catch {append data $PEnts($PEref)}]} {
01822             if {[info exists ExtPEnts($PEref)]} {
01823             set externalParser [$options(-cmd) entityparser]
01824             $externalParser parse $ExtPEnts($PEref) -dtdsubset external
01825             #$externalParser free
01826             } else {
01827             uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"]
01828             }
01829         }
01830         set dtd $data$remainder
01831         }
01832         default {
01833         if {[catch {append text $PEnts($PEref)}]} {
01834             if {[info exists ExtPEnts($PEref)]} {
01835             set externalParser [$options(-cmd) entityparser]
01836             $externalParser parse $ExtPEnts($PEref) -dtdsubset external
01837             #$externalParser free
01838             } else {
01839             uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"]
01840             }
01841         }
01842         set dtd $text$remainder
01843         }
01844     }
01845 
01846     # Check whether a conditional section has been terminated
01847     if {[regexp {^(.*?)]]>(.*)$} $dtd discard t1 t2]} {
01848         if {![regexp <.*> $t1]} {
01849         if {[string length [string trim $t1]]} {
01850             uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
01851         }
01852         if {![llength $state(condSections)]} {
01853             uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
01854         }
01855         set state(condSections) [lreplace $state(condSections) end end]
01856         if {![string compare $mode "ignore"]} {
01857             set mode {} ;# normal
01858         }
01859         set dtd $t2
01860         set progress 1
01861         }
01862     }
01863 
01864     if {!$progress} {
01865         # No parameter entity references were found and 
01866         # the text does not contain a well-formed markup declaration
01867         # Avoid going into an infinite loop
01868         upvar #0 $options(-errorcommand) [list syntaxerror "external entity does not contain well-formed markup declaration"]
01869         break
01870     }
01871     }
01872 
01873     set state(inInternalDTD) $oldState
01874 
01875     # Check that conditional sections have been closed properly
01876     if {[llength $state(condSections)] > $startCondSectionDepth} {
01877     uplevel #0 $options(-errorcommand) [list syntaxerror "[lindex $state(condSections) end] conditional section not closed"]
01878     }
01879     if {[llength $state(condSections)] < $startCondSectionDepth} {
01880     uplevel #0 $options(-errorcommand) [list syntaxerror "too many conditional section closures"]
01881     }
01882 
01883     return {}
01884 }
01885 
01886 /*  Procedures for handling the various declarative elements in a DTD.*/
01887 /*  New elements may be added by creating a procedure of the form*/
01888 /*  parse:DTD:_element_*/
01889 
01890 /*  For each of these procedures, the various regular expressions they use*/
01891 /*  are created outside of the proc to avoid overhead at runtime*/
01892 
01893 /*  sgml::DTD:ELEMENT --*/
01894 /* */
01895 /*  <!ELEMENT ...> defines an element.*/
01896 /* */
01897 /*  The content model for the element is stored in the contentmodel array,*/
01898 /*  indexed by the element name.  The content model is parsed into the*/
01899 /*  following list form:*/
01900 /* */
01901 /*      {}  Content model is EMPTY.*/
01902 /*          Indicated by an empty list.*/
01903 /*      *   Content model is ANY.*/
01904 /*          Indicated by an asterix.*/
01905 /*      {ELEMENT ...}*/
01906 /*          Content model is element-only.*/
01907 /*      {MIXED {element1 element2 ...}}*/
01908 /*          Content model is mixed (PCDATA and elements).*/
01909 /*          The second element of the list contains the */
01910 /*          elements that may occur.  #PCDATA is assumed */
01911 /*          (ie. the list is normalised).*/
01912 /* */
01913 /*  Arguments:*/
01914 /*  opts    configuration options*/
01915 /*  name    element GI*/
01916 /*  modspec unparsed content model specification*/
01917 
01918 ret  sgml::DTD:ELEMENT (type opts , type name , type modspec) {
01919     variable Wsp
01920     array set options $opts
01921 
01922     upvar #0 $options(elementdecls) elements
01923 
01924     if {$options(-validate) && [info exists elements($name)]} {
01925     eval $options(-errorcommand) [list elementdeclared "element \"$name\" already declared"]
01926     } else {
01927     switch -- $modspec {
01928         EMPTY {
01929             set elements($name) {}
01930         uplevel #0 $options(-elementdeclcommand) $name {{}}
01931         }
01932         ANY {
01933             set elements($name) *
01934         uplevel #0 $options(-elementdeclcommand) $name *
01935         }
01936         default {
01937         # Don't parse the content model for now,
01938         # just pass the model to the application
01939         if {0 && [regexp [format {^\([%s]*#PCDATA[%s]*(\|([^)]+))?[%s]*\)*[%s]*$} $Wsp $Wsp $Wsp $Wsp] discard discard mtoks]} {
01940             set cm($name) [list MIXED [split $mtoks |]]
01941         } elseif {0} {
01942             if {[catch {CModelParse $state(state) $value} result]} {
01943             eval $options(-errorcommand) [list element? $result]
01944             } else {
01945             set cm($id) [list ELEMENT $result]
01946             }
01947         } else {
01948             set elements($name) $modspec
01949             uplevel #0 $options(-elementdeclcommand) $name [list $modspec]
01950         }
01951         }
01952     }
01953     }
01954 }
01955 
01956 /*  sgml::CModelParse --*/
01957 /* */
01958 /*  Parse an element content model (non-mixed).*/
01959 /*  A syntax tree is constructed.*/
01960 /*  A transition table is built next.*/
01961 /* */
01962 /*  This is going to need alot of work!*/
01963 /* */
01964 /*  Arguments:*/
01965 /*  state   state array variable*/
01966 /*  value   the content model data*/
01967 /* */
01968 /*  Results:*/
01969 /*  A Tcl list representing the content model.*/
01970 
01971 ret  sgml::CModelParse (type state , type value) {
01972     upvar #0 $state var
01973 
01974     # First build syntax tree
01975     set syntaxTree [CModelMakeSyntaxTree $state $value]
01976 
01977     # Build transition table
01978     set transitionTable [CModelMakeTransitionTable $state $syntaxTree]
01979 
01980     return [list $syntaxTree $transitionTable]
01981 }
01982 
01983 /*  sgml::CModelMakeSyntaxTree --*/
01984 /* */
01985 /*  Construct a syntax tree for the regular expression.*/
01986 /* */
01987 /*  Syntax tree is represented as a Tcl list:*/
01988 /*  rep {:choice|:seq {{rep list1} {rep list2} ...}}*/
01989 /*  where:  rep is repetition character, *, + or ?. {} for no repetition*/
01990 /*      listN is nested expression or Name*/
01991 /* */
01992 /*  Arguments:*/
01993 /*  spec    Element specification*/
01994 /* */
01995 /*  Results:*/
01996 /*  Syntax tree for element spec as nested Tcl list.*/
01997 /* */
01998 /*  Examples:*/
01999 /*  (memo)*/
02000 /*      {} {:seq {{} memo}}*/
02001 /*  (front, body, back?)*/
02002 /*      {} {:seq {{} front} {{} body} {? back}}*/
02003 /*  (head, (p | list | note)*, div2*)*/
02004 /*      {} {:seq {{} head} {* {:choice {{} p} {{} list} {{} note}}} {* div2}}*/
02005 /*  (p | a | ul)+*/
02006 /*      + {:choice {{} p} {{} a} {{} ul}}*/
02007 
02008 ret  sgml::CModelMakeSyntaxTree (type state , type spec) {
02009     upvar #0 $state var
02010     variable Wsp
02011     variable name
02012 
02013     # Translate the spec into a Tcl list.
02014 
02015     # None of the Tcl special characters are allowed in a content model spec.
02016     if {[regexp {\$|\[|\]|\{|\}} $spec]} {
02017     return -code error "illegal characters in specification"
02018     }
02019 
02020     regsub -all [format {(%s)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $name $Wsp $Wsp] $spec [format {%sCModelSTname %s {\1} {\2} {\3}} \n $state] spec
02021     regsub -all {\(} $spec "\nCModelSTopenParen $state " spec
02022     regsub -all [format {\)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $Wsp $Wsp] $spec [format {%sCModelSTcloseParen %s {\1} {\2}} \n $state] spec
02023 
02024     array set var {stack {} state start}
02025     eval $spec
02026 
02027     # Peel off the outer seq, its redundant
02028     return [lindex [lindex $var(stack) 1] 0]
02029 }
02030 
02031 /*  sgml::CModelSTname --*/
02032 /* */
02033 /*  Processes a name in a content model spec.*/
02034 /* */
02035 /*  Arguments:*/
02036 /*  state   state array variable*/
02037 /*  name    name specified*/
02038 /*  rep repetition operator*/
02039 /*  cs  choice or sequence delimiter*/
02040 /* */
02041 /*  Results:*/
02042 /*  See CModelSTcp.*/
02043 
02044 ret  sgml::CModelSTname (type state , type name , type rep , type cs , type args) {
02045     if {[llength $args]} {
02046     return -code error "syntax error in specification: \"$args\""
02047     }
02048 
02049     CModelSTcp $state $name $rep $cs
02050 }
02051 
02052 /*  sgml::CModelSTcp --*/
02053 /* */
02054 /*  Process a content particle.*/
02055 /* */
02056 /*  Arguments:*/
02057 /*  state   state array variable*/
02058 /*  name    name specified*/
02059 /*  rep repetition operator*/
02060 /*  cs  choice or sequence delimiter*/
02061 /* */
02062 /*  Results:*/
02063 /*  The content particle is added to the current group.*/
02064 
02065 ret  sgml::CModelSTcp (type state , type cp , type rep , type cs) {
02066     upvar #0 $state var
02067 
02068     switch -glob -- [lindex $var(state) end]=$cs {
02069     start= {
02070         set var(state) [lreplace $var(state) end end end]
02071         # Add (dummy) grouping, either choice or sequence will do
02072         CModelSTcsSet $state ,
02073         CModelSTcpAdd $state $cp $rep
02074     }
02075     :choice= -
02076     :seq= {
02077         set var(state) [lreplace $var(state) end end end]
02078         CModelSTcpAdd $state $cp $rep
02079     }
02080     start=| -
02081     start=, {
02082         set var(state) [lreplace $var(state) end end [expr {$cs == "," ? ":seq" : ":choice"}]]
02083         CModelSTcsSet $state $cs
02084         CModelSTcpAdd $state $cp $rep
02085     }
02086     :choice=| -
02087     :seq=, {
02088         CModelSTcpAdd $state $cp $rep
02089     }
02090     :choice=, -
02091     :seq=| {
02092         return -code error "syntax error in specification: incorrect delimiter after \"$cp\", should be \"[expr {$cs == "," ? "|" : ","}]\""
02093     }
02094     end=* {
02095         return -code error "syntax error in specification: no delimiter before \"$cp\""
02096     }
02097     default {
02098         return -code error "syntax error"
02099     }
02100     }
02101     
02102 }
02103 
02104 /*  sgml::CModelSTcsSet --*/
02105 /* */
02106 /*  Start a choice or sequence on the stack.*/
02107 /* */
02108 /*  Arguments:*/
02109 /*  state   state array*/
02110 /*  cs  choice oir sequence*/
02111 /* */
02112 /*  Results:*/
02113 /*  state is modified: end element of state is appended.*/
02114 
02115 ret  sgml::CModelSTcsSet (type state , type cs) {
02116     upvar #0 $state var
02117 
02118     set cs [expr {$cs == "," ? ":seq" : ":choice"}]
02119 
02120     if {[llength $var(stack)]} {
02121     set var(stack) [lreplace $var(stack) end end $cs]
02122     } else {
02123     set var(stack) [list $cs {}]
02124     }
02125 }
02126 
02127 /*  sgml::CModelSTcpAdd --*/
02128 /* */
02129 /*  Append a content particle to the top of the stack.*/
02130 /* */
02131 /*  Arguments:*/
02132 /*  state   state array*/
02133 /*  cp  content particle*/
02134 /*  rep repetition*/
02135 /* */
02136 /*  Results:*/
02137 /*  state is modified: end element of state is appended.*/
02138 
02139 ret  sgml::CModelSTcpAdd (type state , type cp , type rep) {
02140     upvar #0 $state var
02141 
02142     if {[llength $var(stack)]} {
02143     set top [lindex $var(stack) end]
02144         lappend top [list $rep $cp]
02145     set var(stack) [lreplace $var(stack) end end $top]
02146     } else {
02147     set var(stack) [list $rep $cp]
02148     }
02149 }
02150 
02151 /*  sgml::CModelSTopenParen --*/
02152 /* */
02153 /*  Processes a '(' in a content model spec.*/
02154 /* */
02155 /*  Arguments:*/
02156 /*  state   state array*/
02157 /* */
02158 /*  Results:*/
02159 /*  Pushes stack in state array.*/
02160 
02161 ret  sgml::CModelSTopenParen (type state , type args) {
02162     upvar #0 $state var
02163 
02164     if {[llength $args]} {
02165     return -code error "syntax error in specification: \"$args\""
02166     }
02167 
02168     lappend var(state) start
02169     lappend var(stack) [list {} {}]
02170 }
02171 
02172 /*  sgml::CModelSTcloseParen --*/
02173 /* */
02174 /*  Processes a ')' in a content model spec.*/
02175 /* */
02176 /*  Arguments:*/
02177 /*  state   state array*/
02178 /*  rep repetition*/
02179 /*  cs  choice or sequence delimiter*/
02180 /* */
02181 /*  Results:*/
02182 /*  Stack is popped, and former top of stack is appended to previous element.*/
02183 
02184 ret  sgml::CModelSTcloseParen (type state , type rep , type cs , type args) {
02185     upvar #0 $state var
02186 
02187     if {[llength $args]} {
02188     return -code error "syntax error in specification: \"$args\""
02189     }
02190 
02191     set cp [lindex $var(stack) end]
02192     set var(stack) [lreplace $var(stack) end end]
02193     set var(state) [lreplace $var(state) end end]
02194     CModelSTcp $state $cp $rep $cs
02195 }
02196 
02197 /*  sgml::CModelMakeTransitionTable --*/
02198 /* */
02199 /*  Given a content model's syntax tree, constructs*/
02200 /*  the transition table for the regular expression.*/
02201 /* */
02202 /*  See "Compilers, Principles, Techniques, and Tools",*/
02203 /*  Aho, Sethi and Ullman.  Section 3.9, algorithm 3.5.*/
02204 /* */
02205 /*  Arguments:*/
02206 /*  state   state array variable*/
02207 /*  st  syntax tree*/
02208 /* */
02209 /*  Results:*/
02210 /*  The transition table is returned, as a key/value Tcl list.*/
02211 
02212 ret  sgml::CModelMakeTransitionTable (type state , type st) {
02213     upvar #0 $state var
02214 
02215     # Construct nullable, firstpos and lastpos functions
02216     array set var {number 0}
02217     foreach {nullable firstpos lastpos} [   \
02218     TraverseDepth1st $state $st {
02219         # Evaluated for leaf nodes
02220         # Compute nullable(n)
02221         # Compute firstpos(n)
02222         # Compute lastpos(n)
02223         set nullable [nullable leaf $rep $name]
02224         set firstpos [list {} $var(number)]
02225         set lastpos [list {} $var(number)]
02226         set var(pos:$var(number)) $name
02227     } {
02228         # Evaluated for nonterminal nodes
02229         # Compute nullable, firstpos, lastpos
02230         set firstpos [firstpos $cs $firstpos $nullable]
02231         set lastpos  [lastpos  $cs $lastpos  $nullable]
02232         set nullable [nullable nonterm $rep $cs $nullable]
02233     }   \
02234     ] break
02235 
02236     set accepting [incr var(number)]
02237     set var(pos:$accepting) #
02238 
02239     # var(pos:N) maps from position to symbol.
02240     # Construct reverse map for convenience.
02241     # NB. A symbol may appear in more than one position.
02242     # var is about to be reset, so use different arrays.
02243 
02244     foreach {pos symbol} [array get var pos:*] {
02245     set pos [lindex [split $pos :] 1]
02246     set pos2symbol($pos) $symbol
02247     lappend sym2pos($symbol) $pos
02248     }
02249 
02250     # Construct the followpos functions
02251     catch {unset var}
02252     followpos $state $st $firstpos $lastpos
02253 
02254     # Construct transition table
02255     # Dstates is [union $marked $unmarked]
02256     set unmarked [list [lindex $firstpos 1]]
02257     while {[llength $unmarked]} {
02258     set T [lindex $unmarked 0]
02259     lappend marked $T
02260     set unmarked [lrange $unmarked 1 end]
02261 
02262     # Find which input symbols occur in T
02263     set symbols {}
02264     foreach pos $T {
02265         if {$pos != $accepting && [lsearch $symbols $pos2symbol($pos)] < 0} {
02266         lappend symbols $pos2symbol($pos)
02267         }
02268     }
02269     foreach a $symbols {
02270         set U {}
02271         foreach pos $sym2pos($a) {
02272         if {[lsearch $T $pos] >= 0} {
02273             # add followpos($pos)
02274                 if {$var($pos) == {}} {
02275                     lappend U $accepting
02276                 } else {
02277                     eval lappend U $var($pos)
02278                 }
02279         }
02280         }
02281         set U [makeSet $U]
02282         if {[llength $U] && [lsearch $marked $U] < 0 && [lsearch $unmarked $U] < 0} {
02283         lappend unmarked $U
02284         }
02285         set Dtran($T,$a) $U
02286     }
02287     
02288     }
02289 
02290     return [list [array get Dtran] [array get sym2pos] $accepting]
02291 }
02292 
02293 /*  sgml::followpos --*/
02294 /* */
02295 /*  Compute the followpos function, using the already computed*/
02296 /*  firstpos and lastpos.*/
02297 /* */
02298 /*  Arguments:*/
02299 /*  state       array variable to store followpos functions*/
02300 /*  st      syntax tree*/
02301 /*  firstpos    firstpos functions for the syntax tree*/
02302 /*  lastpos     lastpos functions*/
02303 /* */
02304 /*  Results:*/
02305 /*  followpos functions for each leaf node, in name/value format*/
02306 
02307 ret  sgml::followpos (type state , type st , type firstpos , type lastpos) {
02308     upvar #0 $state var
02309 
02310     switch -- [lindex [lindex $st 1] 0] {
02311     :seq {
02312         for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} {
02313             followpos $state [lindex [lindex $st 1] $i]         \
02314             [lindex [lindex $firstpos 0] [expr $i - 1]] \
02315             [lindex [lindex $lastpos 0] [expr $i - 1]]
02316             foreach pos [lindex [lindex [lindex $lastpos 0] [expr $i - 1]] 1] {
02317             eval lappend var($pos) [lindex [lindex [lindex $firstpos 0] $i] 1]
02318             set var($pos) [makeSet $var($pos)]
02319             }
02320         }
02321     }
02322     :choice {
02323         for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} {
02324         followpos $state [lindex [lindex $st 1] $i]         \
02325             [lindex [lindex $firstpos 0] [expr $i - 1]] \
02326             [lindex [lindex $lastpos 0] [expr $i - 1]]
02327         }
02328     }
02329     default {
02330         # No action at leaf nodes
02331     }
02332     }
02333 
02334     switch -- [lindex $st 0] {
02335     ? {
02336         # We having nothing to do here ! Doing the same as
02337         # for * effectively converts this qualifier into the other.
02338     }
02339     * {
02340         foreach pos [lindex $lastpos 1] {
02341         eval lappend var($pos) [lindex $firstpos 1]
02342         set var($pos) [makeSet $var($pos)]
02343         }
02344     }
02345     }
02346 
02347 }
02348 
02349 /*  sgml::TraverseDepth1st --*/
02350 /* */
02351 /*  Perform depth-first traversal of a tree.*/
02352 /*  A new tree is constructed, with each node computed by f.*/
02353 /* */
02354 /*  Arguments:*/
02355 /*  state   state array variable*/
02356 /*  t   The tree to traverse, a Tcl list*/
02357 /*  leaf    Evaluated at a leaf node*/
02358 /*  nonTerm Evaluated at a nonterminal node*/
02359 /* */
02360 /*  Results:*/
02361 /*  A new tree is returned.*/
02362 
02363 ret  sgml::TraverseDepth1st (type state , type t , type leaf , type nonTerm) {
02364     upvar #0 $state var
02365 
02366     set nullable {}
02367     set firstpos {}
02368     set lastpos {}
02369 
02370     switch -- [lindex [lindex $t 1] 0] {
02371     :seq -
02372     :choice {
02373         set rep [lindex $t 0]
02374         set cs [lindex [lindex $t 1] 0]
02375 
02376         foreach child [lrange [lindex $t 1] 1 end] {
02377         foreach {childNullable childFirstpos childLastpos} \
02378             [TraverseDepth1st $state $child $leaf $nonTerm] break
02379         lappend nullable $childNullable
02380         lappend firstpos $childFirstpos
02381         lappend lastpos  $childLastpos
02382         }
02383 
02384         eval $nonTerm
02385     }
02386     default {
02387         incr var(number)
02388         set rep [lindex [lindex $t 0] 0]
02389         set name [lindex [lindex $t 1] 0]
02390         eval $leaf
02391     }
02392     }
02393 
02394     return [list $nullable $firstpos $lastpos]
02395 }
02396 
02397 /*  sgml::firstpos --*/
02398 /* */
02399 /*  Computes the firstpos function for a nonterminal node.*/
02400 /* */
02401 /*  Arguments:*/
02402 /*  cs      node type, choice or sequence*/
02403 /*  firstpos    firstpos functions for the subtree*/
02404 /*  nullable    nullable functions for the subtree*/
02405 /* */
02406 /*  Results:*/
02407 /*  firstpos function for this node is returned.*/
02408 
02409 ret  sgml::firstpos (type cs , type firstpos , type nullable) {
02410     switch -- $cs {
02411     :seq {
02412         set result [lindex [lindex $firstpos 0] 1]
02413         for {set i 0} {$i < [llength $nullable]} {incr i} {
02414             if {[lindex [lindex $nullable $i] 1]} {
02415                 eval lappend result [lindex [lindex $firstpos [expr $i + 1]] 1]
02416         } else {
02417             break
02418         }
02419         }
02420     }
02421     :choice {
02422         foreach child $firstpos {
02423         eval lappend result $child
02424         }
02425     }
02426     }
02427 
02428     return [list $firstpos [makeSet $result]]
02429 }
02430 
02431 /*  sgml::lastpos --*/
02432 /* */
02433 /*  Computes the lastpos function for a nonterminal node.*/
02434 /*  Same as firstpos, only logic is reversed*/
02435 /* */
02436 /*  Arguments:*/
02437 /*  cs      node type, choice or sequence*/
02438 /*  lastpos     lastpos functions for the subtree*/
02439 /*  nullable    nullable functions forthe subtree*/
02440 /* */
02441 /*  Results:*/
02442 /*  lastpos function for this node is returned.*/
02443 
02444 ret  sgml::lastpos (type cs , type lastpos , type nullable) {
02445     switch -- $cs {
02446     :seq {
02447         set result [lindex [lindex $lastpos end] 1]
02448         for {set i [expr [llength $nullable] - 1]} {$i >= 0} {incr i -1} {
02449         if {[lindex [lindex $nullable $i] 1]} {
02450             eval lappend result [lindex [lindex $lastpos $i] 1]
02451         } else {
02452             break
02453         }
02454         }
02455     }
02456     :choice {
02457         foreach child $lastpos {
02458         eval lappend result $child
02459         }
02460     }
02461     }
02462 
02463     return [list $lastpos [makeSet $result]]
02464 }
02465 
02466 /*  sgml::makeSet --*/
02467 /* */
02468 /*  Turn a list into a set, ie. remove duplicates.*/
02469 /* */
02470 /*  Arguments:*/
02471 /*  s   a list*/
02472 /* */
02473 /*  Results:*/
02474 /*  A set is returned, which is a list with duplicates removed.*/
02475 
02476 ret  sgml::makeSet s (
02477     type foreach , type r $, type s , optional 
02478     if ={[llength $r] , optional 
02479         set =unique($r) {
02480     )
02481     }
02482     return [array names unique]
02483 }
02484 
02485 # sgml::nullable --
02486 #
02487 #   Compute the nullable function for a node.
02488 #
02489 # Arguments:
02490 #   nodeType    leaf or nonterminal
02491 #   rep     repetition applying to this node
02492 #   name        leaf node: symbol for this node, nonterm node: choice or seq node
02493 #   subtree     nonterm node: nullable functions for the subtree
02494 #
02495 # Results:
02496 #   Returns nullable function for this branch of the tree.
02497 
02498 proc sgml::nullable {nodeType rep name {subtree {}}} {
02499     switch -glob -- $rep:$nodeType {
02500     :leaf -
02501     +:leaf {
02502         return [list {} 0]
02503     }
02504     \\*:leaf -
02505     \\?:leaf {
02506         return [list {} 1]
02507     }
02508     \\*:nonterm -
02509     \\?:nonterm {
02510         return [list $subtree 1]
02511     }
02512     :nonterm -
02513     +:nonterm {
02514         switch -- $name {
02515         :choice {
02516              result =  0
02517             foreach child $subtree {
02518              result =  [expr $result || [lindex $child 1]]
02519             }
02520         }
02521         :seq {
02522              result =  1
02523             foreach child $subtree {
02524              result =  [expr $result && [lindex $child 1]]
02525             }
02526         }
02527         }
02528         return [list $subtree $result]
02529     }
02530     }
02531 }
02532 
02533 /*  sgml::DTD:ATTLIST --*/
02534 /* */
02535 /*  <!ATTLIST ...> defines an attribute list.*/
02536 /* */
02537 /*  Arguments:*/
02538 /*  opts    configuration opions*/
02539 /*  name    Element GI*/
02540 /*  attspec unparsed attribute definitions*/
02541 /* */
02542 /*  Results:*/
02543 /*  Attribute list variables are modified.*/
02544 
02545 ret  sgml::DTD:ATTLIST (type opts , type name , type attspec) {
02546     variable attlist_exp
02547     variable attlist_enum_exp
02548     variable attlist_fixed_exp
02549 
02550     array set options $opts
02551 
02552     # Parse the attribute list.  If it were regular, could just use foreach,
02553     # but some attributes may have values.
02554     regsub -all {([][$\\])} $attspec {\\\1} attspec
02555     regsub -all $attlist_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {} \{" attspec
02556     regsub -all $attlist_enum_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {} {\\4} \{" attspec
02557     regsub -all $attlist_fixed_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {\\4} \{" attspec
02558 
02559     eval "noop \{$attspec\}"
02560 
02561     return {}
02562 }
02563 
02564 /*  sgml::DTDAttribute --*/
02565 /* */
02566 /*  Parse definition of a single attribute.*/
02567 /* */
02568 /*  Arguments:*/
02569 /*  callback    attribute defn callback*/
02570 /*  name    element name*/
02571 /*  var array variable*/
02572 /*  att attribute name*/
02573 /*  type    type of this attribute*/
02574 /*  default default value of the attribute*/
02575 /*  value   other information*/
02576 /*  text    other text (should be empty)*/
02577 /* */
02578 /*  Results:*/
02579 /*  Attribute defn added to array, unless it already exists*/
02580 
02581 ret  sgml::DTDAttribute args (
02582     # type BUG: , type Some , type problems , type with , type parameter , type passing - , type deal , type with , type it , type later
02583     , type foreach , optional callback =name var =att type =default value =text $, type args , type break
02584 
02585     , type upvar #0 $, type var , type atts
02586 
02587     , type if , optional [string =length [string =trim $text]] , optional 
02588     return =-code error ="unexpected text =\"$text\" in =attribute definition"
02589     
02590 
02591     # , type What , type about , type overridden , type attribute , type defns?
02592     # , type A , type non-, type validating , type app , type may , type want , type to , type know , type about , type them
02593     # (, type eg. , type an , type editor)
02594     , type if , optional ![info =exists atts($name/$att)] , optional 
02595     set =atts($name/$att) [list =$type $default =$value]
02596     uplevel =#0 $callback =[list $name =$att $type =$default $value]
02597     
02598 
02599     , type return , optional 
02600 )
02601 
02602 # sgml::DTD:ENTITY --
02603 #
02604 #   <!ENTITY ...> declaration.
02605 #
02606 #   Callbacks:
02607 #   -entitydeclcommand for general entity declaration
02608 #   -unparsedentitydeclcommand for unparsed external entity declaration
02609 #   -parameterentitydeclcommand for parameter entity declaration
02610 #
02611 # Arguments:
02612 #   opts    configuration options
02613 #   name    name of entity being defined
02614 #   param   whether a parameter entity is being defined
02615 #   value   unparsed replacement text
02616 #
02617 # Results:
02618 #   Modifies the caller's entities array variable
02619 
02620 proc sgml::DTD:ENTITY {opts name param value} {
02621 
02622     array  options =  $opts
02623 
02624     if {[string compare % $param]} {
02625     /*  Entity declaration - general or external*/
02626     upvar /* 0 $options(entities) ents*/
02627     upvar /* 0 $options(extentities) externals*/
02628 
02629     if {[info exists ents($name)] || [info exists externals($name)]} {
02630         eval $options(-warningcommand) entity [list "entity \"$name\" already declared"]
02631     } else {
02632         if {[catch {uplevel /* 0 $options(-parseentitydeclcommand) [list $value]} value]} {*/
02633         return -code error "unable to parse entity declaration due to \"$value\""
02634         }
02635         switch -glob [lindex $value 0],[lindex $value 3] {
02636         internal, {
02637              ents = ($name) [EntitySubst [array get options] [lindex $value 1]]
02638             uplevel /* 0 $options(-entitydeclcommand) [list $name $ents($name)]*/
02639         }
02640         internal,* {
02641             return -code error "unexpected NDATA declaration"
02642         }
02643         external, {
02644              externals = ($name) [lrange $value 1 2]
02645             uplevel /* 0 $options(-entitydeclcommand) [eval list $name [lrange $value 1 2]]*/
02646         }
02647         external,* {
02648              externals = ($name) [lrange $value 1 3]
02649             uplevel /* 0 $options(-unparsedentitydeclcommand) [eval list $name [lrange $value 1 3]]*/
02650         }
02651         default {
02652             return -code error "internal error: unexpected parser state"
02653         }
02654         }
02655     }
02656     } else {
02657     /*  Parameter entity declaration*/
02658     upvar /* 0 $options(parameterentities) PEnts*/
02659     upvar /* 0 $options(externalparameterentities) ExtPEnts*/
02660 
02661     if {[info exists PEnts($name)] || [info exists ExtPEnts($name)]} {
02662         eval $options(-warningcommand) parameterentity [list "parameter entity \"$name\" already declared"]
02663     } else {
02664         if {[catch {uplevel /* 0 $options(-parseentitydeclcommand) [list $value]} value]} {*/
02665         return -code error "unable to parse parameter entity declaration due to \"$value\""
02666         }
02667         if {[string length [lindex $value 3]]} {
02668         return -code error "NDATA illegal in parameter entity declaration"
02669         }
02670         switch [lindex $value 0] {
02671         internal {
02672             /*  Substitute character references and PEs (XML: 4.5)*/
02673              value =  [EntitySubst [array get options] [lindex $value 1]]
02674 
02675              PEnts = ($name) $value
02676             uplevel /* 0 $options(-parameterentitydeclcommand) [list $name $value]*/
02677         }
02678         external -
02679         default {
02680             /*  Get the replacement text now.*/
02681             /*  Could wait until the first reference, but easier*/
02682             /*  to just do it now.*/
02683 
02684              token =  [uri::geturl [uri::resolve $options(-baseuri) [lindex $value 1]]]
02685 
02686              ExtPEnts = ($name) [lindex [array get $token data] 1]
02687             uplevel /* 0 $options(-parameterentitydeclcommand) [eval list $name [lrange $value 1 2]]*/
02688         }
02689         }
02690     }
02691     }
02692 }
02693 
02694 /*  sgml::EntitySubst --*/
02695 /* */
02696 /*  Perform entity substitution on an entity replacement text.*/
02697 /*  This differs slightly from other substitution procedures,*/
02698 /*  because only parameter and character entity substitution*/
02699 /*  is performed, not general entities.*/
02700 /*  See XML Rec. section 4.5.*/
02701 /* */
02702 /*  Arguments:*/
02703 /*  opts    configuration options*/
02704 /*  value   Literal entity value*/
02705 /* */
02706 /*  Results:*/
02707 /*  Expanded replacement text*/
02708 
02709 ret  sgml::EntitySubst (type opts , type value) {
02710     array set options $opts
02711 
02712     # Protect Tcl special characters
02713     regsub -all {([{}\\])} $value {\\\1} value
02714 
02715     # Find entity references
02716     regsub -all (&#\[0-9\]+|&#x\[0-9a-fA-F\]+|%${::sgml::Name})\; $value "\[EntitySubstValue [list $options(parameterentities)] {\\1}\]" value
02717 
02718     set result [subst $value]
02719 
02720     return $result
02721 }
02722 
02723 /*  sgml::EntitySubstValue --*/
02724 /* */
02725 /*  Handle a single character or parameter entity substitution*/
02726 /* */
02727 /*  Arguments:*/
02728 /*  PEvar   array variable containing PE declarations*/
02729 /*  ref character or parameter entity reference*/
02730 /* */
02731 /*  Results:*/
02732 /*  Replacement text*/
02733 
02734 ret  sgml::EntitySubstValue (type PEvar , type ref) {
02735     # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
02736     switch -glob -- $ref {
02737     {&#x*} {
02738         scan [string range $ref 3 end] %x hex
02739         return [format %c $hex]
02740     }
02741     {&#*} {
02742         return [format %c [string range $ref 2 end]]
02743     }
02744     {%*} {
02745         upvar #0 $PEvar PEs
02746         set ref [string range $ref 1 end]
02747         if {[info exists PEs($ref)]} {
02748         return $PEs($ref)
02749         } else {
02750         return -code error "parameter entity \"$ref\" not declared"
02751         }
02752     }
02753     default {
02754         return -code error "internal error - unexpected entity reference"
02755     }
02756     }
02757     return {}
02758 }
02759 
02760 /*  sgml::DTD:NOTATION --*/
02761 /* */
02762 /*  Process notation declaration*/
02763 /* */
02764 /*  Arguments:*/
02765 /*  opts    configuration options*/
02766 /*  name    notation name*/
02767 /*  value   unparsed notation spec*/
02768 
02769 ret  sgml::DTD:NOTATION (type opts , type name , type value) {
02770     return {}
02771 
02772     variable notation_exp
02773     upvar opts state
02774 
02775     if {[regexp $notation_exp $value x scheme data] == 2} {
02776     } else {
02777     eval $state(-errorcommand) [list notationvalue "notation value \"$value\" incorrectly specified"]
02778     }
02779 }
02780 
02781 /*  sgml::ResolveEntity --*/
02782 /* */
02783 /*  Default entity resolution routine*/
02784 /* */
02785 /*  Arguments:*/
02786 /*  cmd command of parent parser*/
02787 /*  base    base URL for relative URLs*/
02788 /*  sysId   system identifier*/
02789 /*  pubId   public identifier*/
02790 
02791 ret  sgml::ResolveEntity (type cmd , type base , type sysId , type pubId) {
02792     variable ParseEventNum
02793 
02794     if {[catch {uri::resolve $base $sysId} url]} {
02795     return -code error "unable to resolve system identifier \"$sysId\""
02796     }
02797     if {[catch {uri::geturl $url} token]} {
02798     return -code error "unable to retrieve external entity \"$url\" for system identifier \"$sysId\""
02799     }
02800 
02801     upvar #0 $token data
02802 
02803     set parser [uplevel #0 $cmd entityparser]
02804 
02805     set body {}
02806     catch {set body $data(body)}
02807     catch {set body $data(data)}
02808     if {[string length $body]} {
02809     uplevel #0 $parser parse [list $body] -dtdsubset external
02810     }
02811     $parser free
02812 
02813     return {}
02814 }
02815 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1