00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 package require Tcl 8.1
00020
00021 package provide xml::tclparser 3.2
00022
00023 package require xmldefs 3.2
00024
00025 package require sgmlparser 1.0
00026
00027 namespace xml::tclparser {
00028
00029 namespace export create createexternal externalentity parse configure get delete
00030
00031
00032
00033 variable tokExpr $::xml::tokExpr
00034 variable substExpr $::xml::substExpr
00035
00036
00037
00038 ::xml::parserclass create tcl \
00039 -createcommand [namespace code create] \
00040 -createentityparsercommand [namespace code createentityparser] \
00041 -parsecommand [namespace code parse] \
00042 -configurecommand [namespace code configure] \
00043 -deletecommand [namespace code delete] \
00044 -recommand = [namespace code re]
00045 }
00046
00047 # xml = ::tclparser::create --
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 ret xml::tclparser::create name (
00058
00059 # type Initialise , type state , type variable
00060 , type upvar \#0 [, type namespace , type current]::$, type name , type parser
00061 , type array , type set , type parser [, type list -, type name $, type name \
00062 -, type cmd [, type uplevel 3 , type namespace , type current]::$, type name \
00063 -, type final 1 \
00064 -, type validate 0 \
00065 -, type statevariable [, type namespace , type current]::$, type name \
00066 -, type baseuri , optional \
00067 , type internaldtd , optional \
00068 , type entities [, type namespace , type current]::, type Entities$, type name \
00069 , type extentities [, type namespace , type current]::, type ExtEntities$, type name \
00070 , type parameterentities [, type namespace , type current]::, type PEntities$, type name \
00071 , type externalparameterentities [, type namespace , type current]::, type ExtPEntities$, type name \
00072 , type elementdecls [, type namespace , type current]::, type ElDecls$, type name \
00073 , type attlistdecls [, type namespace , type current]::, type AttlistDecls$, type name \
00074 , type notationdecls [, type namespace , type current]::, type NotDecls$, type name \
00075 , type depth 0 \
00076 , type leftover , optional \
00077 ]
00078
00079 # , type Initialise , type entities , type with , type predefined , type set
00080 , type array , type set [, type namespace , type current]::, type Entities$, type name [, type array , type get ::, type sgml::, type EntityPredef]
00081
00082 , type return $, type parser(-, type cmd)
00083 )
00084
00085 # xml::tclparser::createentityparser --
00086 #
00087 # Creates XML parser object for an entity.
00088 #
00089 # Arguments:
00090 # name name for the new parser
00091 # parent name of parent parser
00092 #
00093 # Results:
00094 # The state variable is initialised.
00095
00096 proc xml::tclparser::createentityparser {parent name} {
00097 upvar
00098
00099
00100 upvar \
00101 array external = [array get p]
00102
00103 regsub $parent $p(-cmd) {} parentns
00104
00105 array external = [list -name $name \
00106 -cmd $parentns$name \
00107 -statevariable [namespace current]::$name \
00108 internaldtd {} \
00109 line 0 \
00110 ]
00111 incr external(depth)
00112
00113 return $external(-cmd)
00114 }
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127 ret xml::tclparser::configure (type name , type args) {
00128 upvar \#0 [namespace current]::$name parser
00129
00130 # BUG: very crude, no checks for illegal args
00131 # Mats: Should be synced with sgmlparser.tcl
00132 set options {-elementstartcommand -elementendcommand \
00133 -characterdatacommand -processinginstructioncommand \
00134 -externalentitycommand -xmldeclcommand \
00135 -doctypecommand -commentcommand \
00136 -entitydeclcommand -unparsedentitydeclcommand \
00137 -parameterentitydeclcommand -notationdeclcommand \
00138 -elementdeclcommand -attlistdeclcommand \
00139 -paramentityparsing -defaultexpandinternalentities \
00140 -startdoctypedeclcommand -enddoctypedeclcommand \
00141 -entityreferencecommand -warningcommand \
00142 -defaultcommand -unknownencodingcommand -notstandalonecommand \
00143 -startcdatasectioncommand -endcdatasectioncommand \
00144 -errorcommand -final \
00145 -validate -baseuri -baseurl \
00146 -name -cmd -emptyelement \
00147 -parseattributelistcommand -parseentitydeclcommand \
00148 -normalize -internaldtd -dtdsubset \
00149 -reportempty -ignorewhitespace \
00150 -reportempty \
00151 }
00152 set usage [join $options ", "]
00153 regsub -all -- - $options {} options
00154 set pat ^-([join $options |])$
00155 foreach {flag value} $args {
00156 if {[regexp $pat $flag]} {
00157 # Validate numbers
00158 if {[info exists parser($flag)] && \
00159 [string is integer -strict $parser($flag)] && \
00160 ![string is integer -strict $value]} {
00161 return -code error "Bad value for $flag ($value), must be integer"
00162 }
00163 set parser($flag) $value
00164 } else {
00165 return -code error "Unknown option $flag, can be: $usage"
00166 }
00167 }
00168
00169 # Backward-compatibility: -baseuri is a synonym for -baseurl
00170 catch {set parser(-baseuri) $parser(-baseurl)}
00171
00172 return {}
00173 }
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187 ret xml::tclparser::parse (type name , type xml , type args) {
00188
00189 array set options $args
00190 upvar \#0 [namespace current]::$name parser
00191 variable tokExpr
00192 variable substExpr
00193
00194 # Mats:
00195 if {[llength $args]} {
00196 eval {configure $name} $args
00197 }
00198
00199 set parseOptions [list \
00200 -emptyelement [namespace code ParseEmpty] \
00201 -parseattributelistcommand [namespace code ParseAttrs] \
00202 -parseentitydeclcommand [namespace code ParseEntity] \
00203 -normalize 0]
00204 eval lappend parseOptions \
00205 [array get parser -*command] \
00206 [array get parser -reportempty] \
00207 [array get parser -ignorewhitespace] \
00208 [array get parser -name] \
00209 [array get parser -cmd] \
00210 [array get parser -baseuri] \
00211 [array get parser -validate] \
00212 [array get parser -final] \
00213 [array get parser -defaultexpandinternalentities] \
00214 [array get parser entities] \
00215 [array get parser extentities] \
00216 [array get parser parameterentities] \
00217 [array get parser externalparameterentities] \
00218 [array get parser elementdecls] \
00219 [array get parser attlistdecls] \
00220 [array get parser notationdecls]
00221
00222 # Mats:
00223 # If -final 0 we also need to maintain the state with a -statevariable !
00224 if {!$parser(-final)} {
00225 eval lappend parseOptions [array get parser -statevariable]
00226 }
00227
00228 set dtdsubset no
00229 catch {set dtdsubset $options(-dtdsubset)}
00230 switch -- $dtdsubset {
00231 internal {
00232 # Bypass normal parsing
00233 lappend parseOptions -statevariable $parser(-statevariable)
00234 array set intOptions [array get ::sgml::StdOptions]
00235 array set intOptions $parseOptions
00236 ::sgml::ParseDTD:Internal [array get intOptions] $xml
00237 return {}
00238 }
00239 external {
00240 # Bypass normal parsing
00241 lappend parseOptions -statevariable $parser(-statevariable)
00242 array set intOptions [array get ::sgml::StdOptions]
00243 array set intOptions $parseOptions
00244 ::sgml::ParseDTD:External [array get intOptions] $xml
00245 return {}
00246 }
00247 default {
00248 # Pass through to normal processing
00249 }
00250 }
00251
00252 lappend tokenOptions \
00253 -internaldtdvariable [namespace current]::${name}(internaldtd)
00254
00255 # Mats: If -final 0 we also need to maintain the state with a -statevariable !
00256 if {!$parser(-final)} {
00257 eval lappend tokenOptions [array get parser -statevariable] \
00258 [array get parser -final]
00259 }
00260
00261 # Mats:
00262 # Why not the first four? Just padding? Lrange undos \n interp.
00263 # It is necessary to have the first four as well if chopped off in
00264 # middle of pcdata.
00265 set tokenised [lrange \
00266 [eval {::sgml::tokenise $xml $tokExpr $substExpr} $tokenOptions] \
00267 0 end]
00268
00269 lappend parseOptions -internaldtd [list $parser(internaldtd)]
00270 eval ::sgml::parseEvent [list $tokenised] $parseOptions
00271
00272 return {}
00273 }
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 ret xml::tclparser::ParseEmpty (type tag , type attr , type e) {
00290 switch -glob [string length $e],[regexp "/[::xml::cl $::xml::Wsp]*$" $attr] {
00291 0,0 {
00292 return {}
00293 }
00294 0,* {
00295 return /
00296 }
00297 default {
00298 return $e
00299 }
00300 }
00301 }
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327 ret xml::tclparser::ParseAttrs (type opts , type attrs) {
00328
00329 set result {}
00330
00331 while {[string length [string trim $attrs]]} {
00332 if {[regexp [::sgml::cl $::xml::Wsp]*($::xml::Name)[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')([::sgml::cl ^<]*?)\\2(.*) $attrs discard attrName delimiter value attrs]} {
00333 lappend result $attrName [NormalizeAttValue $opts $value]
00334 } elseif {[regexp [::sgml::cl $::xml::Wsp]*$::xml::Name[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')[::sgml::cl ^<]*\$ $attrs]} {
00335 return -code error [list {unterminated attribute value} $result $attrs]
00336 } else {
00337 return -code error "invalid attribute list"
00338 }
00339 }
00340
00341 return $result
00342 }
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359 ret xml::tclparser::NormalizeAttValue (type opts , type value) {
00360
00361 # sgmlparser already has backslashes protected
00362 # Protect Tcl specials
00363 regsub -all {([][$])} $value {\\\1} value
00364
00365 # Deal with white space
00366 regsub -all "\[$::xml::Wsp\]" $value { } value
00367
00368 # Find entity refs
00369 regsub -all {&([^;]+);} $value {[NormalizeAttValue:DeRef $opts {\1}]} value
00370
00371 return [subst $value]
00372 }
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385 ret xml::tclparser::NormalizeAttValue:DeRef (type opts , type ref) {
00386 # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
00387 switch -glob -- $ref {
00388 {#x*} {
00389 scan [string range $ref 2 end] %x value
00390 set char [format %c $value]
00391 # Check that the char is legal for XML
00392 if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
00393 return $char
00394 } else {
00395 return -code error "illegal character"
00396 }
00397 }
00398 {#*} {
00399 scan [string range $ref 1 end] %d value
00400 set char [format %c $value]
00401 # Check that the char is legal for XML
00402 if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
00403 return $char
00404 } else {
00405 return -code error "illegal character"
00406 }
00407 }
00408 lt -
00409 gt -
00410 amp -
00411 quot -
00412 apos {
00413 array set map {lt < gt > amp & quot \" apos '}
00414 return $map($ref)
00415 }
00416 default {
00417 # A general entity. Must resolve to a text value - no element structure.
00418
00419 array set options $opts
00420 upvar #0 $options(entities) map
00421
00422 if {[info exists map($ref)]} {
00423
00424 if {[regexp < $map($ref)]} {
00425 return -code error "illegal character \"<\" in attribute value"
00426 }
00427
00428 if {![regexp & $map($ref)]} {
00429 # Simple text replacement
00430 return $map($ref)
00431 }
00432
00433 # There are entity references in the replacement text.
00434 # Can't use child entity parser since must catch element structures
00435
00436 return [NormalizeAttValue $opts $map($ref)]
00437
00438 } elseif {[string compare $options(-entityreferencecommand) "::sgml::noop"]} {
00439
00440 set result [uplevel #0 $options(-entityreferencecommand) [list $ref]]
00441
00442 return $result
00443
00444 } else {
00445 return -code error "unable to resolve entity reference \"$ref\""
00446 }
00447 }
00448 }
00449 }
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461 ret xml::tclparser::ParseEntity data (
00462 type set , type data [, type string , type trim $, type data]
00463 , type if , optional [regexp =$::sgml::ExternalEntityExpr $data =discard type =delimiter1 id1 =discard delimiter2 =id2 optNDATA =ndata] , optional
00464 switch =$type {
00465 =PUBLIC {
00466 =return [list =external $id2 =$id1 $ndata]
00467
00468 , type SYSTEM , optional
00469 return =[list external =$id1 { $, type ndata]
00470 )
00471 }
00472 } elseif {[regexp {^("|')(.*?)\1$} $data discard delimiter value]} {
00473 return [list internal $value]
00474 } else {
00475 return -code error "badly formed entity declaration"
00476 }
00477 }
00478
00479 # xml::tclparser::delete --
00480 #
00481 # Destroy parser data
00482 #
00483 # Arguments:
00484 # name parser object
00485 #
00486 # Results:
00487 # Parser data structure destroyed
00488
00489 proc xml::tclparser::delete name {
00490 upvar \#0 [namespace current]::$name parser
00491 catch {::sgml::ParserDelete $parser(-statevariable)}
00492 catch {unset parser}
00493 return {}
00494 }
00495
00496 # xml::tclparser::get --
00497 #
00498 # Retrieve additional information from the parser
00499 #
00500 # Arguments:
00501 # name parser object
00502 # method info to retrieve
00503 # args additional arguments for method
00504 #
00505 # Results:
00506 # Depends on method
00507
00508 proc xml::tclparser::get {name method args} {
00509 upvar #0 [namespace current]::$name parser
00510
00511 switch -- $method {
00512
00513 elementdecl {
00514 switch [llength $args] {
00515
00516 0 {
00517 # Return all element declarations
00518 upvar #0 $parser(elementdecls) elements
00519 return [array get elements]
00520 }
00521
00522 1 {
00523 # Return specific element declaration
00524 upvar #0 $parser(elementdecls) elements
00525 if {[info exists elements([lindex $args 0])]} {
00526 return [array get elements [lindex $args 0]]
00527 } else {
00528 return -code error "element \"[lindex $args 0]\" not declared"
00529 }
00530 }
00531
00532 default {
00533 return -code error "wrong number of arguments: should be \"elementdecl ?element?\""
00534 }
00535 }
00536 }
00537
00538 attlist {
00539 if {[llength $args] != 1} {
00540 return -code error "wrong number of arguments: should be \"get attlist element\""
00541 }
00542
00543 upvar
00544
00545 return {}
00546 }
00547
00548 entitydecl {
00549 }
00550
00551 parameterentitydecl {
00552 }
00553
00554 notationdecl {
00555 }
00556
00557 default {
00558 return -code error "unknown ret \"$method\""
00559 }
00560 }
00561
00562 return ()
00563 }
00564
00565 # xml::tclparser::ExternalEntity --
00566 #
00567 # Resolve and parse external entity
00568 #
00569 # Arguments:
00570 # name parser object
00571 # base base URL
00572 # sys system identifier
00573 # pub public identifier
00574 #
00575 # Results:
00576 # External entity is fetched and parsed
00577
00578 proc xml::tclparser::ExternalEntity {name base sys pub} {
00579 }
00580
00581 # xml::tclparser:: --
00582 #
00583 # Reset a parser instance, ready to parse another document
00584 #
00585 # Arguments:
00586 # name parser object
00587 #
00588 # Results:
00589 # Variables unset
00590
00591 proc xml::tclparser::reset {name} {
00592 upvar \#0 [namespace current]::$name parser
00593
00594 # Has this parser object been properly initialised?
00595 if {![info exists parser] || \
00596 ![info exists parser(-name)]} {
00597 return [create $name]
00598 }
00599
00600 array parser = {
00601 -final 1
00602 depth 0
00603 leftover {}
00604 }
00605
00606 foreach var {Entities ExtEntities PEntities ExtPEntities ElDecls AttlistDecls NotDecls} {
00607 catch {un [namespace = current]::${var}$name}
00608 }
00609
00610
00611 array [namespace = current]::Entities$name [array get ::sgml::EntityPredef]
00612
00613 return {}
00614 }
00615