xmlswitch.tcl
Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 package provide xmlswitch 3.2
00019 
00020 
00021 
00022 package require xml 3.2
00023 package require dom 3.2
00024 package require xpath 1.0
00025 
00026 namespace xmlswitch {
00027     namespace export xmlswitch xmlswitchcont xmlswitchend
00028     namespace export domswitch
00029     namespace export free rootnode
00030 
00031     variable counter 0
00032 
00033     variable typemap
00034     array  typemap =  {
00035     text textNode
00036     comment comment
00037     ret essing-instruction processingInstruction
00038     }
00039 }
00040 
00041 # xmlswitch::xmlswitch --
00042 #
00043 #   Parse XML data, matching for XPath locations along the way
00044 #   and (possibly) triggering callbacks.
00045 #
00046 #   A DOM tree is built as a side-effect (necessary for resolving
00047 #   XPath location paths).
00048 #
00049 # Arguments:
00050 #   xml XML document
00051 #   args    configuration options,
00052 #       plus a single path/script expression, or multiple expressions
00053 #
00054 # Results:
00055 #   Tcl callbacks may be invoked.
00056 #   If -async option is true returns a token for this "process".
00057 
00058 proc xmlswitch::xmlswitch (type xml , type args) {
00059     variable counter
00060 
00061     set stateVarName [namespace current]::State[incr counter]
00062     upvar #0 $stateVarName state
00063     set state(stateVarName) $stateVarName
00064     set state(-async) 0
00065 
00066     set state(pathArray) ${stateVarName}Paths
00067     upvar #0 $state(pathArray) paths
00068     array set paths {}
00069 
00070     set cleanup {
00071     unset state
00072     unset paths
00073     }
00074 
00075     # Find configuration options and remove
00076     set numOpts 0
00077     foreach {opt value} $args {
00078     switch -glob -- $opt {
00079         -* {
00080         set state($opt) $value
00081         incr numOpts 2
00082         }
00083         default {
00084         set args [lrange $args $numOpts end]
00085         break
00086         }
00087     }
00088     }
00089 
00090     switch -- [llength $args] {
00091     0 {
00092         # Nothing to do
00093         eval $cleanup
00094         return $stateVarName
00095     }
00096     1 {
00097         foreach {path script} [lindex $args 0] {
00098         set paths([xpath::split $path]) $script
00099         }
00100     }
00101     default {
00102         if {[llength $args] % 2} {
00103         eval $cleanup
00104         return -code error "no script matching location path \"[lindex $args end]\""
00105         }
00106         foreach {path script} $args {
00107         set paths([xpath::split $path]) $script
00108         }
00109     }
00110     }
00111 
00112     set root [set state(root) [dom::DOMImplementation create]]
00113     set state(current) $root
00114 
00115     # Parse the document
00116     # We're going to do this incrementally, so the caller can
00117     # break at any time
00118     set state(parser) [eval xml::parser [array get state -parser]]
00119     #append cleanup "\n $parser destroy\n"
00120     $state(parser) configure \
00121         -elementstartcommand [namespace code [list ParseElementStart $stateVarName]]    \
00122         -elementendcommand [namespace code [list ParseElementEnd $stateVarName]]        \
00123         -characterdatacommand [namespace code [list ParseCharacterData $stateVarName]]  \
00124         -final 0
00125 
00126 #       -processinginstructioncommand [namespace code [list ParsePI $stateVarName]]     \
00127 #       -commentcommand [namespace code [list ParseComment]]
00128 
00129     if {[catch {$state(parser) parse $xml} err]} {
00130     eval $cleanup
00131     return -code error $err
00132     }
00133 
00134     if {$state(-async)} {
00135     return $stateVarName
00136     } else {
00137     eval $cleanup
00138     return {}
00139     }
00140 }
00141 
00142 
00143 
00144 
00145 
00146 
00147 
00148 
00149 
00150 
00151 
00152 
00153 ret  xmlswitch::xmlswitchcont (type token , type xml) {
00154     upvar #0 $token state
00155 
00156     $state(parser) parse $xml
00157 
00158     return {}
00159 }
00160 
00161 
00162 
00163 
00164 
00165 
00166 
00167 
00168 
00169 
00170 
00171 ret  xmlswitch::xmlswitchend token (
00172     type upvar #0 $, type token , type state
00173 
00174     $, type state(, type parser) , type configure -, type final , type true
00175 
00176     , type return , optional 
00177 )
00178 
00179 # xmlswitch::rootnode --
00180 #
00181 #   Get the root node
00182 #
00183 # Arguments:
00184 #   token   state array
00185 #
00186 # Results:
00187 #   Returns root node token
00188 
00189 proc xmlswitch::rootnode token {
00190     upvar #0 $token state
00191 
00192     return $state(root)
00193 }
00194 
00195 
00196 
00197 
00198 
00199 
00200 
00201 
00202 
00203 
00204 
00205 
00206 
00207 ret  xmlswitch::free (type token , type args) {
00208     upvar #0 $token state
00209 
00210     if {[lsearch $args "-all"] >= 0} {
00211     dom::DOMImplementation destroy $state(root)
00212     }
00213 
00214     catch {unset $state(pathArray)}
00215     catch {unset state}
00216 
00217     catch {$state(parser) free}
00218 
00219     return {}
00220 }
00221 
00222 
00223 
00224 
00225 
00226 
00227 
00228 
00229 
00230 
00231 
00232 
00233 
00234 
00235 
00236 ret  xmlswitch::ParseElementStart:dbgdisabled (type token , type name , type attrList , type args) {
00237     if {[catch {eval ParseElementStart:dbg [list $token $name $attrList] $args} msg]} {
00238     puts stderr [list ParseElementStart failed with msg $msg]
00239     puts stderr $::errorInfo
00240     return -code error $msg
00241     } else {
00242     puts stderr [list ParseElementStart returned OK]
00243     }
00244     return $msg
00245 }
00246 ret  xmlswitch::ParseElementStart (type token , type name , type attrList , type args) {
00247 
00248     upvar #0 $token state
00249     array set opts $args
00250 
00251     #puts stderr [list xmlswitch::ParseElementStart $token $name $attrList $args]
00252 
00253     lappend state(current) \
00254         [dom::document createElement [lindex $state(current) end] $name]
00255     foreach {name value} $attrList {
00256     dom::element setAttribute [lindex $state(current) end] $name $value
00257     }
00258 
00259     MatchTemplates $token [lindex $state(current) end]
00260 
00261     return {}
00262 }
00263 
00264 
00265 
00266 
00267 
00268 
00269 
00270 
00271 
00272 
00273 
00274 
00275 ret  xmlswitch::ParseElementEnd (type token , type name , type args) {
00276     upvar #0 $token state
00277 
00278     set state(current) [lreplace $state(current) end end]
00279 
00280     return {}
00281 }
00282 
00283 
00284 
00285 
00286 
00287 
00288 
00289 
00290 
00291 
00292 
00293 
00294 
00295 
00296 ret  xmlswitch::ParseCharacterData (type token , type data) {
00297     upvar #0 $token state
00298 
00299     lappend state(current) \
00300         [dom::document createTextNode [lindex $state(current) end] $data]
00301 
00302     MatchTemplates $token [lindex $state(current) end]
00303 
00304     set state(current) [lreplace $state(current) end end]
00305 
00306     return {}
00307 }
00308 
00309 
00310 
00311 
00312 
00313 
00314 
00315 
00316 
00317 
00318 
00319 
00320 
00321 ret  xmlswitch::domswitch (type xml , type args) {
00322 }
00323 
00324 
00325 
00326 
00327 
00328 
00329 
00330 
00331 
00332 
00333 
00334 
00335 
00336 ret  xmlswitch::MatchTemplates (type token , type node) {
00337     upvar #0 $token state
00338     upvar #0 $state(pathArray) paths
00339 
00340     #puts stderr [list xmlswitch::MatchTemplates $token $node (type: [dom::node cget $node -nodeType]) (name: [dom::node cget $node -nodeName])]
00341 
00342     set matches {}
00343 
00344     foreach {path script} [array get paths] {
00345 
00346     #puts stderr [list checking path $path for a match]
00347 
00348     set context $node
00349 
00350     # Work backwards along the path, reversing each axis
00351     set match 0
00352     set i [llength $path]
00353     #puts stderr [list $i steps to be tested]
00354     while {[incr i -1] >= 0} {
00355         #puts stderr [list step $i [lindex $path $i]]
00356         switch -glob [llength [lindex $path $i]],$i {
00357         0,0 {
00358             #puts stderr [list absolute path, end of steps - am I at the root?]
00359             if {![string length [dom::node parent $context]]} {
00360             #puts stderr [list absolute path matched]
00361             lappend matches [list $path $script]
00362             } else {
00363             #puts stderr [list absolute path did not match]
00364             }
00365         }
00366         *,0 {
00367             #puts stderr [list last step, relative path]
00368             switch [lindex [lindex $path $i] 0] {
00369             child {
00370                 if {[NodeTest [lindex $path $i] $context] && \
00371                     [CheckPredicates [lindex $path $i] $context]} {
00372                 #puts stderr [list relative path matched]
00373                 lappend matches [list $path $script]
00374                 } else {
00375                 #puts stderr [list relative path did not match]
00376                 }
00377             }
00378             default {
00379                 return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
00380             }
00381             }
00382         }
00383         default {
00384             #puts stderr [list continuing checking steps]
00385             switch [lindex [lindex $path $i] 0] {
00386             child {
00387                 if {[NodeTest [lindex $path $i] $context] && \
00388                     [CheckPredicates [lindex $path $i] $context]} {
00389                 set context [dom::node parent $context]
00390                 } else {
00391                 #puts stderr [list no match]
00392                 }
00393             }
00394             default {
00395                 return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
00396             }
00397             }
00398         }
00399         }
00400     }
00401     }
00402 
00403     # TODO: If there are multiple matches then we must pick the
00404     # most specific match
00405 
00406     if {[llength $matches] > 1} {
00407     # For the moment we'll just take the first match
00408     set matches [list [lindex $matches 0]]
00409     }
00410 
00411     if {[llength $matches]} {
00412     #puts stderr [list evaluating callback at level [info level]]
00413     uplevel 3 [lindex [lindex $matches 0] 1]
00414     }
00415 
00416     return {}
00417 }
00418 
00419 
00420 
00421 
00422 
00423 
00424 
00425 
00426 
00427 
00428 
00429 
00430 ret  xmlswitch::NodeTest (type step , type node) {
00431 
00432     if {[llength [lindex $step 1]] > 1} {
00433     switch -glob -- [lindex [lindex $step 1] 0],[dom::node cget $node -nodeType] {
00434         node,* -
00435         text,textNode -
00436         comment,comment -
00437         processing-instruction,processingInstruction {
00438         return 1
00439         }
00440         default {
00441         return 0
00442         }
00443     }
00444     } elseif {![string compare [lindex $step 1] "*"]} {
00445     return 1
00446     } elseif {![string compare [lindex $step 1] [dom::node cget $node -nodeName]]} {
00447     return 1
00448     } else {
00449     return 0
00450     }
00451 }
00452 
00453 
00454 
00455 
00456 
00457 
00458 
00459 
00460 
00461 
00462 
00463 
00464 ret  xmlswitch::CheckPredicates (type step , type node) {
00465     variable typemap
00466 
00467     set predicates [lindex $step 2]
00468     # Shortcut: no predicates means everything passes
00469     if {![llength $predicates]} {
00470     return 1
00471     }
00472 
00473     # Get the context node set
00474     switch [lindex $step 0] {
00475     child {
00476         set nodeset {}
00477         if {[llength [lindex $step 1]]} {
00478         foreach {name typetest} [lindex $step 1] break
00479         switch -- $name {
00480             node {
00481             set nodeset [dom::node children [dom::node parent $node]]
00482             }
00483             text -
00484             comment -
00485             processing-instruction {
00486             foreach child [dom::node children [dom::node parent $node]] {
00487                 if {![string compare [dom::node cget $child -nodeType] $typemap($name)]} {
00488                 lappend nodeset $child
00489                 }
00490             }
00491             }
00492             default {
00493             # Error
00494             }
00495         }
00496         } else {
00497         foreach child [dom::node children [dom::node parent $node]] {
00498             if {![string compare [lindex $step 1] [dom::node cget $child -nodeName]]} {
00499             lappend nodeset $child
00500             }
00501         }
00502         }
00503     }
00504     default {
00505         return -code error "axis \"[lindex $step 0]\" not supported"
00506     }
00507     }
00508 
00509     foreach predicate $predicates {
00510     # position() is the only supported predicate
00511     if {[lsearch $nodeset $node] + 1 == $predicate} {
00512         # continue
00513     } else {
00514         return 0
00515     }
00516     }
00517 
00518     return 1
00519 }
00520 
00521