xmlswitch.tcl

Go to the documentation of this file.
00001 /*  xmlswitch.tcl --*/
00002 /* */
00003 /*  This file implements a control structure for Tcl.*/
00004 /*  'xmlswitch' iterates over an XML document.  Features in*/
00005 /*  the document may be specified using XPath location paths,*/
00006 /*  and these will trigger Tcl scripts when matched.*/
00007 /* */
00008 /*  Copyright (c) 2008 Explain*/
00009 /*  http://www.explain.com.au/*/
00010 /*  Copyright (c) 2000-2003 Zveno Pty Ltd*/
00011 /*  http://www.zveno.com/*/
00012 /* */
00013 /*  See the file "LICENSE" in this distribution for information on usage and*/
00014 /*  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00015 /* */
00016 /*  $Id: xmlswitch.tcl,v 1.6 2003/12/09 04:56:43 balls Exp $*/
00017 
00018 package provide xmlswitch 3.2
00019 
00020 /*  We need the xml, dom and xpath packages*/
00021 
00022 package require xml 3.2
00023 package require dom 3.2
00024 package require xpath 1.0
00025 
00026 namespace xmlswitch {
00027     namespace export xmlswitch xmlswitchcont xmlswitchend
00028     namespace export domswitch
00029     namespace export free rootnode
00030 
00031     variable counter 0
00032 
00033     variable typemap
00034     array  typemap =  {
00035     text textNode
00036     comment comment
00037     ret essing-instruction processingInstruction
00038     }
00039 }
00040 
00041 # xmlswitch::xmlswitch --
00042 #
00043 #   Parse XML data, matching for XPath locations along the way
00044 #   and (possibly) triggering callbacks.
00045 #
00046 #   A DOM tree is built as a side-effect (necessary for resolving
00047 #   XPath location paths).
00048 #
00049 # Arguments:
00050 #   xml XML document
00051 #   args    configuration options,
00052 #       plus a single path/script expression, or multiple expressions
00053 #
00054 # Results:
00055 #   Tcl callbacks may be invoked.
00056 #   If -async option is true returns a token for this "process".
00057 
00058 proc xmlswitch::xmlswitch (type xml , type args) {
00059     variable counter
00060 
00061     set stateVarName [namespace current]::State[incr counter]
00062     upvar #0 $stateVarName state
00063     set state(stateVarName) $stateVarName
00064     set state(-async) 0
00065 
00066     set state(pathArray) ${stateVarName}Paths
00067     upvar #0 $state(pathArray) paths
00068     array set paths {}
00069 
00070     set cleanup {
00071     unset state
00072     unset paths
00073     }
00074 
00075     # Find configuration options and remove
00076     set numOpts 0
00077     foreach {opt value} $args {
00078     switch -glob -- $opt {
00079         -* {
00080         set state($opt) $value
00081         incr numOpts 2
00082         }
00083         default {
00084         set args [lrange $args $numOpts end]
00085         break
00086         }
00087     }
00088     }
00089 
00090     switch -- [llength $args] {
00091     0 {
00092         # Nothing to do
00093         eval $cleanup
00094         return $stateVarName
00095     }
00096     1 {
00097         foreach {path script} [lindex $args 0] {
00098         set paths([xpath::split $path]) $script
00099         }
00100     }
00101     default {
00102         if {[llength $args] % 2} {
00103         eval $cleanup
00104         return -code error "no script matching location path \"[lindex $args end]\""
00105         }
00106         foreach {path script} $args {
00107         set paths([xpath::split $path]) $script
00108         }
00109     }
00110     }
00111 
00112     set root [set state(root) [dom::DOMImplementation create]]
00113     set state(current) $root
00114 
00115     # Parse the document
00116     # We're going to do this incrementally, so the caller can
00117     # break at any time
00118     set state(parser) [eval xml::parser [array get state -parser]]
00119     #append cleanup "\n $parser destroy\n"
00120     $state(parser) configure \
00121         -elementstartcommand [namespace code [list ParseElementStart $stateVarName]]    \
00122         -elementendcommand [namespace code [list ParseElementEnd $stateVarName]]        \
00123         -characterdatacommand [namespace code [list ParseCharacterData $stateVarName]]  \
00124         -final 0
00125 
00126 #       -processinginstructioncommand [namespace code [list ParsePI $stateVarName]]     \
00127 #       -commentcommand [namespace code [list ParseComment]]
00128 
00129     if {[catch {$state(parser) parse $xml} err]} {
00130     eval $cleanup
00131     return -code error $err
00132     }
00133 
00134     if {$state(-async)} {
00135     return $stateVarName
00136     } else {
00137     eval $cleanup
00138     return {}
00139     }
00140 }
00141 
00142 /*  xmlswitch::xmlswitchcont --*/
00143 /* */
00144 /*  Provide more XML data to parse*/
00145 /* */
00146 /*  Arguments:*/
00147 /*  token   state variable name*/
00148 /*  xml XML data*/
00149 /* */
00150 /*  Results:*/
00151 /*  More parsing*/
00152 
00153 ret  xmlswitch::xmlswitchcont (type token , type xml) {
00154     upvar #0 $token state
00155 
00156     $state(parser) parse $xml
00157 
00158     return {}
00159 }
00160 
00161 /*  xmlswitch::xmlswitchend --*/
00162 /* */
00163 /*  Signal that no further data is available*/
00164 /* */
00165 /*  Arguments:*/
00166 /*  token   state array*/
00167 /* */
00168 /*  Results:*/
00169 /*  Parser configuration changed*/
00170 
00171 ret  xmlswitch::xmlswitchend token (
00172     type upvar #0 $, type token , type state
00173 
00174     $, type state(, type parser) , type configure -, type final , type true
00175 
00176     , type return , optional 
00177 )
00178 
00179 # xmlswitch::rootnode --
00180 #
00181 #   Get the root node
00182 #
00183 # Arguments:
00184 #   token   state array
00185 #
00186 # Results:
00187 #   Returns root node token
00188 
00189 proc xmlswitch::rootnode token {
00190     upvar #0 $token state
00191 
00192     return $state(root)
00193 }
00194 
00195 /*  xmlswitch::free --*/
00196 /* */
00197 /*  Free resources EXCEPT the DOM tree.*/
00198 /*  "-all" causes DOM tree to be destroyed too.*/
00199 /* */
00200 /*  Arguments:*/
00201 /*  token   state array*/
00202 /*  args    options*/
00203 /* */
00204 /*  Results:*/
00205 /*  Resources freed.*/
00206 
00207 ret  xmlswitch::free (type token , type args) {
00208     upvar #0 $token state
00209 
00210     if {[lsearch $args "-all"] >= 0} {
00211     dom::DOMImplementation destroy $state(root)
00212     }
00213 
00214     catch {unset $state(pathArray)}
00215     catch {unset state}
00216 
00217     catch {$state(parser) free}
00218 
00219     return {}
00220 }
00221 
00222 /*  xmlswitch::ParseElementStart --*/
00223 /* */
00224 /*  Handle element start tag*/
00225 /* */
00226 /*  Arguments:*/
00227 /*  token   state array*/
00228 /*  name    element type*/
00229 /*  attrList attribute list*/
00230 /*  args    options*/
00231 /*  Results:*/
00232 /*  All XPath location paths are checked for a match,*/
00233 /*  and script evaluated for matching XPath.*/
00234 /*  DOM tree node added.*/
00235 
00236 ret  xmlswitch::ParseElementStart:dbgdisabled (type token , type name , type attrList , type args) {
00237     if {[catch {eval ParseElementStart:dbg [list $token $name $attrList] $args} msg]} {
00238     puts stderr [list ParseElementStart failed with msg $msg]
00239     puts stderr $::errorInfo
00240     return -code error $msg
00241     } else {
00242     puts stderr [list ParseElementStart returned OK]
00243     }
00244     return $msg
00245 }
00246 ret  xmlswitch::ParseElementStart (type token , type name , type attrList , type args) {
00247 
00248     upvar #0 $token state
00249     array set opts $args
00250 
00251     #puts stderr [list xmlswitch::ParseElementStart $token $name $attrList $args]
00252 
00253     lappend state(current) \
00254         [dom::document createElement [lindex $state(current) end] $name]
00255     foreach {name value} $attrList {
00256     dom::element setAttribute [lindex $state(current) end] $name $value
00257     }
00258 
00259     MatchTemplates $token [lindex $state(current) end]
00260 
00261     return {}
00262 }
00263 
00264 /*  xmlswitch::ParseElementEnd --*/
00265 /* */
00266 /*  Handle element end tag*/
00267 /* */
00268 /*  Arguments:*/
00269 /*  token   state array*/
00270 /*  name    element type*/
00271 /*  args    options*/
00272 /*  Results:*/
00273 /*  State changed*/
00274 
00275 ret  xmlswitch::ParseElementEnd (type token , type name , type args) {
00276     upvar #0 $token state
00277 
00278     set state(current) [lreplace $state(current) end end]
00279 
00280     return {}
00281 }
00282 
00283 /*  xmlswitch::ParseCharacterData --*/
00284 /* */
00285 /*  Handle character data*/
00286 /* */
00287 /*  Arguments:*/
00288 /*  token   state array*/
00289 /*  data    pcdata*/
00290 /* */
00291 /*  Results:*/
00292 /*  All XPath location paths are checked for a match,*/
00293 /*  and script evaluated for matching XPath.*/
00294 /*  DOM tree node added.*/
00295 
00296 ret  xmlswitch::ParseCharacterData (type token , type data) {
00297     upvar #0 $token state
00298 
00299     lappend state(current) \
00300         [dom::document createTextNode [lindex $state(current) end] $data]
00301 
00302     MatchTemplates $token [lindex $state(current) end]
00303 
00304     set state(current) [lreplace $state(current) end end]
00305 
00306     return {}
00307 }
00308 
00309 /*  xmlswitch::domswitch --*/
00310 /* */
00311 /*  Similar to xmlswitch above, but iterates over a pre-built*/
00312 /*  DOM tree.*/
00313 /* */
00314 /*  Arguments:*/
00315 /*  xml XML document*/
00316 /*  args    a single path/script expression, or multiple expressions*/
00317 /* */
00318 /*  Results:*/
00319 /*  Tcl callbacks may be invoked.*/
00320 
00321 ret  xmlswitch::domswitch (type xml , type args) {
00322 }
00323 
00324 /*  xmlswitch::MatchTemplates --*/
00325 /* */
00326 /*  Check all templates for one which matches*/
00327 /*  the current node.*/
00328 /* */
00329 /*  Arguments:*/
00330 /*  token   state array*/
00331 /*  node    Current DOM node*/
00332 /* */
00333 /*  Results:*/
00334 /*  If a template matches, its script is evaluated*/
00335 
00336 ret  xmlswitch::MatchTemplates (type token , type node) {
00337     upvar #0 $token state
00338     upvar #0 $state(pathArray) paths
00339 
00340     #puts stderr [list xmlswitch::MatchTemplates $token $node (type: [dom::node cget $node -nodeType]) (name: [dom::node cget $node -nodeName])]
00341 
00342     set matches {}
00343 
00344     foreach {path script} [array get paths] {
00345 
00346     #puts stderr [list checking path $path for a match]
00347 
00348     set context $node
00349 
00350     # Work backwards along the path, reversing each axis
00351     set match 0
00352     set i [llength $path]
00353     #puts stderr [list $i steps to be tested]
00354     while {[incr i -1] >= 0} {
00355         #puts stderr [list step $i [lindex $path $i]]
00356         switch -glob [llength [lindex $path $i]],$i {
00357         0,0 {
00358             #puts stderr [list absolute path, end of steps - am I at the root?]
00359             if {![string length [dom::node parent $context]]} {
00360             #puts stderr [list absolute path matched]
00361             lappend matches [list $path $script]
00362             } else {
00363             #puts stderr [list absolute path did not match]
00364             }
00365         }
00366         *,0 {
00367             #puts stderr [list last step, relative path]
00368             switch [lindex [lindex $path $i] 0] {
00369             child {
00370                 if {[NodeTest [lindex $path $i] $context] && \
00371                     [CheckPredicates [lindex $path $i] $context]} {
00372                 #puts stderr [list relative path matched]
00373                 lappend matches [list $path $script]
00374                 } else {
00375                 #puts stderr [list relative path did not match]
00376                 }
00377             }
00378             default {
00379                 return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
00380             }
00381             }
00382         }
00383         default {
00384             #puts stderr [list continuing checking steps]
00385             switch [lindex [lindex $path $i] 0] {
00386             child {
00387                 if {[NodeTest [lindex $path $i] $context] && \
00388                     [CheckPredicates [lindex $path $i] $context]} {
00389                 set context [dom::node parent $context]
00390                 } else {
00391                 #puts stderr [list no match]
00392                 }
00393             }
00394             default {
00395                 return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
00396             }
00397             }
00398         }
00399         }
00400     }
00401     }
00402 
00403     # TODO: If there are multiple matches then we must pick the
00404     # most specific match
00405 
00406     if {[llength $matches] > 1} {
00407     # For the moment we'll just take the first match
00408     set matches [list [lindex $matches 0]]
00409     }
00410 
00411     if {[llength $matches]} {
00412     #puts stderr [list evaluating callback at level [info level]]
00413     uplevel 3 [lindex [lindex $matches 0] 1]
00414     }
00415 
00416     return {}
00417 }
00418 
00419 /*  xmlswitch::NodeTest --*/
00420 /* */
00421 /*  Check that the node passes the node (type) test*/
00422 /* */
00423 /*  Arguments:*/
00424 /*  step    Location step*/
00425 /*  node    DOM node*/
00426 /* */
00427 /*  Results:*/
00428 /*  Boolean*/
00429 
00430 ret  xmlswitch::NodeTest (type step , type node) {
00431 
00432     if {[llength [lindex $step 1]] > 1} {
00433     switch -glob -- [lindex [lindex $step 1] 0],[dom::node cget $node -nodeType] {
00434         node,* -
00435         text,textNode -
00436         comment,comment -
00437         processing-instruction,processingInstruction {
00438         return 1
00439         }
00440         default {
00441         return 0
00442         }
00443     }
00444     } elseif {![string compare [lindex $step 1] "*"]} {
00445     return 1
00446     } elseif {![string compare [lindex $step 1] [dom::node cget $node -nodeName]]} {
00447     return 1
00448     } else {
00449     return 0
00450     }
00451 }
00452 
00453 /*  xmlswitch::CheckPredicates --*/
00454 /* */
00455 /*  Check that the node passes the predicates*/
00456 /* */
00457 /*  Arguments:*/
00458 /*  step    Location step*/
00459 /*  node    DOM node*/
00460 /* */
00461 /*  Results:*/
00462 /*  Boolean*/
00463 
00464 ret  xmlswitch::CheckPredicates (type step , type node) {
00465     variable typemap
00466 
00467     set predicates [lindex $step 2]
00468     # Shortcut: no predicates means everything passes
00469     if {![llength $predicates]} {
00470     return 1
00471     }
00472 
00473     # Get the context node set
00474     switch [lindex $step 0] {
00475     child {
00476         set nodeset {}
00477         if {[llength [lindex $step 1]]} {
00478         foreach {name typetest} [lindex $step 1] break
00479         switch -- $name {
00480             node {
00481             set nodeset [dom::node children [dom::node parent $node]]
00482             }
00483             text -
00484             comment -
00485             processing-instruction {
00486             foreach child [dom::node children [dom::node parent $node]] {
00487                 if {![string compare [dom::node cget $child -nodeType] $typemap($name)]} {
00488                 lappend nodeset $child
00489                 }
00490             }
00491             }
00492             default {
00493             # Error
00494             }
00495         }
00496         } else {
00497         foreach child [dom::node children [dom::node parent $node]] {
00498             if {![string compare [lindex $step 1] [dom::node cget $child -nodeName]]} {
00499             lappend nodeset $child
00500             }
00501         }
00502         }
00503     }
00504     default {
00505         return -code error "axis \"[lindex $step 0]\" not supported"
00506     }
00507     }
00508 
00509     foreach predicate $predicates {
00510     # position() is the only supported predicate
00511     if {[lsearch $nodeset $node] + 1 == $predicate} {
00512         # continue
00513     } else {
00514         return 0
00515     }
00516     }
00517 
00518     return 1
00519 }
00520 
00521 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1