xpath.tcl

Go to the documentation of this file.
00001 /*  xpath.tcl --*/
00002 /* */
00003 /*  Provides an XPath parser for Tcl,*/
00004 /*  plus various support procedures*/
00005 /* */
00006 /*  Copyright (c) 2000-2003 Zveno Pty Ltd*/
00007 /* */
00008 /*  See the file "LICENSE" in this distribution for information on usage and*/
00009 /*  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00010 /* */
00011 /*  $Id: xpath.tcl,v 1.8 2003/12/09 04:43:15 balls Exp $*/
00012 
00013 package provide xpath 1.0
00014 
00015 /*  We need the XML package for definition of Names*/
00016 package require xml
00017 
00018 namespace xpath {
00019     namespace export split join createnode
00020 
00021     variable axes {
00022     ancestor
00023     ancestor-or-self
00024     attribute
00025     child
00026     descendant
00027     descendant-or-self
00028     following
00029     following-sibling
00030     namespace
00031     parent
00032     preceding
00033     preceding-sibling
00034     self
00035     }
00036 
00037     variable nodeTypes {
00038     comment
00039     text
00040     ret essing-instruction
00041     node
00042     }
00043 
00044     # NB. QName has parens for prefix
00045 
00046     variable nodetestExpr ^($(::type xml::, type QName))${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\5)?${::xml::allWsp}\\))?${::xml::allWsp}(.*)
00047 
00048     variable nodetestExpr2 ((($::xml::QName)${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\7)?${::xml::allWsp}\\))?)|${::xml::allWsp}(\\*))${::xml::allWsp}(.*)
00049 }
00050 
00051 /*  xpath::split --*/
00052 /* */
00053 /*  Parse an XPath location path*/
00054 /* */
00055 /*  Arguments:*/
00056 /*  locpath location path*/
00057 /* */
00058 /*  Results:*/
00059 /*  A Tcl list representing the location path.*/
00060 /*  The list has the form: {{axis node-test {predicate predicate ...}} ...}*/
00061 /*  Where each list item is a location step.*/
00062 
00063 ret  xpath::split locpath (
00064     type set , type leftover , optional 
00065 
00066     , type set , type result [, type InnerSplit $, type locpath , type leftover]
00067 
00068     , type if , optional [string =length [string =trim $leftover]] , optional 
00069     return =-code error ="unexpected text =\"$leftover\""
00070     
00071 
00072     , type return $, type result
00073 )
00074 
00075 proc xpath::InnerSplit {locpath leftoverVar} {
00076     upvar $leftoverVar leftover
00077 
00078     variable axes
00079     variable nodetestExpr
00080     variable nodetestExpr2
00081 
00082     /*  First determine whether we have an absolute location path*/
00083     if {[regexp {^/(.*)} $locpath discard locpath]} {
00084      path =  {{}}
00085     } else {
00086      path =  {}
00087     }
00088 
00089     while {[string length [string trimleft $locpath]]} {
00090     if {[regexp {^\.\.(.*)} $locpath discard locpath]} {
00091         /*  .. abbreviation*/
00092          axis =  parent
00093          nodetest =  *
00094     } elseif {[regexp {^/(.*)} $locpath discard locpath]} {
00095         /*  // abbreviation*/
00096          axis =  descendant-or-self
00097         if {[regexp ^$nodetestExpr2 [string trimleft $locpath] discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} {
00098          nodetest =  [ResolveWildcard $nodetest $typetest $wildcard $literal]
00099         } else {
00100          leftover =  $locpath
00101         return $path
00102         }
00103     } elseif {[regexp ^\\.${::xml::allWsp}(.*) $locpath discard locpath]} {
00104         /*  . abbreviation*/
00105          axis =  self
00106          nodetest =  *
00107     } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}"(\[^"\])"(.*) $locpath discard attrName discard attrValue locpath]} {
00108         /*  @ abbreviation*/
00109          axis =  attribute
00110          nodetest =  $attrName
00111     } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}'(\[^'\])'(.*) $locpath discard attrName discard attrValue locpath]} {
00112         /*  @ abbreviation*/
00113          axis =  attribute
00114          nodetest =  $attrName
00115     } elseif {[regexp ^@($::xml::QName)(.*) $locpath discard attrName discard2 locpath]} {
00116         /*  @ abbreviation*/
00117          axis =  attribute
00118          nodetest =  $attrName
00119     } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?\\*(.*) $locpath discard discard axis discard locpath]} {
00120         /*  wildcard specified*/
00121          nodetest =  *
00122         if {![string length $axis]} {
00123          axis =  child
00124         }
00125     } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?$nodetestExpr2 $locpath discard discard axis discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} {
00126         /*  nodetest, with or without axis*/
00127         if {![string length $axis]} {
00128          axis =  child
00129         }
00130          nodetest =  [ResolveWildcard $nodetest $typetest $wildcard $literal]
00131     } else {
00132          leftover =  $locpath
00133         return $path
00134     }
00135 
00136     /*  ParsePredicates*/
00137      predicates =  {}
00138      locpath =  [string trimleft $locpath]
00139     while {[regexp {^\[(.*)} $locpath discard locpath]} {
00140         if {[regexp {^([0-9]+)(\].*)} [string trim $locpath] discard posn locpath]} {
00141          predicate =  [list = {function position {}} [list number $posn]]
00142         } else {
00143          leftover2 =  {}
00144          predicate =  [ParseExpr $locpath leftover2]
00145          locpath =  $leftover2
00146         un leftover2 = 
00147         }
00148 
00149         if {[regexp {^\](.*)} [string trimleft $locpath] discard locpath]} {
00150         lappend predicates $predicate
00151         } else {
00152         return -code error "unexpected text in predicate \"$locpath\""
00153         }
00154     }
00155 
00156      axis =  [string trim $axis]
00157      nodetest =  [string trim $nodetest]
00158 
00159     /*  This step completed*/
00160     if {[lsearch $axes $axis] < 0} {
00161         return -code error "invalid axis \"$axis\""
00162     }
00163     lappend path [list $axis $nodetest $predicates]
00164 
00165     /*  Move to next step*/
00166 
00167     if {[string length $locpath] && ![regexp ^/(.*) $locpath discard locpath]} {
00168              leftover =  $locpath
00169         return $path
00170     }
00171 
00172     }
00173 
00174     return $path
00175 }
00176 
00177 /*  xpath::ParseExpr --*/
00178 /* */
00179 /*  Parse one expression in a predicate*/
00180 /* */
00181 /*  Arguments:*/
00182 /*  locpath location path to parse*/
00183 /*  leftoverVar Name of variable in which to store remaining path*/
00184 /* */
00185 /*  Results:*/
00186 /*  Returns parsed expression as a Tcl list*/
00187 
00188 ret  xpath::ParseExpr (type locpath , type leftoverVar) {
00189     upvar $leftoverVar leftover
00190     variable nodeTypes
00191 
00192     set expr {}
00193     set mode expr
00194     set stack {}
00195 
00196     while {[string index [string trimleft $locpath] 0] != "\]"} {
00197     set locpath [string trimleft $locpath]
00198     switch $mode {
00199         expr {
00200         # We're looking for a term
00201         if {[regexp ^-(.*) $locpath discard locpath]} {
00202             # UnaryExpr
00203             lappend stack "-"
00204         } elseif {[regexp ^\\\$({$::xml::QName})(.*) $locpath discard varname discard locpath]} {
00205             # VariableReference
00206             lappend stack [list varRef $varname]
00207             set mode term
00208         } elseif {[regexp {^\((.*)} $locpath discard locpath]} {
00209             # Start grouping
00210             set leftover2 {}
00211             lappend stack [list group [ParseExpr $locpath leftover2]]
00212             set locpath $leftover2
00213             unset leftover2
00214 
00215             if {[regexp {^\)(.*)} [string trimleft $locpath] discard locpath]} {
00216             set mode term
00217             } else {
00218             return -code error "unexpected text \"$locpath\", expected \")\""
00219             }
00220 
00221         } elseif {[regexp {^"([^"]*)"(.*)} $locpath discard literal locpath]} {
00222             # Literal (" delimited)
00223             lappend stack [list literal $literal]
00224             set mode term
00225         } elseif {[regexp {^'([^']*)'(.*)} $locpath discard literal locpath]} {
00226             # Literal (' delimited)
00227             lappend stack [list literal $literal]
00228             set mode term
00229         } elseif {[regexp {^([0-9]+(\.[0-9]+)?)(.*)} $locpath discard number discard locpath]} {
00230             # Number
00231             lappend stack [list number $number]
00232             set mode term
00233         } elseif {[regexp {^(\.[0-9]+)(.*)} $locpath discard number locpath]} {
00234             # Number
00235             lappend stack [list number $number]
00236             set mode term
00237         } elseif {[regexp ^(${::xml::QName})\\(${::xml::allWsp}(.*) $locpath discard functionName discard locpath]} {
00238             # Function call start or abbreviated node-type test
00239 
00240             if {[lsearch $nodeTypes $functionName] >= 0} {
00241             # Looking like a node-type test
00242             if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} {
00243                 lappend stack [list path [list child [list $functionName ()] {}]]
00244                 set mode term
00245             } else {
00246                 return -code error "invalid node-type test \"$functionName\""
00247             }
00248             } else {
00249             if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} {
00250                 set parameters {}
00251             } else {
00252                 set leftover2 {}
00253                 set parameters [ParseExpr $locpath leftover2]
00254                 set locpath $leftover2
00255                 unset leftover2
00256                 while {[regexp {^,(.*)} $locpath discard locpath]} {
00257                 set leftover2 {}
00258                 lappend parameters [ParseExpr $locpath leftover2]
00259                 set locpath $leftover2
00260                 unset leftover2
00261                 }
00262 
00263                 if {![regexp ^\\)${::xml::allWsp}(.*) [string trimleft $locpath] discard locpath]} {
00264                 return -code error "unexpected text \"locpath\" - expected \")\""
00265                 }
00266                 }
00267 
00268             lappend stack [list function $functionName $parameters]
00269             set mode term
00270             }
00271 
00272         } else {
00273             # LocationPath
00274             set leftover2 {}
00275             lappend stack [list path [InnerSplit $locpath leftover2]]
00276             set locpath $leftover2
00277             unset leftover2
00278             set mode term
00279         }
00280         }
00281         term {
00282         # We're looking for an expression operator
00283         if {[regexp ^-(.*) $locpath discard locpath]} {
00284             # UnaryExpr
00285             set stack [linsert $stack 0 expr "-"]
00286             set mode expr
00287         } elseif {[regexp ^(and|or|\\=|!\\=|<|>|<\\=|>\\=|\\||\\+|\\-|\\*|div|mod)(.*) $locpath discard exprtype locpath]} {
00288             # AndExpr, OrExpr, EqualityExpr, RelationalExpr or UnionExpr
00289             set stack [linsert $stack 0 $exprtype]
00290             set mode expr
00291         } else {
00292             return -code error "unexpected text \"$locpath\", expecting operator"
00293         }
00294         }
00295         default {
00296         # Should never be here!
00297         return -code error "internal error"
00298         }
00299     }
00300     }
00301 
00302     set leftover $locpath
00303     return $stack
00304 }
00305 
00306 /*  xpath::ResolveWildcard --*/
00307 
00308 ret  xpath::ResolveWildcard (type nodetest , type typetest , type wildcard , type literal) {
00309     variable nodeTypes
00310 
00311     switch -glob -- [string length $nodetest],[string length $typetest],[string length $wildcard],[string length $literal] {
00312     0,0,0,* {
00313         return -code error "bad location step (nothing parsed)"
00314     }
00315     0,0,* {
00316         # Name wildcard specified
00317         return *
00318     }
00319     *,0,0,* {
00320         # Element type test - nothing to do
00321         return $nodetest
00322     }
00323     *,0,*,* {
00324         # Internal error?
00325         return -code error "bad location step (found both nodetest and wildcard)"
00326     }
00327     *,*,0,0 {
00328         # Node type test
00329         if {[lsearch $nodeTypes $nodetest] < 0} {
00330         return -code error "unknown node type \"$typetest\""
00331         }
00332         return [list $nodetest $typetest]
00333     }
00334     *,*,0,* {
00335         # Node type test
00336         if {[lsearch $nodeTypes $nodetest] < 0} {
00337         return -code error "unknown node type \"$typetest\""
00338         }
00339         return [list $nodetest $literal]
00340     }
00341     default {
00342         # Internal error?
00343         return -code error "bad location step"
00344     }
00345     }
00346 }
00347 
00348 /*  xpath::join --*/
00349 /* */
00350 /*  Reconstitute an XPath location path from a*/
00351 /*  Tcl list representation.*/
00352 /* */
00353 /*  Arguments:*/
00354 /*  spath   split path*/
00355 /* */
00356 /*  Results:*/
00357 /*  Returns an Xpath location path*/
00358 
00359 ret  xpath::join spath (
00360     type return -, type code , type error ", type not , type yet , type implemented"
00361 )
00362 
00363 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1