tclparser-8.0.tcl

Go to the documentation of this file.
00001 /*  tclparser-8.0.tcl --*/
00002 /* */
00003 /*  This file provides a Tcl implementation of a XML parser.*/
00004 /*  This file supports Tcl 8.0.*/
00005 /* */
00006 /*  See xml-8.[01].tcl for definitions of character sets and*/
00007 /*  regular expressions.*/
00008 /* */
00009 /*  Copyright (c) 2005-2008 by Explain.*/
00010 /*  http://www.explain.com.au/*/
00011 /*  Copyright (c) 1998-2004 Zveno Pty Ltd*/
00012 /*  http://www.zveno.com/*/
00013 /*  */
00014 /*  See the file "LICENSE" in this distribution for information on usage and*/
00015 /*  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00016 /* */
00017 /*  $Id: tclparser-8.0.tcl,v 1.10.2.1 2005/12/28 06:49:51 balls Exp $*/
00018 
00019 package require -exact Tcl 8.0
00020 
00021 package require xmldefs 3.2
00022 
00023 package require sgmlparser 1.0
00024 
00025 package provide xml::tclparser 3.2
00026 
00027 namespace xml {
00028 
00029     /*  Procedures for parsing XML documents*/
00030     namespace export parser
00031     /*  Procedures for parsing XML DTDs*/
00032     namespace export DTDparser
00033 
00034     /*  Counter for creating unique parser objects*/
00035     variable ParserCounter 0
00036 
00037 }
00038 
00039 /*  xml::parser --*/
00040 /* */
00041 /*  Creates XML parser object.*/
00042 /* */
00043 /*  Arguments:*/
00044 /*  args    Unique name for parser object*/
00045 /*      plus option/value pairs*/
00046 /* */
00047 /*  Recognised Options:*/
00048 /*  -final          Indicates end of document data*/
00049 /*  -elementstartcommand    Called when an element starts*/
00050 /*  -elementendcommand  Called when an element ends*/
00051 /*  -characterdatacommand   Called when character data occurs*/
00052 /*  -processinginstructioncommand   Called when a PI occurs*/
00053 /*  -externalentityrefcommand   Called for an external entity reference*/
00054 /* */
00055 /*  (Not compatible with expat)*/
00056 /*  -xmldeclcommand     Called when the XML declaration occurs*/
00057 /*  -doctypecommand     Called when the document type declaration occurs*/
00058 /* */
00059 /*  -errorcommand       Script to evaluate for a fatal error*/
00060 /*  -warningcommand     Script to evaluate for a reportable warning*/
00061 /*  -statevariable      global state variable*/
00062 /*  -reportempty        whether to provide empty element indication*/
00063 /* */
00064 /*  Results:*/
00065 /*  The state variable is initialised.*/
00066 
00067 ret  xml::parser (type args) {
00068     variable ParserCounter
00069 
00070     if {[llength $args] > 0} {
00071     set name [lindex $args 0]
00072     set args [lreplace $args 0 0]
00073     } else {
00074     set name parser[incr ParserCounter]
00075     }
00076 
00077     if {[info command [namespace current]::$name] != {}} {
00078     return -code error "unable to create parser object \"[namespace current]::$name\" command"
00079     }
00080 
00081     # Initialise state variable and object command
00082     upvar \#0 [namespace current]::$name parser
00083     set sgml_ns [namespace parent]::sgml
00084     array set parser [list name $name           \
00085     -final 1                    \
00086     -elementstartcommand ${sgml_ns}::noop       \
00087     -elementendcommand ${sgml_ns}::noop     \
00088     -characterdatacommand ${sgml_ns}::noop      \
00089     -processinginstructioncommand ${sgml_ns}::noop  \
00090     -externalentityrefcommand ${sgml_ns}::noop  \
00091     -xmldeclcommand ${sgml_ns}::noop        \
00092     -doctypecommand ${sgml_ns}::noop        \
00093     -warningcommand ${sgml_ns}::noop        \
00094     -statevariable [namespace current]::$name   \
00095     -reportempty 0                  \
00096     internaldtd {}                  \
00097     ]
00098 
00099     proc [namespace current]::$name {method args} \
00100     "eval ParseCommand $name \$method \$args"
00101 
00102     eval ParseCommand [list $name] configure $args
00103 
00104     return [namespace current]::$name
00105 }
00106 
00107 /*  xml::ParseCommand --*/
00108 /* */
00109 /*  Handles parse object command invocations*/
00110 /* */
00111 /*  Valid Methods:*/
00112 /*  cget*/
00113 /*  configure*/
00114 /*  parse*/
00115 /*  reset*/
00116 /* */
00117 /*  Arguments:*/
00118 /*  parser  parser object*/
00119 /*  method  minor command*/
00120 /*  args    other arguments*/
00121 /* */
00122 /*  Results:*/
00123 /*  Depends on method*/
00124 
00125 ret  xml::ParseCommand (type parser , type method , type args) {
00126     upvar \#0 [namespace current]::$parser state
00127 
00128     switch -- $method {
00129     cget {
00130         return $state([lindex $args 0])
00131     }
00132     configure {
00133         foreach {opt value} $args {
00134         set state($opt) $value
00135         }
00136     }
00137     parse {
00138         ParseCommand_parse $parser [lindex $args 0]
00139     }
00140     reset {
00141         if {[llength $args]} {
00142         return -code error "too many arguments"
00143         }
00144         ParseCommand_reset $parser
00145     }
00146     default {
00147         return -code error "unknown method \"$method\""
00148     }
00149     }
00150 
00151     return {}
00152 }
00153 
00154 /*  xml::ParseCommand_parse --*/
00155 /* */
00156 /*  Parses document instance data*/
00157 /* */
00158 /*  Arguments:*/
00159 /*  object  parser object*/
00160 /*  xml data*/
00161 /* */
00162 /*  Results:*/
00163 /*  Callbacks are invoked, if any are defined*/
00164 
00165 ret  xml::ParseCommand_parse (type object , type xml) {
00166     upvar \#0 [namespace current]::$object parser
00167     variable Wsp
00168     variable tokExpr
00169     variable substExpr
00170 
00171     set parent [namespace parent]
00172     if {![string compare :: $parent]} {
00173     set parent {}
00174     }
00175 
00176     set tokenised [lrange \
00177         [${parent}::sgml::tokenise $xml \
00178         $tokExpr \
00179         $substExpr \
00180         -internaldtdvariable [namespace current]::${object}(internaldtd)] \
00181     4 end]
00182 
00183     eval ${parent}::sgml::parseEvent \
00184     [list $tokenised \
00185         -emptyelement [namespace code ParseEmpty] \
00186         -parseattributelistcommand [namespace code ParseAttrs]] \
00187     [array get parser -*command] \
00188     [array get parser -entityvariable] \
00189     [array get parser -reportempty] \
00190     [array get parser -final] \
00191     -normalize 0 \
00192     -internaldtd [list $parser(internaldtd)]
00193 
00194     return {}
00195 }
00196 
00197 /*  xml::ParseEmpty --  Tcl 8.0 version*/
00198 /* */
00199 /*        Used by parser to determine whether an element is empty.*/
00200 /*        This should be dead easy in XML.  The only complication is*/
00201 /*        that the RE above can't catch the trailing slash, so we have*/
00202 /*        to dig it out of the tag name or attribute list.*/
00203 /* */
00204 /*        Tcl 8.1 REs should fix this.*/
00205 /* */
00206 /*  Arguments:*/
00207 /*        tag     element name*/
00208 /*        attr    attribute list (raw)*/
00209 /*        e       End tag delimiter.*/
00210 /* */
00211 /*  Results:*/
00212 /*        "/" if the trailing slash is found.  Optionally, return a list*/
00213 /*        containing new values for the tag name and/or attribute list.*/
00214 
00215 ret  xml::ParseEmpty (type tag , type attr , type e) {
00216 
00217     if {[string match */ [string trimright $tag]] && \
00218             ![string length $attr]} {
00219         regsub {/$} $tag {} tag
00220         return [list / $tag $attr]
00221     } elseif {[string match */ [string trimright $attr]]} {
00222         regsub {/$} [string trimright $attr] {} attr
00223         return [list / $tag $attr]
00224     } else {
00225         return {}
00226     }
00227 
00228 }
00229 
00230 /*  xml::ParseAttrs --*/
00231 /* */
00232 /*  Parse element attributes.*/
00233 /* */
00234 /*  There are two forms for name-value pairs:*/
00235 /* */
00236 /*  name="value"*/
00237 /*  name='value'*/
00238 /* */
00239 /*  Watch out for the trailing slash on empty elements.*/
00240 /* */
00241 /*  Arguments:*/
00242 /*  attrs   attribute string given in a tag*/
00243 /* */
00244 /*  Results:*/
00245 /*  Returns a Tcl list representing the name-value pairs in the */
00246 /*  attribute string*/
00247 
00248 ret  xml::ParseAttrs attrs (
00249     type variable , type Wsp
00250     , type variable , type Name
00251 
00252     # , type First , type check , type whether , type there', type s , type any , type work , type to , type do
00253     , type if , optional ![string =compare { [, type string , type trim $, type attrs]]) {
00254     return {}
00255     }
00256 
00257     /*  Strip the trailing slash on empty elements*/
00258     regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList
00259 
00260      mode =  name
00261      result =  {}
00262     foreach component [split $atList =] {
00263     switch $mode {
00264         name {
00265          component =  [string trim $component]
00266         if {[regexp $Name $component]} {
00267             lappend result $component
00268         } else {
00269             return -code error "invalid attribute name \"$component\""
00270         }
00271          mode =  value:start
00272         }
00273         value:start {
00274          component =  [string trimleft $component]
00275          delimiter =  [string index $component 0]
00276          value =  {}
00277         switch -- $delimiter {
00278             \" -
00279             ' {
00280             if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} {
00281                 lappend result $value
00282                  remainder =  [string trim $remainder]
00283                 if {[string length $remainder]} {
00284                 if {[regexp $Name $remainder]} {
00285                     lappend result $remainder
00286                      mode =  value:start
00287                 } else {
00288                     return -code error "invalid attribute name \"$remainder\""
00289                 }
00290                 } else {
00291                  mode =  end
00292                 }
00293             } else {
00294                  value =  [string range $component 1 end]
00295                  mode =  value:continue
00296             }
00297             }
00298             default {
00299             return -code error "invalid value for attribute \"[lindex $result end]\""
00300             }
00301         }
00302         }
00303         value:continue {
00304         if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} {
00305             append value = $valuepart
00306             lappend result $value
00307              remainder =  [string trim $remainder]
00308             if {[string length $remainder]} {
00309             if {[regexp $Name $remainder]} {
00310                 lappend result $remainder
00311                  mode =  value:start
00312             } else {
00313                 return -code error "invalid attribute name \"$remainder\""
00314             }
00315             } else {
00316              mode =  end
00317             }
00318         } else {
00319             append value = $component
00320         }
00321         }
00322         end {
00323         return -code error "unexpected data found after end of attribute list"
00324         }
00325     }
00326     }
00327 
00328     switch $mode {
00329     name -
00330     end {
00331         /*  This is normal*/
00332     }
00333     default {
00334         return -code error "unexpected end of attribute list"
00335     }
00336     }
00337 
00338     return $result
00339 }
00340 
00341 /*  xml::ParseCommand_reset --*/
00342 /* */
00343 /*  Initialize parser data*/
00344 /* */
00345 /*  Arguments:*/
00346 /*  object  parser object*/
00347 /* */
00348 /*  Results:*/
00349 /*  Parser data structure initialised*/
00350 
00351 ret  xml::ParseCommand_reset object (
00352     type upvar \#0 [, type namespace , type current]::$, type object , type parser
00353 
00354     , type array , type set , type parser [, type list \
00355         -, type final 1     \
00356         , type internaldtd , optional   \
00357     ]
00358 )
00359 
00360 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1