uri.tcl

Go to the documentation of this file.
00001 /*  uri.tcl --*/
00002 /* */
00003 /*  URI parsing and fetch*/
00004 /* */
00005 /*  Copyright (c) 2000 Zveno Pty Ltd*/
00006 /*  Copyright (c) 2006 Pierre DAVID <Pierre.David@crc.u-strasbg.fr>*/
00007 /*  Copyright (c) 2006 Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00008 /*  Steve Ball, http://www.zveno.com/*/
00009 /*  Derived from urls.tcl by Andreas Kupries*/
00010 /* */
00011 /*  TODO:*/
00012 /*  Handle www-url-encoding details*/
00013 /* */
00014 /*  CVS: $Id: uri.tcl,v 1.35 2007/01/11 19:35:23 andreas_kupries Exp $*/
00015 
00016 package require Tcl 8.2
00017 
00018 namespace ::uri {
00019 
00020     namespace export split join
00021     namespace export resolve isrelative
00022     namespace export geturl
00023     namespace export canonicalize
00024     namespace export register
00025 
00026     variable file:counter 0
00027 
00028     /*  extend these variable in the coming namespaces*/
00029     variable schemes       {}
00030     variable schemePattern ""
00031     variable url           ""
00032     variable url2part
00033     array  url2part =      {}
00034 
00035     /*  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
00036     /*  basic regular expressions used in URL syntax.*/
00037 
00038     namespace basic {
00039     variable    loAlpha     {[a-z]}
00040     variable    hiAlpha     {[A-Z]}
00041     variable    digit       {[0-9]}
00042     variable    alpha       {[a-zA-Z]}
00043     variable    safe        {[$_.+-]}
00044     variable    extra       {[!*'(,)]}
00045     /*  danger in next pattern, order important for []*/
00046     variable    national    {[][|\}\{\^~`]}
00047     variable    punctuation {[<>/* %"]} ;#" fake emacs hilit*/
00048     variable    reserved    {[;/?:@&=]}
00049     variable    hex     {[0-9A-Fa-f]}
00050     variable    alphaDigit  {[A-Za-z0-9]}
00051     variable    alphaDigitMinus {[A-Za-z0-9-]}
00052 
00053     /*  next is <national | punctuation>*/
00054     variable    unsafe      {[][<>"/* %\{\}|\\^~`]} ;#" emacs hilit*/
00055     variable    escape      "%${hex}${hex}"
00056 
00057     /*  unreserved  = alpha | digit | safe | extra*/
00058     /*  xchar       = unreserved | reserved | escape*/
00059 
00060     variable    unreserved  {[a-zA-Z0-9$_.+!*'(,)-]}
00061     variable    uChar       "(${unreserved}|${escape})"
00062     variable    xCharN      {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]}
00063     variable    xChar       "(${xCharN}|${escape})"
00064     variable    digits      "${digit}+"
00065 
00066     variable    toplabel    \
00067         "(${alpha}${alphaDigitMinus}*${alphaDigit}|${alpha})"
00068     variable    domainlabel \
00069         "(${alphaDigit}${alphaDigitMinus}*${alphaDigit}|${alphaDigit})"
00070 
00071     variable    hostname    \
00072         "((${domainlabel}\\.)*${toplabel})"
00073     variable    hostnumber  \
00074         "(${digits}\\.${digits}\\.${digits}\\.${digits})"
00075 
00076     variable    host        "(${hostname}|${hostnumber})"
00077 
00078     variable    port        $digits
00079     variable    hostOrPort  "${host}(:${port})?"
00080 
00081     variable    usrCharN    {[a-zA-Z0-9$_.+!*'(,);?&=-]}
00082     variable    usrChar     "(${usrCharN}|${escape})"
00083     variable    user        "${usrChar}*"
00084     variable    password    $user
00085     variable    login       "(${user}(:${password})?@)?${hostOrPort}"
00086     } ;/*  basic {}*/
00087 }
00088 
00089 
00090 /*  ::uri::register --*/
00091 /* */
00092 /*  Register a scheme (and aliases) in the package. The command*/
00093 /*  creates a namespace below "::uri" with the same name as the*/
00094 /*  scheme and executes the script declaring the pattern variables*/
00095 /*  for this scheme in the new namespace. At last it updates the*/
00096 /*  uri variables keeping track of overall scheme information.*/
00097 /* */
00098 /*  The script has to declare at least the variable "schemepart",*/
00099 /*  the pattern for an url of the registered scheme after the*/
00100 /*  scheme declaration. Not declaring this variable is an error.*/
00101 /* */
00102 /*  Arguments:*/
00103 /*  schemeList  Name of the scheme to register, plus aliases*/
00104 /*        script        Script declaring the scheme patterns*/
00105 /* */
00106 /*  Results:*/
00107 /*  None.*/
00108 
00109 ret  ::uri::register (type schemeList , type script) {
00110     variable schemes
00111     variable schemePattern
00112     variable url
00113     variable url2part
00114 
00115     # Check scheme and its aliases for existence.
00116     foreach scheme $schemeList {
00117     if {[lsearch -exact $schemes $scheme] >= 0} {
00118         return -code error \
00119             "trying to register scheme (\"$scheme\") which is already known"
00120     }
00121     }
00122 
00123     # Get the main scheme
00124     set scheme  [lindex $schemeList 0]
00125 
00126     if {[catch {namespace eval $scheme $script} msg]} {
00127     catch {namespace delete $scheme}
00128     return -code error \
00129         "error while evaluating scheme script: $msg"
00130     }
00131 
00132     if {![info exists ${scheme}::schemepart]} {
00133     namespace delete $scheme
00134     return -code error \
00135         "Variable \"schemepart\" is missing."
00136     }
00137 
00138     # Now we can extend the variables which keep track of the registered schemes.
00139 
00140     eval [linsert $schemeList 0 lappend schemes]
00141     set schemePattern   "([::join $schemes |]):"
00142 
00143     foreach s $schemeList {
00144     # FRINK: nocheck
00145     set url2part($s) "${s}:[set ${scheme}::schemepart]"
00146     # FRINK: nocheck
00147     append url "(${s}:[set ${scheme}::schemepart])|"
00148     }
00149     set url [string trimright $url |]
00150     return
00151 }
00152 
00153 /*  ::uri::split --*/
00154 /* */
00155 /*  Splits the given <a url> into its constituents.*/
00156 /* */
00157 /*  Arguments:*/
00158 /*  url the URL to split*/
00159 /* */
00160 /*  Results:*/
00161 /*  Tcl list containing constituents, suitable for 'array set'.*/
00162 
00163 ret  ::uri::split (type url , optional defaultscheme =http) {
00164 
00165     set url [string trim $url]
00166     set scheme {}
00167 
00168     # RFC 1738: scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
00169     regexp -- {^([a-z0-9+.-][a-z0-9+.-]*):} $url dummy scheme
00170 
00171     if {$scheme == {}} {
00172     set scheme $defaultscheme
00173     }
00174 
00175     # ease maintenance: dynamic dispatch, able to handle all schemes
00176     # added in future!
00177 
00178     if {[::info procs Split[string totitle $scheme]] == {}} {
00179     error "unknown scheme '$scheme' in '$url'"
00180     }
00181 
00182     regsub -- "^${scheme}:" $url {} url
00183 
00184     set       parts(scheme) $scheme
00185     array set parts [Split[string totitle $scheme] $url]
00186 
00187     # should decode all encoded characters!
00188 
00189     return [array get parts]
00190 }
00191 
00192 ret  ::uri::SplitFtp (type url) {
00193     # @c Splits the given ftp-<a url> into its constituents.
00194     # @a url: The url to split, without! scheme specification.
00195     # @r List containing the constituents, suitable for 'array set'.
00196 
00197     # general syntax:
00198     # //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode>
00199     #
00200     # additional rules:
00201     #
00202     # <user>:<password> are optional, detectable by presence of @.
00203     # <password> is optional too.
00204     #
00205     # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/"
00206     #   <cwd1> "/" ..."/" <cwdN> "/" <name> [";type=" <typecode>]
00207 
00208     upvar \#0 [namespace current]::ftp::typepart ftptype
00209 
00210     array set parts {user {} pwd {} host {} port {} path {} type {}}
00211 
00212     # slash off possible type specification
00213 
00214     if {[regexp -indices -- "${ftptype}$" $url dummy ftype]} {
00215 
00216     set from    [lindex $ftype 0]
00217     set to      [lindex $ftype 1]
00218 
00219     set parts(type) [string range   $url $from $to]
00220 
00221     set from    [lindex $dummy 0]
00222     set url     [string replace $url $from end]
00223     }
00224 
00225     # Handle user, password, host and port
00226 
00227     if {[string match "//*" $url]} {
00228     set url [string range $url 2 end]
00229 
00230     array set parts [GetUPHP url]
00231     }
00232 
00233     set parts(path) [string trimleft $url /]
00234 
00235     return [array get parts]
00236 }
00237 
00238 ret  ::uri::JoinFtp args (
00239     type array , type set , type components , optional 
00240     user ={ , type pwd , optional  , type host , optional  , type port , optional 
00241     , type path , optional  , type type , optional 
00242     )
00243     array set components $args
00244 
00245     set userPwd {}
00246     if {[string length $components(user)] || [string length $components(pwd)]} {
00247      userPwd =  $components(user)[expr {[string length $components(pwd)] ? ":$components(pwd)" : {}}]@
00248     }
00249 
00250      port =  {}
00251     if {[string length $components(port)]} {
00252      port =  :$components(port)
00253     }
00254 
00255      type =  {}
00256     if {[string length $components(type)]} {
00257      type =  \;type=$components(type)
00258     }
00259 
00260     return ftp://${userPwd}$components(host)${port}/[string trimleft $components(path) /]$type
00261 }
00262 
00263 ret  ::uri::SplitHttps (type url) {
00264     return [SplitHttp $url]
00265 }
00266 
00267 ret  ::uri::SplitHttp (type url) {
00268     # @c Splits the given http-<a url> into its constituents.
00269     # @a url: The url to split, without! scheme specification.
00270     # @r List containing the constituents, suitable for 'array set'.
00271 
00272     # general syntax:
00273     # //<host>:<port>/<path>?<searchpart>
00274     #
00275     #   where <host> and <port> are as described in Section 3.1. If :<port>
00276     #   is omitted, the port defaults to 80.  No user name or password is
00277     #   allowed.  <path> is an HTTP selector, and <searchpart> is a query
00278     #   string. The <path> is optional, as is the <searchpart> and its
00279     #   preceding "?". If neither <path> nor <searchpart> is present, the "/"
00280     #   may also be omitted.
00281     #
00282     #   Within the <path> and <searchpart> components, "/", ";", "?" are
00283     #   reserved.  The "/" character may be used within HTTP to designate a
00284     #   hierarchical structure.
00285     #
00286     # path == <cwd1> "/" ..."/" <cwdN> "/" <name> ["#" <fragment>]
00287 
00288     upvar #0 [namespace current]::http::search  search
00289     upvar #0 [namespace current]::http::segment segment
00290 
00291     array set parts {host {} port {} path {} query {}}
00292 
00293     set searchPattern   "\\?(${search})\$"
00294     set fragmentPattern "#(${segment})\$"
00295 
00296     # slash off possible query. the 'search' regexp, while official,
00297     # is not good enough. We have apparently lots of urls in the wild
00298     # which contain unquoted urls with queries in a query. The RE
00299     # finds the embedded query, not the actual one. Using string first
00300     # now instead of a RE
00301 
00302     if {[set pos [string first ? $url]] >= 0} {
00303     incr pos
00304     set parts(query) [string range   $url $pos end]
00305     incr pos -1
00306     set url          [string replace $url $pos end]
00307     }
00308 
00309     # slash off possible fragment
00310 
00311     if {[regexp -indices -- $fragmentPattern $url match fragment]} {
00312     set from [lindex $fragment 0]
00313     set to   [lindex $fragment 1]
00314 
00315     set parts(fragment) [string range $url $from $to]
00316 
00317     set url [string replace $url [lindex $match 0] end]
00318     }
00319 
00320     if {[string match "//*" $url]} {
00321     set url [string range $url 2 end]
00322 
00323     array set parts [GetUPHP url]
00324     }
00325 
00326     set parts(path) [string trimleft $url /]
00327 
00328     return [array get parts]
00329 }
00330 
00331 ret  ::uri::JoinHttp (type args) {
00332     return [eval [linsert $args 0 ::uri::JoinHttpInner http 80]]
00333 }
00334 
00335 ret  ::uri::JoinHttps (type args) {
00336     return [eval [linsert $args 0 ::uri::JoinHttpInner https 443]]
00337 }
00338 
00339 ret  ::uri::JoinHttpInner (type scheme , type defport , type args) {
00340     array set components {host {} path {} query {}}
00341     set       components(port) $defport
00342     array set components $args
00343 
00344     set port {}
00345     if {[string length $components(port)] && $components(port) != $defport} {
00346     set port :$components(port)
00347     }
00348 
00349     set query {}
00350     if {[string length $components(query)]} {
00351     set query ?$components(query)
00352     }
00353 
00354     regsub -- {^/} $components(path) {} components(path)
00355 
00356     if { [info exists components(fragment)] && $components(fragment) != "" } {
00357     set components(fragment) "#$components(fragment)"
00358     } else {
00359     set components(fragment) ""
00360     }
00361 
00362     return $scheme://$components(host)$port/$components(path)$components(fragment)$query
00363 }
00364 
00365 ret  ::uri::SplitFile (type url) {
00366     # @c Splits the given file-<a url> into its constituents.
00367     # @a url: The url to split, without! scheme specification.
00368     # @r List containing the constituents, suitable for 'array set'.
00369 
00370     upvar #0 [namespace current]::basic::hostname   hostname
00371     upvar #0 [namespace current]::basic::hostnumber hostnumber
00372 
00373     if {[string match "//*" $url]} {
00374     set url [string range $url 2 end]
00375 
00376     set hostPattern "^($hostname|$hostnumber)"
00377     switch -exact -- $::tcl_platform(platform) {
00378         windows {
00379         # Catch drive letter
00380         append hostPattern :?
00381         }
00382         default {
00383         # Proceed as usual
00384         }
00385     }
00386 
00387     if {[regexp -indices -- $hostPattern $url match host]} {
00388         set fh  [lindex $host 0]
00389         set th  [lindex $host 1]
00390 
00391         set parts(host) [string range $url $fh $th]
00392 
00393         set  matchEnd   [lindex $match 1]
00394         incr matchEnd
00395 
00396         set url [string range $url $matchEnd end]
00397     }
00398     }
00399 
00400     set parts(path) $url
00401 
00402     return [array get parts]
00403 }
00404 
00405 ret  ::uri::JoinFile args (
00406     type array , type set , type components , optional 
00407     host ={ , type port , optional  , type path , optional 
00408     )
00409     array set components $args
00410 
00411     switch -exact -- $::tcl_platform(platform) {
00412     windows {
00413         if {[string length $components(host)]} {
00414         return file://$components(host):$components(path)
00415         } else {
00416         return file://$components(path)
00417         }
00418     }
00419     default {
00420         return file://$components(host)$components(path)
00421     }
00422     }
00423 }
00424 
00425 ret  ::uri::SplitMailto (type url) {
00426     # @c Splits the given mailto-<a url> into its constituents.
00427     # @a url: The url to split, without! scheme specification.
00428     # @r List containing the constituents, suitable for 'array set'.
00429 
00430     if {[string match "*@*" $url]} {
00431     set url [::split $url @]
00432     return [list user [lindex $url 0] host [lindex $url 1]]
00433     } else {
00434     return [list user $url]
00435     }
00436 }
00437 
00438 ret  ::uri::JoinMailto args (
00439     type array , type set , type components , optional 
00440     user ={ , type host , optional 
00441     )
00442     array set components $args
00443 
00444     return mailto:$components(user)@$components(host)
00445 }
00446 
00447 proc ::uri::SplitNews {url} {
00448     if { [string first @ $url] >= 0 } {
00449     return [list message-id $url]
00450     } else {
00451     return [list newsgroup-name $url]
00452     }
00453 }
00454 
00455 ret  ::uri::JoinNews args (
00456     type array , type set , type components , optional 
00457     message-id ={ , type newsgroup-, type name , optional 
00458     )
00459     array set components $args
00460     return news:$components(message-id)$components(newsgroup-name)
00461 }
00462 
00463 proc ::uri::SplitLdaps {url} {
00464     ::uri::SplitLdap $url
00465 }
00466 
00467 proc ::uri::SplitLdap {url} {
00468     # @c Splits the given Ldap-<a url> into its constituents.
00469     # @a url: The url to split, without! scheme specification.
00470     # @r List containing the constituents, suitable for 'array set'.
00471 
00472     # general syntax:
00473     # //<host>:<port>/<dn>?<attrs>?<scope>?<filter>?<extensions>
00474     #
00475     #   where <host> and <port> are as described in Section 5 of RFC 1738.
00476     #   No user name or password is allowed.
00477     #   If omitted, the port defaults to 389 for ldap, 636 for ldaps
00478     #   <dn> is the base DN for the search
00479     #   <attrs> is a comma separated list of attributes description
00480     #   <scope> is either "base", "one" or "sub".
00481     #   <filter> is a RFC 2254 filter specification
00482     #   <extensions> are documented in RFC 2255
00483     #
00484 
00485     array set parts {host {} port {} dn {} attrs {} scope {} filter {} extensions {}}
00486 
00487     /*           host        port           dn          attrs       scope               filter     extns*/
00488      re =  {//([^:?/]+)(?::([0-9]+))?(?:/([^?]+)(?:\?([^?]*)(?:\?(base|one|sub)?(?:\?([^?]*)(?:\?(.*))?)?)?)?)?}
00489 
00490     if {! [regexp $re $url match parts(host) parts(port) \
00491         parts(dn) parts(attrs) parts(scope) parts(filter) \
00492         parts(extensions)]} then {
00493     return -code error "unable to match URL \"$url\""
00494     }
00495 
00496      parts = (attrs) [::split $parts(attrs) ","]
00497 
00498     return [array get parts]
00499 }
00500 
00501 ret  ::uri::JoinLdap (type args) {
00502     return [eval [linsert $args 0 ::uri::JoinLdapInner ldap 389]]
00503 }
00504 
00505 ret  ::uri::JoinLdaps (type args) {
00506     return [eval [linsert $args 0 ::uri::JoinLdapInner ldaps 636]]
00507 }
00508 
00509 ret  ::uri::JoinLdapInner (type scheme , type defport , type args) {
00510     array set components {host {} port {} dn {} attrs {} scope {} filter {} extensions {}}
00511     set       components(port) $defport
00512     array set components $args
00513 
00514     set port {}
00515     if {[string length $components(port)] && $components(port) != $defport} {
00516     set port :$components(port)
00517     }
00518 
00519     set url "$scheme://$components(host)$port"
00520 
00521     set components(attrs) [::join $components(attrs) ","]
00522 
00523     set s ""
00524     foreach c {dn attrs scope filter extensions} {
00525     if {[string equal $c "dn"]} then {
00526         append s "/"
00527     } else {
00528         append s "?"
00529     }
00530     if {! [string equal $components($c) ""]} then {
00531         append url "${s}$components($c)"
00532         set s ""
00533     }
00534     }
00535 
00536     return $url
00537 }
00538 
00539 ret  ::uri::GetUPHP (type urlvar) {
00540     # @c Parse user, password host and port out of the url stored in
00541     # @c variable <a urlvar>.
00542     # @d Side effect: The extracted information is removed from the given url.
00543     # @r List containing the extracted information in a format suitable for
00544     # @r 'array set'.
00545     # @a urlvar: Name of the variable containing the url to parse.
00546 
00547     upvar \#0 [namespace current]::basic::user      user
00548     upvar \#0 [namespace current]::basic::password  password
00549     upvar \#0 [namespace current]::basic::hostname  hostname
00550     upvar \#0 [namespace current]::basic::hostnumber    hostnumber
00551     upvar \#0 [namespace current]::basic::port      port
00552 
00553     upvar $urlvar url
00554 
00555     array set parts {user {} pwd {} host {} port {}}
00556 
00557     # syntax
00558     # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/"
00559     # "//" already cut off by caller
00560 
00561     set upPattern "^(${user})(:(${password}))?@"
00562 
00563     if {[regexp -indices -- $upPattern $url match theUser c d thePassword]} {
00564     set fu  [lindex $theUser 0]
00565     set tu  [lindex $theUser 1]
00566 
00567     set fp  [lindex $thePassword 0]
00568     set tp  [lindex $thePassword 1]
00569 
00570     set parts(user) [string range $url $fu $tu]
00571     set parts(pwd)  [string range $url $fp $tp]
00572 
00573     set  matchEnd   [lindex $match 1]
00574     incr matchEnd
00575 
00576     set url [string range $url $matchEnd end]
00577     }
00578 
00579     set hpPattern "^($hostname|$hostnumber)(:($port))?"
00580 
00581     if {[regexp -indices -- $hpPattern $url match theHost c d e f g h thePort]} {
00582     set fh  [lindex $theHost 0]
00583     set th  [lindex $theHost 1]
00584 
00585     set fp  [lindex $thePort 0]
00586     set tp  [lindex $thePort 1]
00587 
00588     set parts(host) [string range $url $fh $th]
00589     set parts(port) [string range $url $fp $tp]
00590 
00591     set  matchEnd   [lindex $match 1]
00592     incr matchEnd
00593 
00594     set url [string range $url $matchEnd end]
00595     }
00596 
00597     return [array get parts]
00598 }
00599 
00600 ret  ::uri::GetHostPort (type urlvar) {
00601     # @c Parse host and port out of the url stored in variable <a urlvar>.
00602     # @d Side effect: The extracted information is removed from the given url.
00603     # @r List containing the extracted information in a format suitable for
00604     # @r 'array set'.
00605     # @a urlvar: Name of the variable containing the url to parse.
00606 
00607     upvar #0 [namespace current]::basic::hostname   hostname
00608     upvar #0 [namespace current]::basic::hostnumber hostnumber
00609     upvar #0 [namespace current]::basic::port       port
00610 
00611     upvar $urlvar url
00612 
00613     set pattern "^(${hostname}|${hostnumber})(:(${port}))?"
00614 
00615     if {[regexp -indices -- $pattern $url match host c d e f g h thePort]} {
00616     set fromHost    [lindex $host 0]
00617     set toHost  [lindex $host 1]
00618 
00619     set fromPort    [lindex $thePort 0]
00620     set toPort  [lindex $thePort 1]
00621 
00622     set parts(host) [string range $url $fromHost $toHost]
00623     set parts(port) [string range $url $fromPort $toPort]
00624 
00625     set  matchEnd   [lindex $match 1]
00626     incr matchEnd
00627 
00628     set url [string range $url $matchEnd end]
00629     }
00630 
00631     return [array get parts]
00632 }
00633 
00634 /*  ::uri::resolve --*/
00635 /* */
00636 /*  Resolve an arbitrary URL, given a base URL*/
00637 /* */
00638 /*  Arguments:*/
00639 /*  base    base URL (absolute)*/
00640 /*  url arbitrary URL*/
00641 /* */
00642 /*  Results:*/
00643 /*  Returns a URL*/
00644 
00645 ret  ::uri::resolve (type base , type url) {
00646     if {[string length $url]} {
00647     if {[isrelative $url]} {
00648 
00649         array set baseparts [split $base]
00650 
00651         switch -- $baseparts(scheme) {
00652         http -
00653         https -
00654         ftp -
00655         file {
00656             array set relparts [split $url]
00657             if { [string match /* $url] } {
00658             catch { set baseparts(path) $relparts(path) }
00659             } elseif { [string match */ $baseparts(path)] } {
00660             set baseparts(path) "$baseparts(path)$relparts(path)"
00661             } else {
00662             if { [string length $relparts(path)] > 0 } {
00663                 set path [lreplace [::split $baseparts(path) /] end end]
00664                 set baseparts(path) "[::join $path /]/$relparts(path)"
00665             }
00666             }
00667             catch { set baseparts(query) $relparts(query) }
00668             catch { set baseparts(fragment) $relparts(fragment) }
00669             return [eval [linsert [array get baseparts] 0 join]]
00670         }
00671         default {
00672             return -code error "unable to resolve relative URL \"$url\""
00673         }
00674         }
00675 
00676     } else {
00677         return $url
00678     }
00679     } else {
00680     return $base
00681     }
00682 }
00683 
00684 /*  ::uri::isrelative --*/
00685 /* */
00686 /*  Determines whether a URL is absolute or relative*/
00687 /* */
00688 /*  Arguments:*/
00689 /*  url URL to check*/
00690 /* */
00691 /*  Results:*/
00692 /*  Returns 1 if the URL is relative, 0 otherwise*/
00693 
00694 ret  ::uri::isrelative url (
00695     type return [, type expr , optional ![regexp =-- {^[a-z0-9+-.][a-z0-9+-.]*: $, type url])]
00696 }
00697 
00698 # ::uri::geturl --
00699 #
00700 #   Fetch the data from an arbitrary URL.
00701 #
00702 #   This package provides a handler for the file:
00703 #   scheme, since this conflicts with the file command.
00704 #
00705 # Arguments:
00706 #   url address of data resource
00707 #   args    configuration options
00708 #
00709 # Results:
00710 #   Depends on scheme
00711 
00712 proc ::uri::geturl {url args} {
00713     array set urlparts [split $url]
00714 
00715     switch -- $urlparts(scheme) {
00716     file {
00717         return [eval [linsert $args 0 file_geturl $url]]
00718     }
00719     default {
00720         # Load a geturl package for the scheme first and only if
00721         # that fails the scheme package itself. This prevents
00722         # cyclic dependencies between packages.
00723         if {[catch {package require $urlparts(scheme)::geturl}]} {
00724         package require $urlparts(scheme)
00725         }
00726         return [eval [linsert $args 0 $urlparts(scheme)::geturl $url]]
00727     }
00728     }
00729 }
00730 
00731 /*  ::uri::file_geturl --*/
00732 /* */
00733 /*  geturl implementation for file: scheme*/
00734 /* */
00735 /*  TODO:*/
00736 /*  This is an initial, basic implementation.*/
00737 /*  Eventually want to support all options for geturl.*/
00738 /* */
00739 /*  Arguments:*/
00740 /*  url URL to fetch*/
00741 /*  args    configuration options*/
00742 /* */
00743 /*  Results:*/
00744 /*  Returns data from file*/
00745 
00746 ret  ::uri::file_geturl (type url , type args) {
00747     variable file:counter
00748 
00749     set var [namespace current]::file[incr file:counter]
00750     upvar #0 $var state
00751     array set state {data {}}
00752 
00753     array set parts [split $url]
00754 
00755     set ch [open $parts(path)]
00756     # Could determine text/binary from file extension,
00757     # except on Macintosh
00758     # fconfigure $ch -translation binary
00759     set state(data) [read $ch]
00760     close $ch
00761 
00762     return $var
00763 }
00764 
00765 /*  ::uri::join --*/
00766 /* */
00767 /*  Format a URL*/
00768 /* */
00769 /*  Arguments:*/
00770 /*  args    components, key-value format*/
00771 /* */
00772 /*  Results:*/
00773 /*  A URL*/
00774 
00775 ret  ::uri::join args (
00776     type array , type set , type components $, type args
00777 
00778     , type return [, type eval [, type linsert $, type args 0 , type Join[, type string , type totitle $, type components(, type scheme)]]]
00779 )
00780 
00781 # ::uri::canonicalize --
00782 #
00783 #   Canonicalize a URL
00784 #
00785 # Acknowledgements:
00786 #   Andreas Kupries <andreas_kupries@users.sourceforge.net>
00787 #
00788 # Arguments:
00789 #   uri URI (which contains a path component)
00790 #
00791 # Results:
00792 #   The canonical form of the URI
00793 
00794 proc ::uri::canonicalize uri {
00795 
00796     # Make uri canonical with respect to dots (path changing commands)
00797     #
00798     # Remove single dots (.)  => pwd not changing
00799     # Remove double dots (..) => gobble previous segment of path
00800     #
00801     # Fixes for this command:
00802     #
00803     # * Ignore any url which cannot be split into components by this
00804     #   module. Just assume that such urls do not have a path to
00805     #   canonicalize.
00806     #
00807     # * Ignore any url which could be split into components, but does
00808     #   not have a path component.
00809     #
00810     # In the text above 'ignore' means
00811     # 'return the url unchanged to the caller'.
00812 
00813     if {[catch {array set u [::uri::split $uri]}]} {
00814     return $uri
00815     }
00816     if {![info exists u(path)]} {
00817     return $uri
00818     }
00819 
00820     set uri $u(path)
00821 
00822     # Remove leading "./" "../" "/.." (and "/../")
00823     regsub -all -- {^(\./)+}    $uri {}  uri
00824     regsub -all -- {^/(\.\./)+} $uri {/} uri
00825     regsub -all -- {^(\.\./)+}  $uri {}  uri
00826 
00827     # Remove inner /./ and /../
00828     while {[regsub -all -- {/\./}         $uri {/} uri]} {}
00829     while {[regsub -all -- {/[^/]+/\.\./} $uri {/} uri]} {}
00830     while {[regsub -all -- {^[^/]+/\.\./} $uri {}  uri]} {}
00831     # Munge trailing /..
00832     while {[regsub -all -- {/[^/]+/\.\.} $uri {/} uri]} {}
00833     if { $uri == ".." } { set uri "/" }
00834 
00835     set u(path) $uri
00836     set uri [eval [linsert [array get u] 0 ::uri::join]]
00837 
00838     return $uri
00839 }
00840 
00841 /*  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
00842 /*  regular expressions covering various url schemes*/
00843 
00844 /*  Currently known URL schemes:*/
00845 /* */
00846 /*  (RFC 1738)*/
00847 /*  ------------------------------------------------*/
00848 /*  scheme  basic syntax of scheme specific part*/
00849 /*  ------------------------------------------------*/
00850 /*  ftp     //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode>*/
00851 /* */
00852 /*  http        //<host>:<port>/<path>?<searchpart>*/
00853 /* */
00854 /*  gopher  //<host>:<port>/<gophertype><selector>*/
00855 /*              <gophertype><selector>%09<search>*/
00856 /*      <gophertype><selector>%09<search>%09<gopher+_string>*/
00857 /* */
00858 /*  mailto  <rfc822-addr-spec>*/
00859 /*  news        <newsgroup-name>*/
00860 /*      <message-id>*/
00861 /*  nntp        //<host>:<port>/<newsgroup-name>/<article-number>*/
00862 /*  telnet  //<user>:<password>@<host>:<port>/*/
00863 /*  wais        //<host>:<port>/<database>*/
00864 /*      //<host>:<port>/<database>?<search>*/
00865 /*      //<host>:<port>/<database>/<wtype>/<wpath>*/
00866 /*  file        //<host>/<path>*/
00867 /*  prospero    //<host>:<port>/<hsoname>;<field>=<value>*/
00868 /*  ------------------------------------------------*/
00869 /* */
00870 /*  (RFC 2111)*/
00871 /*  ------------------------------------------------*/
00872 /*  scheme  basic syntax of scheme specific part*/
00873 /*  ------------------------------------------------*/
00874 /*  mid message-id*/
00875 /*      message-id/content-id*/
00876 /*  cid content-id*/
00877 /*  ------------------------------------------------*/
00878 /* */
00879 /*  (RFC 2255)*/
00880 /*  ------------------------------------------------*/
00881 /*  scheme  basic syntax of scheme specific part*/
00882 /*  ------------------------------------------------*/
00883 /*  ldap        //<host>:<port>/<dn>?<attrs>?<scope>?<filter>?<extensions>*/
00884 /*  ------------------------------------------------*/
00885 
00886 /*  FTP*/
00887 uri::register ftp {
00888     variable escape [ [namespace =  parent [namespace current]]::basic::escape]
00889     variable login  [ [namespace =  parent [namespace current]]::basic::login]
00890 
00891     variable    charN   {[a-zA-Z0-9$_.+!*'(,)?:@&=-]}
00892     variable    char    "(${charN}|${escape})"
00893     variable    segment "${char}*"
00894     variable    path    "${segment}(/${segment})*"
00895 
00896     variable    type        {[AaDdIi]}
00897     variable    typepart    ";type=(${type})"
00898     variable    schemepart  \
00899             "//${login}(/${path}(${typepart})?)?"
00900 
00901     variable    url     "ftp:${schemepart}"
00902 }
00903 
00904 /*  FILE*/
00905 uri::register file {
00906     variable    host [ [namespace =  parent [namespace current]]::basic::host]
00907     variable    path [ [namespace =  parent [namespace current]]::ftp::path]
00908 
00909     variable    schemepart  "//(${host}|localhost)?/${path}"
00910     variable    url     "file:${schemepart}"
00911 }
00912 
00913 /*  HTTP*/
00914 uri::register http {
00915     variable    escape \
00916         [ [namespace =  parent [namespace current]]::basic::escape]
00917     variable    hostOrPort  \
00918         [ [namespace =  parent [namespace current]]::basic::hostOrPort]
00919 
00920     variable    charN       {[a-zA-Z0-9$_.+!*'(,);:@&=-]}
00921     variable    char        "($charN|${escape})"
00922     variable    segment     "${char}*"
00923 
00924     variable    path        "${segment}(/${segment})*"
00925     variable    search      $segment
00926     variable    schemepart  \
00927         "//${hostOrPort}(/${path}(\\?${search})?)?"
00928 
00929     variable    url     "http:${schemepart}"
00930 }
00931 
00932 /*  GOPHER*/
00933 uri::register gopher {
00934     variable    xChar \
00935         [ [namespace =  parent [namespace current]]::basic::xChar]
00936     variable    hostOrPort \
00937         [ [namespace =  parent [namespace current]]::basic::hostOrPort]
00938     variable    search \
00939         [ [namespace =  parent [namespace current]]::http::search]
00940 
00941     variable    type        $xChar
00942     variable    selector    "$xChar*"
00943     variable    string      $selector
00944     variable    schemepart  \
00945         "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?"
00946     variable    url     "gopher:${schemepart}"
00947 }
00948 
00949 /*  MAILTO*/
00950 uri::register mailto {
00951     variable xChar [ [namespace =  parent [namespace current]]::basic::xChar]
00952     variable host  [ [namespace =  parent [namespace current]]::basic::host]
00953 
00954     variable schemepart "$xChar+(@${host})?"
00955     variable url    "mailto:${schemepart}"
00956 }
00957 
00958 /*  NEWS*/
00959 uri::register news {
00960     variable escape [ [namespace =  parent [namespace current]]::basic::escape]
00961     variable alpha  [ [namespace =  parent [namespace current]]::basic::alpha]
00962     variable host   [ [namespace =  parent [namespace current]]::basic::host]
00963 
00964     variable    aCharN      {[a-zA-Z0-9$_.+!*'(,);/?:&=-]}
00965     variable    aChar       "($aCharN|${escape})"
00966     variable    gChar       {[a-zA-Z0-9$_.+-]}
00967     variable    newsgroup-name  "${alpha}${gChar}*"
00968     variable    message-id  "${aChar}+@${host}"
00969     variable    schemepart  "\\*|${newsgroup-name}|${message-id}"
00970     variable    url     "news:${schemepart}"
00971 }
00972 
00973 /*  WAIS*/
00974 uri::register wais {
00975     variable    uChar \
00976         [ [namespace =  parent [namespace current]]::basic::xChar]
00977     variable    hostOrPort \
00978         [ [namespace =  parent [namespace current]]::basic::hostOrPort]
00979     variable    search \
00980         [ [namespace =  parent [namespace current]]::http::search]
00981 
00982     variable    db      "${uChar}*"
00983     variable    type        "${uChar}*"
00984     variable    path        "${uChar}*"
00985 
00986     variable    database    "//${hostOrPort}/${db}"
00987     variable    index       "//${hostOrPort}/${db}\\?${search}"
00988     variable    doc     "//${hostOrPort}/${db}/${type}/${path}"
00989 
00990     /* variable schemepart  "${doc}|${index}|${database}"*/
00991 
00992     variable    schemepart \
00993         "//${hostOrPort}/${db}((\\?${search})|(/${type}/${path}))?"
00994 
00995     variable    url     "wais:${schemepart}"
00996 }
00997 
00998 /*  PROSPERO*/
00999 uri::register prospero {
01000     variable    escape \
01001         [ [namespace =  parent [namespace current]]::basic::escape]
01002     variable    hostOrPort \
01003         [ [namespace =  parent [namespace current]]::basic::hostOrPort]
01004     variable    path \
01005         [ [namespace =  parent [namespace current]]::ftp::path]
01006 
01007     variable    charN       {[a-zA-Z0-9$_.+!*'(,)?:@&-]}
01008     variable    char        "(${charN}|$escape)"
01009 
01010     variable    fieldname   "${char}*"
01011     variable    fieldvalue  "${char}*"
01012     variable    fieldspec   ";${fieldname}=${fieldvalue}"
01013 
01014     variable    schemepart  "//${hostOrPort}/${path}(${fieldspec})*"
01015     variable    url     "prospero:$schemepart"
01016 }
01017 
01018 /*  LDAP*/
01019 uri::register ldap {
01020     variable    hostOrPort \
01021         [ [namespace =  parent [namespace current]]::basic::hostOrPort]
01022 
01023     /*  very crude parsing*/
01024     variable    dn      {[^?]*}
01025     variable    attrs       {[^?]*}
01026     variable    scope       "base|one|sub"
01027     variable    filter      {[^?]*}
01028     /*  extensions are not handled yet*/
01029 
01030     variable    schemepart  "//${hostOrPort}(/${dn}(\?${attrs}(\?(${scope})(\?${filter})?)?)?)?"
01031     variable    url     "ldap:$schemepart"
01032 }
01033 
01034 package provide uri 1.2.1
01035 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1