00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 package require Tcl 8.2
00017
00018 namespace ::uri {
00019
00020 namespace export split join
00021 namespace export resolve isrelative
00022 namespace export geturl
00023 namespace export canonicalize
00024 namespace export register
00025
00026 variable file:counter 0
00027
00028
00029 variable schemes {}
00030 variable schemePattern ""
00031 variable url ""
00032 variable url2part
00033 array url2part = {}
00034
00035
00036
00037
00038 namespace basic {
00039 variable loAlpha {[a-z]}
00040 variable hiAlpha {[A-Z]}
00041 variable digit {[0-9]}
00042 variable alpha {[a-zA-Z]}
00043 variable safe {[$_.+-]}
00044 variable extra {[!*'(,)]}
00045
00046 variable national {[][|\}\{\^~`]}
00047 variable punctuation {[<>
00048 variable reserved {[;/?:@&=]}
00049 variable hex {[0-9A-Fa-f]}
00050 variable alphaDigit {[A-Za-z0-9]}
00051 variable alphaDigitMinus {[A-Za-z0-9-]}
00052
00053
00054 variable unsafe {[][<>"/* %\{\}|\\^~`]} ;#" emacs hilit*/
00055 variable escape "%${hex}${hex}"
00056
00057
00058
00059
00060 variable unreserved {[a-zA-Z0-9$_.+!*'(,)-]}
00061 variable uChar "(${unreserved}|${escape})"
00062 variable xCharN {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]}
00063 variable xChar "(${xCharN}|${escape})"
00064 variable digits "${digit}+"
00065
00066 variable toplabel \
00067 "(${alpha}${alphaDigitMinus}*${alphaDigit}|${alpha})"
00068 variable domainlabel \
00069 "(${alphaDigit}${alphaDigitMinus}*${alphaDigit}|${alphaDigit})"
00070
00071 variable hostname \
00072 "((${domainlabel}\\.)*${toplabel})"
00073 variable hostnumber \
00074 "(${digits}\\.${digits}\\.${digits}\\.${digits})"
00075
00076 variable host "(${hostname}|${hostnumber})"
00077
00078 variable port $digits
00079 variable hostOrPort "${host}(:${port})?"
00080
00081 variable usrCharN {[a-zA-Z0-9$_.+!*'(,);?&=-]}
00082 variable usrChar "(${usrCharN}|${escape})"
00083 variable user "${usrChar}*"
00084 variable password $user
00085 variable login "(${user}(:${password})?@)?${hostOrPort}"
00086 } ;/* basic {}*/
00087 }
00088
00089
00090 /* ::uri::register --*/
00091 /* */
00092 /* Register a scheme (and aliases) in the package. The command*/
00093 /* creates a namespace below "::uri" with the same name as the*/
00094 /* scheme and executes the script declaring the pattern variables*/
00095 /* for this scheme in the new namespace. At last it updates the*/
00096 /* uri variables keeping track of overall scheme information.*/
00097 /* */
00098 /* The script has to declare at least the variable "schemepart",*/
00099 /* the pattern for an url of the registered scheme after the*/
00100 /* scheme declaration. Not declaring this variable is an error.*/
00101 /* */
00102 /* Arguments:*/
00103 /* schemeList Name of the scheme to register, plus aliases*/
00104 /* script Script declaring the scheme patterns*/
00105 /* */
00106 /* Results:*/
00107 /* None.*/
00108
00109 ret ::uri::register (type schemeList , type script) {
00110 variable schemes
00111 variable schemePattern
00112 variable url
00113 variable url2part
00114
00115 # Check scheme and its aliases for existence.
00116 foreach scheme $schemeList {
00117 if {[lsearch -exact $schemes $scheme] >= 0} {
00118 return -code error \
00119 "trying to register scheme (\"$scheme\") which is already known"
00120 }
00121 }
00122
00123 # Get the main scheme
00124 set scheme [lindex $schemeList 0]
00125
00126 if {[catch {namespace eval $scheme $script} msg]} {
00127 catch {namespace delete $scheme}
00128 return -code error \
00129 "error while evaluating scheme script: $msg"
00130 }
00131
00132 if {![info exists ${scheme}::schemepart]} {
00133 namespace delete $scheme
00134 return -code error \
00135 "Variable \"schemepart\" is missing."
00136 }
00137
00138 # Now we can extend the variables which keep track of the registered schemes.
00139
00140 eval [linsert $schemeList 0 lappend schemes]
00141 set schemePattern "([::join $schemes |]):"
00142
00143 foreach s $schemeList {
00144 # FRINK: nocheck
00145 set url2part($s) "${s}:[set ${scheme}::schemepart]"
00146 # FRINK: nocheck
00147 append url "(${s}:[set ${scheme}::schemepart])|"
00148 }
00149 set url [string trimright $url |]
00150 return
00151 }
00152
00153 /* ::uri::split --*/
00154 /* */
00155 /* Splits the given <a url> into its constituents.*/
00156 /* */
00157 /* Arguments:*/
00158 /* url the URL to split*/
00159 /* */
00160 /* Results:*/
00161 /* Tcl list containing constituents, suitable for 'array set'.*/
00162
00163 ret ::uri::split (type url , optional defaultscheme =http) {
00164
00165 set url [string trim $url]
00166 set scheme {}
00167
00168 # RFC 1738: scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
00169 regexp -- {^([a-z0-9+.-][a-z0-9+.-]*):} $url dummy scheme
00170
00171 if {$scheme == {}} {
00172 set scheme $defaultscheme
00173 }
00174
00175 # ease maintenance: dynamic dispatch, able to handle all schemes
00176 # added in future!
00177
00178 if {[::info procs Split[string totitle $scheme]] == {}} {
00179 error "unknown scheme '$scheme' in '$url'"
00180 }
00181
00182 regsub -- "^${scheme}:" $url {} url
00183
00184 set parts(scheme) $scheme
00185 array set parts [Split[string totitle $scheme] $url]
00186
00187 # should decode all encoded characters!
00188
00189 return [array get parts]
00190 }
00191
00192 ret ::uri::SplitFtp (type url) {
00193 # @c Splits the given ftp-<a url> into its constituents.
00194 # @a url: The url to split, without! scheme specification.
00195 # @r List containing the constituents, suitable for 'array set'.
00196
00197 # general syntax:
00198 # //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode>
00199 #
00200 # additional rules:
00201 #
00202 # <user>:<password> are optional, detectable by presence of @.
00203 # <password> is optional too.
00204 #
00205 # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/"
00206 # <cwd1> "/" ..."/" <cwdN> "/" <name> [";type=" <typecode>]
00207
00208 upvar \#0 [namespace current]::ftp::typepart ftptype
00209
00210 array set parts {user {} pwd {} host {} port {} path {} type {}}
00211
00212 # slash off possible type specification
00213
00214 if {[regexp -indices -- "${ftptype}$" $url dummy ftype]} {
00215
00216 set from [lindex $ftype 0]
00217 set to [lindex $ftype 1]
00218
00219 set parts(type) [string range $url $from $to]
00220
00221 set from [lindex $dummy 0]
00222 set url [string replace $url $from end]
00223 }
00224
00225 # Handle user, password, host and port
00226
00227 if {[string match "//*" $url]} {
00228 set url [string range $url 2 end]
00229
00230 array set parts [GetUPHP url]
00231 }
00232
00233 set parts(path) [string trimleft $url /]
00234
00235 return [array get parts]
00236 }
00237
00238 ret ::uri::JoinFtp args (
00239 type array , type set , type components , optional
00240 user ={ , type pwd , optional , type host , optional , type port , optional
00241 , type path , optional , type type , optional
00242 )
00243 array set components $args
00244
00245 set userPwd {}
00246 if {[string length $components(user)] || [string length $components(pwd)]} {
00247 userPwd = $components(user)[expr {[string length $components(pwd)] ? ":$components(pwd)" : {}}]@
00248 }
00249
00250 port = {}
00251 if {[string length $components(port)]} {
00252 port = :$components(port)
00253 }
00254
00255 type = {}
00256 if {[string length $components(type)]} {
00257 type = \;type=$components(type)
00258 }
00259
00260 return ftp://${userPwd}$components(host)${port}/[string trimleft $components(path) /]$type
00261 }
00262
00263 ret ::uri::SplitHttps (type url) {
00264 return [SplitHttp $url]
00265 }
00266
00267 ret ::uri::SplitHttp (type url) {
00268 # @c Splits the given http-<a url> into its constituents.
00269 # @a url: The url to split, without! scheme specification.
00270 # @r List containing the constituents, suitable for 'array set'.
00271
00272 # general syntax:
00273 # //<host>:<port>/<path>?<searchpart>
00274 #
00275 # where <host> and <port> are as described in Section 3.1. If :<port>
00276 # is omitted, the port defaults to 80. No user name or password is
00277 # allowed. <path> is an HTTP selector, and <searchpart> is a query
00278 # string. The <path> is optional, as is the <searchpart> and its
00279 # preceding "?". If neither <path> nor <searchpart> is present, the "/"
00280 # may also be omitted.
00281 #
00282 # Within the <path> and <searchpart> components, "/", ";", "?" are
00283 # reserved. The "/" character may be used within HTTP to designate a
00284 # hierarchical structure.
00285 #
00286 # path == <cwd1> "/" ..."/" <cwdN> "/" <name> ["#" <fragment>]
00287
00288 upvar #0 [namespace current]::http::search search
00289 upvar #0 [namespace current]::http::segment segment
00290
00291 array set parts {host {} port {} path {} query {}}
00292
00293 set searchPattern "\\?(${search})\$"
00294 set fragmentPattern "#(${segment})\$"
00295
00296 # slash off possible query. the 'search' regexp, while official,
00297 # is not good enough. We have apparently lots of urls in the wild
00298 # which contain unquoted urls with queries in a query. The RE
00299 # finds the embedded query, not the actual one. Using string first
00300 # now instead of a RE
00301
00302 if {[set pos [string first ? $url]] >= 0} {
00303 incr pos
00304 set parts(query) [string range $url $pos end]
00305 incr pos -1
00306 set url [string replace $url $pos end]
00307 }
00308
00309 # slash off possible fragment
00310
00311 if {[regexp -indices -- $fragmentPattern $url match fragment]} {
00312 set from [lindex $fragment 0]
00313 set to [lindex $fragment 1]
00314
00315 set parts(fragment) [string range $url $from $to]
00316
00317 set url [string replace $url [lindex $match 0] end]
00318 }
00319
00320 if {[string match "//*" $url]} {
00321 set url [string range $url 2 end]
00322
00323 array set parts [GetUPHP url]
00324 }
00325
00326 set parts(path) [string trimleft $url /]
00327
00328 return [array get parts]
00329 }
00330
00331 ret ::uri::JoinHttp (type args) {
00332 return [eval [linsert $args 0 ::uri::JoinHttpInner http 80]]
00333 }
00334
00335 ret ::uri::JoinHttps (type args) {
00336 return [eval [linsert $args 0 ::uri::JoinHttpInner https 443]]
00337 }
00338
00339 ret ::uri::JoinHttpInner (type scheme , type defport , type args) {
00340 array set components {host {} path {} query {}}
00341 set components(port) $defport
00342 array set components $args
00343
00344 set port {}
00345 if {[string length $components(port)] && $components(port) != $defport} {
00346 set port :$components(port)
00347 }
00348
00349 set query {}
00350 if {[string length $components(query)]} {
00351 set query ?$components(query)
00352 }
00353
00354 regsub -- {^/} $components(path) {} components(path)
00355
00356 if { [info exists components(fragment)] && $components(fragment) != "" } {
00357 set components(fragment) "#$components(fragment)"
00358 } else {
00359 set components(fragment) ""
00360 }
00361
00362 return $scheme://$components(host)$port/$components(path)$components(fragment)$query
00363 }
00364
00365 ret ::uri::SplitFile (type url) {
00366 # @c Splits the given file-<a url> into its constituents.
00367 # @a url: The url to split, without! scheme specification.
00368 # @r List containing the constituents, suitable for 'array set'.
00369
00370 upvar #0 [namespace current]::basic::hostname hostname
00371 upvar #0 [namespace current]::basic::hostnumber hostnumber
00372
00373 if {[string match "//*" $url]} {
00374 set url [string range $url 2 end]
00375
00376 set hostPattern "^($hostname|$hostnumber)"
00377 switch -exact -- $::tcl_platform(platform) {
00378 windows {
00379 # Catch drive letter
00380 append hostPattern :?
00381 }
00382 default {
00383 # Proceed as usual
00384 }
00385 }
00386
00387 if {[regexp -indices -- $hostPattern $url match host]} {
00388 set fh [lindex $host 0]
00389 set th [lindex $host 1]
00390
00391 set parts(host) [string range $url $fh $th]
00392
00393 set matchEnd [lindex $match 1]
00394 incr matchEnd
00395
00396 set url [string range $url $matchEnd end]
00397 }
00398 }
00399
00400 set parts(path) $url
00401
00402 return [array get parts]
00403 }
00404
00405 ret ::uri::JoinFile args (
00406 type array , type set , type components , optional
00407 host ={ , type port , optional , type path , optional
00408 )
00409 array set components $args
00410
00411 switch -exact -- $::tcl_platform(platform) {
00412 windows {
00413 if {[string length $components(host)]} {
00414 return file://$components(host):$components(path)
00415 } else {
00416 return file://$components(path)
00417 }
00418 }
00419 default {
00420 return file://$components(host)$components(path)
00421 }
00422 }
00423 }
00424
00425 ret ::uri::SplitMailto (type url) {
00426 # @c Splits the given mailto-<a url> into its constituents.
00427 # @a url: The url to split, without! scheme specification.
00428 # @r List containing the constituents, suitable for 'array set'.
00429
00430 if {[string match "*@*" $url]} {
00431 set url [::split $url @]
00432 return [list user [lindex $url 0] host [lindex $url 1]]
00433 } else {
00434 return [list user $url]
00435 }
00436 }
00437
00438 ret ::uri::JoinMailto args (
00439 type array , type set , type components , optional
00440 user ={ , type host , optional
00441 )
00442 array set components $args
00443
00444 return mailto:$components(user)@$components(host)
00445 }
00446
00447 proc ::uri::SplitNews {url} {
00448 if { [string first @ $url] >= 0 } {
00449 return [list message-id $url]
00450 } else {
00451 return [list newsgroup-name $url]
00452 }
00453 }
00454
00455 ret ::uri::JoinNews args (
00456 type array , type set , type components , optional
00457 message-id ={ , type newsgroup-, type name , optional
00458 )
00459 array set components $args
00460 return news:$components(message-id)$components(newsgroup-name)
00461 }
00462
00463 proc ::uri::SplitLdaps {url} {
00464 ::uri::SplitLdap $url
00465 }
00466
00467 proc ::uri::SplitLdap {url} {
00468 # @c Splits the given Ldap-<a url> into its constituents.
00469 # @a url: The url to split, without! scheme specification.
00470 # @r List containing the constituents, suitable for 'array set'.
00471
00472 # general syntax:
00473 # //<host>:<port>/<dn>?<attrs>?<scope>?<filter>?<extensions>
00474 #
00475 # where <host> and <port> are as described in Section 5 of RFC 1738.
00476 # No user name or password is allowed.
00477 # If omitted, the port defaults to 389 for ldap, 636 for ldaps
00478 # <dn> is the base DN for the search
00479 # <attrs> is a comma separated list of attributes description
00480 # <scope> is either "base", "one" or "sub".
00481 # <filter> is a RFC 2254 filter specification
00482 # <extensions> are documented in RFC 2255
00483 #
00484
00485 array set parts {host {} port {} dn {} attrs {} scope {} filter {} extensions {}}
00486
00487 /* host port dn attrs scope filter extns*/
00488 re = {//([^:?/]+)(?::([0-9]+))?(?:/([^?]+)(?:\?([^?]*)(?:\?(base|one|sub)?(?:\?([^?]*)(?:\?(.*))?)?)?)?)?}
00489
00490 if {! [regexp $re $url match parts(host) parts(port) \
00491 parts(dn) parts(attrs) parts(scope) parts(filter) \
00492 parts(extensions)]} then {
00493 return -code error "unable to match URL \"$url\""
00494 }
00495
00496 parts = (attrs) [::split $parts(attrs) ","]
00497
00498 return [array get parts]
00499 }
00500
00501 ret ::uri::JoinLdap (type args) {
00502 return [eval [linsert $args 0 ::uri::JoinLdapInner ldap 389]]
00503 }
00504
00505 ret ::uri::JoinLdaps (type args) {
00506 return [eval [linsert $args 0 ::uri::JoinLdapInner ldaps 636]]
00507 }
00508
00509 ret ::uri::JoinLdapInner (type scheme , type defport , type args) {
00510 array set components {host {} port {} dn {} attrs {} scope {} filter {} extensions {}}
00511 set components(port) $defport
00512 array set components $args
00513
00514 set port {}
00515 if {[string length $components(port)] && $components(port) != $defport} {
00516 set port :$components(port)
00517 }
00518
00519 set url "$scheme://$components(host)$port"
00520
00521 set components(attrs) [::join $components(attrs) ","]
00522
00523 set s ""
00524 foreach c {dn attrs scope filter extensions} {
00525 if {[string equal $c "dn"]} then {
00526 append s "/"
00527 } else {
00528 append s "?"
00529 }
00530 if {! [string equal $components($c) ""]} then {
00531 append url "${s}$components($c)"
00532 set s ""
00533 }
00534 }
00535
00536 return $url
00537 }
00538
00539 ret ::uri::GetUPHP (type urlvar) {
00540 # @c Parse user, password host and port out of the url stored in
00541 # @c variable <a urlvar>.
00542 # @d Side effect: The extracted information is removed from the given url.
00543 # @r List containing the extracted information in a format suitable for
00544 # @r 'array set'.
00545 # @a urlvar: Name of the variable containing the url to parse.
00546
00547 upvar \#0 [namespace current]::basic::user user
00548 upvar \#0 [namespace current]::basic::password password
00549 upvar \#0 [namespace current]::basic::hostname hostname
00550 upvar \#0 [namespace current]::basic::hostnumber hostnumber
00551 upvar \#0 [namespace current]::basic::port port
00552
00553 upvar $urlvar url
00554
00555 array set parts {user {} pwd {} host {} port {}}
00556
00557 # syntax
00558 # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/"
00559 # "//" already cut off by caller
00560
00561 set upPattern "^(${user})(:(${password}))?@"
00562
00563 if {[regexp -indices -- $upPattern $url match theUser c d thePassword]} {
00564 set fu [lindex $theUser 0]
00565 set tu [lindex $theUser 1]
00566
00567 set fp [lindex $thePassword 0]
00568 set tp [lindex $thePassword 1]
00569
00570 set parts(user) [string range $url $fu $tu]
00571 set parts(pwd) [string range $url $fp $tp]
00572
00573 set matchEnd [lindex $match 1]
00574 incr matchEnd
00575
00576 set url [string range $url $matchEnd end]
00577 }
00578
00579 set hpPattern "^($hostname|$hostnumber)(:($port))?"
00580
00581 if {[regexp -indices -- $hpPattern $url match theHost c d e f g h thePort]} {
00582 set fh [lindex $theHost 0]
00583 set th [lindex $theHost 1]
00584
00585 set fp [lindex $thePort 0]
00586 set tp [lindex $thePort 1]
00587
00588 set parts(host) [string range $url $fh $th]
00589 set parts(port) [string range $url $fp $tp]
00590
00591 set matchEnd [lindex $match 1]
00592 incr matchEnd
00593
00594 set url [string range $url $matchEnd end]
00595 }
00596
00597 return [array get parts]
00598 }
00599
00600 ret ::uri::GetHostPort (type urlvar) {
00601 # @c Parse host and port out of the url stored in variable <a urlvar>.
00602 # @d Side effect: The extracted information is removed from the given url.
00603 # @r List containing the extracted information in a format suitable for
00604 # @r 'array set'.
00605 # @a urlvar: Name of the variable containing the url to parse.
00606
00607 upvar #0 [namespace current]::basic::hostname hostname
00608 upvar #0 [namespace current]::basic::hostnumber hostnumber
00609 upvar #0 [namespace current]::basic::port port
00610
00611 upvar $urlvar url
00612
00613 set pattern "^(${hostname}|${hostnumber})(:(${port}))?"
00614
00615 if {[regexp -indices -- $pattern $url match host c d e f g h thePort]} {
00616 set fromHost [lindex $host 0]
00617 set toHost [lindex $host 1]
00618
00619 set fromPort [lindex $thePort 0]
00620 set toPort [lindex $thePort 1]
00621
00622 set parts(host) [string range $url $fromHost $toHost]
00623 set parts(port) [string range $url $fromPort $toPort]
00624
00625 set matchEnd [lindex $match 1]
00626 incr matchEnd
00627
00628 set url [string range $url $matchEnd end]
00629 }
00630
00631 return [array get parts]
00632 }
00633
00634 /* ::uri::resolve --*/
00635 /* */
00636 /* Resolve an arbitrary URL, given a base URL*/
00637 /* */
00638 /* Arguments:*/
00639 /* base base URL (absolute)*/
00640 /* url arbitrary URL*/
00641 /* */
00642 /* Results:*/
00643 /* Returns a URL*/
00644
00645 ret ::uri::resolve (type base , type url) {
00646 if {[string length $url]} {
00647 if {[isrelative $url]} {
00648
00649 array set baseparts [split $base]
00650
00651 switch -- $baseparts(scheme) {
00652 http -
00653 https -
00654 ftp -
00655 file {
00656 array set relparts [split $url]
00657 if { [string match /* $url] } {
00658 catch { set baseparts(path) $relparts(path) }
00659 } elseif { [string match */ $baseparts(path)] } {
00660 set baseparts(path) "$baseparts(path)$relparts(path)"
00661 } else {
00662 if { [string length $relparts(path)] > 0 } {
00663 set path [lreplace [::split $baseparts(path) /] end end]
00664 set baseparts(path) "[::join $path /]/$relparts(path)"
00665 }
00666 }
00667 catch { set baseparts(query) $relparts(query) }
00668 catch { set baseparts(fragment) $relparts(fragment) }
00669 return [eval [linsert [array get baseparts] 0 join]]
00670 }
00671 default {
00672 return -code error "unable to resolve relative URL \"$url\""
00673 }
00674 }
00675
00676 } else {
00677 return $url
00678 }
00679 } else {
00680 return $base
00681 }
00682 }
00683
00684 /* ::uri::isrelative --*/
00685 /* */
00686 /* Determines whether a URL is absolute or relative*/
00687 /* */
00688 /* Arguments:*/
00689 /* url URL to check*/
00690 /* */
00691 /* Results:*/
00692 /* Returns 1 if the URL is relative, 0 otherwise*/
00693
00694 ret ::uri::isrelative url (
00695 type return [, type expr , optional ![regexp =-- {^[a-z0-9+-.][a-z0-9+-.]*: $, type url])]
00696 }
00697
00698 # ::uri::geturl --
00699 #
00700 # Fetch the data from an arbitrary URL.
00701 #
00702 # This package provides a handler for the file:
00703 # scheme, since this conflicts with the file command.
00704 #
00705 # Arguments:
00706 # url address of data resource
00707 # args configuration options
00708 #
00709 # Results:
00710 # Depends on scheme
00711
00712 proc ::uri::geturl {url args} {
00713 array set urlparts [split $url]
00714
00715 switch -- $urlparts(scheme) {
00716 file {
00717 return [eval [linsert $args 0 file_geturl $url]]
00718 }
00719 default {
00720 # Load a geturl package for the scheme first and only if
00721 # that fails the scheme package itself. This prevents
00722 # cyclic dependencies between packages.
00723 if {[catch {package require $urlparts(scheme)::geturl}]} {
00724 package require $urlparts(scheme)
00725 }
00726 return [eval [linsert $args 0 $urlparts(scheme)::geturl $url]]
00727 }
00728 }
00729 }
00730
00731 /* ::uri::file_geturl --*/
00732 /* */
00733 /* geturl implementation for file: scheme*/
00734 /* */
00735 /* TODO:*/
00736 /* This is an initial, basic implementation.*/
00737 /* Eventually want to support all options for geturl.*/
00738 /* */
00739 /* Arguments:*/
00740 /* url URL to fetch*/
00741 /* args configuration options*/
00742 /* */
00743 /* Results:*/
00744 /* Returns data from file*/
00745
00746 ret ::uri::file_geturl (type url , type args) {
00747 variable file:counter
00748
00749 set var [namespace current]::file[incr file:counter]
00750 upvar #0 $var state
00751 array set state {data {}}
00752
00753 array set parts [split $url]
00754
00755 set ch [open $parts(path)]
00756 # Could determine text/binary from file extension,
00757 # except on Macintosh
00758 # fconfigure $ch -translation binary
00759 set state(data) [read $ch]
00760 close $ch
00761
00762 return $var
00763 }
00764
00765 /* ::uri::join --*/
00766 /* */
00767 /* Format a URL*/
00768 /* */
00769 /* Arguments:*/
00770 /* args components, key-value format*/
00771 /* */
00772 /* Results:*/
00773 /* A URL*/
00774
00775 ret ::uri::join args (
00776 type array , type set , type components $, type args
00777
00778 , type return [, type eval [, type linsert $, type args 0 , type Join[, type string , type totitle $, type components(, type scheme)]]]
00779 )
00780
00781 # ::uri::canonicalize --
00782 #
00783 # Canonicalize a URL
00784 #
00785 # Acknowledgements:
00786 # Andreas Kupries <andreas_kupries@users.sourceforge.net>
00787 #
00788 # Arguments:
00789 # uri URI (which contains a path component)
00790 #
00791 # Results:
00792 # The canonical form of the URI
00793
00794 proc ::uri::canonicalize uri {
00795
00796 # Make uri canonical with respect to dots (path changing commands)
00797 #
00798 # Remove single dots (.) => pwd not changing
00799 # Remove double dots (..) => gobble previous segment of path
00800 #
00801 # Fixes for this command:
00802 #
00803 # * Ignore any url which cannot be split into components by this
00804 # module. Just assume that such urls do not have a path to
00805 # canonicalize.
00806 #
00807 # * Ignore any url which could be split into components, but does
00808 # not have a path component.
00809 #
00810 # In the text above 'ignore' means
00811 # 'return the url unchanged to the caller'.
00812
00813 if {[catch {array set u [::uri::split $uri]}]} {
00814 return $uri
00815 }
00816 if {![info exists u(path)]} {
00817 return $uri
00818 }
00819
00820 set uri $u(path)
00821
00822 # Remove leading "./" "../" "/.." (and "/../")
00823 regsub -all -- {^(\./)+} $uri {} uri
00824 regsub -all -- {^/(\.\./)+} $uri {/} uri
00825 regsub -all -- {^(\.\./)+} $uri {} uri
00826
00827 # Remove inner /./ and /../
00828 while {[regsub -all -- {/\./} $uri {/} uri]} {}
00829 while {[regsub -all -- {/[^/]+/\.\./} $uri {/} uri]} {}
00830 while {[regsub -all -- {^[^/]+/\.\./} $uri {} uri]} {}
00831 # Munge trailing /..
00832 while {[regsub -all -- {/[^/]+/\.\.} $uri {/} uri]} {}
00833 if { $uri == ".." } { set uri "/" }
00834
00835 set u(path) $uri
00836 set uri [eval [linsert [array get u] 0 ::uri::join]]
00837
00838 return $uri
00839 }
00840
00841 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
00842 /* regular expressions covering various url schemes*/
00843
00844 /* Currently known URL schemes:*/
00845 /* */
00846 /* (RFC 1738)*/
00847 /* ------------------------------------------------*/
00848 /* scheme basic syntax of scheme specific part*/
00849 /* ------------------------------------------------*/
00850 /* ftp //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode>*/
00851 /* */
00852 /* http //<host>:<port>/<path>?<searchpart>*/
00853 /* */
00854 /* gopher //<host>:<port>/<gophertype><selector>*/
00855 /* <gophertype><selector>%09<search>*/
00856 /* <gophertype><selector>%09<search>%09<gopher+_string>*/
00857 /* */
00858 /* mailto <rfc822-addr-spec>*/
00859 /* news <newsgroup-name>*/
00860 /* <message-id>*/
00861 /* nntp //<host>:<port>/<newsgroup-name>/<article-number>*/
00862 /* telnet //<user>:<password>@<host>:<port>/*/
00863 /* wais //<host>:<port>/<database>*/
00864 /* //<host>:<port>/<database>?<search>*/
00865 /* //<host>:<port>/<database>/<wtype>/<wpath>*/
00866 /* file //<host>/<path>*/
00867 /* prospero //<host>:<port>/<hsoname>;<field>=<value>*/
00868 /* ------------------------------------------------*/
00869 /* */
00870 /* (RFC 2111)*/
00871 /* ------------------------------------------------*/
00872 /* scheme basic syntax of scheme specific part*/
00873 /* ------------------------------------------------*/
00874 /* mid message-id*/
00875 /* message-id/content-id*/
00876 /* cid content-id*/
00877 /* ------------------------------------------------*/
00878 /* */
00879 /* (RFC 2255)*/
00880 /* ------------------------------------------------*/
00881 /* scheme basic syntax of scheme specific part*/
00882 /* ------------------------------------------------*/
00883 /* ldap //<host>:<port>/<dn>?<attrs>?<scope>?<filter>?<extensions>*/
00884 /* ------------------------------------------------*/
00885
00886 /* FTP*/
00887 uri::register ftp {
00888 variable escape [ [namespace = parent [namespace current]]::basic::escape]
00889 variable login [ [namespace = parent [namespace current]]::basic::login]
00890
00891 variable charN {[a-zA-Z0-9$_.+!*'(,)?:@&=-]}
00892 variable char "(${charN}|${escape})"
00893 variable segment "${char}*"
00894 variable path "${segment}(/${segment})*"
00895
00896 variable type {[AaDdIi]}
00897 variable typepart ";type=(${type})"
00898 variable schemepart \
00899 "//${login}(/${path}(${typepart})?)?"
00900
00901 variable url "ftp:${schemepart}"
00902 }
00903
00904
00905 uri::register file {
00906 variable host [ [namespace = parent [namespace current]]::basic::host]
00907 variable path [ [namespace = parent [namespace current]]::ftp::path]
00908
00909 variable schemepart "
00910 variable url "file:${schemepart}"
00911 }
00912
00913 /* HTTP*/
00914 uri::register http {
00915 variable escape \
00916 [ [namespace = parent [namespace current]]::basic::escape]
00917 variable hostOrPort \
00918 [ [namespace = parent [namespace current]]::basic::hostOrPort]
00919
00920 variable charN {[a-zA-Z0-9$_.+!*'(,);:@&=-]}
00921 variable char "($charN|${escape})"
00922 variable segment "${char}*"
00923
00924 variable path "${segment}(/${segment})*"
00925 variable search $segment
00926 variable schemepart \
00927 "
00928
00929 variable url "http:${schemepart}"
00930 }
00931
00932
00933 uri::register gopher {
00934 variable xChar \
00935 [ [namespace = parent [namespace current]]::basic::xChar]
00936 variable hostOrPort \
00937 [ [namespace = parent [namespace current]]::basic::hostOrPort]
00938 variable search \
00939 [ [namespace = parent [namespace current]]::http::search]
00940
00941 variable type $xChar
00942 variable selector "$xChar*"
00943 variable string $selector
00944 variable schemepart \
00945 "
00946 variable url "gopher:${schemepart}"
00947 }
00948
00949 /* MAILTO*/
00950 uri::register mailto {
00951 variable xChar [ [namespace = parent [namespace current]]::basic::xChar]
00952 variable host [ [namespace = parent [namespace current]]::basic::host]
00953
00954 variable schemepart "$xChar+(@${host})?"
00955 variable url "mailto:${schemepart}"
00956 }
00957
00958 /* NEWS*/
00959 uri::register news {
00960 variable escape [ [namespace = parent [namespace current]]::basic::escape]
00961 variable alpha [ [namespace = parent [namespace current]]::basic::alpha]
00962 variable host [ [namespace = parent [namespace current]]::basic::host]
00963
00964 variable aCharN {[a-zA-Z0-9$_.+!*'(,);/?:&=-]}
00965 variable aChar "($aCharN|${escape})"
00966 variable gChar {[a-zA-Z0-9$_.+-]}
00967 variable newsgroup-name "${alpha}${gChar}*"
00968 variable message-id "${aChar}+@${host}"
00969 variable schemepart "\\*|${newsgroup-name}|${message-id}"
00970 variable url "news:${schemepart}"
00971 }
00972
00973 /* WAIS*/
00974 uri::register wais {
00975 variable uChar \
00976 [ [namespace = parent [namespace current]]::basic::xChar]
00977 variable hostOrPort \
00978 [ [namespace = parent [namespace current]]::basic::hostOrPort]
00979 variable search \
00980 [ [namespace = parent [namespace current]]::http::search]
00981
00982 variable db "${uChar}*"
00983 variable type "${uChar}*"
00984 variable path "${uChar}*"
00985
00986 variable database "
00987 variable index "//${hostOrPort}/${db}\\?${search}"
00988 variable doc "//${hostOrPort}/${db}/${type}/${path}"
00989
00990
00991
00992 variable schemepart \
00993 "//${hostOrPort}/${db}((\\?${search})|(/${type}/${path}))?"
00994
00995 variable url "wais:${schemepart}"
00996 }
00997
00998
00999 uri::register prospero {
01000 variable escape \
01001 [ [namespace = parent [namespace current]]::basic::escape]
01002 variable hostOrPort \
01003 [ [namespace = parent [namespace current]]::basic::hostOrPort]
01004 variable path \
01005 [ [namespace = parent [namespace current]]::ftp::path]
01006
01007 variable charN {[a-zA-Z0-9$_.+!*'(,)?:@&-]}
01008 variable char "(${charN}|$escape)"
01009
01010 variable fieldname "${char}*"
01011 variable fieldvalue "${char}*"
01012 variable fieldspec ";${fieldname}=${fieldvalue}"
01013
01014 variable schemepart "//${hostOrPort}/${path}(${fieldspec})*"
01015 variable url "prospero:$schemepart"
01016 }
01017
01018 /* LDAP*/
01019 uri::register ldap {
01020 variable hostOrPort \
01021 [ [namespace = parent [namespace current]]::basic::hostOrPort]
01022
01023 /* very crude parsing*/
01024 variable dn {[^?]*}
01025 variable attrs {[^?]*}
01026 variable scope "base|one|sub"
01027 variable filter {[^?]*}
01028 /* extensions are not handled yet*/
01029
01030 variable schemepart "//${hostOrPort}(/${dn}(\?${attrs}(\?(${scope})(\?${filter})?)?)?)?"
01031 variable url "ldap:$schemepart"
01032 }
01033
01034 package provide uri 1.2.1
01035