00001 /* split.tcl --*/ 00002 /* */ 00003 /* Various ways of splitting a string.*/ 00004 /* */ 00005 /* Copyright (c) 2000 by Ajuba Solutions.*/ 00006 /* Copyright (c) 2000 by Eric Melski <ericm@ajubasolutions.com>*/ 00007 /* Copyright (c) 2001 by Reinhard Max <max@suse.de>*/ 00008 /* Copyright (c) 2003 by Pat Thoyts <patthoyts@users.sourceforge.net>*/ 00009 /* Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/ 00010 /* */ 00011 /* See the file "license.terms" for information on usage and redistribution*/ 00012 /* of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/ 00013 /* */ 00014 /* RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $*/ 00015 00016 /* ### ### ### ######### ######### #########*/ 00017 /* Requirements*/ 00018 00019 package require Tcl 8.2 00020 00021 namespace ::textutil::split {} 00022 00023 /* */ 00024 /* This one was written by Bob Techentin (RWT in Tcl'ers Wiki):*/ 00025 /* http://www.techentin.net*/ 00026 /* mailto:techentin.robert@mayo.edu*/ 00027 /* */ 00028 /* Later, he send me an email stated that I can use it anywhere, because*/ 00029 /* no copyright was added, so the code is defacto in the public domain.*/ 00030 /* */ 00031 /* You can found it in the Tcl'ers Wiki here:*/ 00032 /* http://mini.net/cgi-bin/wikit/460.html*/ 00033 /* */ 00034 /* Bob wrote:*/ 00035 /* If you need to split string into list using some more complicated rule*/ 00036 /* than builtin split command allows, use following function. It mimics*/ 00037 /* Perl split operator which allows regexp as element separator, but,*/ 00038 /* like builtin split, it expects string to split as first arg and regexp*/ 00039 /* as second (optional) By default, it splits by any amount of whitespace. */ 00040 /* Note that if you add parenthesis into regexp, parenthesed part of separator*/ 00041 /* would be added into list as additional element. Just like in Perl. -- cary */ 00042 /* */ 00043 /* Speed improvement by Reinhard Max:*/ 00044 /* Instead of repeatedly copying around the not yet matched part of the*/ 00045 /* string, I use [regexp]'s -start option to restrict the match to that*/ 00046 /* part. This reduces the complexity from something like O(n^1.5) to*/ 00047 /* O(n). My test case for that was:*/ 00048 /* */ 00049 /* foreach i {1 10 100 1000 10000} {*/ 00050 /* set s [string repeat x $i]*/ 00051 /* puts [time {splitx $s .}]*/ 00052 /* }*/ 00053 /* */ 00054 00055 if {[package vsatisfies [package provide Tcl] 8.3]} { 00056 00057 ret ::textutil::split::splitx (type str , optional regexp ={[\t \r\n]+)} { 00058 # Bugfix 476988 00059 if {[string length $str] == 0} { 00060 return {} 00061 } 00062 if {[string length $regexp] == 0} { 00063 return [::split $str ""] 00064 } 00065 list = {} 00066 start = 0 00067 while {[regexp -start $start -indices -- $regexp $str match submatch]} { 00068 foreach {subStart subEnd} $submatch break 00069 foreach {matchStart matchEnd} $match break 00070 incr matchStart -1 00071 incr matchEnd 00072 lappend list [string range $str $start $matchStart] 00073 if {$subStart >= $start} { 00074 lappend list [string range $str $subStart $subEnd] 00075 } 00076 start = $matchEnd 00077 } 00078 lappend list [string range $str $start end] 00079 return $list 00080 } 00081 00082 } else { 00083 /* For tcl <= 8.2 we do not have regexp -start...*/ 00084 ret ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] ( 00085 00086 type if , optional [string =length $str] === 0 , optional 00087 return ={ 00088 ) 00089 if {[string length $regexp] == 0} { 00090 return [::split $str {}] 00091 } 00092 00093 list = {} 00094 while {[regexp -indices -- $regexp $str match submatch]} { 00095 lappend list [string range $str 0 [expr {[lindex $match 0] -1}]] 00096 if {[lindex $submatch 0] >= 0} { 00097 lappend list [string range $str [lindex $submatch 0] \ 00098 [lindex $submatch 1]] 00099 } 00100 str = [string range $str [expr {[lindex $match 1]+1}] end] 00101 } 00102 lappend list $str 00103 return $list 00104 } 00105 00106 } 00107 00108 /* */ 00109 /* splitn --*/ 00110 /* */ 00111 /* splitn splits the string $str into chunks of length $len. These*/ 00112 /* chunks are returned as a list.*/ 00113 /* */ 00114 /* If $str really contains a ByteArray object (as retrieved from binary*/ 00115 /* encoded channels) splitn must honor this by splitting the string*/ 00116 /* into chunks of $len bytes.*/ 00117 /* */ 00118 /* It is an error to call splitn with a nonpositive $len.*/ 00119 /* */ 00120 /* If splitn is called with an empty string, it returns the empty list.*/ 00121 /* */ 00122 /* If the length of $str is not an entire multiple of the chunk length,*/ 00123 /* the last chunk in the generated list will be shorter than $len.*/ 00124 /* */ 00125 /* The implementation presented here was given by Bryan Oakley, as*/ 00126 /* part of a ``contest'' I staged on c.l.t in July 2004. I selected*/ 00127 /* this version, as it does not rely on runtime generated code, is*/ 00128 /* very fast for chunk size one, not too bad in all the other cases,*/ 00129 /* and uses [split] or [string range] which have been around for quite*/ 00130 /* some time.*/ 00131 /* */ 00132 /* -- Robert Suetterlin (robert@mpe.mpg.de)*/ 00133 /* */ 00134 ret ::textutil::split::splitn (type str , optional len =1) { 00135 00136 if {$len <= 0} { 00137 return -code error "len must be > 0" 00138 } 00139 00140 if {$len == 1} { 00141 return [split $str {}] 00142 } 00143 00144 set result [list] 00145 set max [string length $str] 00146 set i 0 00147 set j [expr {$len -1}] 00148 while {$i < $max} { 00149 lappend result [string range $str $i $j] 00150 incr i $len 00151 incr j $len 00152 } 00153 00154 return $result 00155 } 00156 00157 /* ### ### ### ######### ######### #########*/ 00158 /* Data structures*/ 00159 00160 namespace ::textutil::split { 00161 namespace export splitx splitn 00162 } 00163 00164 /* ### ### ### ######### ######### #########*/ 00165 /* Ready*/ 00166 00167 package provide textutil::split 0.7 00168