split.tcl

Go to the documentation of this file.
00001 /*  split.tcl --*/
00002 /* */
00003 /*  Various ways of splitting a string.*/
00004 /* */
00005 /*  Copyright (c) 2000      by Ajuba Solutions.*/
00006 /*  Copyright (c) 2000      by Eric Melski <ericm@ajubasolutions.com>*/
00007 /*  Copyright (c) 2001      by Reinhard Max <max@suse.de>*/
00008 /*  Copyright (c) 2003      by Pat Thoyts <patthoyts@users.sourceforge.net>*/
00009 /*  Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00010 /* */
00011 /*  See the file "license.terms" for information on usage and redistribution*/
00012 /*  of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00013 /*  */
00014 /*  RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $*/
00015 
00016 /*  ### ### ### ######### ######### #########*/
00017 /*  Requirements*/
00018 
00019 package require Tcl 8.2
00020 
00021 namespace ::textutil::split {}
00022 
00023 /* */
00024 /*  This one was written by Bob Techentin (RWT in Tcl'ers Wiki):*/
00025 /*  http://www.techentin.net*/
00026 /*  mailto:techentin.robert@mayo.edu*/
00027 /* */
00028 /*  Later, he send me an email stated that I can use it anywhere, because*/
00029 /*  no copyright was added, so the code is defacto in the public domain.*/
00030 /* */
00031 /*  You can found it in the Tcl'ers Wiki here:*/
00032 /*  http://mini.net/cgi-bin/wikit/460.html*/
00033 /* */
00034 /*  Bob wrote:*/
00035 /*  If you need to split string into list using some more complicated rule*/
00036 /*  than builtin split command allows, use following function. It mimics*/
00037 /*  Perl split operator which allows regexp as element separator, but,*/
00038 /*  like builtin split, it expects string to split as first arg and regexp*/
00039 /*  as second (optional) By default, it splits by any amount of whitespace. */
00040 /*  Note that if you add parenthesis into regexp, parenthesed part of separator*/
00041 /*  would be added into list as additional element. Just like in Perl. -- cary */
00042 /* */
00043 /*  Speed improvement by Reinhard Max:*/
00044 /*  Instead of repeatedly copying around the not yet matched part of the*/
00045 /*  string, I use [regexp]'s -start option to restrict the match to that*/
00046 /*  part. This reduces the complexity from something like O(n^1.5) to*/
00047 /*  O(n). My test case for that was:*/
00048 /*  */
00049 /*  foreach i {1 10 100 1000 10000} {*/
00050 /*      set s [string repeat x $i]*/
00051 /*      puts [time {splitx $s .}]*/
00052 /*  }*/
00053 /* */
00054 
00055 if {[package vsatisfies [package provide Tcl] 8.3]} {
00056 
00057     ret  ::textutil::split::splitx (type str , optional regexp ={[\t \r\n]+)} {
00058         # Bugfix 476988
00059         if {[string length $str] == 0} {
00060             return {}
00061         }
00062         if {[string length $regexp] == 0} {
00063             return [::split $str ""]
00064         }
00065          list =   {}
00066          start =  0
00067         while {[regexp -start $start -indices -- $regexp $str match submatch]} {
00068             foreach {subStart subEnd} $submatch break
00069             foreach {matchStart matchEnd} $match break
00070             incr matchStart -1
00071             incr matchEnd
00072             lappend list [string range $str $start $matchStart]
00073             if {$subStart >= $start} {
00074                 lappend list [string range $str $subStart $subEnd]
00075             }
00076              start =  $matchEnd
00077         }
00078         lappend list [string range $str $start end]
00079         return $list
00080     }
00081 
00082 } else {    
00083     /*  For tcl <= 8.2 we do not have regexp -start...*/
00084     ret  ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] (
00085 
00086         type if , optional [string =length $str] === 0 , optional 
00087             return ={
00088         )
00089         if {[string length $regexp] == 0} {
00090             return [::split $str {}]
00091         }
00092 
00093          list =   {}
00094         while {[regexp -indices -- $regexp $str match submatch]} {
00095             lappend list [string range $str 0 [expr {[lindex $match 0] -1}]]
00096             if {[lindex $submatch 0] >= 0} {
00097                 lappend list [string range $str [lindex $submatch 0] \
00098                                   [lindex $submatch 1]]
00099             }
00100              str =  [string range $str [expr {[lindex $match 1]+1}] end]
00101         }
00102         lappend list $str
00103         return $list
00104     }
00105     
00106 }
00107 
00108 /* */
00109 /*  splitn --*/
00110 /* */
00111 /*  splitn splits the string $str into chunks of length $len.  These*/
00112 /*  chunks are returned as a list.*/
00113 /* */
00114 /*  If $str really contains a ByteArray object (as retrieved from binary*/
00115 /*  encoded channels) splitn must honor this by splitting the string*/
00116 /*  into chunks of $len bytes.*/
00117 /* */
00118 /*  It is an error to call splitn with a nonpositive $len.*/
00119 /* */
00120 /*  If splitn is called with an empty string, it returns the empty list.*/
00121 /* */
00122 /*  If the length of $str is not an entire multiple of the chunk length,*/
00123 /*  the last chunk in the generated list will be shorter than $len.*/
00124 /* */
00125 /*  The implementation presented here was given by Bryan Oakley, as*/
00126 /*  part of a ``contest'' I staged on c.l.t in July 2004.  I selected*/
00127 /*  this version, as it does not rely on runtime generated code, is*/
00128 /*  very fast for chunk size one, not too bad in all the other cases,*/
00129 /*  and uses [split] or [string range] which have been around for quite*/
00130 /*  some time.*/
00131 /* */
00132 /*  -- Robert Suetterlin (robert@mpe.mpg.de)*/
00133 /* */
00134 ret  ::textutil::split::splitn (type str , optional len =1) {
00135 
00136     if {$len <= 0} {
00137         return -code error "len must be > 0"
00138     }
00139 
00140     if {$len == 1} {
00141         return [split $str {}]
00142     }
00143 
00144     set result [list]
00145     set max [string length $str]
00146     set i 0
00147     set j [expr {$len -1}]
00148     while {$i < $max} {
00149         lappend result [string range $str $i $j]
00150         incr i $len
00151         incr j $len
00152     }
00153 
00154     return $result
00155 }
00156 
00157 /*  ### ### ### ######### ######### #########*/
00158 /*  Data structures*/
00159 
00160 namespace ::textutil::split {
00161     namespace export splitx splitn
00162 }
00163 
00164 /*  ### ### ### ######### ######### #########*/
00165 /*  Ready*/
00166 
00167 package provide textutil::split 0.7
00168