00001 /* */ 00002 /* As the author of the procs 'tabify2' and 'untabify2' I suggest that the*/ 00003 /* comments explaining their behaviour be kept in this file.*/ 00004 /* 1) Beginners in any programming language (I am new to Tcl so I know what I*/ 00005 /* am talking about) can profit enormously from studying 'correct' code.*/ 00006 /* Of course comments will help a lot in this regard.*/ 00007 /* 2) Many problems newbies face can be solved by directing them towards*/ 00008 /* available libraries - after all, libraries have been written to solve*/ 00009 /* recurring problems. Then they can just use them, or have a closer look*/ 00010 /* to see and to discover how things are done the 'Tcl way'.*/ 00011 /* 3) And if ever a proc from a library should be less than perfect, having*/ 00012 /* comments explaining the behaviour of the code will surely help.*/ 00013 /* */ 00014 /* This said, I will welcome any error reports or suggestions for improvements*/ 00015 /* (especially on the 'doing things the Tcl way' aspect).*/ 00016 /* */ 00017 /* Use of these sources is licensed under the same conditions as is Tcl.*/ 00018 /* */ 00019 /* June 2001, Helmut Giese (hgiese@ratiosoft.com)*/ 00020 /* */ 00021 /* ----------------------------------------------------------------------------*/ 00022 /* */ 00023 /* The original procs 'tabify' and 'untabify' each work with complete blocks*/ 00024 /* of $num spaces ('num' holding the tab size). While this is certainly useful*/ 00025 /* in some circumstances, it does not reflect the way an editor works:*/ 00026 /* Counting columns from 1, assuming a tab size of 8 and entering '12345'*/ 00027 /* followed by a tab, you expect to advance to column 9. Your editor might*/ 00028 /* put a tab into the file or 3 spaces, depending on its configuration.*/ 00029 /* Now, on 'tabifying' you will expect to see those 3 spaces converted to a*/ 00030 /* tab (and on the other hand expect the tab *at this position* to be*/ 00031 /* converted to 3 spaces).*/ 00032 /* */ 00033 /* This behaviour is mimicked by the new procs 'tabify2' and 'untabify2'.*/ 00034 /* Both have one feature in common: They accept multi-line strings (a whole*/ 00035 /* file if you want to) but in order to make life simpler for the programmer,*/ 00036 /* they split the incoming string into individual lines and hand each line to*/ 00037 /* a proc that does the real work.*/ 00038 /* */ 00039 /* One design decision worth mentioning here:*/ 00040 /* A single space is never converted to a tab even if its position would*/ 00041 /* allow to do so.*/ 00042 /* Single spaces occur very often, say in arithmetic expressions like*/ 00043 /* [expr (($a + $b) * $c) < $d]. If we didn't follow the above rule we might*/ 00044 /* need to replace one or more of them to tabs. However if the tab size gets*/ 00045 /* changed, this expression would be formatted quite differently - which is*/ 00046 /* probably not a good idea.*/ 00047 /* */ 00048 /* 'untabifying' on the other hand might need to replace a tab with a single*/ 00049 /* space: If the current position requires it, what else to do?*/ 00050 /* As a consequence those two procs are unsymmetric in this aspect, but I*/ 00051 /* couldn't think of a better solution. Could you?*/ 00052 /* */ 00053 /* ----------------------------------------------------------------------------*/ 00054 /* */ 00055 00056 /* ### ### ### ######### ######### #########*/ 00057 /* Requirements*/ 00058 00059 package require Tcl 8.2 00060 package require textutil::repeat 00061 00062 namespace ::textutil::tabify {} 00063 00064 /* ### ### ### ######### ######### #########*/ 00065 /* API implementation*/ 00066 00067 namespace ::textutil::tabify { 00068 namespace import -force ::textutil::repeat::strRepeat 00069 } 00070 00071 ret ::textutil::tabify::tabify ( type string , optional num =8 ) { 00072 return [string map [list [MakeTabStr $num] \t] $string] 00073 } 00074 00075 ret ::textutil::tabify::untabify ( type string , optional num =8 ) { 00076 return [string map [list \t [MakeTabStr $num]] $string] 00077 } 00078 00079 ret ::textutil::tabify::MakeTabStr ( type num ) { 00080 variable TabStr 00081 variable TabLen 00082 00083 if { $TabLen != $num } then { 00084 set TabLen $num 00085 set TabStr [strRepeat " " $num] 00086 } 00087 00088 return $TabStr 00089 } 00090 00091 /* ----------------------------------------------------------------------------*/ 00092 /* */ 00093 /* tabifyLine: Works on a single line of text, replacing 'spaces at correct*/ 00094 /* positions' with tabs. $num is the requested tab size.*/ 00095 /* Returns the (possibly modified) line.*/ 00096 /* */ 00097 /* 'spaces at correct positions': Only spaces which 'fill the space' between*/ 00098 /* an arbitrary position and the next tab stop can be replaced. */ 00099 /* Example: With tab size 8, spaces at positions 11 - 13 will *not* be replaced,*/ 00100 /* because an expansion of a tab at position 11 will jump up to 16.*/ 00101 /* See also the comment at the beginning of this file why single spaces are*/ 00102 /* *never* replaced by a tab.*/ 00103 /* */ 00104 /* The proc works backwards, from the end of the string up to the beginning:*/ 00105 /* - Set the position to start the search from ('lastPos') to 'end'.*/ 00106 /* - Find the last occurrence of ' ' in 'line' with respect to 'lastPos'*/ 00107 /* ('currPos' below). This is a candidate for replacement.*/ 00108 /* - Find to 'currPos' the following tab stop using the expression*/ 00109 /* set nextTab [expr ($currPos + $num) - ($currPos % $num)]*/ 00110 /* and get the previous tab stop as well (this will be the starting */ 00111 /* point for the next iteration).*/ 00112 /* - The ' ' at 'currPos' is only a candidate for replacement if*/ 00113 /* 1) it is just one position before a tab stop *and**/ 00114 /* 2) there is at least one space at its left (see comment above on not*/ 00115 /* touching an isolated space).*/ 00116 /* Continue, if any of these conditions is not met.*/ 00117 /* - Determine where to put the tab (that is: how many spaces to replace?)*/ 00118 /* by stepping up to the beginning until*/ 00119 /* -- you hit a non-space or*/ 00120 /* -- you are at the previous tab position*/ 00121 /* - Do the replacement and continue.*/ 00122 /* */ 00123 /* This algorithm only works, if $line does not contain tabs. Otherwise our */ 00124 /* interpretation of any position beyond the tab will be wrong. (Imagine you */ 00125 /* find a ' ' at position 4 in $line. If you got 3 leading tabs, your *real**/ 00126 /* position might be 25 (tab size of 8). Since in real life some strings might */ 00127 /* already contain tabs, we test for it (and eventually call untabifyLine).*/ 00128 /* */ 00129 00130 ret ::textutil::tabify::tabifyLine ( type line , type num ) { 00131 if { [string first \t $line] != -1 } { 00132 # assure array 'Spaces' is set up 'comme il faut' 00133 checkArr $num 00134 # remove existing tabs 00135 set line [untabifyLine $line $num] 00136 } 00137 00138 set lastPos end 00139 00140 while { $lastPos > 0 } { 00141 set currPos [string last " " $line $lastPos] 00142 if { $currPos == -1 } { 00143 # no more spaces 00144 break; 00145 } 00146 00147 set nextTab [expr {($currPos + $num) - ($currPos % $num)}] 00148 set prevTab [expr {$nextTab - $num}] 00149 00150 # prepare for next round: continue at 'previous tab stop - 1' 00151 set lastPos [expr {$prevTab - 1}] 00152 00153 if { ($currPos + 1) != $nextTab } { 00154 continue ;# crit. (1) 00155 } 00156 00157 if { [string index $line [expr {$currPos - 1}]] != " " } { 00158 continue ;# crit. (2) 00159 } 00160 00161 # now step backwards while there are spaces 00162 for {set pos [expr {$currPos - 2}]} {$pos >= $prevTab} {incr pos -1} { 00163 if { [string index $line $pos] != " " } { 00164 break; 00165 } 00166 } 00167 00168 # ... and replace them 00169 set line [string replace $line [expr {$pos + 1}] $currPos \t] 00170 } 00171 return $line 00172 } 00173 00174 /* */ 00175 /* Helper proc for 'untabifyLine': Checks if all needed elements of array*/ 00176 /* 'Spaces' exist and creates the missing ones if needed.*/ 00177 /* */ 00178 00179 ret ::textutil::tabify::checkArr ( type num ) { 00180 variable TabLen2 00181 variable Spaces 00182 00183 if { $num > $TabLen2 } { 00184 for { set i [expr {$TabLen2 + 1}] } { $i <= $num } { incr i } { 00185 set Spaces($i) [strRepeat " " $i] 00186 } 00187 set TabLen2 $num 00188 } 00189 } 00190 00191 00192 /* untabifyLine: Works on a single line of text, replacing tabs with enough*/ 00193 /* spaces to get to the next tab position.*/ 00194 /* Returns the (possibly modified) line.*/ 00195 /* */ 00196 /* The procedure is straight forward:*/ 00197 /* - Find the next tab.*/ 00198 /* - Calculate the next tab position following it.*/ 00199 /* - Delete the tab and insert as many spaces as needed to get there.*/ 00200 /* */ 00201 00202 ret ::textutil::tabify::untabifyLine ( type line , type num ) { 00203 variable Spaces 00204 00205 set currPos 0 00206 while { 1 } { 00207 set currPos [string first \t $line $currPos] 00208 if { $currPos == -1 } { 00209 # no more tabs 00210 break 00211 } 00212 00213 # how far is the next tab position ? 00214 set dist [expr {$num - ($currPos % $num)}] 00215 # replace '\t' at $currPos with $dist spaces 00216 set line [string replace $line $currPos $currPos $Spaces($dist)] 00217 00218 # set up for next round (not absolutely necessary but maybe a trifle 00219 # more efficient) 00220 incr currPos $dist 00221 } 00222 return $line 00223 } 00224 00225 /* tabify2: Replace all 'appropriate' spaces as discussed above with tabs.*/ 00226 /* 'string' might hold any number of lines, 'num' is the requested tab size.*/ 00227 /* Returns (possibly modified) 'string'.*/ 00228 /* */ 00229 ret ::textutil::tabify::tabify2 ( type string , optional num =8 ) { 00230 00231 # split string into individual lines 00232 set inLst [split $string \n] 00233 00234 # now work on each line 00235 set outLst [list] 00236 foreach line $inLst { 00237 lappend outLst [tabifyLine $line $num] 00238 } 00239 00240 # return all as one string 00241 return [join $outLst \n] 00242 } 00243 00244 00245 /* untabify2: Replace all tabs with the appropriate number of spaces.*/ 00246 /* 'string' might hold any number of lines, 'num' is the requested tab size.*/ 00247 /* Returns (possibly modified) 'string'.*/ 00248 /* */ 00249 ret ::textutil::tabify::untabify2 ( type string , optional num =8 ) { 00250 00251 # assure array 'Spaces' is set up 'comme il faut' 00252 checkArr $num 00253 00254 set inLst [split $string \n] 00255 00256 set outLst [list] 00257 foreach line $inLst { 00258 lappend outLst [untabifyLine $line $num] 00259 } 00260 00261 return [join $outLst \n] 00262 } 00263 00264 00265 00266 /* ### ### ### ######### ######### #########*/ 00267 /* Data structures*/ 00268 00269 namespace ::textutil::tabify { 00270 variable TabLen 8 00271 variable TabStr [strRepeat " " $TabLen] 00272 00273 namespace export tabify untabify tabify2 untabify2 00274 00275 /* The proc 'untabify2' uses the following variables for efficiency.*/ 00276 /* Since a tab can be replaced by one up to 'tab size' spaces, it is handy*/ 00277 /* to have the appropriate 'space strings' available. This is the use of*/ 00278 /* the array 'Spaces', where 'Spaces(n)' contains just 'n' spaces.*/ 00279 /* The variable 'TabLen2' remembers the biggest tab size used.*/ 00280 00281 variable TabLen2 0 00282 variable Spaces 00283 array Spaces = {0 ""} 00284 } 00285 00286 /* ### ### ### ######### ######### #########*/ 00287 /* Ready*/ 00288 00289 package provide textutil::tabify 0.7 00290