tabify.tcl

Go to the documentation of this file.
00001 /* */
00002 /*  As the author of the procs 'tabify2' and 'untabify2' I suggest that the*/
00003 /*  comments explaining their behaviour be kept in this file.*/
00004 /*  1) Beginners in any programming language (I am new to Tcl so I know what I*/
00005 /*     am talking about) can profit enormously from studying 'correct' code.*/
00006 /*     Of course comments will help a lot in this regard.*/
00007 /*  2) Many problems newbies face can be solved by directing them towards*/
00008 /*     available libraries - after all, libraries have been written to solve*/
00009 /*     recurring problems. Then they can just use them, or have a closer look*/
00010 /*     to see and to discover how things are done the 'Tcl way'.*/
00011 /*  3) And if ever a proc from a library should be less than perfect, having*/
00012 /*     comments explaining the behaviour of the code will surely help.*/
00013 /* */
00014 /*  This said, I will welcome any error reports or suggestions for improvements*/
00015 /*  (especially on the 'doing things the Tcl way' aspect).*/
00016 /* */
00017 /*  Use of these sources is licensed under the same conditions as is Tcl.*/
00018 /* */
00019 /*  June 2001, Helmut Giese (hgiese@ratiosoft.com)*/
00020 /* */
00021 /*  ----------------------------------------------------------------------------*/
00022 /* */
00023 /*  The original procs 'tabify' and 'untabify' each work with complete blocks*/
00024 /*  of $num spaces ('num' holding the tab size). While this is certainly useful*/
00025 /*  in some circumstances, it does not reflect the way an editor works:*/
00026 /*      Counting columns from 1, assuming a tab size of 8 and entering '12345'*/
00027 /*    followed by a tab, you expect to advance to column 9. Your editor might*/
00028 /*    put a tab into the file or 3 spaces, depending on its configuration.*/
00029 /*  Now, on 'tabifying' you will expect to see those 3 spaces converted to a*/
00030 /*  tab (and on the other hand expect the tab *at this position* to be*/
00031 /*  converted to 3 spaces).*/
00032 /* */
00033 /*  This behaviour is mimicked by the new procs 'tabify2' and 'untabify2'.*/
00034 /*    Both have one feature in common: They accept multi-line strings (a whole*/
00035 /*    file if you want to) but in order to make life simpler for the programmer,*/
00036 /*    they split the incoming string into individual lines and hand each line to*/
00037 /*    a proc that does the real work.*/
00038 /* */
00039 /*    One design decision worth mentioning here:*/
00040 /*       A single space is never converted to a tab even if its position would*/
00041 /*       allow to do so.*/
00042 /*    Single spaces occur very often, say in arithmetic expressions like*/
00043 /*    [expr (($a + $b) * $c) < $d]. If we didn't follow the above rule we might*/
00044 /*    need to replace one or more of them to tabs. However if the tab size gets*/
00045 /*    changed, this expression would be formatted quite differently - which is*/
00046 /*    probably not a good idea.*/
00047 /* */
00048 /*    'untabifying' on the other hand might need to replace a tab with a single*/
00049 /*    space: If the current position requires it, what else to do?*/
00050 /*    As a consequence those two procs are unsymmetric in this aspect, but I*/
00051 /*    couldn't think of a better solution. Could you?*/
00052 /* */
00053 /*  ----------------------------------------------------------------------------*/
00054 /* */
00055 
00056 /*  ### ### ### ######### ######### #########*/
00057 /*  Requirements*/
00058 
00059 package require Tcl 8.2
00060 package require textutil::repeat
00061 
00062 namespace ::textutil::tabify {}
00063 
00064 /*  ### ### ### ######### ######### #########*/
00065 /*  API implementation*/
00066 
00067 namespace ::textutil::tabify {
00068     namespace import -force ::textutil::repeat::strRepeat
00069 }
00070 
00071 ret  ::textutil::tabify::tabify ( type string , optional num =8  ) {
00072     return [string map [list [MakeTabStr $num] \t] $string]
00073 }
00074 
00075 ret  ::textutil::tabify::untabify ( type string , optional num =8  ) {
00076     return [string map [list \t [MakeTabStr $num]] $string]
00077 }
00078 
00079 ret  ::textutil::tabify::MakeTabStr ( type num ) {
00080     variable TabStr
00081     variable TabLen
00082 
00083     if { $TabLen != $num } then {
00084     set TabLen $num
00085     set TabStr [strRepeat " " $num]
00086     }
00087 
00088     return $TabStr
00089 }
00090 
00091 /*  ----------------------------------------------------------------------------*/
00092 /* */
00093 /*  tabifyLine: Works on a single line of text, replacing 'spaces at correct*/
00094 /*          positions' with tabs. $num is the requested tab size.*/
00095 /*      Returns the (possibly modified) line.*/
00096 /* */
00097 /*  'spaces at correct positions': Only spaces which 'fill the space' between*/
00098 /*  an arbitrary position and the next tab stop can be replaced. */
00099 /*  Example: With tab size 8, spaces at positions 11 - 13 will *not* be replaced,*/
00100 /*           because an expansion of a tab at position 11 will jump up to 16.*/
00101 /*  See also the comment at the beginning of this file why single spaces are*/
00102 /*  *never* replaced by a tab.*/
00103 /* */
00104 /*  The proc works backwards, from the end of the string up to the beginning:*/
00105 /*  - Set the position to start the search from ('lastPos') to 'end'.*/
00106 /*  - Find the last occurrence of ' ' in 'line' with respect to 'lastPos'*/
00107 /*          ('currPos' below). This is a candidate for replacement.*/
00108 /*        - Find to 'currPos' the following tab stop using the expression*/
00109 /*            set nextTab [expr ($currPos + $num) - ($currPos % $num)]*/
00110 /*          and get the previous tab stop as well (this will be the starting */
00111 /*          point for the next iteration).*/
00112 /*  - The ' ' at 'currPos' is only a candidate for replacement if*/
00113 /*    1) it is just one position before a tab stop *and**/
00114 /*    2) there is at least one space at its left (see comment above on not*/
00115 /*       touching an isolated space).*/
00116 /*    Continue, if any of these conditions is not met.*/
00117 /*  - Determine where to put the tab (that is: how many spaces to replace?)*/
00118 /*    by stepping up to the beginning until*/
00119 /*      -- you hit a non-space or*/
00120 /*      -- you are at the previous tab position*/
00121 /*  - Do the replacement and continue.*/
00122 /* */
00123 /*  This algorithm only works, if $line does not contain tabs. Otherwise our */
00124 /*  interpretation of any position beyond the tab will be wrong. (Imagine you */
00125 /*  find a ' ' at position 4 in $line. If you got 3 leading tabs, your *real**/
00126 /*  position might be 25 (tab size of 8). Since in real life some strings might */
00127 /*  already contain tabs, we test for it (and eventually call untabifyLine).*/
00128 /* */
00129 
00130 ret  ::textutil::tabify::tabifyLine ( type line , type num ) {
00131     if { [string first \t $line] != -1 } {      
00132     # assure array 'Spaces' is set up 'comme il faut'
00133     checkArr $num
00134     # remove existing tabs
00135     set line [untabifyLine $line $num]
00136     }
00137 
00138     set lastPos end
00139 
00140     while { $lastPos > 0 } {
00141     set currPos [string last " " $line $lastPos]
00142     if { $currPos == -1 } {
00143         # no more spaces
00144         break;
00145     }
00146 
00147     set nextTab [expr {($currPos + $num) - ($currPos % $num)}]
00148     set prevTab [expr {$nextTab - $num}]
00149 
00150     # prepare for next round: continue at 'previous tab stop - 1'
00151     set lastPos [expr {$prevTab - 1}]
00152 
00153     if { ($currPos + 1) != $nextTab } {
00154         continue            ;# crit. (1)
00155     }
00156 
00157     if { [string index $line [expr {$currPos - 1}]] != " " } {
00158         continue            ;# crit. (2)
00159     }
00160 
00161     # now step backwards while there are spaces
00162     for {set pos [expr {$currPos - 2}]} {$pos >= $prevTab} {incr pos -1} {
00163         if { [string index $line $pos] != " " } {
00164         break;
00165         }
00166     }
00167 
00168     # ... and replace them
00169     set line [string replace $line [expr {$pos + 1}] $currPos \t]
00170     }
00171     return $line
00172 }
00173 
00174 /* */
00175 /*  Helper proc for 'untabifyLine': Checks if all needed elements of array*/
00176 /*  'Spaces' exist and creates the missing ones if needed.*/
00177 /* */
00178 
00179 ret  ::textutil::tabify::checkArr ( type num ) {
00180     variable TabLen2
00181     variable Spaces
00182 
00183     if { $num > $TabLen2 } {
00184     for { set i [expr {$TabLen2 + 1}] } { $i <= $num } { incr i } {
00185         set Spaces($i) [strRepeat " " $i]
00186     }
00187     set TabLen2 $num
00188     }
00189 }
00190 
00191 
00192 /*  untabifyLine: Works on a single line of text, replacing tabs with enough*/
00193 /*      spaces to get to the next tab position.*/
00194 /*      Returns the (possibly modified) line.*/
00195 /* */
00196 /*  The procedure is straight forward:*/
00197 /*  - Find the next tab.*/
00198 /*  - Calculate the next tab position following it.*/
00199 /*  - Delete the tab and insert as many spaces as needed to get there.*/
00200 /* */
00201 
00202 ret  ::textutil::tabify::untabifyLine ( type line , type num ) {
00203     variable Spaces
00204 
00205     set currPos 0
00206     while { 1 } {
00207     set currPos [string first \t $line $currPos]
00208     if { $currPos == -1 } {
00209         # no more tabs
00210         break
00211     }
00212 
00213     # how far is the next tab position ?
00214     set dist [expr {$num - ($currPos % $num)}]
00215     # replace '\t' at $currPos with $dist spaces
00216     set line [string replace $line $currPos $currPos $Spaces($dist)]
00217 
00218     # set up for next round (not absolutely necessary but maybe a trifle
00219     # more efficient)
00220     incr currPos $dist
00221     }
00222     return $line
00223 }
00224 
00225 /*  tabify2: Replace all 'appropriate' spaces as discussed above with tabs.*/
00226 /*  'string' might hold any number of lines, 'num' is the requested tab size.*/
00227 /*  Returns (possibly modified) 'string'.*/
00228 /* */
00229 ret  ::textutil::tabify::tabify2 ( type string , optional num =8  ) {
00230 
00231     # split string into individual lines
00232     set inLst [split $string \n]
00233 
00234     # now work on each line
00235     set outLst [list]
00236     foreach line $inLst {
00237     lappend outLst [tabifyLine $line $num]
00238     }
00239 
00240     # return all as one string
00241     return [join $outLst \n]
00242 }
00243 
00244 
00245 /*  untabify2: Replace all tabs with the appropriate number of spaces.*/
00246 /*  'string' might hold any number of lines, 'num' is the requested tab size.*/
00247 /*  Returns (possibly modified) 'string'.*/
00248 /* */
00249 ret  ::textutil::tabify::untabify2 ( type string , optional num =8  ) {
00250 
00251     # assure array 'Spaces' is set up 'comme il faut'
00252     checkArr $num
00253 
00254     set inLst [split $string \n]
00255 
00256     set outLst [list]
00257     foreach line $inLst {
00258     lappend outLst [untabifyLine $line $num]
00259     }
00260 
00261     return [join $outLst \n]
00262 }
00263 
00264 
00265 
00266 /*  ### ### ### ######### ######### #########*/
00267 /*  Data structures*/
00268 
00269 namespace ::textutil::tabify {
00270     variable TabLen  8
00271     variable TabStr  [strRepeat " " $TabLen]
00272 
00273     namespace export tabify untabify tabify2 untabify2
00274     
00275     /*  The proc 'untabify2' uses the following variables for efficiency.*/
00276     /*  Since a tab can be replaced by one up to 'tab size' spaces, it is handy*/
00277     /*  to have the appropriate 'space strings' available. This is the use of*/
00278     /*  the array 'Spaces', where 'Spaces(n)' contains just 'n' spaces.*/
00279     /*  The variable 'TabLen2' remembers the biggest tab size used.*/
00280 
00281     variable  TabLen2 0
00282     variable  Spaces
00283     array  Spaces =  {0 ""}
00284 }
00285 
00286 /*  ### ### ### ######### ######### #########*/
00287 /*  Ready*/
00288 
00289 package provide textutil::tabify 0.7
00290