00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 package require Tcl 8.2
00019 package require textutil::repeat
00020 package require textutil::string
00021
00022 namespace ::textutil::adjust {}
00023
00024
00025
00026
00027 namespace ::textutil::adjust {
00028 namespace import -force ::textutil::repeat::strRepeat
00029 }
00030
00031 ret ::textutil::adjust::adjust (type text , type args) {
00032 if {[string length [string trim $text]] == 0} {
00033 return ""
00034 }
00035
00036 Configure $args
00037 Adjust text newtext
00038
00039 return $newtext
00040 }
00041
00042 ret ::textutil::adjust::Configure (type args) {
00043 variable Justify left
00044 variable Length 72
00045 variable FullLine 0
00046 variable StrictLength 0
00047 variable Hyphenate 0
00048 variable HyphPatterns ; # hyphenation patterns (TeX)
00049
00050 set args [ lindex $args 0 ]
00051 foreach { option value } $args {
00052 switch -exact -- $option {
00053 -full {
00054 if { ![ string is boolean -strict $value ] } then {
00055 error "expected boolean but got \"$value\""
00056 }
00057 set FullLine [ string is true $value ]
00058 }
00059 -hyphenate {
00060 # the word exceeding the length of line is tried to be
00061 # hyphenated; if a word cannot be hyphenated to fit into
00062 # the line processing stops! The length of the line should
00063 # be set to a reasonable value!
00064
00065 if { ![ string is boolean -strict $value ] } then {
00066 error "expected boolean but got \"$value\""
00067 }
00068 set Hyphenate [string is true $value]
00069 if { $Hyphenate && ![info exists HyphPatterns(_LOADED_)]} {
00070 error "hyphenation patterns not loaded!"
00071 }
00072 }
00073 -justify {
00074 set lovalue [ string tolower $value ]
00075 switch -exact -- $lovalue {
00076 left -
00077 right -
00078 center -
00079 plain {
00080 set Justify $lovalue
00081 }
00082 default {
00083 error "bad value \"$value\": should be center, left, plain or right"
00084 }
00085 }
00086 }
00087 -length {
00088 if { ![ string is integer $value ] } then {
00089 error "expected positive integer but got \"$value\""
00090 }
00091 if { $value < 1 } then {
00092 error "expected positive integer but got \"$value\""
00093 }
00094 set Length $value
00095 }
00096 -strictlength {
00097 # the word exceeding the length of line is moved to the
00098 # next line without hyphenation; words longer than given
00099 # line length are cut into smaller pieces
00100
00101 if { ![ string is boolean -strict $value ] } then {
00102 error "expected boolean but got \"$value\""
00103 }
00104 set StrictLength [ string is true $value ]
00105 }
00106 default {
00107 error "bad option \"$option\": must be -full, -hyphenate, \
00108 -justify, -length, or -strictlength"
00109 }
00110 }
00111 }
00112
00113 return ""
00114 }
00115
00116
00117
00118
00119
00120
00121 ret ::textutil::adjust::Adjust ( type varOrigName , type varNewName ) {
00122 variable Length
00123 variable FullLine
00124 variable StrictLength
00125 variable Hyphenate
00126
00127 upvar $varOrigName orig
00128 upvar $varNewName text
00129
00130 set pos 0; # Cursor after writing
00131 set line ""
00132 set text ""
00133
00134
00135 if {!$FullLine} {
00136 regsub -all -- "(\n)|(\t)" $orig " " orig
00137 regsub -all -- " +" $orig " " orig
00138 regsub -all -- "(^ *)|( *\$)" $orig "" orig
00139 }
00140
00141 set words [split $orig]
00142 set numWords [llength $words]
00143 set numline 0
00144
00145 for {set cnt 0} {$cnt < $numWords} {incr cnt} {
00146
00147 set w [lindex $words $cnt]
00148 set wLen [string length $w]
00149
00150 # the word $w doesn't fit into the present line
00151 # case #1: we try to hyphenate
00152
00153 if {$Hyphenate && ($pos+$wLen >= $Length)} {
00154 # Hyphenation instructions
00155 set w2 [textutil::adjust::Hyphenation $w]
00156
00157 set iMax [llength $w2]
00158 if {$iMax == 1 && [string length $w] > $Length} {
00159 # word cannot be hyphenated and exceeds linesize
00160
00161 error "Word \"$w2\" can\'t be hyphenated\
00162 and exceeds linesize $Length!"
00163 } else {
00164 # hyphenating of $w was successfull, but we have to look
00165 # that every sylable would fit into the line
00166
00167 foreach x $w2 {
00168 if {[string length $x] >= $Length} {
00169 error "Word \"$w\" can\'t be hyphenated\
00170 to fit into linesize $Length!"
00171 }
00172 }
00173 }
00174
00175 for {set i 0; set w3 ""} {$i < $iMax} {incr i} {
00176 set syl [lindex $w2 $i]
00177 if {($pos+[string length " $w3$syl-"]) > $Length} {break}
00178 append w3 $syl
00179 }
00180 for {set w4 ""} {$i < $iMax} {incr i} {
00181 set syl [lindex $w2 $i]
00182 append w4 $syl
00183 }
00184
00185 if {[string length $w3] && [string length $w4]} {
00186 # hyphenation was successfull: redefine
00187 # list of words w => {"$w3-" "$w4"}
00188
00189 set x [lreplace $words $cnt $cnt "$w4"]
00190 set words [linsert $x $cnt "$w3-"]
00191 set w [lindex $words $cnt]
00192 set wLen [string length $w]
00193 incr numWords
00194 }
00195 }
00196
00197 # the word $w doesn't fit into the present line
00198 # case #2: we try to cut the word into pieces
00199
00200 if {$StrictLength && ([string length $w] > $Length)} {
00201 # cut word into two pieces
00202 set w2 $w
00203
00204 set over [expr {$pos+2+$wLen-$Length}]
00205 set w3 [string range $w2 0 $Length]
00206 set w4 [string range $w2 [expr {$Length+1}] end]
00207
00208 set x [lreplace $words $cnt $cnt $w4]
00209 set words [linsert $x $cnt $w3 ]
00210 set w [lindex $words $cnt]
00211 set wLen [string length $w]
00212 incr numWords
00213 }
00214
00215 # continuing with the normal procedure
00216
00217 if {($pos+$wLen < $Length)} {
00218 # append word to current line
00219
00220 if {$pos} {append line " "; incr pos}
00221 append line $w
00222 incr pos $wLen
00223 } else {
00224 # line full => write buffer and begin a new line
00225
00226 if {[string length $text]} {append text "\n"}
00227 append text [Justification $line [incr numline]]
00228 set line $w
00229 set pos $wLen
00230 }
00231 }
00232
00233 # write buffer and return!
00234
00235 if {[string length $text]} {append text "\n"}
00236 append text [Justification $line end]
00237 return $text
00238 }
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255 ret ::textutil::adjust::Justification ( type line , type index ) {
00256 variable Justify
00257 variable Length
00258 variable FullLine
00259
00260 set len [string length $line]; # length of current line
00261
00262 if { $Length <= $len } then {
00263 # the length of current line ($len) is equal as or greater than
00264 # the value provided for text formatting ($Length) => to avoid
00265 # inifinite loops we leave $line unchanged and return!
00266
00267 return $line
00268 }
00269
00270 # Special case:
00271 # for the last line, and if the justification is set to 'plain'
00272 # the real justification is 'left' if the length of the line
00273 # is less than 90% (rounded) of the max length allowed. This is
00274 # to avoid expansion of this line when it is too small: without
00275 # it, the added spaces will 'unbeautify' the result.
00276 #
00277
00278 set justify $Justify
00279 if { ( "$index" == "end" ) && \
00280 ( "$Justify" == "plain" ) && \
00281 ( $len < round($Length * 0.90) ) } then {
00282 set justify left
00283 }
00284
00285 # For a left justification, nothing to do, but to
00286 # add some spaces at the end of the line if requested
00287
00288 if { "$justify" == "left" } then {
00289 set jus ""
00290 if { $FullLine } then {
00291 set jus [strRepeat " " [ expr { $Length - $len } ]]
00292 }
00293 return "${line}${jus}"
00294 }
00295
00296 # For a right justification, just add enough spaces
00297 # at the beginning of the line
00298
00299 if { "$justify" == "right" } then {
00300 set jus [strRepeat " " [ expr { $Length - $len } ]]
00301 return "${jus}${line}"
00302 }
00303
00304 # For a center justification, add half of the needed spaces
00305 # at the beginning of the line, and the rest at the end
00306 # only if needed.
00307
00308 if { "$justify" == "center" } then {
00309 set mr [ expr { ( $Length - $len ) / 2 } ]
00310 set ml [ expr { $Length - $len - $mr } ]
00311 set jusl [strRepeat " " $ml]
00312 set jusr [strRepeat " " $mr]
00313 if { $FullLine } then {
00314 return "${jusl}${line}${jusr}"
00315 } else {
00316 return "${jusl}${line}"
00317 }
00318 }
00319
00320 # For a plain justification, it's a little bit complex:
00321 #
00322 # if some spaces are missing, then
00323 #
00324 # 1) sort the list of words in the current line by decreasing size
00325 # 2) foreach word, add one space before it, except if it's the
00326 # first word, until enough spaces are added
00327 # 3) rebuild the line
00328
00329 if { "$justify" == "plain" } then {
00330 set miss [ expr { $Length - [ string length $line ] } ]
00331
00332 # Bugfix tcllib-bugs-860753 (jhv)
00333
00334 set words [split $line]
00335 set numWords [llength $words]
00336
00337 if {$numWords < 2} {
00338 # current line consists of less than two words - we can't
00339 # insert blanks to achieve a plain justification => leave
00340 # $line unchanged and return!
00341
00342 return $line
00343 }
00344
00345 for {set i 0; set totalLen 0} {$i < $numWords} {incr i} {
00346 set w($i) [lindex $words $i]
00347 if {$i > 0} {set w($i) " $w($i)"}
00348 set wLen($i) [string length $w($i)]
00349 set totalLen [expr {$totalLen+$wLen($i)}]
00350 }
00351
00352 set miss [expr {$Length - $totalLen}]
00353
00354 # len walks through all lengths of words of the line under
00355 # consideration
00356
00357 for {set len 1} {$miss > 0} {incr len} {
00358 for {set i 1} {($i < $numWords) && ($miss > 0)} {incr i} {
00359 if {$wLen($i) == $len} {
00360 set w($i) " $w($i)"
00361 incr wLen($i)
00362 incr miss -1
00363 }
00364 }
00365 }
00366
00367 set line ""
00368 for {set i 0} {$i < $numWords} {incr i} {
00369 set line "$line$w($i)"
00370 }
00371
00372 # End of bugfix
00373
00374 return "${line}"
00375 }
00376
00377 error "Illegal justification key \"$justify\""
00378 }
00379
00380 ret ::textutil::adjust::SortList ( type list , type dir , type index ) {
00381
00382 if { [ catch { lsort -integer -$dir -index $index $list } sl ] != 0 } then {
00383 error "$sl"
00384 }
00385
00386 return $sl
00387 }
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404 ret ::textutil::adjust::Hyphenation ( type str ) {
00405
00406 # if there are manual set hyphenation marks e.g. "Recht\-schrei\-bung"
00407 # use these for hyphenation and return
00408
00409 if {[regexp {[^\\-]*[\\-][.]*} $str]} {
00410 regsub -all {(\\)(-)} $str {-} tmp
00411 return [split $tmp -]
00412 }
00413
00414 # Don't hyphenate very short words! Minimum length for hyphenation
00415 # is set to 3 characters!
00416
00417 if { [string length $str] < 4 } then { return $str }
00418
00419 # otherwise follow Knuth's algorithm
00420
00421 variable HyphPatterns; # hyphenation patterns (TeX)
00422
00423 set w ".[string tolower $str]."; # transform to lower case
00424 set wLen [string length $w]; # and add delimiters
00425
00426 # Initialize hyphenation weights
00427
00428 set s {}
00429 for {set i 0} {$i < $wLen} {incr i} {
00430 lappend s 0
00431 }
00432
00433 for {set i 0} {$i < $wLen} {incr i} {
00434 set kmax [expr {$wLen-$i}]
00435 for {set k 1} {$k < $kmax} {incr k} {
00436 set sw [string range $w $i [expr {$i+$k}]]
00437 if {[info exists HyphPatterns($sw)]} {
00438 set hw $HyphPatterns($sw)
00439 set hwLen [string length $hw]
00440 for {set l1 0; set l2 0} {$l1 < $hwLen} {incr l1} {
00441 set c [string index $hw $l1]
00442 if {[string is digit $c]} {
00443 set sPos [expr {$i+$l2}]
00444 if {$c > [lindex $s $sPos]} {
00445 set s [lreplace $s $sPos $sPos $c]
00446 }
00447 } else {
00448 incr l2
00449 }
00450 }
00451 }
00452 }
00453 }
00454
00455 # Replace all even hyphenation weigths by zero
00456
00457 for {set i 0} {$i < [llength $s]} {incr i} {
00458 set c [lindex $s $i]
00459 if {!($c%2)} { set s [lreplace $s $i $i 0] }
00460 }
00461
00462 # Don't start with a hyphen! Take also care of words enclosed in quotes
00463 # or that someone has forgotten to put a blank between a punctuation
00464 # character and the following word etc.
00465
00466 for {set i 1} {$i < ($wLen-1)} {incr i} {
00467 set c [string range $w $i end]
00468 if {[regexp {^[:alpha:][.]*} $c]} {
00469 for {set k 1} {$k < ($i+1)} {incr k} {
00470 set s [lreplace $s $k $k 0]
00471 }
00472 break
00473 }
00474 }
00475
00476 # Don't separate the last character of a word with a hyphen
00477
00478 set max [expr {[llength $s]-2}]
00479 if {$max} {set s [lreplace $s $max end 0]}
00480
00481 # return the syllabels of the hyphenated word as a list!
00482
00483 set ret ""
00484 set w ".$str."
00485 for {set i 1} {$i < ($wLen-1)} {incr i} {
00486 if {[lindex $s $i]} { append ret - }
00487 append ret [string index $w $i]
00488 }
00489 return [split $ret -]
00490 }
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502 ret ::textutil::adjust::listPredefined () {
00503 variable here
00504 return [glob -type f -directory $here -tails *.tex]
00505 }
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519 ret ::textutil::adjust::getPredefined (type name) {
00520 variable here
00521
00522 if {![string match *.tex $name]} {
00523 return -code error \
00524 "Illegal hyphenation file \"$name\""
00525 }
00526 set path [file join $here $name]
00527 if {![file exists $path]} {
00528 return -code error \
00529 "Unknown hyphenation file \"$path\""
00530 }
00531 return $path
00532 }
00533
00534
00535
00536
00537
00538
00539
00540
00541 ret ::textutil::adjust::readPatterns ( type filNam ) {
00542
00543 variable HyphPatterns; # hyphenation patterns (TeX)
00544
00545 # HyphPatterns(_LOADED_) is used as flag for having loaded
00546 # hyphenation patterns from the respective file (TeX format)
00547
00548 if {[info exists HyphPatterns(_LOADED_)]} {
00549 unset HyphPatterns(_LOADED_)
00550 }
00551
00552 # the array xlat provides translation from TeX encoded characters
00553 # to those of the ISO-8859-1 character set
00554
00555 set xlat(\"s) \337; # 223 := sharp s "
00556 set xlat(\`a) \340; # 224 := a, grave
00557 set xlat(\'a) \341; # 225 := a, acute
00558 set xlat(\^a) \342; # 226 := a, circumflex
00559 set xlat(\"a) \344; # 228 := a, diaeresis "
00560 set xlat(\`e) \350; # 232 := e, grave
00561 set xlat(\'e) \351; # 233 := e, acute
00562 set xlat(\^e) \352; # 234 := e, circumflex
00563 set xlat(\`i) \354; # 236 := i, grave
00564 set xlat(\'i) \355; # 237 := i, acute
00565 set xlat(\^i) \356; # 238 := i, circumflex
00566 set xlat(\~n) \361; # 241 := n, tilde
00567 set xlat(\`o) \362; # 242 := o, grave
00568 set xlat(\'o) \363; # 243 := o, acute
00569 set xlat(\^o) \364; # 244 := o, circumflex
00570 set xlat(\"o) \366; # 246 := o, diaeresis "
00571 set xlat(\`u) \371; # 249 := u, grave
00572 set xlat(\'u) \372; # 250 := u, acute
00573 set xlat(\^u) \373; # 251 := u, circumflex
00574 set xlat(\"u) \374; # 252 := u, diaeresis "
00575
00576 set fd [open $filNam RDONLY]
00577 set status 0
00578
00579 while {[gets $fd line] >= 0} {
00580
00581 switch -exact $status {
00582 PATTERNS {
00583 if {[regexp {^\}[.]*} $line]} {
00584 # End of patterns encountered: set status
00585 # and ignore that line
00586 set status 0
00587 continue
00588 } else {
00589 # This seems to be pattern definition line; to process it
00590 # we have first to do some editing
00591 #
00592 # 1) eat comments in a pattern definition line
00593 # 2) eat braces and coded linefeeds
00594
00595 set z [string first "%" $line]
00596 if {$z > 0} { set line [string range $line 0 [expr {$z-1}]] }
00597
00598 regsub -all {(\\n|\{|\})} $line {} tmp
00599 set line $tmp
00600
00601 # Now $line should consist only of hyphenation patterns
00602 # separated by white space
00603
00604 # Translate TeX encoded characters to ISO-8859-1 characters
00605 # using the array xlat defined above
00606
00607 foreach x [array names xlat] {
00608 regsub -all {$x} $line $xlat($x) tmp
00609 set line $tmp
00610 }
00611
00612 # split the line and create a lookup array for
00613 # the repective hyphenation patterns
00614
00615 foreach item [split $line] {
00616 if {[string length $item]} {
00617 if {![string match {\\} $item]} {
00618 # create index for hyphenation patterns
00619
00620 set var $item
00621 regsub -all {[0-9]} $var {} idx
00622 # store hyphenation patterns as elements of an array
00623
00624 set HyphPatterns($idx) $item
00625 }
00626 }
00627 }
00628 }
00629 }
00630 EXCEPTIONS {
00631 if {[regexp {^\}[.]*} $line]} {
00632 # End of patterns encountered: set status
00633 # and ignore that line
00634 set status 0
00635 continue
00636 } else {
00637 # to be done in the future
00638 }
00639 }
00640 default {
00641 if {[regexp {^\\endinput[.]*} $line]} {
00642 # end of data encountered, stop processing and
00643 # ignore all the following text ..
00644 break
00645 } elseif {[regexp {^\\patterns[.]*} $line]} {
00646 # begin of patterns encountered: set status
00647 # and ignore that line
00648 set status PATTERNS
00649 continue
00650 } elseif {[regexp {^\\hyphenation[.]*} $line]} {
00651 # some particular cases to be treated separately
00652 set status EXCEPTIONS
00653 continue
00654 } else {
00655 set status 0
00656 }
00657 }
00658 }
00659 }
00660
00661 close $fd
00662 HyphPatterns = (_LOADED_) 1
00663
00664 return
00665 }
00666
00667 /* */
00668
00669 /* @c The specified <a text>block is indented*/
00670 /* @c by <a prefix>ing each line. The first*/
00671 /* @c <a hang> lines ares skipped.*/
00672 /* */
00673 /* @a text: The paragraph to indent.*/
00674 /* @a prefix: The string to use as prefix for each line*/
00675 /* @a prefix: of <a text> with.*/
00676 /* @a skip: The number of lines at the beginning to leave untouched.*/
00677 /* */
00678 /* @r Basically <a text>, but indented a certain amount.*/
00679 /* */
00680 /* @i indent*/
00681 /* @n This procedure is not checked by the testsuite.*/
00682
00683 ret ::textutil::adjust::indent (type text , type prefix , optional skip =0) {
00684 set text [string trimright $text]
00685
00686 set res [list]
00687 foreach line [split $text \n] {
00688 if {[string compare "" [string trim $line]] == 0} {
00689 lappend res {}
00690 } else {
00691 set line [string trimright $line]
00692 if {$skip <= 0} {
00693 lappend res $prefix$line
00694 } else {
00695 lappend res $line
00696 }
00697 }
00698 if {$skip > 0} {incr skip -1}
00699 }
00700 return [join $res \n]
00701 }
00702
00703 /* Undent the block of text: Compute LCP (restricted to whitespace!)*/
00704 /* and remove that from each line. Note that this preverses the*/
00705 /* shaping of the paragraph (i.e. hanging indent are _not_ flattened)*/
00706 /* We ignore empty lines !!*/
00707
00708 ret ::textutil::adjust::undent (type text) {
00709
00710 if {$text == {}} {return {}}
00711
00712 set lines [split $text \n]
00713 set ne [list]
00714 foreach l $lines {
00715 if {[string length [string trim $l]] == 0} continue
00716 lappend ne $l
00717 }
00718 set lcp [::textutil::string::longestCommonPrefixList $ne]
00719
00720 if {[string length $lcp] == 0} {return $text}
00721
00722 regexp {^([ ]*)} $lcp -> lcp
00723
00724 if {[string length $lcp] == 0} {return $text}
00725
00726 set len [string length $lcp]
00727
00728 set res [list]
00729 foreach l $lines {
00730 if {[string length [string trim $l]] == 0} {
00731 lappend res {}
00732 } else {
00733 lappend res [string range $l $len end]
00734 }
00735 }
00736 return [join $res \n]
00737 }
00738
00739 /* ### ### ### ######### ######### #########*/
00740 /* Data structures*/
00741
00742 namespace ::textutil::adjust {
00743 variable here [file dirname [info script]]
00744
00745 variable Justify left
00746 variable Length 72
00747 variable FullLine 0
00748 variable StrictLength 0
00749 variable Hyphenate 0
00750 variable HyphPatterns
00751
00752 namespace export adjust indent undent
00753 }
00754
00755 /* ### ### ### ######### ######### #########*/
00756 /* Ready*/
00757
00758 package provide textutil::adjust 0.7
00759