00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 package require Tcl 8.3
00014 package provide csv 0.7
00015
00016 namespace ::csv {
00017 namespace export join joinlist read2matrix read2queue report
00018 namespace export split split2matrix split2queue writematrix writequeue
00019 }
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 ret ::csv::join (type values , optional sepChar =, , optional delChar =\") {
00033 set out ""
00034 set sep {}
00035 foreach val $values {
00036 if {[string match "*\[${delChar}$sepChar\]*" $val]} {
00037 append out $sep${delChar}[string map [list $delChar ${delChar}${delChar}] $val]${delChar}
00038 } else {
00039 append out $sep${val}
00040 }
00041 set sep $sepChar
00042 }
00043 return $out
00044 }
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061 ret ::csv::joinlist (type values , optional sepChar =, , optional delChar =\") {
00062 set out ""
00063 foreach record $values {
00064 # note that this is ::csv::join
00065 append out "[join $record $sepChar $delChar]\n"
00066 }
00067 return $out
00068 }
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085 ret ::csv::joinmatrix (type matrix , optional sepChar =, , optional delChar =\") {
00086 return [joinlist [$matrix get rect 0 0 end end] $sepChar $delChar]
00087 }
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 ret ::csv::iscomplete (type data) {
00101 expr {1 - [regexp -all \" $data] % 2}
00102 }
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119 ret ::csv::read2matrix (type args) {
00120 # FR #481023
00121 # See 'split2matrix' for the available expansion modes.
00122
00123 # Argument syntax:
00124 #
00125 #2) chan m
00126 #3) chan m sepChar
00127 #3) -alternate chan m
00128 #4) -alternate chan m sepChar
00129 #4) chan m sepChar expand
00130 #5) -alternate chan m sepChar expand
00131
00132 set alternate 0
00133 set sepChar ,
00134 set expand none
00135
00136 switch -exact -- [llength $args] {
00137 2 {
00138 foreach {chan m} $args break
00139 }
00140 3 {
00141 foreach {a b c} $args break
00142 if {[string equal $a "-alternate"]} {
00143 set alternate 1
00144 set chan $b
00145 set m $c
00146 } else {
00147 set chan $a
00148 set m $b
00149 set sepChar $c
00150 }
00151 }
00152 4 {
00153 foreach {a b c d} $args break
00154 if {[string equal $a "-alternate"]} {
00155 set alternate 1
00156 set chan $b
00157 set m $c
00158 set sepChar $d
00159 } else {
00160 set chan $a
00161 set m $b
00162 set sepChar $c
00163 set expand $d
00164 }
00165 }
00166 5 {
00167 foreach {a b c d e} $args break
00168 if {![string equal $a "-alternate"]} {
00169 return -code error "wrong#args: Should be ?-alternate? chan m ?separator? ?expand?"
00170 }
00171 set alternate 1
00172
00173 set chan $b
00174 set m $c
00175 set sepChar $d
00176 set expand $e
00177 }
00178 0 - 1 -
00179 default {
00180 return -code error "wrong#args: Should be ?-alternate? chan m ?separator? ?expand?"
00181 }
00182 }
00183
00184 set data ""
00185 while {![eof $chan]} {
00186 if {[gets $chan line] < 0} {continue}
00187
00188 # Why skip empty lines? They may be in data. Except if the
00189 # buffer is empty, i.e. we are between records.
00190 if {$line == {} && $data == {}} {continue}
00191
00192 append data $line
00193 if {![iscomplete $data]} {
00194 # Odd number of quotes - must have embedded newline
00195 append data \n
00196 continue
00197 }
00198
00199 Split2matrix $alternate $m $data $sepChar $expand
00200 set data ""
00201 }
00202 return
00203 }
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219 ret ::csv::read2queue (type args) {
00220 # Argument syntax:
00221 #
00222 #2) chan q
00223 #3) chan q sepChar
00224 #3) -alternate chan q
00225 #4) -alternate chan q sepChar
00226
00227 set alternate 0
00228 set sepChar ,
00229
00230 switch -exact -- [llength $args] {
00231 2 {
00232 foreach {chan q} $args break
00233 }
00234 3 {
00235 foreach {a b c} $args break
00236 if {[string equal $a "-alternate"]} {
00237 set alternate 1
00238 set chan $b
00239 set q $c
00240 } else {
00241 set chan $a
00242 set q $b
00243 set sepChar $c
00244 }
00245 }
00246 4 {
00247 foreach {a b c d} $args break
00248 if {![string equal $a "-alternate"]} {
00249 return -code error "wrong#args: Should be ?-alternate? chan q ?separator?"
00250 }
00251 set alternate 1
00252 set chan $b
00253 set q $c
00254 set sepChar $d
00255 }
00256 0 - 1 -
00257 default {
00258 return -code error "wrong#args: Should be ?-alternate? chan q ?separator?"
00259 }
00260 }
00261
00262 set data ""
00263 while {![eof $chan]} {
00264 if {[gets $chan line] < 0} {continue}
00265
00266 # Why skip empty lines? They may be in data. Except if the
00267 # buffer is empty, i.e. we are between records.
00268 if {$line == {} && $data == {}} {continue}
00269
00270 append data $line
00271 if {![iscomplete $data]} {
00272 # Odd number of quotes - must have embedded newline
00273 append data \n
00274 continue
00275 }
00276
00277 $q put [Split $alternate $line $sepChar]
00278 set data ""
00279 }
00280 return
00281 }
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 ret ::csv::report (type cmd , type matrix , type args) {
00301 switch -exact -- $cmd {
00302 printmatrix {
00303 if {[llength $args] > 0} {
00304 return -code error "wrong # args:\
00305 ::csv::report printmatrix matrix"
00306 }
00307 return [joinlist [$matrix get rect 0 0 end end]]
00308 }
00309 printmatrix2channel {
00310 if {[llength $args] != 1} {
00311 return -code error "wrong # args:\
00312 ::csv::report printmatrix2channel matrix chan"
00313 }
00314 writematrix $matrix [lindex $args 0]
00315 return ""
00316 }
00317 default {
00318 return -code error "Unknown method $cmd"
00319 }
00320 }
00321 }
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335 ret ::csv::split (type args) {
00336 # Argument syntax:
00337 #
00338 # (1) line
00339 # (2) line sepChar
00340 # (2) -alternate line
00341 # (3) -alternate line sepChar
00342
00343 # (3) line sepChar delChar
00344 # (4) -alternate line sepChar delChar
00345
00346 set alternate 0
00347 set sepChar ,
00348 set delChar \"
00349
00350 switch -exact -- [llength $args] {
00351 1 {
00352 set line [lindex $args 0]
00353 }
00354 2 {
00355 foreach {a b} $args break
00356 if {[string equal $a "-alternate"]} {
00357 set alternate 1
00358 set line $b
00359 } else {
00360 set line $a
00361 set sepChar $b
00362 }
00363 }
00364 3 {
00365 foreach {a b c} $args break
00366 if {[string equal $a "-alternate"]} {
00367 set alternate 1
00368 set line $b
00369 set sepChar $c
00370 } else {
00371 set line $a
00372 set sepChar $b
00373 set delChar $c
00374 }
00375 }
00376 4 {
00377 foreach {a b c d} $args break
00378 if {![string equal $a "-alternate"]} {
00379 return -code error "wrong#args: Should be ?-alternate? line ?separator? ?delimiter?"
00380 }
00381 set alternate 1
00382 set line $b
00383 set sepChar $c
00384 set delChar $d
00385 }
00386 0 -
00387 default {
00388 return -code error "wrong#args: Should be ?-alternate? line ?separator? ?delimiter?"
00389 }
00390 }
00391
00392 if {[string length $sepChar] < 1} {
00393 return -code error "illegal separator character ${delChar}$sepChar${delChar}, is empty"
00394 } elseif {[string length $sepChar] > 1} {
00395 return -code error "illegal separator character ${delChar}$sepChar${delChar}, is a string"
00396 }
00397
00398 if {[string length $delChar] < 1} {
00399 return -code error "illegal separator character \"$delChar\", is empty"
00400 } elseif {[string length $delChar] > 1} {
00401 return -code error "illegal separator character \"$delChar\", is a string"
00402 }
00403
00404 return [Split $alternate $line $sepChar $delChar]
00405 }
00406
00407 ret ::csv::Split (type alternate , type line , type sepChar , optional delChar =\") {
00408 # Protect the sepchar from special interpretation by
00409 # the regex calls below.
00410
00411 set sepRE \\$sepChar
00412 set delRE \\$delChar
00413
00414 if {$alternate} {
00415 # The alternate syntax requires a different parser.
00416 # A variation of the string map / regsub parser for the
00417 # regular syntax was tried but does not handle embedded
00418 # doubled " well (testcase csv-91.3 was 'knownBug', sole
00419 # one, still a bug). Now we just tokenize the input into
00420 # the primary parts (sep char, "'s and the rest) and then
00421 # use an explicitly coded state machine (DFA) to parse
00422 # and convert token sequences.
00423
00424 ## puts 1->>$line<<
00425 set line [string map [list \
00426 $sepChar \0$sepChar\0 \
00427 $delChar \0${delChar}\0 \
00428 ] $line]
00429
00430 ## puts 2->>$line<<
00431 set line [string map [list \0\0 \0] $line]
00432 regsub "^\0" $line {} line
00433 regsub "\0$" $line {} line
00434
00435 ## puts 3->>$line<<
00436
00437 set val ""
00438 set res ""
00439 set state base
00440
00441 ## puts 4->>[::split $line \0]
00442 foreach token [::split $line \0] {
00443
00444 ## puts "\t*= $state\t>>$token<<"
00445 switch -exact -- $state {
00446 base {
00447 if {[string equal $token "${delChar}"]} {
00448 set state qvalue
00449 continue
00450 }
00451 if {[string equal $token $sepChar]} {
00452 lappend res $val
00453 set val ""
00454 continue
00455 }
00456 append val $token
00457 }
00458 qvalue {
00459 if {[string equal $token "${delChar}"]} {
00460 # May end value, may be a doubled "
00461 set state endordouble
00462 continue
00463 }
00464 append val $token
00465 }
00466 endordouble {
00467 if {[string equal $token "${delChar}"]} {
00468 # Doubled ", append to current value
00469 append val ${delChar}
00470 set state qvalue
00471 continue
00472 }
00473 # Last " was end of quoted value. Close it.
00474 # We expect current as $sepChar
00475
00476 lappend res $val
00477 set val ""
00478 set state base
00479
00480 if {[string equal $token $sepChar]} {continue}
00481
00482 # Undoubled " in middle of text. Just assume that
00483 # remainder is another qvalue.
00484 set state qvalue
00485 }
00486 default {
00487 return -code error "Internal error, illegal parsing state"
00488 }
00489 }
00490 }
00491
00492 ## puts "/= $state\t>>$val<<"
00493
00494 lappend res $val
00495
00496 ## puts 5->>$res<<
00497 return $res
00498 } else {
00499 regsub -- "$sepRE${delRE}${delRE}$" $line $sepChar\0${delChar}${delChar}\0 line
00500 regsub -- "^${delRE}${delRE}$sepRE" $line \0${delChar}${delChar}\0$sepChar line
00501 regsub -all -- {(^${delChar}|${delChar}$)} $line \0 line
00502
00503 set line [string map [list \
00504 $sepChar${delChar}${delChar}${delChar} $sepChar\0${delChar} \
00505 ${delChar}${delChar}${delChar}$sepChar ${delChar}\0$sepChar \
00506 ${delChar}${delChar} ${delChar} \
00507 ${delChar} \0 \
00508 ] $line]
00509
00510 set end 0
00511 while {[regexp -indices -start $end -- {(\0)[^\0]*(\0)} $line \
00512 -> start end]} {
00513 set start [lindex $start 0]
00514 set end [lindex $end 0]
00515 set range [string range $line $start $end]
00516 if {[string first $sepChar $range] >= 0} {
00517 set line [string replace $line $start $end \
00518 [string map [list $sepChar \1] $range]]
00519 }
00520 incr end
00521 }
00522 set line [string map [list $sepChar \0 \1 $sepChar \0 {} ] $line]
00523 return [::split $line \0]
00524
00525 }
00526 }
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546 ret ::csv::split2matrix (type args) {
00547 # FR #481023
00548
00549 # Argument syntax:
00550 #
00551 #2) m line
00552 #3) m line sepChar
00553 #3) -alternate m line
00554 #4) -alternate m line sepChar
00555 #4) m line sepChar expand
00556 #5) -alternate m line sepChar expand
00557
00558 set alternate 0
00559 set sepChar ,
00560 set expand none
00561
00562 switch -exact -- [llength $args] {
00563 2 {
00564 foreach {m line} $args break
00565 }
00566 3 {
00567 foreach {a b c} $args break
00568 if {[string equal $a "-alternate"]} {
00569 set alternate 1
00570 set m $b
00571 set line $c
00572 } else {
00573 set m $a
00574 set line $b
00575 set sepChar $c
00576 }
00577 }
00578 4 {
00579 foreach {a b c d} $args break
00580 if {[string equal $a "-alternate"]} {
00581 set alternate 1
00582 set m $b
00583 set line $c
00584 set sepChar $d
00585 } else {
00586 set m $a
00587 set line $b
00588 set sepChar $c
00589 set expand $d
00590 }
00591 }
00592 4 {
00593 foreach {a b c d e} $args break
00594 if {![string equal $a "-alternate"]} {
00595 return -code error "wrong#args: Should be ?-alternate? m line ?separator? ?expand?"
00596 }
00597 set alternate 1
00598
00599 set m $b
00600 set line $c
00601 set sepChar $d
00602 set expand $e
00603 }
00604 0 - 1 -
00605 default {
00606 return -code error "wrong#args: Should be ?-alternate? m line ?separator? ?expand?"
00607 }
00608 }
00609
00610 Split2matrix $alternate $m $line $sepChar $expand
00611 return
00612 }
00613
00614 ret ::csv::Split2matrix (type alternate , type m , type line , type sepChar , type expand) {
00615 set csv [Split $alternate $line $sepChar]
00616
00617 # Expansion modes
00618 # - none : default, behaviour of original implementation.
00619 # no expansion is done, lines are silently truncated
00620 # to the number of columns in the matrix.
00621 #
00622 # - empty : A matrix without columns is expanded to the number
00623 # of columns in the first line added to it. All
00624 # following lines are handled as if "mode == none"
00625 # was set.
00626 #
00627 # - auto : Full auto-mode. The matrix is expanded as needed to
00628 # hold all columns of all lines.
00629
00630 switch -exact -- $expand {
00631 none {}
00632 empty {
00633 if {[$m columns] == 0} {
00634 $m add columns [llength $csv]
00635 }
00636 }
00637 auto {
00638 if {[$m columns] < [llength $csv]} {
00639 $m add columns [expr {[llength $csv] - [$m columns]}]
00640 }
00641 }
00642 }
00643 $m add row $csv
00644 return
00645 }
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665 ret ::csv::split2queue (type args) {
00666 # Argument syntax:
00667 #
00668 #2) q line
00669 #3) q line sepChar
00670 #3) -alternate q line
00671 #4) -alternate q line sepChar
00672
00673 set alternate 0
00674 set sepChar ,
00675
00676 switch -exact -- [llength $args] {
00677 2 {
00678 foreach {q line} $args break
00679 }
00680 3 {
00681 foreach {a b c} $args break
00682 if {[string equal $a "-alternate"]} {
00683 set alternate 1
00684 set q $b
00685 set line $c
00686 } else {
00687 set q $a
00688 set line $b
00689 set sepChar $c
00690 }
00691 }
00692 4 {
00693 foreach {a b c d} $args break
00694 if {![string equal $a "-alternate"]} {
00695 return -code error "wrong#args: Should be ?-alternate? q line ?separator?"
00696 }
00697 set alternate 1
00698
00699 set q $b
00700 set line $c
00701 set sepChar $d
00702 }
00703 0 - 1 -
00704 default {
00705 return -code error "wrong#args: Should be ?-alternate? q line ?separator?"
00706 }
00707 }
00708
00709 $q put [Split $alternate $line $sepChar]
00710 return
00711 }
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726 ret ::csv::writematrix (type m , type chan , optional sepChar =, , optional delChar =\") {
00727 set n [$m rows]
00728 for {set r 0} {$r < $n} {incr r} {
00729 puts $chan [join [$m get row $r] $sepChar $delChar]
00730 }
00731
00732 # Memory intensive alternative:
00733 # puts $chan [joinlist [m get rect 0 0 end end] $sepChar $delChar]
00734 return
00735 }
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750 ret ::csv::writequeue (type q , type chan , optional sepChar =, , optional delChar =\") {
00751 while {[$q size] > 0} {
00752 puts $chan [join [$q get] $sepChar $delChar]
00753 }
00754
00755 # Memory intensive alternative:
00756 # puts $chan [joinlist [$q get [$q size]] $sepChar $delChar]
00757 return
00758 }
00759
00760