soundex.tcl
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 package require Tcl 8.2
00013
00014 namespace ::soundex {}
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 namespace ::soundex {
00051 variable soundexKnuthCode
00052 array soundexKnuthCode = {
00053 a 0 b 1 c 2 d 3 e 0 f 1 g 2 h 0 i 0 j 2 k 2 l 4 m 5
00054 n 5 o 0 p 1 q 2 r 6 s 2 t 3 u 0 v 1 w 0 x 2 y 0 z 2
00055 }
00056 }
00057 ret ::soundex::knuth (type in) {
00058 variable soundexKnuthCode
00059 set key ""
00060
00061 # Remove the leading/trailing white space punctuation etc.
00062
00063 set TempIn [string trim $in "\t\n\r .,'-"]
00064
00065 # Only use alphabetic characters, so strip out all others
00066 # also, soundex index uses only lower case chars, so force to lower
00067
00068 regsub -all {[^a-z]} [string tolower $TempIn] {} TempIn
00069 if {[string length $TempIn] == 0} {
00070 return Z000
00071 }
00072 set last [string index $TempIn 0]
00073 set key [string toupper $last]
00074 set last $soundexKnuthCode($last)
00075
00076 # Scan rest of string, stop at end of string or when the key is
00077 # full
00078
00079 set count 1
00080 set MaxIndex [string length $TempIn]
00081
00082 for {set index 1} {(($count < 4) && ($index < $MaxIndex))} {incr index } {
00083 set chcode $soundexKnuthCode([string index $TempIn $index])
00084 # Fold together adjacent letters sharing the same code
00085 if {![string equal $last $chcode]} {
00086 set last $chcode
00087 # Ignore code==0 letters except as separators
00088 if {$last != 0} then {
00089 set key $key$last
00090 incr count
00091 }
00092 }
00093 }
00094 return [string range ${key}0000 0 3]
00095 }
00096
00097 package provide soundex 1.0
00098