soundex.tcl

Go to the documentation of this file.
00001 /*  soundex.tcl --*/
00002 /* */
00003 /*  Implementation of soundex in Tcl*/
00004 /* */
00005 /*  Copyright (c) 2003 by Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00006 /* */
00007 /*  See the file "license.terms" for information on usage and redistribution*/
00008 /*  of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00009 /*  */
00010 /*  RCS: @(#) $Id: soundex.tcl,v 1.3 2004/01/15 06:36:14 andreas_kupries Exp $*/
00011 
00012 package require Tcl 8.2
00013 
00014 namespace ::soundex {}
00015 
00016 /*  ------------------------------------------------------------*/
00017 /** 
00018  *# I. Soundex by Knuth.
00019  */
00020 
00021 /*  This implementation of the Soundex algorithm is released to the public*/
00022 /*  domain: anyone may use it for any purpose.  See if I care.*/
00023 
00024 /*  N. Dean Pentcheff 1/13/89 Dept. of Zoology University of California Berkeley,*/
00025 /*     CA  94720 dean@violet.berkeley.edu*/
00026 /*  TCL port by Evan Rempel 2/10/98 Dept Comp Services University of Victoria.*/
00027 /*  erempel@uvic.ca*/
00028 
00029 /*  proc ::soundex::knuth ( string )*/
00030 /* */
00031 /*    Given as argument: a character string. Returns: a static string, 4 characters long*/
00032 /*    This string is the Soundex key for the argument string.*/
00033 /*    Side effects and limitations:*/
00034 /*    Does not clobber the string passed in as the argument. No limit on*/
00035 /*    argument string length. Assumes a character set with continuously*/
00036 /*    ascending and contiguous letters within each case and within the digits*/
00037 /*    (e.g. this works for ASCII and bombs in EBCDIC. But then, most things*/
00038 /*    do.). Reference: Adapted from Knuth, D.E. (1973) The art of computer*/
00039 /*    programming; Volume 3: Sorting and searching.  Addison-Wesley Publishing*/
00040 /*    Company: Reading, Mass. Page 392.*/
00041 /*    Special cases: Leading or embedded spaces, numerals, or punctuation are squeezed*/
00042 /*    out before encoding begins.*/
00043 /* */
00044 /*    Null strings or those with no encodable letters return the code 'Z000'.*/
00045 /* */
00046 /*    Test data from Knuth (1973):*/
00047 /*    Euler   Gauss   Hilbert Knuth   Lloyd   Lukasiewicz*/
00048 /*    E460    G200    H416    K530    L300    L222*/
00049 
00050 namespace ::soundex {
00051     variable  soundexKnuthCode
00052     array  soundexKnuthCode =  {
00053     a 0 b 1 c 2 d 3 e 0 f 1 g 2 h 0 i 0 j 2 k 2 l 4 m 5
00054     n 5 o 0 p 1 q 2 r 6 s 2 t 3 u 0 v 1 w 0 x 2 y 0 z 2
00055     }
00056 }
00057 ret  ::soundex::knuth (type in) {
00058     variable soundexKnuthCode
00059     set key ""
00060 
00061     # Remove the leading/trailing white space punctuation etc.
00062 
00063     set TempIn [string trim $in "\t\n\r .,'-"]
00064 
00065     # Only use alphabetic characters, so strip out all others
00066     # also, soundex index uses only lower case chars, so force to lower
00067 
00068     regsub -all {[^a-z]} [string tolower $TempIn] {} TempIn
00069     if {[string length $TempIn] == 0} {
00070     return Z000
00071     }
00072     set last [string index $TempIn 0]
00073     set key  [string toupper $last]
00074     set last $soundexKnuthCode($last)
00075 
00076     # Scan rest of string, stop at end of string or when the key is
00077     # full
00078 
00079     set count    1
00080     set MaxIndex [string length $TempIn]
00081 
00082     for {set index 1} {(($count < 4) && ($index < $MaxIndex))} {incr index } {
00083     set chcode $soundexKnuthCode([string index $TempIn $index])
00084     # Fold together adjacent letters sharing the same code
00085     if {![string equal $last $chcode]} {
00086         set last $chcode
00087         # Ignore code==0 letters except as separators
00088         if {$last != 0} then {
00089         set key $key$last
00090         incr count
00091         }
00092     }
00093     }
00094     return [string range ${key}0000 0 3]
00095 }
00096 
00097 package provide soundex 1.0
00098 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1