cfront.tcl

Go to the documentation of this file.
00001 /*  cfront.tcl --*/
00002 /* */
00003 /*  Generator frontend for compiler of magic(5) files into recognizers*/
00004 /*  based on the 'rtcore'. Parses magic(5) into a basic 'script'.*/
00005 /* */
00006 /*  Copyright (c) 2004-2005 Colin McCormack <coldstore@users.sourceforge.net>*/
00007 /*  Copyright (c) 2005      Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00008 /* */
00009 /*  See the file "license.terms" for information on usage and redistribution*/
00010 /*  of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00011 /*  */
00012 /*  RCS: @(#) $Id: cfront.tcl,v 1.6 2007/06/23 03:39:34 andreas_kupries Exp $*/
00013 
00014 /* */
00015 /* */
00016 /*  "mime type recognition in pure tcl"*/
00017 /*  http://wiki.tcl.tk/12526*/
00018 /* */
00019 /*  Tcl code harvested on:  10 Feb 2005, 04:06 GMT*/
00020 /*  Wiki page last updated: ???*/
00021 /* */
00022 /* */
00023 
00024 /*  ### ### ### ######### ######### #########*/
00025 /*  Requirements*/
00026 
00027 package require Tcl 8.4
00028 
00029 /*  file to compile the magic file from magic(5) into a tcl program*/
00030 package require fileutil              ; /*  File processing (input)*/
00031 package require fileutil::magic::cgen ; /*  Code generator.*/
00032 package require fileutil::magic::rt   ; /*  Runtime (typemap)*/
00033 package require struct::list          ; /*  lrepeat.*/
00034 
00035 package provide fileutil::magic::cfront 1.0
00036 
00037 /*  ### ### ### ######### ######### #########*/
00038 /*  Implementation*/
00039 
00040 namespace ::fileutil::magic::cfront {
00041     /*  Configuration flag. (De)activate debugging output.*/
00042     /*  This is done during initialization.*/
00043     /*  Changes at runtime have no effect.*/
00044 
00045     variable debug 0
00046 
00047     /*  Constants*/
00048 
00049     variable hashprotection  [list "\/* " "\\#" \" \\\" \{ \\\{ \} \\\}]      ;#"*/
00050     variable hashprotectionB [list "\/* " "\\\#" \" \\\" \} \\\} ( \\( ) \\)] ;#"*/
00051 
00052     /*  Make backend functionality accessible*/
00053     namespace import ::fileutil::magic::cgen::*
00054 
00055     namespace export compile prodef install
00056 }
00057 
00058 /*  parse an individual line*/
00059 ret  ::fileutil::magic::cfront::parseline (type line , optional maxlevel =10000) {
00060     # calculate the line's level
00061     set unlevel [string trimleft $line >]
00062     set level   [expr {[string length $line] - [string length $unlevel]}]
00063     if {$level > $maxlevel} {
00064     return -code continue "Skip - too high a level"
00065     }
00066 
00067     # regexp parse line into (offset, type, value, command)
00068     set parse [regexp -expanded -inline {^(\S+)\s+(\S+)\s*((\S|(\B\s))*)\s*(.*)$} $unlevel]
00069     if {$parse == {}} {
00070     error "Can't parse: '$unlevel'"
00071     }
00072 
00073     # unpack parsed line
00074     set value   ""
00075     set command ""
00076     foreach {junk offset type value junk1 junk2 command} $parse break
00077 
00078     # handle trailing spaces
00079     if {[string index $value end] eq "\\"} {
00080     append value " "
00081     }
00082     if {[string index $command end] eq "\\"} {
00083     append command " "
00084     }
00085 
00086     if {$value eq ""} {
00087     # badly formatted line
00088     return -code error "no value"
00089     }
00090 
00091     ::fileutil::magic::cfront::Debug {
00092     puts "level:$level offset:$offset type:$type value:'$value' command:'$command'"
00093     }
00094 
00095     # return the line's fields
00096     return [list $level $offset $type $value $command]
00097 }
00098 
00099 /*  process a magic file*/
00100 ret  ::fileutil::magic::cfront::process (type file , optional maxlevel =10000) {
00101     variable hashprotection
00102     variable hashprotectionB
00103     variable level  ;# level of line
00104     variable linenum    ;# line number
00105 
00106     set level  0
00107     set script {}
00108 
00109     set linenum 0
00110     ::fileutil::foreachLine line $file {
00111     incr linenum
00112     set line [string trim $line " "]
00113     if {[string index $line 0] eq "#"} {
00114         continue    ;# skip comments
00115     } elseif {$line == ""} {
00116         continue    ;# skip blank lines
00117     } else {
00118         # parse line
00119         if {[catch {parseline $line $maxlevel} parsed]} {
00120         continue    ;# skip erroring lines
00121         }
00122 
00123         # got a valid line
00124         foreach {level offset type value message} $parsed break
00125 
00126         # strip comparator out of value field,
00127         # (they are combined)
00128         set compare [string index $value 0]
00129         switch -glob --  $value {
00130         [<>]=* {
00131             set compare [string range $value 0 1]
00132             set value   [string range $value 2 end]
00133         }
00134 
00135         <* - >* - &* - ^* {
00136             set value [string range $value 1 end]
00137         }
00138 
00139         =* {
00140             set compare "=="
00141             set value   [string range $value 1 end]
00142         }
00143 
00144         !* {
00145             set compare "!="
00146             set value   [string range $value 1 end]
00147         }
00148 
00149         x {
00150             # this is the 'don't care' match
00151             # used for collecting values
00152             set value ""
00153         }
00154 
00155         default {
00156             # the default comparator is equals
00157             set compare "=="
00158             if {[string match {\\[<!>=]*} $value]} {
00159             set value [string range $value 1 end]
00160             }
00161         }
00162         }
00163 
00164         # process type field
00165         set qual ""
00166         switch -glob -- $type {
00167         pstring* - string* {
00168             # String or Pascal string type
00169 
00170             # extract string match qualifiers
00171             foreach {type qual} [split $type /] break
00172 
00173             # convert pstring to string + qualifier
00174             if {$type eq "pstring"} {
00175             append qual "p"
00176             set type "string"
00177             }
00178 
00179             # protect hashes in output script value
00180             set value [string map $hashprotection $value]
00181 
00182             if {($value eq "\\0") && ($compare eq ">")} {
00183             # record 'any string' match
00184             set value   ""
00185             set compare x
00186             } elseif {$compare eq "!="} {
00187             # string doesn't allow !match
00188             set value   !$value
00189             set compare "=="
00190             }
00191 
00192             if {$type ne "string"} {
00193             # don't let any odd string types sneak in
00194             puts stderr "Reject String: ${file}:$linenum $type - $line"
00195             continue
00196             }
00197         }
00198 
00199         regex {
00200             # I am *not* going to handle regex
00201             puts stderr "Reject Regex: ${file}:$linenum $type - $line"
00202             continue
00203         }
00204 
00205         *byte* - *short* - *long* - *date* {
00206             # Numeric types
00207 
00208             # extract numeric match &qualifiers
00209             set type [split  $type &]
00210             set qual [lindex $type 1]
00211 
00212             if {$qual ne ""} {
00213             # this is an &-qualifier
00214             set qual &$qual
00215             } else {
00216             # extract -qualifier from type
00217             set type [split  $type -]
00218             set qual [lindex $type 1]
00219             if {$qual ne ""} {
00220                 set qual -$qual
00221             }
00222             }
00223             set type [lindex $type 0]
00224 
00225             # perform value adjustments
00226             if {$compare ne "x"} {
00227             # trim redundant Long value qualifier
00228             set value [string trimright $value L]
00229 
00230             if {[catch {set value [expr $value]} x]} {
00231                 upvar #0 errorInfo eo
00232                 # check that value is representable in tcl
00233                 puts stderr "Reject Value Error: ${file}:$linenum '$value' '$line' - $eo"
00234                 continue;
00235             }
00236 
00237             # coerce numeric value into hex
00238             set value [format "0x%x" $value]
00239             }
00240         }
00241 
00242         default {
00243             # this is not a type we can handle
00244             puts stderr "Reject Unknown Type: ${file}:$linenum $type - $line"
00245             continue
00246         }
00247         }
00248     }
00249 
00250     # collect some summaries
00251     ::fileutil::magic::cfront::Debug {
00252         variable types
00253         set types($type) $type
00254         variable quals
00255         set quals($qual) $qual
00256     }
00257 
00258     #puts $linenum level:$level offset:$offset type:$type
00259     #puts qual:$qual compare:$compare value:'$value' message:'$message'
00260 
00261     # protect hashes in output script message
00262     set message [string map $hashprotectionB $message]
00263 
00264     if {![string match "(*)" $offset]} {
00265         catch {set offset [expr $offset]}
00266     }
00267 
00268     # record is the complete match command,
00269     # encoded for tcl code generation
00270     set record [list $linenum $type $qual $compare $offset $value $message]
00271     if {$script == {}} {
00272         # the original script has level 0,
00273         # regardless of what the script says
00274         set level 0
00275     }
00276 
00277     if {$level == 0} {
00278         # add a new 0-level record
00279         lappend script $record
00280     } else {
00281         # find the growing edge of the script
00282         set depth [::struct::list repeat [expr $level] end]
00283         while {[catch {
00284         # get the insertion point
00285         set insertion [eval [linsert $depth 0 lindex $script]]
00286         # 8.5 # set insertion [lindex $script {*}$depth]
00287         }]} {
00288         # handle scripts which jump levels,
00289         # reduce depth to current-depth+1
00290         set depth [lreplace $depth end end]
00291         }
00292 
00293         # add the record at the insertion point
00294         lappend insertion $record
00295 
00296         # re-insert the record into its correct position
00297         eval [linsert [linsert $depth 0 lset script] end $insertion]
00298         # 8.5 # lset script {*}$depth $insertion
00299     }
00300     }
00301     #puts "Script: $script"
00302     return $script
00303 }
00304 
00305 /*  compile up magic files or directories of magic files into a single recognizer.*/
00306 ret  ::fileutil::magic::cfront::compile (type args) {
00307     set tcl ""
00308     set script {}
00309     foreach arg $args {
00310     if {[file type $arg] == "directory"} {
00311         foreach file [glob [file join $arg *]] {
00312         set script1 [process $file]
00313         eval [linsert $script1 0 lappend script [list file $file]]
00314         # 8.5 # lappend script [list file $file] {*}$script1
00315 
00316         #append tcl "magic::file_start $file" \n
00317         #append tcl [run $script1] \n
00318         }
00319     } else {
00320         set file $arg
00321         set script1 [process $file]
00322          eval [linsert $script1 0 lappend script [list file $file]]
00323         # 8.5 # lappend script [list file $file] {*}$script1
00324 
00325         #append tcl "magic::file_start $file" \n
00326         #append tcl [run $script1] \n
00327     }
00328     }
00329 
00330     #puts stderr $script
00331     ::fileutil::magic::cfront::Debug {puts "\# $args"}
00332 
00333     set    t   [2tree $script]
00334     set    tcl [treegen $t root]
00335     append tcl "\nreturn \{\}"
00336 
00337     ::fileutil::magic::cfront::Debug {puts [treedump $t]}
00338     #set tcl [run $script]
00339 
00340     return $tcl
00341 }
00342 
00343 ret  ::fileutil::magic::cfront::procdef (type procname , type args) {
00344 
00345     set pspace [namespace qualifiers $procname]
00346 
00347     if {$pspace eq ""} {
00348     return -code error "Cannot generate recognizer in the global namespace"
00349     }
00350 
00351     set     script {}
00352     lappend script "package require fileutil::magic::rt"
00353     lappend script "namespace eval [list ${pspace}] \{"
00354     lappend script "    namespace import ::fileutil::magic::rt::*"
00355     lappend script "\}"
00356     lappend script ""
00357     lappend script [list proc ${procname} {} \n[eval [linsert $args 0 compile]]\n]
00358     return [join $script \n]
00359 }
00360 
00361 ret  ::fileutil::magic::cfront::install (type args) {
00362     foreach arg $args {
00363     set path [file tail $arg]
00364     eval [procdef ::fileutil::magic::/${path}::run $arg]
00365     }
00366     return
00367 }
00368 
00369 /*  ### ### ### ######### ######### #########*/
00370 /*  Internal, debugging.*/
00371 
00372 if {!$::fileutil::magic::cfront::debug} {
00373     /*  This procedure definition is optimized out of using code by the*/
00374     /*  core bcc. It knows that neither argument checks are required,*/
00375     /*  nor is anything done. So neither results, nor errors are*/
00376     /*  possible, a true no-operation.*/
00377     ret  ::fileutil::magic::cfront::Debug (type args) {}
00378 
00379 } else {
00380     ret  ::fileutil::magic::cfront::Debug (type script) {
00381     # Run the commands in the debug script. This usually generates
00382     # some output. The uplevel is required to ensure the proper
00383     # resolution of all variables found in the script.
00384     uplevel 1 $script
00385     return
00386     }
00387 }
00388 
00389 /* set script [magic::compile {} /usr/share/misc/file/magic]*/
00390 /* puts "\# types:[array names magic::types]"*/
00391 /* puts "\# quals:[array names magic::quals]"*/
00392 /* puts "Script: $script"*/
00393 
00394 /*  ### ### ### ######### ######### #########*/
00395 /*  Ready for use.*/
00396 /*  EOF*/
00397 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1