rtcore.tcl

Go to the documentation of this file.
00001 /*  rtcore.tcl --*/
00002 /* */
00003 /*  Runtime core for file type recognition engines written in pure Tcl.*/
00004 /* */
00005 /*  Copyright (c) 2004-2005 Colin McCormack <coldstore@users.sourceforge.net>*/
00006 /*  Copyright (c) 2005      Andreas Kupries <andreas_kupries@users.sourceforge.net>*/
00007 /* */
00008 /*  See the file "license.terms" for information on usage and redistribution*/
00009 /*  of this file, and for a DISCLAIMER OF ALL WARRANTIES.*/
00010 /*  */
00011 /*  RCS: @(#) $Id: rtcore.tcl,v 1.5 2005/09/28 04:51:19 andreas_kupries Exp $*/
00012 
00013 /* */
00014 /* */
00015 /*  "mime type recognition in pure tcl"*/
00016 /*  http://wiki.tcl.tk/12526*/
00017 /* */
00018 /*  Tcl code harvested on:  10 Feb 2005, 04:06 GMT*/
00019 /*  Wiki page last updated: ???*/
00020 /* */
00021 /* */
00022 
00023 /*  TODO - Required Functionality:*/
00024 
00025 /*  implement full offset language*/
00026 /*  implement pstring (pascal string, blerk)*/
00027 /*  implement regex form (blerk!)*/
00028 /*  implement string qualifiers*/
00029 
00030 /*  ### ### ### ######### ######### #########*/
00031 /*  Requirements*/
00032 
00033 package require Tcl 8.4
00034 
00035 /*  ### ### ### ######### ######### #########*/
00036 /*  Implementation*/
00037 
00038 namespace ::fileutil::magic::rt {
00039     /*  Configuration flag. (De)activate debugging output.*/
00040     /*  This is done during initialization.*/
00041     /*  Changes at runtime have no effect.*/
00042 
00043     variable debug 0
00044 
00045     /*  Runtime state.*/
00046 
00047     variable fd     {}     ; /*  Channel to file under scrutiny*/
00048     variable strbuf {}     ; /*  Input cache [*].*/
00049     variable cache         ; /*  Cache of fetched and decoded numeric*/
00050     array  cache =  {}     ; /*  values.*/
00051     variable result {}     ; /*  Accumulated recognition result.*/
00052     variable string {}     ; /*  Last recognized string | For substitution*/
00053     variable numeric -9999 ; /*  Last recognized number | into the message*/
00054 
00055     variable  last         ; /*  Behind last fetch locations,*/
00056     array  last =  {}      ; /*  per nesting level.*/
00057 
00058     /*  [*] The vast majority of magic strings are in the first 4k of the file.*/
00059 
00060     /*  Export APIs (full public, recognizer public)*/
00061     namespace export open close file_start result
00062     namespace export emit off Nv =  N S Nvx Nx Sx L R I
00063 }
00064 
00065 /*  ### ### ### ######### ######### #########*/
00066 /*  Public API, general use.*/
00067 
00068 /*  open the file to be scanned*/
00069 ret  ::fileutil::magic::rt::open (type file) {
00070     variable result {}
00071     variable string {}
00072     variable numeric -9999
00073     variable strbuf
00074     variable fd
00075     variable cache
00076 
00077     set fd [::open $file]
00078     ::fconfigure $fd -translation binary
00079 
00080     # fill the string cache
00081     set strbuf [::read $fd 4096]
00082 
00083     # clear the fetch cache
00084     catch {unset cache}
00085     array set cache {}
00086 
00087     return $fd
00088 }
00089 
00090 ret  ::fileutil::magic::rt::close () {
00091     variable fd
00092     ::close $fd
00093     return
00094 }
00095 
00096 /*  mark the start of a magic file in debugging*/
00097 ret  ::fileutil::magic::rt::file_start (type name) {
00098     ::fileutil::magic::rt::Debug {puts stderr "File: $name"}
00099 }
00100 
00101 /*  return the emitted result*/
00102 ret  ::fileutil::magic::rt::result (optional msg ="") {
00103     variable result
00104     if {$msg ne ""} {emit $msg}
00105     return -code return $result
00106 }
00107 
00108 ret  ::fileutil::magic::rt::resultv (optional msg ="") {
00109     variable result
00110     if {$msg ne ""} {emit $msg}
00111     return $result
00112 }
00113 
00114 /*  ### ### ### ######### ######### #########*/
00115 /*  Public API, for use by a recognizer.*/
00116 
00117 /*  emit a message*/
00118 ret  ::fileutil::magic::rt::emit (type msg) {
00119     variable string
00120     variable numeric
00121     variable result
00122 
00123     set map [list \
00124         \\b "" \
00125         %s  $string \
00126         %ld $numeric \
00127         %d  $numeric \
00128         ]
00129 
00130     lappend result [::string map $map $msg]
00131     return
00132 }
00133 
00134 /*  handle complex offsets - TODO*/
00135 ret  ::fileutil::magic::rt::offset (type where) {
00136     ::fileutil::magic::rt::Debug {puts stderr "OFFSET: $where"}
00137     return 0
00138 }
00139 
00140 ret  ::fileutil::magic::rt::Nv (type type , type offset , optional qual ="") {
00141     variable typemap
00142     variable numeric
00143 
00144     # unpack the type characteristics
00145     foreach {size scan} $typemap($type) break
00146 
00147     # fetch the numeric field from the file
00148     set numeric [Fetch $offset $size $scan]
00149 
00150     if {$qual ne ""} {
00151     # there's a mask to be applied
00152     set numeric [expr $numeric $qual]
00153     }
00154 
00155     ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $qual: $numeric"}
00156     return $numeric
00157 }
00158 
00159 /*  Numeric - get bytes of $type at $offset and $compare to $val*/
00160 /*  qual might be a mask*/
00161 ret  ::fileutil::magic::rt::N (type type , type offset , type comp , type val , optional qual ="") {
00162     variable typemap
00163     variable numeric
00164 
00165     # unpack the type characteristics
00166     foreach {size scan} $typemap($type) break
00167 
00168     # fetch the numeric field
00169     set numeric [Fetch $offset $size $scan]
00170 
00171     # Would moving this before the fetch an optimisation ? The
00172     # tradeoff is that we give up filling the cache, and it is unclear
00173     # how often that value would be used. -- Profile!
00174     if {$comp eq "x"} {
00175     # anything matches - don't care
00176     return 1
00177     }
00178 
00179     # get value in binary form, then back to numeric
00180     # this avoids problems with sign, as both values are
00181     # [binary scan]-converted identically
00182     binary scan [binary format $scan $val] $scan val
00183 
00184     if {$qual ne ""} {
00185     # there's a mask to be applied
00186     set numeric [expr $numeric $qual]
00187     }
00188 
00189     # perform comparison
00190     set c [expr $val $comp $numeric]
00191 
00192     ::fileutil::magic::rt::Debug {puts stderr "numeric $type: $val $comp $numeric / $qual - $c"}
00193     return $c
00194 }
00195 
00196 ret  ::fileutil::magic::rt::S (type offset , type comp , type val , optional qual ="") {
00197     variable fd
00198     variable string
00199 
00200     # convert any backslashes
00201     set val [subst -nocommands -novariables $val]
00202 
00203     if {$comp eq "x"} {
00204     # match anything - don't care, just get the value
00205     set string ""
00206 
00207     # Query: Can we use GetString here ?
00208     # Or at least the strbuf cache ?
00209 
00210     # move to the offset
00211     ::seek $fd $offset
00212     while {
00213         ([::string length $string] < 100) &&
00214         [::string is print [set c [::read $fd 1]]]
00215     } {
00216         if {[::string is space $c]} {
00217         break
00218         }
00219         append string $c
00220     }
00221 
00222     return 1
00223     }
00224 
00225     # get the string and compare it
00226     set string [GetString $offset [::string length $val]]
00227     set cmp    [::string compare $val $string]
00228     set c      [expr $cmp $comp 0]
00229 
00230     ::fileutil::magic::rt::Debug {
00231     puts "String '$val' $comp '$string' - $c"
00232     if {$c} {
00233         puts "offset $offset - $string"
00234     }
00235     }
00236     return $c
00237 }
00238 
00239 ret  ::fileutil::magic::rt::Nvx (type atlevel , type type , type offset , optional qual ="") {
00240     variable typemap
00241     variable numeric
00242     variable last
00243 
00244     upvar 1 level l
00245     set  l $atlevel
00246 
00247     # unpack the type characteristics
00248     foreach {size scan} $typemap($type) break
00249 
00250     # fetch the numeric field from the file
00251     set numeric [Fetch $offset $size $scan]
00252 
00253     set last($atlevel) [expr {$offset + $size}]
00254 
00255     if {$qual ne ""} {
00256     # there's a mask to be applied
00257     set numeric [expr $numeric $qual]
00258     }
00259 
00260     ::fileutil::magic::rt::Debug {puts stderr "NV $type $offset $qual: $numeric"}
00261     return $numeric
00262 }
00263 
00264 /*  Numeric - get bytes of $type at $offset and $compare to $val*/
00265 /*  qual might be a mask*/
00266 ret  ::fileutil::magic::rt::Nx (type atlevel , type type , type offset , type comp , type val , optional qual ="") {
00267     variable typemap
00268     variable numeric
00269     variable last
00270 
00271     upvar 1 level l
00272     set  l $atlevel
00273 
00274     # unpack the type characteristics
00275     foreach {size scan} $typemap($type) break
00276 
00277     set last($atlevel) [expr {$offset + $size}]
00278 
00279     # fetch the numeric field
00280     set numeric [Fetch $offset $size $scan]
00281 
00282     if {$comp eq "x"} {
00283     # anything matches - don't care
00284     return 1
00285     }
00286 
00287     # get value in binary form, then back to numeric
00288     # this avoids problems with sign, as both values are
00289     # [binary scan]-converted identically
00290     binary scan [binary format $scan $val] $scan val
00291 
00292     if {$qual ne ""} {
00293     # there's a mask to be applied
00294     set numeric [expr $numeric $qual]
00295     }
00296 
00297     # perform comparison
00298     set c [expr $val $comp $numeric]
00299 
00300     ::fileutil::magic::rt::Debug {puts stderr "numeric $type: $val $comp $numeric / $qual - $c"}
00301     return $c
00302 }
00303 
00304 ret  ::fileutil::magic::rt::Sx (type atlevel , type offset , type comp , type val , optional qual ="") {
00305     variable fd
00306     variable string
00307     variable last
00308 
00309     upvar 1 level l
00310     set  l $atlevel
00311 
00312     # convert any backslashes
00313     set val [subst -nocommands -novariables $val]
00314 
00315     if {$comp eq "x"} {
00316     # match anything - don't care, just get the value
00317     set string ""
00318 
00319     # Query: Can we use GetString here ?
00320     # Or at least the strbuf cache ?
00321 
00322     # move to the offset
00323     ::seek $fd $offset
00324     while {
00325         ([::string length $string] < 100) &&
00326         [::string is print [set c [::read $fd 1]]]
00327     } {
00328         if {[::string is space $c]} {
00329         break
00330         }
00331         append string $c
00332     }
00333 
00334     set last($atlevel) [expr {$offset + [string length $string]}]
00335 
00336     return 1
00337     }
00338 
00339     set len [::string length $val]
00340     set last($atlevel) [expr {$offset + $len}]
00341 
00342     # get the string and compare it
00343     set string [GetString $offset $len]
00344     set cmp    [::string compare $val $string]
00345     set c      [expr $cmp $comp 0]
00346 
00347     ::fileutil::magic::rt::Debug {
00348     puts "String '$val' $comp '$string' - $c"
00349     if {$c} {
00350         puts "offset $offset - $string"
00351     }
00352     }
00353     return $c
00354 }
00355 ret  ::fileutil::magic::rt::L (type newlevel) {
00356     # Regenerate level information in the calling context.
00357     upvar 1 level l ; set l $newlevel
00358     return
00359 }
00360 
00361 ret  ::fileutil::magic::rt::I (type base , type type , type delta) {
00362     # Handling of base locations specified indirectly through the
00363     # contents of the inspected file.
00364 
00365     variable typemap
00366     foreach {size scan} $typemap($type) break
00367     return [expr {[Fetch $base $size $scan] + $delta}]
00368 }
00369 
00370 ret  ::fileutil::magic::rt::R (type base) {
00371     # Handling of base locations specified relative to the end of the
00372     # last field one level above.
00373 
00374     variable last   ; # Remembered locations.
00375     upvar 1 level l ; # The level to get data from.
00376     return [expr {$last($l) + $base}]
00377 }
00378 
00379 /*  ### ### ### ######### ######### #########*/
00380 /*  Internal. Retrieval of the data used in comparisons.*/
00381 
00382 /*  fetch and cache a numeric value from the file*/
00383 ret  ::fileutil::magic::rt::Fetch (type where , type what , type scan) {
00384     variable cache
00385     variable numeric
00386     variable fd
00387 
00388     if {![info exists cache($where,$what,$scan)]} {
00389     ::seek $fd $where
00390     binary scan [::read $fd $what] $scan numeric
00391     set cache($where,$what,$scan) $numeric
00392 
00393     # Optimization: If we got 4 bytes, i.e. long we implicitly
00394     # know the short and byte data as well. Should put them into
00395     # the cache. -- Profile: How often does such an overlap truly
00396     # happen ?
00397 
00398     } else {
00399     set numeric $cache($where,$what,$scan)
00400     }
00401     return $numeric
00402 }
00403 
00404 ret  ::fileutil::magic::rt::GetString (type offset , type len) {
00405     # We have the first 1k of the file cached
00406     variable string
00407     variable strbuf
00408     variable fd
00409 
00410     set end [expr {$offset + $len - 1}]
00411     if {$end < 4096} {
00412     # in the string cache, copy the requested part.
00413     set string [::string range $strbuf $offset $end]
00414     } else {
00415     # an unusual one, move to the offset and read directly from
00416     # the file.
00417     ::seek $fd $offset
00418     set string [::read $fd $len]
00419     }
00420     return $string
00421 }
00422 
00423 /*  ### ### ### ######### ######### #########*/
00424 /*  Internal, debugging.*/
00425 
00426 if {!$::fileutil::magic::rt::debug} {
00427     /*  This procedure definition is optimized out of using code by the*/
00428     /*  core bcc. It knows that neither argument checks are required,*/
00429     /*  nor is anything done. So neither results, nor errors are*/
00430     /*  possible, a true no-operation.*/
00431     ret  ::fileutil::magic::rt::Debug (type args) {}
00432 
00433 } else {
00434     ret  ::fileutil::magic::rt::Debug (type script) {
00435     # Run the commands in the debug script. This usually generates
00436     # some output. The uplevel is required to ensure the proper
00437     # resolution of all variables found in the script.
00438     uplevel 1 $script
00439     return
00440     }
00441 }
00442 
00443 /*  ### ### ### ######### ######### #########*/
00444 /*  Initialize constants*/
00445 
00446 namespace ::fileutil::magic::rt {
00447     /*  maps magic typenames to field characteristics: size (#byte),*/
00448     /*  binary scan format*/
00449 
00450     variable typemap
00451 }
00452 
00453 ret  ::fileutil::magic::rt::Init () {
00454     variable typemap
00455     global tcl_platform
00456 
00457     # Set the definitions for all types which have their endianess
00458     # explicitly specified n their name.
00459 
00460     array set typemap {
00461     byte    {1 c}  ubyte    {1 c}
00462     beshort {2 S}  ubeshort {2 S}
00463     leshort {2 s}  uleshort {2 s}
00464     belong  {4 I}  ubelong  {4 I}
00465     lelong  {4 i}  ulelong  {4 i}  
00466     bedate  {4 S}  ledate   {4 s}
00467     beldate {4 I}  leldate  {4 i}
00468 
00469     long  {4 Q} ulong  {4 Q} date  {4 Q} ldate {4 Q}
00470     short {2 Y} ushort {2 Y}
00471     }
00472 
00473     # Now set the definitions for the types without explicit
00474     # endianess. They assume/use 'native' byteorder. We also put in
00475     # special forms for the compiler, so that it can use short names
00476     # for the native-endian types as well.
00477 
00478     # generate short form names
00479     foreach {n v} [array get typemap] {
00480     foreach {len scan} $v break
00481     #puts stderr "Adding $scan - [list $len $scan]"
00482     set typemap($scan) [list $len $scan]
00483     }
00484 
00485     # The special Q and Y short forms are incorrect, correct now to
00486     # use the proper native endianess.
00487 
00488     if {$tcl_platform(byteOrder) eq "littleEndian"} {
00489     array set typemap {Q {4 i} Y {2 s}}
00490     } else {
00491     array set typemap {Q {4 I} Y {2 S}}
00492     }
00493 }
00494 
00495 ::fileutil::magic::rt::Init
00496 /*  ### ### ### ######### ######### #########*/
00497 /*  Ready for use.*/
00498 
00499 package provide fileutil::magic::rt 1.0
00500 /*  EOF*/
00501 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1