00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 package require Tcl 8.4
00028 package require fileutil::magic::rt ;
00029 package require struct::list ;
00030 package require struct::tree ;
00031
00032 package provide fileutil::magic::cgen 1.0
00033
00034
00035
00036
00037 namespace ::fileutil::magic::cgen {
00038
00039 variable ::fileutil::magic::rt::typemap
00040
00041
00042 variable tree {}
00043
00044
00045 variable regions
00046
00047
00048
00049
00050 array otmap = {
00051 .b c .B c
00052 .s s .S S
00053 .l i .L I
00054 {} Q
00055 }
00056
00057
00058 namespace export 2tree treedump treegen
00059 }
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095 ret ::fileutil::magic::cgen::path (type tree) {
00096 # Annotates the tree. In each node we store the path from the root
00097 # to this node, as list of nodes, with the current node the last
00098 # element. The root node is never stored in the path.
00099
00100 $tree set root path {}
00101 foreach child [$tree children root] {
00102 $tree walk $child -type dfs node {
00103 set path [$tree get [$tree parent $node] path]
00104 lappend path [$tree index $node]
00105 $tree set $node path $path
00106 }
00107 }
00108 return
00109 }
00110
00111 ret ::fileutil::magic::cgen::tree_el (type tree , type parent , type file , type line , type type , type qual , type comp , type offset , type val , type message , type args) {
00112
00113 # Recursively creates and annotates a node for the specified
00114 # tests, and its sub-tests (args).
00115
00116 set node [$tree insert $parent end]
00117 set path [$tree get $parent path]
00118 lappend path [$tree index $node]
00119 $tree set $node path $path
00120
00121 # generate a proc call type for the type, Numeric or String
00122 variable ::fileutil::magic::rt::typemap
00123
00124 switch -glob -- $type {
00125 *byte* -
00126 *short* -
00127 *long* -
00128 *date* {
00129 set otype N
00130 set type [lindex $typemap($type) 1]
00131 }
00132 *string {
00133 set otype S
00134 }
00135 default {
00136 puts stderr "Unknown type: '$type'"
00137 }
00138 }
00139
00140 # Stores the type determined above, and the arguments into
00141 # attributes of the new node.
00142
00143 foreach key {line type qual comp offset val message file otype} {
00144 if {[catch {
00145 $tree set $node $key [set $key]
00146 } result]} {
00147 upvar ::errorInfo eo
00148 puts "Tree: $eo - $file $line $type"
00149 }
00150 }
00151
00152 # now add children
00153 foreach el $args {
00154 eval [linsert $el 0 tree_el $tree $node $file]
00155 # 8.5 # tree_el $tree $node $file {*}$el
00156 }
00157 return $node
00158 }
00159
00160 ret ::fileutil::magic::cgen::2tree (type script) {
00161
00162 # Converts a recognizer which is in a simple script form into a
00163 # tree.
00164
00165 variable tree
00166 set tree [::struct::tree]
00167
00168 $tree set root path ""
00169 $tree set root otype Root
00170 $tree set root type root
00171 $tree set root message "unknown"
00172
00173 # generate a test for each match
00174 set file "unknown"
00175 foreach el $script {
00176 #puts "EL: $el"
00177 if {[lindex $el 0] eq "file"} {
00178 set file [lindex $el 1]
00179 } else {
00180 set node [eval [linsert $el 0 tree_el $tree root $file]]
00181 # 8.5 # set more [tree_el $tree root $file {*}$el]
00182 append result $node
00183 }
00184 }
00185 optNum $tree root
00186 #optStr $tree root
00187 puts stderr "Script contains [llength [$tree children root]] discriminators"
00188 path $tree
00189
00190 # Decoding the offsets, determination if we have to handle
00191 # relative offsets, and where. The less, the better.
00192 Offsets $tree
00193
00194 return $tree
00195 }
00196
00197 ret ::fileutil::magic::cgen::isStr (type tree , type node) {
00198 return [expr {"S" eq [$tree get $node otype]}]
00199 }
00200
00201 ret ::fileutil::magic::cgen::sortRegion (type r1 , type r2) {
00202 set cmp 0
00203 if {[catch {
00204 if {[string match (*) $r1] || [string match (*) $r2]} {
00205 set cmp [string compare $r1 $r2]
00206 } else {
00207 set cmp [expr {[lindex $r1 0] - [lindex $r2 0]}]
00208 if {!$cmp} {
00209 set cmp 0
00210 set cmp [expr {[lindex $r1 1] - [lindex $r2 1]}]
00211 }
00212 }
00213 } result]} {
00214 set cmp [string compare $r1 $r2]
00215 }
00216 return $cmp
00217 }
00218
00219 ret ::fileutil::magic::cgen::optStr (type tree , type node) {
00220 variable regions
00221 catch {unset regions}
00222 array set regions {}
00223
00224 optStr1 $tree $node
00225
00226 puts stderr "Regions [array statistics regions]"
00227 foreach region [lsort \
00228 -index 0 \
00229 -command ::fileutil::magic::cgen::sortRegion \
00230 [array name regions]] {
00231 puts "$region - $regions($region)"
00232 }
00233 }
00234
00235 ret ::fileutil::magic::cgen::optStr1 (type tree , type node) {
00236 variable regions
00237
00238 # traverse each numeric element of this node's children,
00239 # categorising them
00240
00241 set kids [$tree children $node]
00242 foreach child $kids {
00243 optStr1 $tree $child
00244 }
00245
00246 set strings [$tree children $node filter ::fileutil::magic::cgen::isStr]
00247 #puts stderr "optstr: $node: $strings"
00248
00249 foreach el $strings {
00250 #if {[$tree get $el otype] eq "String"} {puts "[$tree getall $el] - [string length [$tree get $el val]]"}
00251 if {[$tree get $el comp] eq "x"} {
00252 continue
00253 }
00254
00255 set offset [$tree get $el offset]
00256 set len [string length [$tree get $el val]]
00257 lappend regions([list $offset $len]) $el
00258 }
00259 }
00260
00261 ret ::fileutil::magic::cgen::isNum (type tree , type node) {
00262 return [expr {"N" eq [$tree get $node otype]}]
00263 }
00264
00265 ret ::fileutil::magic::cgen::switchNSort (type tree , type n1 , type n2) {
00266 return [expr {[$tree get $n1 val] - [$tree get $n1 val]}]
00267 }
00268
00269 ret ::fileutil::magic::cgen::optNum (type tree , type node) {
00270 array set offsets {}
00271
00272 # traverse each numeric element of this node's children,
00273 # categorising them
00274
00275 set kids [$tree children $node]
00276 foreach child $kids {
00277 optNum $tree $child
00278 }
00279
00280 set numerics [$tree children $node filter ::fileutil::magic::cgen::isNum]
00281 #puts stderr "optNum: $node: $numerics"
00282 if {[llength $numerics] < 2} {
00283 return
00284 }
00285
00286 foreach el $numerics {
00287 if {[$tree get $el comp] ne "=="} {
00288 continue
00289 }
00290 lappend offsets([$tree get $el type],[$tree get $el offset],[$tree get $el qual]) $el
00291 }
00292
00293 #puts "Offset: stderr [array get offsets]"
00294 foreach {match nodes} [array get offsets] {
00295 if {[llength $nodes] < 2} {
00296 continue
00297 }
00298
00299 catch {unset matcher}
00300 foreach n $nodes {
00301 set nv [expr [$tree get $n val]]
00302 if {[info exists matcher($nv)]} {
00303 puts stderr "*====================================="
00304 puts stderr "* Node <[$tree getall $n]>"
00305 puts stderr "* clashes with <[$tree getall $matcher($nv)]>"
00306 puts stderr "*====================================="
00307 } else {
00308 set matcher($nv) $n
00309 }
00310 }
00311
00312 foreach {type offset qual} [split $match ,] break
00313 set switch [$tree insert $node [$tree index [lindex $nodes 0]]]
00314 $tree set $switch otype Switch
00315 $tree set $switch message $match
00316 $tree set $switch offset $offset
00317 $tree set $switch type $type
00318 $tree set $switch qual $qual
00319
00320 set nodes [lsort -command [list ::fileutil::magic::cgen::switchNSort $tree] $nodes]
00321
00322 eval [linsert $nodes 0 $tree move $switch end]
00323 # 8.5 # $tree move $switch end {*}$nodes
00324 set path [$tree get [$tree parent $switch] path]
00325 lappend path [$tree index $switch]
00326 $tree set $switch path $path
00327 }
00328 }
00329
00330 ret ::fileutil::magic::cgen::Offsets (type tree) {
00331
00332 # Indicator if a node has to save field location information for
00333 # relative addressing. The 'kill' attribute is an accumulated
00334 # 'save' over the whole subtree. It will be used to determine when
00335 # level information was destroyed by subnodes and has to be
00336 # regenerated at the current level.
00337
00338 $tree walk root -type dfs node {
00339 $tree set $node save 0
00340 $tree set $node kill 0
00341 }
00342
00343 # We walk from the leafs up to the root, synthesizing the data
00344 # needed, as we go.
00345 $tree walk root -type dfs -order post node {
00346 if {$node eq "root"} continue
00347 DecodeOffset $tree $node [$tree get $node offset]
00348
00349 # If the current node's parent is a switch, and the node has
00350 # to save, then the switch has to save. Because the current
00351 # node is not relevant during code generation anymore, the
00352 # switch is.
00353
00354 if {[$tree get $node save]} {
00355 # We save, therefore we kill.
00356 $tree set $node kill 1
00357 if {[$tree get [$tree parent $node] otype] eq "Switch"} {
00358 $tree set [$tree parent $node] save 1
00359 }
00360 } else {
00361 # We don't save i.e. kill, but we may inherit it from
00362 # children which kill.
00363
00364 foreach c [$tree children $node] {
00365 if {[$tree get $c kill]} {
00366 $tree set $node kill 1
00367 break
00368 }
00369 }
00370 }
00371 }
00372 }
00373
00374 ret ::fileutil::magic::cgen::DecodeOffset (type tree , type node , type offset) {
00375 if {[string match "(*)" $offset]} {
00376 # Indirection offset. (Decoding is non-trivial, therefore
00377 # packed into a proc).
00378
00379 set ind 1 ; # Indirect location
00380 foreach {rel base itype idelta} [DecodeIndirectOffset $offset] break
00381
00382 } elseif {[string match "&*" $offset]} {
00383 # Direct relative offset. (Decoding is trivial)
00384
00385 set ind 0 ; # Direct location
00386 set rel 1 ; # Relative
00387 set base [string range $offset 1 end] ; # Base Delta
00388 set itype {} ; # No data for indirect
00389 set idelta {} ; # s.a.
00390
00391 } else {
00392 set ind 0 ; # Direct location
00393 set rel 0 ; # Absolute
00394 set base $offset ; # Here!
00395 set itype {} ; # No data for indirect
00396 set idelta {} ; # s.a.
00397 }
00398
00399 # Store the expanded data back into the tree.
00400
00401 foreach v {ind rel base itype idelta} {
00402 $tree set $node $v [set $v]
00403 }
00404
00405 # For nodes with adressing relative to last field above the latter
00406 # has to save this information.
00407
00408 if {$rel} {
00409 $tree set [$tree parent $node] save 1
00410 }
00411 return
00412 }
00413
00414 ret ::fileutil::magic::cgen::DecodeIndirectOffset (type offset) {
00415 variable otmap ; # Offset typemap.
00416
00417 # Offset parser.
00418 # Syntax:
00419 # ( ?&? number ?.[bslBSL]? ?[+-]? ?number? )
00420
00421 set n {(([0-9]+)|(0x[0-9A-Fa-f]+))}
00422 set o "\\((&?)(${n})((\\.\[bslBSL])?)(\[+-]?)(${n}?)\\)"
00423 # | | ||| || | | |||
00424 # 1 2 345 67 8 9 012
00425 # ^ ^ ^ ^ ^
00426 # rel base type sign index
00427 #
00428 # 1 2 3 4 5 6 7 8 9 0 1 2
00429 set ok [regexp $o $offset -> rel base _ _ _ type _ sign idx _ _ _]
00430
00431 if {!$ok} {
00432 return -code error "Bad offset \"$offset\""
00433 }
00434
00435 # rel is in {"", &}, map to 0|1
00436 if {$rel eq ""} {set rel 0} else {set rel 1}
00437
00438 # base is a number, enforce decimal. Not optional.
00439 set base [expr $base]
00440
00441 # Type is in .b .s .l .B .S .L, and "". Map to a regular magic
00442 # type code.
00443 set type $otmap($type)
00444
00445 # sign is in {+,-,""}. Map to -|"" (Becomes sign of index)
00446 if {$sign eq "+"} {set sign ""}
00447
00448 # Index is optional number. Enforce decimal, empty is zero. Add in
00449 # the sign as well for a proper signed index.
00450
00451 if {$idx eq ""} {set idx 0}
00452 set idx $sign[expr $idx]
00453
00454 return [list $rel $base $type $idx]
00455 }
00456
00457 ret ::fileutil::magic::cgen::treedump (type tree) {
00458 set result ""
00459 $tree walk root -type dfs node {
00460 set path [$tree get $node path]
00461 set depth [llength $path]
00462
00463 append result [string repeat " " $depth] [list $path] ": " [$tree get $node type]:
00464
00465 if {[$tree keyexists $node offset]} {
00466 append result " ,O|[$tree get $node offset]|"
00467
00468 set x {}
00469 foreach v {ind rel base itype idelta} {lappend x [$tree get $node $v]}
00470 append result "=<[join $x !]>"
00471 }
00472 if {[$tree keyexists $node qual]} {
00473 set q [$tree get $node qual]
00474 if {$q ne ""} {
00475 append result " ,q/$q/"
00476 }
00477 }
00478
00479 if {[$tree keyexists $node comp]} {
00480 append result " " C([$tree get $node comp])
00481 }
00482 if {[$tree keyexists $node val]} {
00483 append result " " V([$tree get $node val])
00484 }
00485
00486 if {[$tree keyexists $node otype]} {
00487 append result " " [$tree get $node otype]/[$tree get $node save]
00488 }
00489
00490 if {$depth == 1} {
00491 set msg [$tree get $node message]
00492 set n $node
00493 while {($n != {}) && ($msg == "")} {
00494 set n [lindex [$tree children $n] 0]
00495 if {$n != {}} {
00496 set msg [$tree get $n message]
00497 }
00498 }
00499 append result " " ( $msg )
00500 if {[$tree keyexists $node file]} {
00501 append result " - " [$tree get $node file]
00502 }
00503 }
00504
00505 #append result " <" [$tree getall $node] >
00506 append result \n
00507 }
00508 return $result
00509 }
00510
00511 ret ::fileutil::magic::cgen::treegen (type tree , type node) {
00512 return "[treegen1 $tree $node]\nresult\n"
00513 }
00514
00515 ret ::fileutil::magic::cgen::treegen1 (type tree , type node) {
00516 variable ::fileutil::magic::rt::typemap
00517
00518 set result ""
00519 foreach k {otype type offset comp val qual message save path} {
00520 if {[$tree keyexists $node $k]} {
00521 set $k [$tree get $node $k]
00522 }
00523 }
00524
00525 set level [llength $path]
00526
00527 # Generate code for each node per its type.
00528
00529 switch $otype {
00530 N -
00531 S {
00532 if {$save} {
00533 # We have to save field data for relative adressing under this
00534 # leaf.
00535 if {$otype eq "N"} {
00536 set type [list Nx $level $type]
00537 } elseif {$otype eq "S"} {
00538 set type [list Sx $level]
00539 }
00540 } else {
00541 # Regular fetching of information.
00542 if {$otype eq "N"} {
00543 set type [list N $type]
00544 } elseif {$otype eq "S"} {
00545 set type S
00546 }
00547 }
00548
00549 set offset [GenerateOffset $tree $node]
00550
00551 if {$qual eq ""} {
00552 append result "if \{\[$type $offset $comp [list $val]\]\} \{"
00553 } else {
00554 append result "if \{\[$type $offset $comp [list $val] $qual\]\} \{"
00555 }
00556
00557 if {[$tree isleaf $node]} {
00558 if {$message ne ""} {
00559 append result "emit [list $message]"
00560 } else {
00561 append result "emit [$tree get $node path]"
00562 }
00563 } else {
00564 # If we saved data the child branches may destroy
00565 # level information. We regenerate it if needed.
00566
00567 if {$message ne ""} {
00568 append result "emit [list $message]\n"
00569 }
00570
00571 set killed 0
00572 foreach child [$tree children $node] {
00573 if {$save && $killed && [$tree get $child rel]} {
00574 # This location already does not regenerate if
00575 # the killing subnode was last. We also do not
00576 # need to regenerate if the current subnode
00577 # does not use relative adressing.
00578 append result "L $level;"
00579 set killed 0
00580 }
00581 append result [treegen1 $tree $child]
00582 set killed [expr {$killed || [$tree get $child kill]}]
00583 }
00584 #append result "\nreturn \$result"
00585 }
00586
00587 append result "\}\n"
00588 }
00589 Root {
00590 foreach child [$tree children $node] {
00591 append result [treegen1 $tree $child]
00592 }
00593 }
00594 Switch {
00595 set offset [GenerateOffset $tree $node]
00596
00597 if {$save} {
00598 set fetch "Nvx $level"
00599 } else {
00600 set fetch Nv
00601 }
00602
00603 append fetch " " $type " " $offset
00604 if {$qual ne ""} {
00605 append fetch " " $qual
00606 }
00607 append result "switch -- \[$fetch\] "
00608
00609 set scan [lindex $typemap($type) 1]
00610
00611 set ckilled 0
00612 foreach child [$tree children $node] {
00613 binary scan [binary format $scan [$tree get $child val]] $scan val
00614 append result "$val \{"
00615
00616 if {$save && $ckilled} {
00617 # This location already does not regenerate if
00618 # the killing subnode was last. We also do not
00619 # need to regenerate if the current subnode
00620 # does not use relative adressing.
00621 append result "L $level;"
00622 set ckilled 0
00623 }
00624
00625 if {[$tree isleaf $child]} {
00626 append result "emit [list [$tree get $child message]]"
00627 } else {
00628 set killed 0
00629 append result "emit [list [$tree get $child message]]\n"
00630 foreach grandchild [$tree children $child] {
00631 if {$save && $killed && [$tree get $grandchild rel]} {
00632 # This location already does not regenerate if
00633 # the killing subnode was last. We also do not
00634 # need to regenerate if the current subnode
00635 # does not use relative adressing.
00636 append result "L $level;"
00637 set killed 0
00638 }
00639 append result [treegen1 $tree $grandchild]
00640 set killed [expr {$killed || [$tree get $grandchild kill]}]
00641 }
00642 }
00643
00644 set ckilled [expr {$ckilled || [$tree get $child kill]}]
00645 append result "\} "
00646 }
00647 append result "\n"
00648 }
00649 }
00650 return $result
00651 }
00652
00653 proc ::fileutil::magic::cgen::GenerateOffset {tree node} {
00654 # Examples:
00655 # direct absolute: 45 -> 45
00656 # direct relative: &45 -> [R 45]
00657 # indirect absolute: (45.s+1) -> [I 45 s 1]
00658 # indirect relative: (&45.s+1) -> [I [R 45] s 1]
00659
00660 foreach v {ind rel base itype idelta} {
00661 set $v [$tree get $node $v]
00662 }
00663
00664 if {$rel} {set base "\[R $base\]"}
00665 if {$ind} {set base "\[I $base $itype $idelta\]"}
00666 return $base
00667 }
00668
00669 # ### ### ### ######### ######### #########
00670 ## Ready for use.
00671 # EOF
00672