webviewer.tcl

Go to the documentation of this file.
00001 /*  webviewer.tcl - Copyright (C) 2004 Pat Thoyts <patthoyts@users.sourceforge.net>*/
00002 /* */
00003 /*  This is a sample application to demonstrate the use of the htmlparse package.*/
00004 /* */
00005 /*  Given the URL of a web page, this application will display just the text of*/
00006 /*  the page - that is the contents of header, paragraph and pre tags.*/
00007 /* */
00008 /*  As an aside, this also illustrates the use of the autoproxy package to */
00009 /*  cope with http proxy servers (if present) and handles HTTP redirections and*/
00010 /*  so on.*/
00011 /* */
00012 /*  Usage: webviewer.tcl http://tip.tcl.tk/2*/
00013 /* */
00014 /*  $Id: webviewer.tcl,v 1.1 2004/10/13 11:42:33 patthoyts Exp $*/
00015 
00016 package require htmlparse;              /*  tcllib*/
00017 package require http;                   /*  tcl*/
00018 package require autoproxy;              /*  tcllib*/
00019 autoproxy::init
00020 
00021 /*  -------------------------------------------------------------------------*/
00022 /*  The driver.*/
00023 /*  - Fetch the page*/
00024 /*  - parse it to extract the text*/
00025 /*  - sort out the html escaped chars*/
00026 /*  - eliminate excessive newlines*/
00027 /* */
00028 ret  webview (type url) {
00029     set html [fetchurl $url]
00030     if {[string length $html] > 0} {
00031         variable parsed ""
00032         htmlparse::parse -cmd [list parser [namespace current]::parsed] $html
00033         set parsed [htmlparse::mapEscapes $parsed]
00034         set parsed [regsub -all -line "\n{2,}" $parsed "\n\n"]
00035         Display $parsed
00036     } else {
00037         Error "error: no data available from \"$url\""
00038     }
00039 }
00040 
00041 /*  -------------------------------------------------------------------------*/
00042 /*  This implements our text extracting parser. This will pretty much turn */
00043 /*  an HTML page into an outline-mode text file.*/
00044 /* */
00045 ret  parser (type outvar , type tag , type end , type attr , type text) {
00046     upvar \#0 $outvar out
00047     set tag [string tolower $tag]
00048     set end [string length $end]
00049     if {$end == 0} {
00050         if {[string equal "hmstart" $tag]} {
00051             set out ""
00052         } elseif {[regexp {h(\d+)} $tag -> level]} {
00053             append out "\n\n" [string repeat * $level] " " $text
00054         } elseif {[lsearch -exact {p pre td} $tag] != -1} {
00055             append out "\n" $text
00056         } elseif {[lsearch -exact {a span i b} $tag] != -1} { 
00057             append out $text
00058         }
00059     }
00060 }
00061 
00062 /*  -------------------------------------------------------------------------*/
00063 /*  Fetch the target page and cope with HTTP problems. This*/
00064 /*  deals with server errors and proxy authentication failure*/
00065 /*  and handles HTTP redirection.*/
00066 /* */
00067 ret  fetchurl (type url) {
00068     set html ""
00069     set err ""
00070     set tok [http::geturl $url -timeout 30000]
00071     if {[string equal [http::status $tok] "ok"]} {
00072         if {[http::ncode $tok] >= 500} {
00073             set err "server error: [http::code $tok]"
00074         } elseif {[http::ncode $tok] >= 400} {
00075             set err "authentication error: [http::code $tok]"
00076         } elseif {[http::ncode $tok] >= 300} {
00077             upvar \#0 $tok state
00078             array set meta $state(meta)
00079             if {[info exists meta(Location)]} {
00080                 return [fetchurl $meta(Location)]
00081             } else {
00082                 set err [http::code $tok]
00083             }
00084         } else {
00085             set html [http::data $tok]
00086         }
00087     } else {
00088         set err [http::error $tok]
00089     }
00090     http::cleanup $tok
00091 
00092     if {[string length $err] > 0} {
00093         Error $err
00094     }
00095     return $html
00096 }
00097 
00098 /*  -------------------------------------------------------------------------*/
00099 /*  Abstract out the display functions so we can run this using either wish or*/
00100 /*  tclsh. This makes life easier on windows where the default is to use wish*/
00101 /*  for tcl files.*/
00102 /* */
00103 ret  Display (type msg) {
00104     if {[string length [package provide Tk]] > 0} {
00105         toplevel .dlg -class Dialog
00106         wm title .dlg "webview output."
00107         text .dlg.txt -yscrollcommand {.dlg.sb set}
00108         scrollbar .dlg.sb -command {.dlg.txt yview}
00109         button .dlg.b -command {destroy .dlg} -text Exit -underline 1
00110         .dlg.txt insert 0.0 $msg
00111         bind .dlg <Control-F2> {console show}
00112         bind .dlg <Escape> {.dlg.b invoke}
00113         grid .dlg.txt .dlg.sb -sticky news
00114         grid .dlg.b  - -sticky e -pady {3 0} -ipadx 4
00115         grid rowconfigure .dlg 0 -weight 1
00116         grid columnconfigure .dlg 0 -weight 1
00117         tkwait window .dlg
00118     } else {
00119         puts $msg
00120     }
00121 }
00122 
00123 ret  Error (type msg) {
00124     if {[string length [package provide Tk]] > 0} {
00125         tk_messageBox -title "webviewer error" -icon error -message $msg
00126     } else {
00127         puts stderr $msg
00128     }
00129     exit 1
00130 }
00131 
00132 /*  -------------------------------------------------------------------------*/
00133 
00134 if {!$tcl_interactive} {
00135     if {[string length [package provide Tk]] > 0} {
00136         wm withdraw .
00137         if {$argc < 1} {
00138             toplevel .dlg -class Dialog
00139             wm title .dlg "Enter URL"
00140             label .dlg.l -text "Enter a URL"
00141             entry .dlg.e -textvariable argv -width 40
00142             button .dlg.ok -text OK -default active -command {destroy .dlg}
00143             button .dlg.ca -text Cancel -command { ::argv =  ""; destroy .dlg}
00144             bind .dlg <Return> {.dlg.ok invoke}
00145             bind .dlg <Escape> {.dlg.ca invoke}
00146             bind .dlg <Control-F2> {console show}
00147             grid .dlg.l - -sticky nws
00148             grid .dlg.e - -sticky news
00149             grid .dlg.ok .dlg.ca -sticky news
00150             tkwait window .dlg
00151             if {[llength $argv] < 1} {
00152                 exit 1
00153             }
00154         }
00155     } else {
00156     
00157         if {$argc != 1} {
00158             Error "usage: webviewer URL"
00159         }
00160 
00161     }
00162     eval [linsert $argv 0 webview]
00163 }
00164 
00165 

Generated on 21 Sep 2010 for Gui by  doxygen 1.6.1