# urlcopy.tcl
# 
# Copy a URL hierarchy, transforming links and image tags as necessary.
#
proc UrlCopy_Setup {} {
    global urlcopy
    catch {destroy .urlcopy}
    set f [Dialog_Shell . .urlcopy "URL Copy Hierarchy" \
"Please specify a URL and a local directory that will contain the pages
to be copied.  The copy recursively copies the URL and all the pages
that are referenced from that URL and served by the same server.
Select \"Copy URLS and Images\" if you want to copy the image files too.
Otherwise, those image tags are converted to absolute references." \
	{{"Copy URLS only" {set urlcopy(how) urlonly}}
	 {"Copy URLS and Images" {set urlcopy(how) url&image}}
	 {"Cancel" {
	     set urlcopy(how) {}
	     if {$urlcopy(stop)} {
		 destroy .urlcopy
	     } else {
		 set urlcopy(stop) 1
		 .urlcopy.but.2 config -text "Dismiss"
	     }
	 }}}]

    set g [frame $f.url]
    entry $g.entry -textvariable urlcopy(url)
    label $g.label -text URL: -width 5 -anchor e
    pack $g.label -side left
    pack $g.entry -side top -fill x
    pack $g -side top -fill x -padx 10

    set g [frame $f.bar -width 1 -height 10]
    pack $g -side top -fill x -padx 10 -pady 2
    frame $f.bar.blue -width 1 -height 10 -background blue
    # Placed later in UrlCopyProgress

    set g [frame $f.dir]
    entry $g.entry -textvariable urlcopy(dir)
    label $g.label -text Dir: -width 5 -anchor e
    pack $g.label -side left
    pack $g.entry -side top -fill x
    pack $g -side top -fill x -padx 10

    set urlcopy(stop) 0

    tkwait variable urlcopy(how)

    if {[string length $urlcopy(how)] == 0} {
	catch {destroy .urlcopy}
	return
    }
    set urlcopy(nqueue)  1
    set g [frame $f.nqueue]
    label $g.label -text "URLS to copy:" -width 0 -anchor e
    label $g.entry -width 0 -textvariable urlcopy(nqueue)
    pack $g -side top -fill both -expand true -padx 10
    pack $g.label $g.entry -side left

    set g [frame $f.queue]
    listbox $g.list -height 5 -yscrollcommand "$g.scroll set"
    scrollbar $g.scroll -orient vertical -command "$g.list yview"
    pack $g -side top -fill both -expand true -padx 10
    pack $g.scroll -side right -fill y
    pack $g.list -side left -fill both -expand true

    set urlcopy(ndone) 0
    set g [frame $f.ndone]
    label $g.label -text "URLS copied:" -width 0 -anchor e
    label $g.entry -width 0 -textvariable urlcopy(ndone)
    pack $g -side top -fill both -expand true -padx 10
    pack $g.label $g.entry -side left
    set urlcopy(total) 1
    label $g.label2 -text " of " -width 0 -anchor e
    label $g.entry2 -width 0 -textvariable urlcopy(total)
    pack $g.label2 $g.entry2 -side left

    trace variable urlcopy(ndone) w UrlCopyTraceTotal
    trace variable urlcopy(nqueue) w UrlCopyTraceTotal

    set g [frame $f.done]
    listbox $g.list -height 5 -yscrollcommand "$g.scroll set"
    scrollbar $g.scroll -orient vertical -command "$g.list yview"
    pack $g -side top -fill both -expand true -padx 10
    pack $g.scroll -side right -fill y
    pack $g.list -side left -fill both -expand true

    .urlcopy.but.2 config -text Stop
    Url_Copy $urlcopy(url) $urlcopy(dir) $urlcopy(how) $f.bar.blue \
	    $f.queue.list $f.done.list
}
proc UrlCopyTraceTotal {args} {
    global urlcopy
    set urlcopy(total) [expr $urlcopy(ndone) + $urlcopy(nqueue)]
}

proc Url_Copy {url dir how barlabel qlist dlist} {
    global urlstate urlcopy urlworkI

    set urlcopy(dir) $dir	;# Main directory
    set urlcopy(how) $how	;# either "urlonly" or "url&image"
    set urlcopy(url) [string trim $url]	;# Main/current URL
    set urlcopy(stop) 0
    set urlcopy(server) ""
    set urlcopy(ext) ""

    catch {close $urlcopy(logfile)}
    set urlcopy(logfile) [open /tmp/copy.log w]

    set urlcopy(dir) [makedir $urlcopy(dir)]
    catch {unset urlstate}	;# Per URL copy state
    catch {eval unset [info global urlwork*]}
    set urlworkI 0

    if ![regexp {^http:} $urlcopy(url)] {
	set urlcopy(url) http://[string trimleft $urlcopy(url) /]
    }
    regsub {#.*$} $urlcopy(url) {} urlcopy(url)

    regexp {^http://([^/]*)(.*)} $urlcopy(url) x urlcopy(server) urlcopy(ext)

    $qlist delete 0 end
    $qlist insert end $urlcopy(url)
    $qlist see end
    Http_get $url [list UrlCopyDone $urlcopy(url) $urlworkI \
			$barlabel $qlist 0 $dlist] \
	    [list UrlCopyProgress $urlcopy(url) $barlabel]

}
proc UrlCopyDone {url I barlabel qlist ix dlist} {
    upvar #0 $url data		;# get_http state variable
    global urlcopy urlstate urlwork urlworkI

    # Move from queue list to Done list
    set s [$qlist get $ix]
    $qlist delete $ix
    $dlist insert end "DONE $s"
    $dlist yview moveto 1.0
    incr urlcopy(nqueue) -1
    incr urlcopy(ndone) 1
if [catch {
    regsub ^http://$urlcopy(server)$urlcopy(ext) $url {} path
    if {[string length $path] == 0} {
	set path "index.html"
    }
    set path [eval {file join $urlcopy(dir)} [split $path /]]
    UrlCopyLog "COPY $url TO $path"
    makedir [file dirname $path]

    if {[info exists data(message)]} {
	set urlstate($url) error
	set urlstate($url,message) $message
    } elseif {[info exists data(html)]} {
	set urlstate($url) ok
	if [catch {open $path w} out] {
	    UrlCopyLog "COPY_ERR $x => $out"
	} else {
	    upvar #0 urlwork$I urlwork
	    set urlwork {}
	    HMparse_html $data(html) [list UrlCopyScan $url urlwork$I $out] {}
	    close $out
	    if !$urlcopy(stop) {
		foreach x $urlwork {
		    $qlist insert end $x
		    $qlist yview moveto 1.0
		    incr urlworkI
		    UrlCopyLog "QUEUE $x"
		    incr urlcopy(nqueue)
		    Http_get $x [list UrlCopyDone $x urlwork$urlworkI \
			$barlabel $qlist \
			[$qlist index end] $dlist] \
			[list UrlCopyProgress $x $barlabel]
		}
	    }
	    unset urlwork
	}
    } elseif {[catch {exec mv -f $data(file) $path} err]} {
	UrlCopyLog "MV_ERROR $data(file) TO $path"
	set urlstate($url) error
	set urlstate($url,message) $err
    } else {
	UrlCopyLog "RENAME $data(file) TO $path"
	set urlstate($url) ok
    }
    if [info exists data(file)] {
	exec rm -f $data(file)
    }
    Http_kill $url		;# Blow away cache state
} err] {
    Stderr "CopyDone $err"
}
    if {$urlcopy(nqueue) == 0} {
	.urlcopy.but.2 config -text "Dismiss" -command {destroy .urlcopy}
    }
}
proc UrlCopyProgress { url bar state current total} {
    global urlcopy
    if $urlcopy(stop) {
	Http_kill $url
    }
    set urlcopy(url) $url
    if {$total > 0} {
	set fract [expr $current.0/$total]
	place $bar -relw $fract -relheight 1.0 -anchor sw -x 0 -rely 1.0
    } else {
	set fract 0
	place $bar -relw 0.0 -relheight 1.0 -anchor sw -x 0 -rely 1.0
    }

}

proc UrlCopyScan { url urlworkVar out htag not param text } {
    global urlcopy urlstate
    upvar #0 $urlworkVar urlwork

    if [regexp {^(a|img)$} [string tolower $not$htag]] {
	set href {}
	if {[HMextract_param $param href] ||
		[HMextract_param $param src href]} {
	    set hreforig $href
	    UrlResolve $url href
	    regsub #.* $href {} hrefkey
	    UrlCopyLog "RESOLVE $hreforig $hrefkey"
	    if [regexp ^http://$urlcopy(server)$urlcopy(ext) $href] {
		if {[string tolower $htag] == "img" &&
			$urlcopy(how) == "urlonly"} {
		    # Re-write to reference original image
		    regsub $hreforig $param $href param
		} else {
		    # Schedule the copy of the element
		    if ![info exists urlstate($hrefkey)] {
			set urlstate($hrefkey) $htag
			lappend urlwork $hrefkey
		    }
		    # Re-write to reference local page
		    UrlRelative $url href 0
		    regsub $hreforig $param $href param
		}
	    } else {
		if ![info exists urlstate($hrefkey)] {
		    set urlstate($hrefkey) external
		}
	    }
	}
    }
    set tag [string trim "$not$htag $param"]
    if {[string length $tag]} {
	puts -nonewline $out <$tag>
    }
    puts -nonewline $out $text
}
proc UrlCopyLog {string} {
    global urlcopy
    catch {
	puts $urlcopy(logfile) $string
	flush $urlcopy(logfile) 
    }
}
