#
# cache.tcl,v 1.6 1995/11/17 00:42:01 steve Exp
#
#	PASTIME Project
#	Cooperative Research Centre for Advanced Computational Systems
#	COPYRIGHT NOTICE AND DISCLAIMER.
#
#	Copyright (c) 1995 ANU and CSIRO
#	on behalf of the participants in
#	the CRC for Advanced Computational Systems (ACSys)
#
# This software and all associated data and documentation ("Software")
# was developed for research purposes and ACSys does not warrant that 
# it is error free or fit for any purpose.  ACSys disclaims all liability
# for all claims, expenses, losses, damages and costs any user may incur 
# as a result of using, copying or modifying the Software.
#
# You may make copies of the Software but you must include all of this
# notice on any copy.
###
# cache.tcl provides routines for managing the cache of images and hyperdocs.
# All procedures and global variables used in this module are prepended with
# "Cache".
#
# There are three levels of cache: in-memory, image and on-disk.
#
# The global array Cache_info is used to store information about documents in
# the cache.  It is indexed by URL.  Elements are lists {level {attr} size info}
# where "level" indicates how the cache has the document stored (PRdata for in-memory
# data, PRimage for in-image and PRfile for on-disk), and "attr" is a list containing
# attribute,value pairs to be set in state variables.
#
# The cache is persistent and is intended to support use by multiple browsers.
# Each document has two files: one for the data and one for meta-data 
# (ie. HTTP headers).
# Browsers download data into a temporary file.  When the download has been
# completed the file is copied into the persistent cache.
# A mapping is defined between URLs and filenames, using character stuffing for
# the `\' character, and mapping '/' characters to '\057'.  Hence the URL
# http://pastime/ would be stored in the file http:\057\057pastime\057
# The meta-data file has a '.' prepended.
# NB. URLs may map to filenames which are too long.  In this case the document will
# not be cached.

# Initialisation

# Set the various cache defaults
# For resource limits, a value of -1 implies no limit and a value of 0
# implies no caching (or just immediate expiration?).

if {![info exists surfit(cachedir)]} {
    if {[array get env HOME] != {}} {
	set surfit(cachedir) $env(HOME)/.surfit-cache
    } else {
	puts stderr "unable to find cache directory.  Set surfit(cachedir)"
    }
}
array set surfit {
    cachecnt 0
    cachedisklimit -1 cachedatalimit -1 cacheimagelimit -1
}

# Create cache directory if it doesn't already exist
if {![file isdirectory $surfit(cachedir)]} {
    if {[file exists $surfit(cachedir)]} {
	# Find an unused name
	for {set counter 0} {[file exists $surfit(cachedir)$counter]} {incr counter} {}
	append surfit(cachedir) $counter
    }
    puts stderr "Creating cache directory \"$surfit(cachedir)\"..."
    exec mkdir $surfit(cachedir)
}

# Cache_get_document checks whether a document is in the cache.  If so then
# the supplied state array is initialised with the details of the document
# and the appropriate return value passed back, see loadData.
# Otherwise an empty string is returned.

proc Cache_get_document {state data} {
    global Cache_info surfit
    upvar #0 $state var
    upvar 2 $data data_return

    if {[set hit [array get Cache_info $var(url)]] == {}} {
	# Check whether it is in the on-disk cache
	if {![catch {file exists $surfit(cachedir)/.[Cache_map_url $var(url)]} exists] && \
	    $exists} {
	    # Read the file into the in-memory or in-image cache
	    # First, read the headers
	    if {![catch {open $surfit(cachedir)/.[Cache_map_url $var(url)] r} filed]} {
		array set var [gets $filed]
		while {[set hdr [gets $filed]] != "" && $hdr != "\r"} {
		    regexp "(\[^:\]+): (\[^\r\]*)\r?" $hdr all key value
		    set var(HDR[string tolower $key]) $value
		}
		close $filed

		# Override protocol handler to use file
		set data_return $surfit(cachedir)/[Cache_map_url $var(url)]
		set var(read_handler) PRfile_read
		set var(file) $data_return
		set var(readsize) [file size $data_return]
		return PRfile
	    } else {return {}}
	} else {return {}}
    } else {
	set hit [lindex $hit 1]
	set data_return [lindex $hit 3]
	array set var [lindex $hit 1]
	set var(readsize) [lindex $hit 2]
	set var(eof) 1
	if {[lindex $hit 0] == "PRfile"} {
	    # Override protocol handler to use file
	    set var(read_handler) PRfile_read
	    set var(file) [lindex $hit 3]
	}
	return [lindex $hit 0]
    }
}

proc Cache_map_url {url} {
    regsub -all {#} $url {##} url
    regsub -all {/} $url {#057} url
    return $url
}

# Return the filename for the cache file for the given URL
# Probably should first check that it exists...

proc Cache_get_file {url} {
    global surfit
    return $surfit(cachedir)/[Cache_map_url $url]
}

#
# Add an image to the cache
#
proc Cache_add_image {url img type} {
    global Cache_info
    # Determine the size of the image
    if {[image type $img] == "bitmap"} {set depth 1} else {set depth 24}
    set size [expr [image height $img] * [image width $img] * $depth]
    set Cache_info($url) \
	[list PRimage "HDRcontent-type $type HDRcontent-length $size" $size $img]
}

#
# Add a new document to the cache.
#
proc Cache_add_document {type state data} {
    global Cache_info
    upvar #0 $state var

    # Construct the list of attributes to store in the cache
    if {![info exists var(HDRcontent-length)]} {
	# We need to know the size of the document for expiration
	set var(HDRcontent-length) 0
    }
    set attr [array get var HDR*]
    append attr " " [array get var HTTP*]

    switch $type {
	PRfd	{
	    # Open a file to receive the data
	    global surfit
	    set filename "$surfit(cachedir)/[Cache_map_url $var(url)]"
	    if {![catch {open $filename w} var(cachefd)]} {
		set Cache_info($var(url)) [list PRfile $attr $var(HDRcontent-length) $filename]
	    } else {unset var(cachefd)}
	}

	PRdata	-
	PRfile	{set Cache_info($var(url)) [list $type $attr $var(HDRcontent-length) $data]}

	PRimage	{Cache_add_image $var(url) $data}

	default {error "improper data type \"$type\""}
    }
}

proc Cache_append {state data} {
    global Cache_info
    upvar #0 $state var

    if {[set hit [array get Cache_info $var(url)]] == {}} {return} ;# document not in cache
    set hit [lindex $hit 1]

    switch [lindex $hit 0] {
	PRdata	{
	    set new [lindex $hit end]
	    append new $data
	    set Cache_info($var(url)) [lreplace $hit 2 3 \
		[string length $new] $new]
	}

	PRfile	{
	    if {[set cid [array get var cachefd]] != {}} {
		# Cache file is already open for writing
		set cid [lindex $cid 1]
		set cleanup {}
	    } else {
		# Have to open cache file for appending
		if {![catch {open [lindex $Cache_info($var(url)) end] a} cid]} {
		    return
		} else {set cleanup {}}
	    }
	    puts -nonewline $cid $data
	    # Update size
	    eval $cleanup
	}

	PRfd	{error "can't append data to a file descriptor"}
	PRimage	{error "can't append data to an image"}
	default {error "improper cache data type \"[lindex $hit 0]\" value"}
    }
}

# Change the attributes for a document in the cache

proc Cache_set_header {url hdr value} {
    global Cache_info

    if {![info exists Cache_info($url)]} {return} ;# Not there
    array set tmp [lindex $Cache_info($url) 1]
    set tmp(HDR$hdr) $value
    set Cache_info($url) [lreplace $Cache_info($url) 1 1 [array get tmp]]
}

proc Cache_set_content_type {url type} {
    Cache_set_header $url content-type $type
}

proc Cache_set_content_length {url length} {
    Cache_set_header $url content-length $length
}

# Cache_close informs the cache that EOF has been detected while
# downloading data for the document.

proc Cache_close {state} {
    global Cache_info surfit
    upvar #0 $state var

    if {[set cid [array get var cachefd]] != {}} {
	close [lindex $cid 1]
	unset var(cachefd)
    }
    # Create the meta data file
    if {![catch {open $surfit(cachedir)/.[Cache_map_url $var(url)] w} filed]} {
	puts $filed [array get var HTTP*]
	foreach hdr [array names var HDR*] {
	    regexp {HDR(.*)} $hdr all key
	    puts $filed "$key: $var($hdr)"
	}
	close $filed
    }
}

# Cache_delete removes a document from the in-memory cache

proc Cache_delete {url} {
    global Cache_info
    if {[set hit [array get Cache_info $url]] != {}} {
	set hit [lindex $hit 1]
	if {[lindex $hit 0] == "PRimage"} {
	    image delete [lindex $hit 3]
	}
    }
    catch "unset Cache_info($url)"
}

# Flush all entries from the in-memory cache

proc Cache_flush {} {
    global Cache_info
    foreach url [array names Cache_info] {Cache_delete $url}
}

# Cache_expire manages the removal of cached documents from each of the
# different cache levels to meet resource usage constraints. 
# The application will probably want to run this in the background,
# so it should be careful not to do too much processing at a time.

proc Cache_expire {} {
    error "not yet implemented"
}

# Vaporise all traces of cached documents

proc Cache_expire_all {} {
    global surfit

    Cache_flush
    catch {foreach f [glob $surfit(cachedir)/*] {exec rm $f}}
    catch {foreach f [glob $surfit(cachedir)/.??*] {exec rm $f}}
}

# Cache_expire_document removes the given document from the cache

proc Cache_expire_document {url} {
    global surfit
    Cache_delete $url
    catch {exec rm -f $surfit(cachedir)/.[Cache_map_url $url]}
    catch {exec rm -f $surfit(cachedir)/[Cache_map_url $url]}
}
