: # *-*-perl-*-*
    eval 'exec perl -S $0 "$@"'
    if $running_under_some_shell;  

# $Id: ftpget.pl,v 1.24 1995/05/26 23:50:47 wessels Exp $

# ftpget.pl beaten (not hacked) to stuff HTTP and MIME on top of an FTP 
# retrieval.  Works like this:
#
#   The -htmlify option is only valid when $lfile (arg 1) is '-' (stdout).
#   In this case, the FTP object is written to a tmpfile.  If the transfer
#   was successful, then an HTTP success return code plus mime headers are
#   written, followed by the file data.  If the FTP object is a directory,
#   it will be converted to HTML.
#
#   Because the FTP data is written to a tmpfile we can return an accurate
#   content-length.  The content-type is decided from the filename
#   extention.  The table of extension mappings is included 'inline' at the
#   bottom of this file.  We also write the content-encoding, FWIW.
#
#   One big change is minimal use of 'die'.  Instead call &fail which
#   outputs an HTTP '500 Internal Failure' response.   This way, we
#   won't be caching failed FTP requests, plus the user gets to see
#   the real reason a request failed.
#
#   README files in FTP directories are included before the listing.
#
#   Is it safe to use 'gopher-internal-foo' icons?  Is this widely 
#   supported outside of Mosaic & Netscape?
#
#   -htmlify should really be -httpify
#
#   All this HTTP/MIME/HTML stuff should really be in the proxy.
#
#  D. Wessels 01/28/95

#
# Re-hacked to not buffer FTP data in a tmpfile.  Default behaviour
# is to NOT write to a tmpfile.  The HTTP response header will not 
# contain any ``content-length'' line.  If we get logged in to the
# FTP host, then an ``HTTP 200 OK'' response is sent.  If the transfer
# then fails, a brief message is written to stderr, but the HTTP code
# indicates success.
#
# A new option '-tmpfile' is provided to allow the previous behaviour and
# send valid a content-length.  This option is only valid if -htmlify is
# given.  Similarly, '-htmlify' is only valid if the output filename is
# '-' indicating stdout.
#
# [ This is because the ftp.pl library routines assume that the output    ]
# [ file will be identical to the source file.  Not true in our case      ]
# [ where we htmlify it.  The library routines do a stat on the output    ]
# [ file to get the file size.  If the size is non-zero, but less than    ]
# [ it should be, it tries to use the sometimes-implemented REST command. ]
#
# Note, the -tmpfile option MUST come before '-htmlify'.  We should look
# into using 'getopts.pl'...
#
# If the '-tmpfile' option never gets used, lets kill it.
#
# -DW

# 22-Feb-95
#
# Now we try the object as a directory before we try it as a file.
# if the 'CWD $rfile' command succeeds, its a directory, otherwise
# we assume it is a file that exists.  
#
# Also implemented the SIZE and MDTM commands.  If the server supports
# these (as wu-ftpd does) then we can return the content-length and
# Last-Modified times in the http header before getting any of the file
# data.  


$| = 1;
$ENV{'HARVEST_HOME'} = "/usr/local/harvest" unless defined $ENV{'HARVEST_HOME'};
unshift(@INC, "$ENV{'HARVEST_HOME'}/lib");	# use local files 

# Message sent in MIME headers
$ServerMsg = 'HarvestCache/1.2';

#
$ls_pattern = '^([ldrwxsSt-]+)\s*(\d+)\s*(\w+)\s*(\w+)\s*(\d+)\s*(\w+\s*\d+\s*[\d:]+)\s*(.*)$';

require 'ftp.pl';
require 'timelocal.pl';

@weekday = (	"Sunday", "Monday", "Tuesday", "Wednesday",
		"Thursday", "Friday", "Saturday"		);

@month = (	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"	);


$debug		= 0;
$htmlify	= 0;
$did_directory  = 0;
$do_tmpfile	= 0;
$sent_header	= 0;  # have we sent HTTP reply header yet?

if ($#ARGV > 0 && $ARGV[0] eq "-fulldebug") {
	shift(@ARGV);
	$debug	= 1;
}

if ($#ARGV > 0 && $ARGV[0] eq "-tmpfile") {
	shift(@ARGV);
	$do_tmpfile = 1;
}

if ($#ARGV > 0 && $ARGV[0] eq "-htmlify") {
	shift(@ARGV);
	$htmlify = 1 if ($ARGV[0] eq "-");	# htmlify only if stdout
}

&usage() if ($#ARGV != 5);

$lfile = shift(@ARGV);		# local filename
$host = shift(@ARGV);		# FTP host
$rfile = shift(@ARGV);		# remote filename
$mode = shift(@ARGV);		# binary vs. ascii mode
$user = shift(@ARGV);		# user name
$password = shift(@ARGV);	# password

# $do_tmpfile = 1 is only allowed if $htmlify also == 1.
#
$do_tmpfile = 0 unless ($htmlify);
$lfile = &tempnam if ($htmlify && $do_tmpfile);


#  Fixes the Parent Directory link by removing traling /'s
$rfile =~ s/\/+$// if ($rfile =~ /[^\/]+\/+$/o);

$ftp_port	= 21;
$retry_call	= $htmlify ? 0 : 1;	# retry failed connect(2)?
$attempts	= $htmlify ? 1 : 3;	# how many times...

if ($debug) {
	$ftp'showfd = STDERR;
	$ftp'ftp_show = 1;
} else {
	open(DEVNULL, "> /dev/null") || &fail ("Cannot write to /dev/null");
	$ftp'showfd = DEVNULL;
	$ftp'ftp_show = 0;
}


&fail ("Cannot connect to $host: $ftp'response")
	if (&ftp'open($host, $ftp_port, $retry_call, $attempts) != 1);

&fail ("Cannot login to $host as $user: $ftp'response")
	if (!&ftp'login($user, $password));

&fail ("Cannot set mode to $mode")
	if (&ftp'type($mode) == 0);

# OK, so we made it this far.  Lets see if we can 'CWD' to the source
# filename.  If so, then its a directory and we generate a listing.
#
if (&ftp'cwd($rfile)) {
	if ($htmlify) {
		unless ($do_tmpfile) {
			$did_directory = 1;
    			($type, $enc, $ver) = &get_mime ($rfile);
    			&http_header ($type, $enc, $ver);
		}
		&fail ("Cannot retrieve file $rfile from $host")
			if (&try_dir_listing_html($rfile, $lfile));
	} else {
		&fail ("Cannot retrieve file $rfile from $host")
			if (&try_dir_listing($rfile, $lfile));
	}
} else {
	if ($htmlify && (! $do_tmpfile)) {
		# If we are here, the requested object is NOT a
		# directory.  We assume the transfer will succeed and
		# send an HTML success header.
		#
		($code, $size) = split (/\s+/, $ftp'response)
			if (&ftp'quote ("SIZE $rfile"));
		($code, $mdtm) = split (/\s+/, $ftp'response)
			if (&ftp'quote ("MDTM $rfile"));
		($type, $enc, $ver) = &get_mime ($rfile);
		&http_header ($type, $enc, $ver, $size, $mdtm);
	}
	# Retrieve the file
	#
	&fail ("Cannot retrieve file $rfile from $host: $ftp'response")
		if (!&ftp'get($rfile, $lfile, 0));
}

&ftp'quit();

# were done if we don't have to put HTTP/MIME headers on this thing
# were also done if we didn't write to tmpfile.
exit(0) unless ($htmlify && $do_tmpfile);

($type, $enc, $ver) = &get_mime ($rfile);
$size = &get_file_size ($lfile);

# $lfile contains the FTP object just retrieved
#
open (F, $lfile)	|| &fail ("$lfile: $!");
unlink ($lfile);	# its a tmpfile

&http_header ($type, $enc, $ver, $size);

print STDOUT $buf while (read (F, $buf, 4096));
close (F);

exit (0);


# ========================================================================
# SUBROUTINES


sub usage {
	print STDERR "Usage: ftpget.pl [-tmpfile] [-htmlify] localfile hostname filename A,I username password\n";
	exit(1);
}


#  Gives a listing of the current directory.  Should only be called
#  if &ftp'cwd($dir) was successful.
#
sub try_dir_listing {
	local($dir, $lfile) = @_;
	local($x);

	## return 1 if (!&ftp'cwd($dir));
	$x = &ftp'dir_open("-l");
	$x = &ftp'dir_open if ($x == 2);
	&fail ("Cannot list directory $dir on $host: $ftp'response")
		if ($x != 1);
	$rls = "ftp'NS";
	if ($lfile eq "-") {
		$OUT = STDOUT;
	} else {
		open ($OUT, ">$lfile") || &fail ("$lfile: $!");
	}
	print $OUT $_  while (<$rls>);
	close ($OUT);
	&ftp'dir_close();
	0;
}

#
#  Put hacks in here to HTML-ify FTP directories. 
#
#   Assume symlinks are pointers to files (mainly for the icons)
#
#  This routine should never be called if the ultimate destination
#  of the FTP object is stdout.
#
#  Before producting the listing, try to open a README file in
#  the requested directory.  If the open is successful, show it.
#
#  Gives a listing of the current directory.  Should only be called
#  if &ftp'cwd($dir) was successful.
#
sub try_dir_listing_html {
	local($dir, $lfile) = @_;
	local($x);
	## &fail ("Cannot cwd to directory $dir: $ftp'response")
	## 	if (!&ftp'cwd($dir));

	$did_directory = 1;

        open (OUT, ">$lfile") || &fail ("$lfile: $!");

	$dir = "/" . $dir if ($dir !~ /^\//);
	$URL = "ftp://" . $host . $dir . "/";

	print OUT "<TITLE>FTP Directory: $URL</TITLE>\n";
	print OUT "<H2>FTP Directory: $URL</H2>\n";
	$README = &tempnam;
	if (&ftp'get ("$dir/README", $README, 0) != 0) {
		open README;
		unlink ($README);
		print OUT "<HR>\n";
		print OUT "<H4>README file from $URL</H4>\n";
		print OUT "<PRE>\n";
		print OUT while (<README>);
		close README;
		print OUT "</PRE>\n";
		print OUT "<HR>\n";
	}

	$x = &ftp'dir_open("-l");
        $x = &ftp'dir_open if ($x == 2);
        &fail ("Cannot list directory $dir on $host: $ftp'response")
                if ($x != 1);
	$rls = "ftp'NS";
	print OUT "<PRE>\n";
	print OUT &htmlize_list_entry ("..\n");
	while (<$rls>) {
		s/\r//g;
		next if (/\s+\.\.?$/);	# skip . and ..
	        print OUT &htmlize_list_entry ($_);
	}
	print OUT "</PRE>\n";
	close (OUT);
	&ftp'dir_close();
	0;
}

sub htmlize_list_entry {
	$_ = shift;
	chop;

	if ($_ eq ".." && $dir ne '/') {
		$pd = $dir;
		$pd =~ s'/[^/]+$'/';
		$icon = sprintf ("<IMG SRC=\"%s\" ALT=\"[DIR] \">",
			"internal-gopher-menu");
		$link = sprintf ("<A HREF=\"%s\">%-24s",
			"ftp://".$host.$pd, "Parent Directory</A>");
		$buf =  sprintf ("%s %s\n",
			$icon, $link);
		return $buf;
	} 

	return "" unless (/$ls_pattern/);
	$p	= $1;
	#$nl	= $2;
	#$u	= $3;
	#$g	= $4;
	$s	= $5;
	$d	= $6;
	@f	= split (/\s+/, $7);

	$f = join (" ", @f);
	$f1 = $f[0];
	$f2 = $f[$#f];
	$f3 = substr ($f1, 0, 20);
	#$s /= 1024;				# rounds down
	$s = ($s + 1023) / 1024;		# rounds up
	$buf = "";

	if ($p =~ /^-/) {
		$icon = sprintf ("<IMG SRC=\"%s\" ALT=\"[FILE]\">",
			"internal-gopher-text");
		$link = sprintf ("<A HREF=\"%s%s\">%-24s",
			$URL, $f2, "$f3</A>");
		$buf =  sprintf ("%s %s  [%s] %6dk\n",
			$icon, $link, $d,$s);
	} elsif ($p =~ /^d/) {
		$icon = sprintf ("<IMG SRC=\"%s\" ALT=\"[DIR] \">",
			"internal-gopher-menu");
		$link = sprintf ("<A HREF=\"%s%s\">%-24s",
			$URL, "$f2/", "$f3</A>");
		$buf =  sprintf ("%s %s  [%s]\n",
			$icon, $link, $d);
	} elsif ($p =~ /^l/) {
		$icon = sprintf ("<IMG SRC=\"%s\" ALT=\"[LINK]\">",
			"internal-gopher-menu");
		$link = sprintf ("<A HREF=\"%s%s\">%-24s",
			$URL, "$f2", "$f3</A>");
		$buf =  sprintf ("%s %s  [%s]\n",
			$icon, $link, $d);
	}
	$buf;
}

sub tempnam {
	local ($f) = sprintf ("ftpget%d%06x", $$, rand (0xFFFFFF));
	return "$ENV{'TMPDIR'}/$f"
		if (-d $ENV{'TMPDIR'} && $ENV{'TMPDIR'} ne "");
	return "/usr/tmp/$f"
		if (-d "/usr/tmp");
	return "/tmp/$f";
}

sub get_file_size {
	local ($f) = shift;
	local (@sb) = stat ($f);
	return -1 if (@sb == ());
	return $sb[7];
}

sub fail {
	local ($msg) = shift;

        # If this is a 'raw' transfer, don't write a HTTP error header,
        # also if we're not putting the FTP data into a tmpfile.
        #
	if (! $htmlify || $sent_header) {
		print STDERR "ftpget.pl: ", $msg, "\n";
		exit(1);
	}

	$rfile = '/' . $rfile unless ($rfile =~ /^\//);
	$html = <<EOF;
<HTML>
<HEAD>
<TITLE>Error Message</TITLE>
</HEAD>
<BODY>
<H1>Fatal Error 500</H1>
<P>
Can not access URL: ftp://$host$rfile
<P>
<B>Reason:</B> $msg
</BODY>
</HTML>
EOF

	$len = length ($html);
	print <<EOF;
HTTP/1.0 500 Internal error
MIME-Version: 1.0
Server: $ServerMsg
Content-Type: text/html
Content-Length: $len

$html
EOF
exit (1);
}


sub init_mime {
	local ($i) = 0;
	undef %MIMEType;
	undef %MIMEEnc;
	while (<DATA>) {
		next if (/^#/);
		chop;
		($ext, $type, $enc, $qual) = split;
		$MIMEType{$ext} = $type;
		$MIMEEnc{$ext}  = $enc;
		$i++;
	}
	$i;
}

sub get_mime {
	local ($filename) = shift;
	local (@parts) = ();
	local ($ext1) = 'default';
	local ($ext2) = undef;
	&init_mime;

	@parts = split ('\.', $filename);
	$ext1 = pop @parts if ($#parts > $[);
	if ($ext1 eq 'gz' || $ext1 eq 'Z') {
		$ext2 = $ext1;
		$ext1 = 'default';
		$ext1 = pop @parts if ($#parts > $[);
	}

	$ext1 = $ext2 = 'html'
		if ($did_directory && defined($MIMEType{'.html'}));
	($MIMEType{".$ext1"}, $MIMEEnc{".$ext2"}, '1.0');
}

sub http_header {
	local ($type, $enc, $ver, $size, $mdtm) = @_;

# $mdtm is an ISO 3307 style time: YYYYMMDDHHMMSS or YYYYMMDDHHMMSS.xxx
# We need to convert it to http time: Weekday, 00-Mon-00 00:00:00 GMT

	local ($http_time) = undef;
	if ($mdtm =~ /^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d).*$/) {
		local (@T) = gmtime (&timegm ($6,$5,$4,$3,$2-1,$1-1900));
		$http_time = sprintf ("%s, %02d-%s-%02d %02d:%02d:%02d GMT",
			$weekday[$T[6]],
			$T[3],
			$month[$T[4]],
			$T[5] % 100,
			$T[2], $T[1], $T[0]);
	}

	print "HTTP/1.0 200 Gatewaying\n";
	print "Server: $ServerMsg\n";
	print "MIME-Version: $ver\n"		if ($ver  ne '');
	print "Content-Length: $size\n"		if ($size ne '');
	print "Content-Type: $type\n"		if ($type ne '');
	print "Content-Encoding: $enc\n"	if ($enc  ne '');
	print "Last-Modified: $http_time\n"	if ($http_time ne '');
	print "\n";
	$sent_header = 1;
}

#
# The following section will be read by the <DATA> filehandle.
#
# Note, we may be doing encodings slightly wrong.  The way NCSA httpd_1.3
# works is that 'image.xbm.gz' comes back with MIME headers such as:
#     content-type: image/x-xbitmap
#     content-encoding: x-gzip
# For us to do this, we'd need to look at two levels of filename extensions

__END__

.mime      www/mime                         8bit     1.0
.bin       application/octet-stream         binary   1.0
.EXE       application/octet-stream         binary   1.0
.exe       application/octet-stream         binary   1.0
.oda       application/oda                  binary   1.0
.pdf       application/pdf                  binary   1.0
.ai        application/postscript           8bit     0.5
.PS        application/postscript           8bit     0.8
.eps       application/postscript           8bit     0.8
.ps        application/postscript           8bit     0.8
.rtf       application/x-rtf                7bit     1.0
.Z         application/x-compressed         x-compress  1.0
.gz        application/x-gzip               x-gzip   1.0
.tgz       application/x-gzip               x-gzip   1.0
.csh       application/x-csh                7bit     0.5
.dvi       application/x-dvi                binary   1.0
.hdf       application/x-hdf                binary   1.0
.latex     application/x-latex              8bit     1.0
.nc        application/x-netcdf             binary   1.0
.cdf       application/x-netcdf             binary   1.0
.sh        application/x-sh                 7bit     0.5
.tcl       application/x-tcl                7bit     0.5
.tex       application/x-tex                8bit     1.0
.texi      application/x-texinfo            7bit     1.0
.texinfo   application/x-texinfo            7bit     1.0
.t         application/x-troff              7bit     0.5
.roff      application/x-troff              7bit     0.5
.tr        application/x-troff              7bit     0.5
.man       application/x-troff-man          7bit     0.5
.me        application/x-troff-me           7bit     0.5
.ms        application/x-troff-ms           7bit     0.5
.src       application/x-wais-source        7bit     1.0
.zip       application/zip                  binary   1.0
.bcpio     application/x-bcpio              binary   1.0
.cpio      application/x-cpio               binary   1.0
.gtar      application/x-gtar               binary   1.0
.shar      application/x-shar               8bit     1.0
.sv4cpio   application/x-sv4cpio            binary   1.0
.sv4crc    application/x-sv4crc             binary   1.0
.tar       application/x-tar                binary   1.0
.ustar     application/x-ustar              binary   1.0
.snd       audio/basic                      binary   1.0
.au        audio/basic                      binary   1.0
.aiff      audio/x-aiff                     binary   1.0
.aifc      audio/x-aiff                     binary   1.0
.aif       audio/x-aiff                     binary   1.0
.wav       audio/x-wav                      binary   1.0
.gif       image/gif                        binary   1.0
.ief       image/ief                        binary   1.0
.jpg       image/jpeg                       binary   1.0
.JPG       image/jpeg                       binary   1.0
.JPE       image/jpeg                       binary   1.0
.jpe       image/jpeg                       binary   1.0
.JPEG      image/jpeg                       binary   1.0
.jpeg      image/jpeg                       binary   1.0
.tif       image/tiff                       binary   1.0
.tiff      image/tiff                       binary   1.0
.ras       image/cmu-raster                 binary   1.0
.pnm       image/x-portable-anymap          binary   1.0
.pbm       image/x-portable-bitmap          binary   1.0
.pgm       image/x-portable-graymap         binary   1.0
.ppm       image/x-portable-pixmap          binary   1.0
.rgb       image/x-rgb                      binary   1.0
.xbm       image/x-xbitmap                  binary   1.0
.xpm       image/x-xpixmap                  binary   1.0
.xwd       image/x-xwindowdump              binary   1.0
.html      text/html                        8bit     1.0
.htm       text/html                        8bit     1.0
.HTML      text/html                        8bit     1.0
.HTM       text/html                        8bit     1.0
.c         text/plain                       7bit     0.5
.h         text/plain                       7bit     0.5
.C         text/plain                       7bit     0.5
.cc        text/plain                       7bit     0.5
.hh        text/plain                       7bit     0.5
.m         text/plain                       7bit     0.5
.f90       text/plain                       7bit     0.5
.txt       text/plain                       7bit     0.5
.rtx       text/richtext                    7bit     1.0
.tsv       text/tab-separated-values        7bit     1.0
.etx       text/x-setext                    7bit     0.9
.MPG       video/mpeg                       binary   1.0
.mpg       video/mpeg                       binary   1.0
.MPE       video/mpeg                       binary   1.0
.mpe       video/mpeg                       binary   1.0
.MPEG      video/mpeg                       binary   1.0
.mpeg      video/mpeg                       binary   1.0
.qt        video/quicktime                  binary   1.0
.mov       video/quicktime                  binary   1.0
.avi       video/x-msvideo                  binary   1.0
.movie     video/x-sgi-movie                binary   1.0
#default    text/plain                       7bit     0.5
default    application/octet-stream         binary   1.0
