/*
    Gn: A Server for the Internet Gopher Protocol(*).
    File: gn/search.c
    Version 2.21
    
    Copyright (C) 1995  <by John Franks>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 1, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    (*) Gopher is a registered trademark of the Univ. of Minn.
*/

#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <string.h>
#include "gn.h"
#include "search.h"
#include "reg.h"
#include "regi.h"

#define	ERROR	(-1)
#define MATCH	(0)
#define NOMATCH	(1)


/* make up for deficient stat.h's */

#ifndef S_ISDIR
#define S_ISDIR(mode) ((mode) & S_IFREG)
#endif


static int	found_match();
static void	dogrep(),
		dowais(),
		www_results(),
		range_search();

/*
 * sendsearch( ip)  Send the items from a "virtual cachefile" produced
 * running the search program ip->filepath.
 */

void
sendsearch(  ip)
Item	*ip;

{
	FILE	*fp;
	char	linebuf[PATHLEN],
		searchargs[MAXLEN];

	int	qflag = FALSE;

	Cache_entry	entry;

	if ( *(ip->extra) == '\0') {
		if ( http )
			qflag = TRUE;
		else {
			senderr("Search string is empty!", ip);
			exit( 2);
		}
	}

	switch( ip->gtype[1]) {
		case 'c':
			if ( qflag)
				www_query( ip, KEYMSG);
			else
				cache_search( ip);
			return;

		case 'm':
			if ( qflag)
				www_query( ip, REGMSG);
			else
				range_search( ip);
			return;

		case 'g':
			if ( qflag)
				www_query( ip, REGMSG);
			else
				dogrep( ip);
			return;

		case 'w':
			if ( qflag)
				www_query( ip, WAISMSG);
			else
				dowais( ip);
			return;

		case '\0':
			break;

		default:
			senderr2( "Unknown selector type", ip->gtype);
	}
	
	if ( qflag) {
		www_query( ip, ASKWORD);
		return;
	}

	cgi_env( ip);

	if ( *(ip->args) ) {
		mystrncpy( searchargs, ip->args, MIDLEN);

		/* Append anything in ip->extra  to the args */
		if ( *(ip->extra) ) {
			strcat( searchargs, " ");
			mystrncat( searchargs, ip->extra, MIDLEN);
		}
	}
	else
		mystrncpy( searchargs, ip->extra, MIDLEN);

	if ( (fp = safer_popen( ip->filepath, searchargs, "r"))
					 == (FILE *) NULL ) {
		senderr2( "Can't exec", ip->filepath);
		exit( 2);
	}

	sprintf(linebuf, "%s <b>`%s'</b>:", RESPNSE, ip->extra);
	www_results( ip, "Response", linebuf);


	while ( read_cache( &entry, fp)) {
		if ( http)
			www_cache_line( &entry);
		else
			send_cache_line( &entry);
	}	
	pclose( fp);
	cache_epilog( http);
	writelog( ip->relpath, "Sent search results", ip->selector);
}

static void
www_results(ip, str1, str2)
Item	*ip;
char	*str1,
	*str2;
{

	char	*cp,
		buf[BUFSIZE];

	if ( !http)
		return;
	http_prolog( ip);
	cp = ( (*(ip->name)) ? ip->name : str1 );

	send_text_line( "<HTML>\n<HEAD>\n<ISINDEX>" );
	sprintf(buf,"<TITLE>%s</TITLE></HEAD><BODY><H2> %s</H2>", cp, cp);
	send_text_line( buf);
	send_text_line( str2);
	send_text_line( "<P>" );
}	


void
www_query( ip, str)
Item	*ip;
char	*str;
{
	char	*cp,
		buf[BUFSIZE];

	http_prolog( ip);
	cp = ( (*(ip->name)) ? ip->name : str );

	sprintf(buf, "<HTML>\n<HEAD>\n<TITLE>%s</TITLE></HEAD>", cp);
	send_text_line( buf);
	sprintf(buf, "<BODY><H2> %s </H2>", cp);
	send_text_line( buf);
	mystrncpy( buf, str, BUFSIZE);
	send_text_line( buf);
	send_text_line( "<P>\n<ISINDEX>\n</BODY>\n</HTML>" );
}






/*
 * range_search( ip)  This is type "7m",  we open the composite cachefile
 * basename..cache and also the composite file "basename".  For each
 * cachefile line search the corresponding range in the composite file
 * for lines matching the search term pointed to by ip->regptr.
 * When a match is found send the cachefile line.
 */

static void
range_search( ip)
Item	*ip;

{
	Item		curitem,
			*item_p;

	Cache_entry	entry,
			*cep;

	FILE	*fp,
		*cfp;

	struct regprog	*regp;

	int	firsttime = TRUE;

	char	cachepath[PATHLEN],
		msgbuf[BUFSIZE],
		querybuf[MIDLEN],
		linebuf[BUFSIZE];

	item_p = &curitem;
	cep = &entry;

	mystrncpy( querybuf, ip->extra, MIDLEN);
	if ( http)
		www_unescape( querybuf, '+');
	strlower( querybuf);

	if ( (regp = regcomp( querybuf)) == NULL ) {
		senderr2( "Search term regular expression error",
			ip->extra);
		exit( 2);
	}


	sprintf( cachepath, "%s.%s", ip->filepath, cfname);

	if ( (fp = fopen( ip->filepath, "r")) == (FILE *) NULL ) {
		senderr2( "Can't open file", ip->filepath);
		exit( 2);
	}
	if ( (cfp = fopen( cachepath, "r")) == (FILE *) NULL ) {
		senderr2( "Can't open file", cachepath);
		exit( 2);
	}

	sprintf( msgbuf, "%s `<b>%s</b>'.  %s", REGRES1, querybuf, REGRES2);

	while ( read_cache( &entry, cfp)) {
		parse_selector( item_p, cep->path, FULL);

		fseek( fp, item_p->range_start, 0);
		while ( fgets( linebuf, BUFSIZE, fp) && 
				(ftell( fp) <= item_p->range_end)) {
			chop( linebuf);
			if ( regfind( regp, strlower( linebuf)) ) {
				if ( http ) {
					if (firsttime == TRUE) {
						firsttime = FALSE;
						www_results( ip,
						"Structured File Search",
							msgbuf);
					}
					www_cache_line( cep);
				}
				else
					send_cache_line( cep);
				break;
			}
		}
	}	
	if ( firsttime == TRUE) {
		sprintf( msgbuf, "%s <b>`%s'</b>.  %s", REGMISS,
			querybuf, REGRES2);
		www_results( ip, "Structured File Search", msgbuf);
	}
	cache_epilog( http);
	writelog( ip->relpath, "Sent range search", ip->selector);
}


/* Do WAIS search of index in directory  ip->filepath */

static void
dowais( ip)
Item	*ip;
{
	char	*title,
		*protocol,
		querybuf[MIDLEN],
		waisgnfile[PATHLEN];

	strcpy( waisgnfile, WAISGN);
	mystrncpy( querybuf, ip->extra, MIDLEN);
	www_unescape( querybuf, ' ');

	if ( *waisgnfile == '\0')
		senderr( "Waisgn is not installed", ip);
	protocol = ( http ? "http" : "gopher");

	http_prolog( ip);
	title = ( (*(ip->name)) ? ip->name : "WAIS Index Search Results" );
	
	writelog( ip->relpath, "Sent WAIS search", ip->selector );
	execl( waisgnfile, "waisgn", ip->filepath, rootdir, cfname, myhost,
		myport, ip->gtype, title, protocol, querybuf, (char *) NULL);
	senderr2( "Couldn't exec waisgn", waisgnfile);
}

/* Do regexp search of directory pointed to by ip->filepath */

static void
dogrep( ip)
Item	*ip;
{
	FILE	*fp,
		*gfp;

	struct regprog	*regp;

	register char	*cp;
	char	cachefile[PATHLEN],
		tmpbuf[PATHLEN],
		querybuf[MIDLEN],
		msgbuf[BUFSIZE];

	int	firsttime = TRUE;

	Cache_entry	entry;

	mystrncpy( querybuf, ip->extra, MIDLEN);
	if (http)
		www_unescape( querybuf, '+');
	strlower (querybuf);

	if ( streq( ip->gtype, "7g") )
		strcpy(cachefile, ip->filepath); /* size ok */
	else
		sprintf( cachefile, "%s/%s", ip->filepath, cfname);

	if ( (fp = fopen( cachefile, "r")) == (FILE *) NULL ) {
		senderr2( "Can't open file", cachefile);
		exit( 2);
	}

	sprintf( msgbuf, "%s <b>`%s'</b>.  %s", REGRES1, querybuf, REGRES2);
	while ( read_cache( &entry, fp)) {

		if ( entry.type1 != '0' )	/* Quit if not type 0 (file) */
			continue;

		strcpy( tmpbuf, rootdir);

		cp = strchr( entry.path, '/');
		mystrncat( tmpbuf, cp, PATHLEN);

		if ( (cp = strrchr( tmpbuf, '(')) != (char *) NULL)
			*cp = '\0';

		/* handle 0h items */
		if ( (entry.path[1] == 'h') && http ) {
			if ( streq( entry.suffix, "txt")) {
				cp = strrchr( tmpbuf, '.');
				if ( cp)
					*cp = '\0';
			}
			strcat ( tmpbuf, HTML_EXT);
		}

                if ( (gfp = fopen( tmpbuf, "r")) == (FILE *) NULL ) {
                        /* File is no longer there, keep going */
                        continue;
                }

		if ( (regp = regcomp( querybuf)) == NULL ) {
			senderr2( "Search term regular expression error",
					ip->extra);
			break;
		}		
		if ( found_match( regp, gfp) ) {
			if ( http ) {
				if (firsttime == TRUE) {
					firsttime = FALSE;
					www_results( ip,
					"Directory Regular Expression Search",
						msgbuf);
				}
				www_cache_line( &entry);
			}
			else
				send_cache_line( &entry);

		}
		fclose( gfp);
	}
	if ( firsttime == TRUE) {
		sprintf( msgbuf, "%s <b>`%s'</b>.  %s", REGMISS,
			querybuf, REGRES2);
		www_results( ip,
			"Directory Regular Expression Search", msgbuf);
	}
	cache_epilog( http);
	writelog(  ip->relpath, "Sent grep search", ip->selector);
}

static int
found_match( rp, fp)
struct regprog	*rp;
FILE	*fp;
{
	char linebuf[BUFSIZE];

	while ( fgets( linebuf, BUFSIZE, fp)) {
		strlower( linebuf);
		if ( regfind( rp, linebuf) )
			return TRUE;
	}
	return FALSE;
}

