/*
 * has.c
 *
 * Library of article body searching routines.
 *
 */

 /*
  * Newsclip(TM) Library Source Code.
  * Copyright 1989 Looking Glass Software Limited.  All Rights Reserved.
  * Unless otherwise licenced, the only authorized use of this source
  * code is compilation into a binary of the newsclip library for the
  * use of licenced Newsclip customers.  Minor source code modifications
  * are allowed.
  * Use of this code for a short term evaluation of the product, as defined
  * in the associated file, 'Licence', is permitted.
  */

#include "nl.h"
#include "rei.h"
#include "body.h"

extern char *arr_string AC(( array *, int, int ));
extern void  parse_body AC(( unsigned int, unsigned int ));
extern void  paragraphize AC(( area_type * ));
extern void  init_stats AC(( int ));

#define fetch_rxp( idx )	user_rxps[idx-1]

extern long time_now;		/* Used to update dbase access times. */

extern int  paragraph_scan;	/* Indicates whether in paragraph mode */

extern area_type *Article;	/* Ptr to the paragrahed article structures */
extern area_type *RawText;	/* Ptr to the raw article structures */

extern char *include_prefix;	/* User-defined prefix for included lines */
extern char *signature_start;	/* User-defined start for signature */
extern int pattern_count;	/* Number of expressions in user_patterns */
extern char *user_patterns[];	/* User's RE patterns, as typed. */
static rxp_type *user_rxps = (rxp_type *) NULL;	/* Array of compiled user REs */

static int scan_text AC(( int, rxp_type ));
static int scan_array AC(( array *, rxp_type ));
static int scan_db AC(( dbptr, rxp_type ));

/* str_has_str() - "string has string" pattern matching routine.
 * Returns TRUE if the first argument matches the RE pattern represented
 * by the second argument; otherwise, FALSE. */

int
str_has_str( sptr, pptr )
char *sptr;			/* Pointer to the searched string */
char *pptr;			/* Pointer to the R.E. (non-compiled) */
{
	return( REG_EXEC( REG_COMP_S( pptr ), sptr ) ? H_TRUE : H_FALSE );
}

/* str_has_pat() - "string has pattern" pattern matching routine.
 * Performs identically to str_has_str() above, but the pattern is
 * represented as an index into the pre-compiled user_pattern array. */

int
str_has_pat( sptr, pidx )
char *sptr;			/* Pointer to the searched string */
int pidx;			/* R.E. index (static, non-compiled R.E) */
{
	return( REG_EXEC( fetch_rxp( pidx ), sptr ) ? H_TRUE : H_FALSE );
}

/* str_has_db() - "string has database" pattern matching routine.
 * Tests every key contained in the given database as a RE against the given
 * string argument, and returns TRUE if any key matches. */

int
str_has_db( sptr, db )
char *sptr;
dbptr db;
{
	register userdb *rec;
	
	for( rec = (userdb *) first_rec( db ); rec;
	     rec = (userdb *) next_rec( db, (dbrec *) rec ) ) 
		if( REG_EXEC( REG_COMP_S( rec->name ), sptr ) ) {
			rec->access_date = time_now;
			return( H_TRUE );
			}

	return( H_FALSE );
}

/* str_has_arr() - "string has array" pattern matching routine.
 * Tests every element in the given string array as a RE against the
 * given string argument, and returns TRUE if any matches occurs. */

int
str_has_arr( sptr, aptr )
char *sptr;
array *aptr;
{
	register rxp_type rxp;
	register int idx;
	int asize = aptr->arsize + AR_LOW_IDX;
	int atype = aptr->artype;

	for( idx = AR_LOW_IDX; idx < asize; idx++ ) {
		rxp = REG_COMP_S( arr_string( aptr, atype, idx ) );
		if( REG_EXEC( rxp, sptr ) )
			return( H_TRUE );
		}

	return( H_FALSE );
}

/* arr_has_str() - "array has string" pattern matching routine.
 * Tests the given string as a RE against every element in the array,
 * returning TRUE if a match occurs. */

int
arr_has_str( aptr, pptr )
array *aptr;		/* Array whose elements are to be searched */
char *pptr;		/* R.E. (noncompiled) being looked for */
{
	return( scan_array( aptr, REG_COMP_S( pptr ) ) );
}

/* arr_has_pat() - "array has pattern" pattern matching routine.
 * Performs identically to arr_has_str() above, but the pattern is
 * represented as an index into the pre-compiled user_pattern array. */

int
arr_has_pat( aptr, pidx )
array *aptr;		/* Array whose elements are to be searched */
int pidx;		/* Index into R.E. table (noncompiled) */
{
	return( scan_array( aptr, fetch_rxp( pidx ) ) );
}

/* arr_has_db() - "array has database" pattern matching routine.
 * Treats each key in the database as a RE pattern to be searched for
 * in the given array. TRUE is returned if any match is detected. */

int
arr_has_db( aptr, db )
array *aptr;
dbptr db;
{
	register userdb *rec; 
	
	for( rec = (userdb *) first_rec( db ); rec;
	     rec = (userdb *) next_rec( db, (dbrec *) rec ) )
		if( H_TRUE == scan_array( aptr, REG_COMP_S( rec->name ) ) ) {
			rec->access_date = time_now;
			return( H_TRUE );
			}

	return( H_FALSE );
}

/* arr_has_arr() - "array has array" pattern matching routine.
 * Treats each element of the second array as a RE pattern to be searched
 * for in the first array. TRUE is returned if any match is detected. */

int
arr_has_arr( aptr, apptr )
array *aptr;		/* Array to be searched. */
array *apptr;		/* Array of patterns for which to search */
{
	register rxp_type rxp;
	register int idx;
	int apsize = apptr->arsize + AR_LOW_IDX;
	int aptype = apptr->artype;

	for( idx = AR_LOW_IDX; idx < apsize; idx++ ) {
		rxp = REG_COMP_S( arr_string( apptr, aptype, idx ) );
		if( H_TRUE == scan_array( aptr, rxp ) )
			return( H_TRUE );
		}

	return( H_FALSE );
}

/* scan_array() is just common code used in the process of scanning
 * an array for the given [compiled] regular expression. */

static int
scan_array( aptr, a_rxp )
array *aptr;
rxp_type a_rxp;
{
	register rxp_type rxp = a_rxp;
	register int idx;
	int asize = aptr->arsize + AR_LOW_IDX;
	int atype = aptr->artype;

	if( rxp )
		for( idx = AR_LOW_IDX; idx < asize; idx++ )
			if( REG_EXEC( rxp, arr_string( aptr, atype, idx ) ) )
				return( H_TRUE );

	return( H_FALSE );
}

/* db_has_pat() - "database has pattern" pattern matching routine.
 * Takes the given index into the user_pattern array and searches for the
 * pattern in every key in the database, returning TRUE if a match is found. */

int
db_has_pat( db, pidx )
dbptr db;		/* Database in which to search for patterns */
int pidx;		/* Index into R.E. table for which to search */
{
	return( scan_db( db, fetch_rxp( pidx ) ) );
}

/* db_has_str() - "database has string" pattern matching routine.
 * Functions as db_has_pat() above, except that the second argument is
 * a user-constructed string rather than a constant pattern. */

int
db_has_str( db, pptr )
dbptr db;		/* Database in which to search for patterns */
char *pptr;		/* Pointer to R.E. for which to search */
{
	return( scan_db( db, REG_COMP_S( pptr ) ) );
}

/* db_has_arr() - "database has array" pattern matching routine.
 * Searches the given database for each string contained in the array. */

int
db_has_arr( db, apptr )
dbptr db;
array *apptr;
{
	register rxp_type rxp;
	register int idx;
	int apsize = apptr->arsize + AR_LOW_IDX;
	int aptype = apptr->artype;

	for( idx = AR_LOW_IDX; idx < apsize; idx++ ) {
		rxp = REG_COMP_S( arr_string( apptr, aptype, idx ) );
		if( H_TRUE == scan_db( db, rxp ) )
			return( H_TRUE );
		}

	return( H_FALSE );
}

/* scan_db() contains the common code called from database "has" routines. */

static int
scan_db( db, rxp )
dbptr db;
rxp_type rxp;
{
	register dbrec *rec;

	if( rxp )
		for( rec = first_rec( db ); rec; rec = next_rec( db, rec ) )
			if( REG_EXEC( rxp, rec->key ) )
				return( H_TRUE );

	return( H_FALSE );
}

/* text_has_str() - "text has string" pattern matching routine.
 * Searches the specified portions of the article body for the given
 * regular expression, returning TRUE if found. */

int
text_has_str( tid, pptr )
int tid;		/* Type of the text to be searched */
char *pptr;		/* Pointer to the (non-compiled) R.E. string */
{
	return( scan_text( tid, REG_COMP_S( pptr ) ) );
}

/* text_has_pat() - "text has pattern" pattern matching routine.
 * As text_has_str(), but uses index into user_patterns[] to obtain
 * the RE to search with. */

int
text_has_pat( tid, pidx )
int tid;		/* Type of the text to be searched */
int pidx;		/* Index into array of user R.E.s */
{
	return( scan_text( tid, fetch_rxp( pidx ) ) );
}

/* text_has_arr() - "text has array" pattern matching routine.
 * Searches articles for entire arrays of patterns. */

int
text_has_arr( tid, apptr )
int tid;		/* Area of text to be searched. */
array *apptr;		/* Array of patterns for which to search */
{
	register rxp_type rxp;
	register int idx;
	int apsize = apptr->arsize + AR_LOW_IDX;
	int aptype = apptr->artype;

	for( idx = AR_LOW_IDX; idx < apsize; idx++ ) {
		rxp = REG_COMP_S( arr_string( apptr, aptype, idx ) );
		if( H_TRUE == scan_text( tid, rxp ) )
			return( H_TRUE );
		}

	return( H_FALSE );
}

/* text_has_db() - "text has database" pattern matching routine.
 * Searches articles for entire databases of patterns. */

int
text_has_db( tid, db )
int tid;		/* Area of text to be scanned */
dbptr db;		/* Database containing patterns */
{
	register userdb *rec; 
	
	for( rec = (userdb *) first_rec( db ); rec;
	     rec = (userdb *) next_rec( db, (dbrec *) rec ) )
		if( H_TRUE == scan_text( tid, REG_COMP_S( rec->name ) ) ) {
			rec->access_date = time_now;
			return( H_TRUE );
			}

	return( H_FALSE );
}

/* scan_text() is the common code used by the text "has" functions. */

static int
scan_text( tid, rxptr )
int tid;
rxp_type rxptr;
{
	register u_list *ul;
	register int j;
	area_type *ap;

	if( !rxptr )
		return( H_FALSE );

	if( tid != LT_BODY || paragraph_scan ) {
		parse_body( 1, MAXINT );
		ap = Article;
		}
	else {
		read_body( 1, MAXINT );
		ap = RawText;
		}

	for( ; ap; ap = ap->next ) {
		if( ap->txt_typ & tid ) {
			if( paragraph_scan ) {
				if( !ap->para )
					paragraphize( ap );
				if( REG_EXEC( rxptr, ap->para ) )
					return( H_TRUE );
				}
			else for( ul = ap->list; ul; ul = ul->next ) {
				for( j = 0; j < ul->size; j++ )
					if( REG_EXEC( rxptr, ul->u_txt[j] ) )
						return( H_TRUE );
				}
			}
		}

	return( H_FALSE );
}

/* init_patterns() compiles the static user pattern array after
 * allocating the memory necessary to maintain the parallel array. */

void
init_patterns()
{
	register int i;

	if( pattern_count )
		user_rxps = (rxp_type *)
			    perm_alloc( sizeof(rxp_type)*pattern_count );

	for( i = 0; i < pattern_count; i++ )
		user_rxps[i] = REG_COMP_P( user_patterns[i] );

	set_include_prefix( include_prefix );
	set_signature_start( signature_start );
}

/* line_count() returns the number of lines contained in the
 * specified section of the article body. */

int
line_count( statid )
int statid;
{
	init_stats( statid );

	return( makeint( ArticleStats[ID_LINES][statid] ) );
}

/* byte_count() returns the number of bytes contained in the
 * specified section of the article body. */

int
byte_count( statid )
int statid;
{
	init_stats( statid );

	return( makeint( ArticleStats[ID_BYTES][statid] ) );
}

/* literal_pattern() scans the given string and returns a pointer to a 
 * constructed copy of the string with all regular expression characters
 * escaped. */

static char *REMagic = ".*+|?[]()^$\\";

char *
literal_pattern( ptr )
char *ptr;			/* String to be literalized (escaped) */
{
	int len = 0;		/* Length of escaped string */
	char *retstr, *rptr = ptr;
	char esc = FALSE;	/* Was last character an escape? */

	for( len = 1; *ptr; len++, ptr++ )
		if( !esc && strchr( REMagic, *ptr ) )
			len++;
		else
			esc = ('\\' == *ptr);

	ptr = rptr;
	rptr = retstr = perm_alloc( len*sizeof(char) );

	for( esc = FALSE; *ptr; ) {
		if( !esc && strchr( REMagic, *ptr ) )
			*retstr++ = '\\';
		else
			esc = ('\\' == *ptr);
		*retstr++ = *ptr++;
		}

	*retstr = '\0';

	return( rptr );
}
