
/*
 * The master feeding program that looks for new articles and either
 * lists them or actually batches them out to the destination site.
 * Copyright 1990 by Looking Glass Software Limited.
 * See the file READ.ME for licence details.
 */

#include <stdio.h>
#include "db.h"
#include <sys/types.h>
#include <sys/stat.h>
#include "feed.h"
#include <ctype.h>

/* default batching command to write batches to.  %s substitutes for site
   name */

char *batch_command = "(echo '#! cunbatch';compress)| uux - -r -gd %s!rnews";

long time_now;				/* time program was run */
int group_count = 0;			/* how many groups in newsrc */

long batch_size = 150000l;		/* default batch size (uncomp) */
long max_output = 100000000l;		/* max output for one session */
long bytes_output = 0;			/* how many bytes output this batch */
long total_output = 0;
char *feed_sitename;
char *newsrc_name;			/* name for .newsrc file */

boolean listmode = TRUE;		/* just list articles */
boolean give_size = FALSE;		/* give size of articles */
boolean give_mid = FALSE;		/* give message-ids */
boolean test = FALSE;			/* do not write out .newsrc */
boolean mid_only = FALSE;		/* message id only for ihave */

char *feedsites[100];			/* sites in path that don't get art */
int num_feedsites;			/* how many there are */
FILE *batchout;				/* main output descriptor for batched news */

main( argc, argv )
int argc;	/* arg count */
char **argv;	/* arg vector */
{
	int argnum;
	char *strchr();
	FILE *nrc;

	init_bases();
	time( &time_now );


	for( argnum = 1; argnum < argc; argnum++ ) {
		char *argline;
		char *argstr;		/* argument string */
		int argval;
		int isplus;		/* boolean tells +arg vs -arg */
		argline = argv[argnum];

		if (argstr = strchr(argline, '=')) {
			argstr++;
			argval = atoi(argstr);
			switch( argline[0] ) {
				case 's':	/* size, in kilobytes */
					if( argval > 10000 )
						error( "Batch size of %d kilobytes too large\n", argval );
					batch_size = argval * 1000L;
					break;
				case 'm':	/* max output, in kilobytes */
					if( argval > 32000 )
						error( "Max output of %d kilobytes too large\n", argval );
					max_output = argval * 1000L;
					break;
				case 'p':	/* paths to avoid */
					feedsites[num_feedsites++] = argstr;
					break;
				case 'n':
					newsrc_name = argstr;
					break;
				case 'c':
					batch_command = argstr;
					break;
				
				default:
					error( "Bad Option %s\n", argline );
				}
			}
		else if( (isplus = argline[0] == '+') || argline[0] == '-' ) {
			switch( argline[1] ) {
				case 'l':
					listmode = isplus;
					break;
				case 's':
					give_size = isplus;
					break;
				case 'm':
					give_mid = isplus;
					break;
				case 'i':
					mid_only = isplus;
					break;
				case 't':
					test = isplus;
					if( test )
						batch_command = "(echo '#! cunbatch';cat)";
					break;
				default:
					error( "Bad Option %s\n", argline );
				}
			}
		else {
			/* code for untagged option -- feed site */
			if( feed_sitename )
				error( "Too many feed sites.\n" );
			feed_sitename = argline;
			feedsites[num_feedsites++] = argline;
			}
		}
	/* body of program */

	if( feed_sitename == NULL )
		error( "No site to feed.\n" );

	/* if no room, exit quietly */
	if( max_output <= 1000 )
		exit(0);

	/* default newsrc name to site being fed */
	if( !newsrc_name )
		newsrc_name = feed_sitename;

	nrc = fopen( newsrc_name, "r" );
	if( !nrc )
		error( "Could not open newsrc %s\n", newsrc_name );
	
	read_newsrc( nrc );
	fclose( nrc );
	read_active();
	do_feeding();
	/* write out revised .newsrc */

	if( !test ) {
		nrc = fopen( newsrc_name, "w" );
		if( !nrc )
			error( "Could not update newsrc %s\n", newsrc_name );
		write_newsrc( nrc );
		fclose( nrc );
		}
}

dbptr newsgroups;
dbptr distributions;

/* prepare the databases */

init_bases()
{
	newsgroups = init_db( 1200, sizeof( group ) );
	distributions = init_db( 25, sizeof( group ) );
}
	

/* feeder's list of patterns to exclude & include
   recipient's list of patterns to include & exclude
   recipient's list of specific groups desired
 */


error( form, a, b, c, d, e, f, g, h )
char *form;
{
	fprintf( stderr, form, a,b,c,d,e,f,g,h );
	exit(1);
}


read_active()
{
	FILE *active;
	char actbuf[MAX_GNAME+40];
	char *groupp, *highp, *lowp, *flagp;
	group *grp;
	char *dir;
	struct stat sbuf;

	active = fopen( ACTFILE, "r" );
	if( !active )
		error( "Could not open %s\n", ACTFILE );
	
	while( fgets( actbuf, sizeof(actbuf), active ) ) {
		groupp = strtok( actbuf, " \t" );
		highp = strtok( NULL, " \t" );
		lowp = strtok( NULL, " \t" );
		flagp = strtok( NULL, " \t" );
		if( !( groupp && highp && lowp && flagp ) ) {
			fprintf( stderr, "Invalid active file line\n" );
			break;
			}
		/* if group was in .newsrc, record stats for it */
		grp = (group *)get_rec( newsgroups, groupp );
		if( grp ) {
			grp->gflags |= GF_ACTIVE;
			grp->active_highest = atol( highp );
			grp->active_lowest = atol( lowp );
			/* if an active, subscribed group, give it number */
			if( !(grp->gflags & GF_UNSUB ) )
				grp->index = ++group_count;
			}
		}
	fclose( active );

}

zero( mem, bytes )
char *mem;
unsigned int bytes;
{
	memset( mem, 0, bytes );
}

static char statspool[MAX_GNAME+1+sizeof(SPOOLDIR)];	/* area for file name */

char *
make_spoolname( grname, article )
char *grname;
artnum article;
{
	register char *p;
	sprintf( statspool, "%s/%s/%ld", SPOOLDIR, grname, (long)article );
	for( p = statspool+sizeof(SPOOLDIR); *p; p++ )
		if( *p == '.' )
			*p = '/';
	return statspool;
}


stripnl( buf )
char *buf;
{
	int len;

	len = strlen( buf );
	if( len > 0 && buf[len-1] == '\n' )
		buf[len-1] = 0;
}

read_newsrc( desc )
FILE *desc;
{
	char rclbuf[MAX_GNAME+40];	/* no long lines, please! */
	char *p;			/* pointer into rc line */
	int flag;
	group *grp;
	char *distr;			/* distribution from option line */



	while( fgets( rclbuf, sizeof(rclbuf), desc ) ) {
		flag = 0;
		if( strncmp( rclbuf, "options -D ", 11 ) == 0 ) {
			/* a distribution */
			for( distr = strtok( rclbuf+10, " ,\t\n" ); distr;
					distr = strtok( NULL, " ,\t\n" ) )
				add_rec( distributions, distr, AR_CREATE );
			continue;
			}
		p = strchr( rclbuf, ':' );
		if( p == NULL ) {
			p = strchr( rclbuf, '!' );
			flag |= GF_UNSUB;
			}
		if( p != NULL ) {
			*p = 0;
			grp = (group *)add_rec( newsgroups, rclbuf, AR_CREATE );
			if( !grp )
				continue;	/* unknown group? */
			/* is there a number?  If so it must be 1- */
			if( atoi( p + 1 ) > 0 ) {
				p = strchr( p + 1, '-' );
				if( p )
					grp->newsrc_highest = atol( p + 1 );
				}
			/* isolate group name */
			grp->gflags |= flag | GF_OLDRC;
			/* unsubscribed groups get more than magic number */
			grp->index = MAGIC_GROUP + (flag & GF_UNSUB);

			}
		 else
			fprintf( stderr, "Unknown line in newsrc %s", rclbuf );
		}
}

do_feeding()
{
	group *grp;
	artnum tofeed;		/* number of articles to feed */
	artnum art;
	boolean junk, control;

	for( grp = (group *)first_rec(newsgroups); grp;
					grp = next_rec(newsgroups,grp)) {
		/* do not do unsubscribed groups */
		if( grp->gflags & GF_UNSUB )
			continue;
		/* nor groups that were not in the active file */
		if( !(grp->gflags & GF_ACTIVE ) )
			continue;

		/* adjust to include existing articles only */
		if( grp->newsrc_highest < grp->active_lowest )
			grp->newsrc_highest = grp->active_lowest - 1;


		junk = strcmp( grp->name, "junk" ) == 0;
		control = strcmp( grp->name, "control" ) == 0;

		/* for each new article in the group */
		for( art = grp->newsrc_highest + 1; art <= grp->active_highest;
								art++ )  {
			process( grp, art, junk, control );
			grp->newsrc_highest = art;
			/* if we hit limit for this session, stop */
			if( total_output >= max_output )
				goto donesess;	/* 2 level break */

			}


		}
	/* close any batch that might have been open */
   donesess:
	close_old_batch();
}

/*
 * This routine takes a group descriptor and article number, finds the
 * file for that group and opens it.
 * It then extracts the 4 important headers related to feeding, and
 * figures out if this article should be fed -- assuming that the
 * group in question is a subscribed group, which it is.
 *
 * The newsgroups line is examined to make sure that crossposted articles
 * are only fed once, although it's not a killer crime if this happens
 * rarely -- but we still don't like it.
 *
 * The path line is used to not feed articles back the way they came.
 */

char ptok[] = "!:@% \t\n";		/* Path header delimiters */

process( grp, art, junk, control )
group *grp;			/* group descriptor */
artnum art;			/* article number */
boolean junk;			/* is the name 'junk' */
boolean control;		/* is this the control group */
{
	char *fname;
	FILE *artfile;
	group *ng;		/* newsgroups line group */
	char line[MAX_LLEN];
	int ng_count;
	char *tok;		/* loop through parts of header lines */
	char midbuf[MAX_LLEN];
	int min_group;		/* minimum group in group list */

	fname = make_spoolname( grp->name, art );

	artfile = fopen( fname, "r" );
	if( !artfile )
		return;		/* deleted or invalid article */

	ng_count = 0;
	midbuf[0] = 0;
	min_group = MAGIC_GROUP+1;

	/* scan for Path:, Newsgroups:, Message-id:, Distribution: */
	while( fgets( line, sizeof(line), artfile ) ) {
		/* blank line is end of header */
		if( line[0] == '\n' )
			break;
		if( starts( line, "Path:" ) ) {
			/* loop through sites */
			/* skip this article if feed site in path */
			for( tok = news_token( line, sizeof(line), artfile,
					ptok, line+5 ); tok; tok =
					news_token( line, sizeof(line), artfile,
					ptok, NULL ) ) {
				int i;
				for( i = 0; i < num_feedsites; i++ )
					if( strcmp(tok,feedsites[i]) == 0 ) {
						/* call it one more time to
						   make sure it's not the user*/
						if( news_token( line,
						  sizeof(line), artfile, ptok,
						  NULL ) != NULL ) {

							fclose( artfile );
							return;
							}
						 else
							break;
						}
				}

			}
		 else if( starts( line, "Newsgroups:" ) ) {
			/* store the group numbers in an array */
			for( tok = news_token( line, sizeof(line), artfile,
					", \t\n", line+11 ); tok; tok =
					news_token( line, sizeof(line), artfile,
					", \t\n",NULL ) ) {
				/* delete ".ctl" from fake control groups */
				if( control ) {
					if( strcmp( tok + strlen(tok)-4,
								".ctl" ) == 0 )
						tok[strlen(tok)-4] = 0;
					/* accept anything matching a distr
					   in 'control' */
					if( match_distr( tok ) )
						min_group = MAGIC_GROUP;
					}
				ng = (group *)get_rec( newsgroups, tok );
				ng_count++;
				if( ng ) {
					if( ng->index < min_group )
						min_group = ng->index;
					}

				}
			}
		 else if( starts( line, "Distribution:" ) ) {
			boolean distr_matches;

			/* initially, reject the article */
			distr_matches = FALSE;
			for( tok = news_token( line, sizeof(line), artfile,
					", \t\n", line+13 ); tok; tok =
					news_token( line, sizeof(line), artfile,
					", \t\n",NULL ) ) {
				/* if this distribution is in our list, be
				   happy */
				if( match_distr( tok ) )
					distr_matches = TRUE;
				
				}
			if( !distr_matches ) {
				fclose( artfile );
				return;
				}
			}
		 else if( starts( line, "Message-id:" ) ) {
			/* save the message-id */
			strcpy( midbuf, whitestrip(line+11) );
			}
		}

	/* ok, we got the info */
	/* reject articles with bad headers */ 

	if( ng_count == 0 || midbuf[0] == 0 ) {
		fclose( artfile );
		return;
		}

	/* reject crosspostings, unless the current group is the lowest
	   numbered group on the newsgroups line */

	/* Possible situations:
		a) One of the groups is a valid (subscribed + existent) group,
			feed when doing the lowest indexed such group.
		b) One of the groups is subscribed, but we're in a special
			group.  Feed here, in the special group.
		c) None of the groups is valid, but one is subscribed
			Feed it here. */

	if( min_group == MAGIC_GROUP || min_group == grp->index ||
			(min_group <= MAGIC_GROUP && (junk || control) ) )
		feed_article( fname, artfile, midbuf );

	fclose( artfile );
	return;

}


feed_article( fname, artfile, mid )
char *fname;
FILE *artfile;
char *mid;
{
	struct stat sbuf;
	long artsize;		/* size of this article */
	int c;			/* zoom through article */

	if( fstat( fileno(artfile), &sbuf ) )
		artsize = -1;
	 else
		artsize = sbuf.st_size;

	/* deal with mode that just lists names */
	
	if( mid_only ) {
		printf( "%s\n", mid );
		return;
		}
	else if( listmode ) {
		printf( "%s", fname );
		if( give_size && artsize > 0 ) 
			printf( " %ld", artsize );
		if( give_mid )
			printf( " %s", mid );
		putchar( '\n' );
		return;
		}

	if( artsize < 0 ) {
		fprintf( stderr, "Bad article size for %s\n", fname );
		return;
		}
	if( bytes_output > 2000 && bytes_output + artsize > batch_size ) {
		close_old_batch();
		}

	if( batchout == (FILE *)0 )
		open_fresh_batch();

	fprintf( batchout, "#! rnews %ld\n", artsize );
	rewind( artfile );
	/* copy over the file */
	while( ( c = getc(artfile) ) != EOF )
		putc( c, batchout );
	bytes_output += artsize;
	total_output += artsize;
	
}

/* close off the last batch of news */

close_old_batch()
{
	if( batchout ) {
		pclose( batchout );
		batchout = (FILE *)0;
		}
}

open_fresh_batch()
{
	char batchcom[MAX_LLEN];

	if( batchout ) {
		fprintf( stderr, "Can't open batch, one is open\n" );
		return;
		}

	sprintf( batchcom, batch_command, feed_sitename );

	batchout = popen( batchcom, "w" );
	if( !batchout )
		error( "Could not execute %s\n", batchcom );
	bytes_output = 0;
}

/* The special routines that read material from news header lines,
 * accounting for arbitrarily long lines and continuations */

static char *pos;

char *
news_token( buf, bsiz, desc, delims, init )
char *buf;		/* buffer to put stuff in, can be init */
int bsiz;		/* buffer size */
FILE *desc;		/* descriptor to keep reading from if need be */
char *delims;		/* delimiters in parsing */
char *init;		/* buffer that starts line */
{
	int ch;
	char *bufp;

	if( init )
		pos = init;
	
	ch = nextc( desc );
	/* skip over initial delims */
	while( ch && strchr( delims, ch ) != NULL )
		ch = nextc(desc);
	bufp = buf;
	if( ch == 0 )
		return NULL;		/* reached the end */


	/* take off one to leave room for zero byte */

	bsiz--;
	while( ch && strchr( delims, ch ) == NULL && bufp-buf < bsiz ) {
		*bufp++ = ch;
		ch = nextc( desc );
		}
	*bufp++ = 0;

	return buf;
}

/* next character from a news spool line */


static int ch;

int
nextc( desc )
FILE *desc;
{
	int nch;

	if( pos && *pos ) {		/* if still reading from line */
		return ch = *pos++;
		}
	pos = NULL;
	nch = getc( desc );
	if( nch == EOF )
		return 0;
	/* if old newline, check for continuation, if so return nl, else 0 */
	if( ch == '\n' )  {
		if( nch != ' ' && nch != '\t' ) {
			/* not white space.  Return the character */
			ungetc( nch, desc );
			return 0;
			}
		}
	return ch = nch;
}

/* Check to see if a given string matches the distributions we carry */

match_distr( str )
char *str;
{
	group *dist;
	int len;

	/* we require that this string either match a distribution
	   exactly, or begin with one followed by a dot. */

	for( dist = (group *)first_rec(distributions); dist;
					dist = next_rec(distributions,dist)) {
		len = strlen( dist->name );
		if( strncmp( dist->name, str, len ) == 0 && ( str[len] == 0 ||
					str[len] == '.' ) )
			return TRUE;
		}
	return FALSE;

}


/* strip white space from a string's ends */

char *
whitestrip( str  )
char *str;
{
	register char *fp;		/* first pointer */
	char *first;
	char *last;
	int len;

	for( fp = str; *fp == ' ' || *fp == '\t' || *fp == '\n'; fp++ )
		;
	first = fp;
	last = 0;
	for( fp = first; *fp; fp++ )
		if( *fp != ' ' && *fp != '\t' && *fp != '\n' )
			last = fp;
	last[1] = 0;
	return first;
}

starts( str1, prefix )
char *str1;
char *prefix;
{
	return cleqn( str1, prefix, strlen(prefix) );
}

cleqn(s1, s2, n)
register char *s1;
register char *s2;
int n;
{
	int count;

	count = 0;
	while( *s1 && count++ < n ) {
		if( (*s1++ | 0x20) != (*s2++ | 0x20) )
			return FALSE;
		}
	return count >= n || *s2 == 0;
}

write_newsrc( desc )
FILE *desc;
{
	group *grp;
	group *dist;
	char sbchar;

	/* write out the distributions first */

	for( dist = (group *)first_rec(distributions); dist;
					dist = next_rec(distributions,dist))
		fprintf( desc, "options -D %s\n", dist->name );

	for( grp = (group *)first_rec(newsgroups); grp;
					grp = next_rec(newsgroups,grp)) {

		sbchar = grp->gflags & GF_UNSUB ? '!' : ':';
		fprintf( desc, "%s%c", grp->name, sbchar );
		if( grp->newsrc_highest > 0 )
			fprintf( desc, " 1-%ld\n", (long)grp->newsrc_highest );
		 else
			putc( '\n', desc );
		}
}
