/* wusage 2.5. Copyright 1993, 1994, Quest Protein Database Center,
   Cold Spring Harbor Labs. Permission granted to copy and distribute
   this work provided that this notice remains intact. Modified
   versions should be cleared through Quest first; if this is not
   done, any modified version of the program must be clearly labeled
   as such. 

   By Thomas Boutell, 11/93 - 4/94. Please contact boutell@cshl.org
   with any questions regarding this software. */
    
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "configure.h"

#define TIME_FMT "%a %h %e %T %Y"
#define COMMON_TIME_FMT "%e/%h/%Y:%T"

char *mystrptime(char *buf, char *fmt, struct tm *tm);

void site_access(char *site);
void item_access(char *item);
void site_output();
void item_output();
int site_compare(int *s1, int *s2);
int item_compare(int *s1, int *s2);

char *weeks[WEEKS_MAX];

static int allsites = 0;
int start_none = 1;
int end_none = 1;
struct tm start_t;
struct tm end_t;
struct tm item_t;
time_t start_t_gm;
time_t end_t_gm;
time_t item_t_gm;
time_t week_t_gm;

void output();
void reset();

#define checkbinary() { if (i == (argc-1)) { \
	fprintf(stderr, "Missing argument.\n"); \
	usage(); \
	exit(-1); \
} }

int accesses = 0;
int totalonly = 0;
int weekly = 0;
int indexed = 0;
int html = 0;
int week = 1;

FILE *out;
void finish();

static char *sites[SITES_MAX];
static int site_accs[SITES_MAX];
static int sites_t = 0;

int wildfind(char *item, char **table, int table_size);

main(int argc, char *argv[]) {
	char *site;
	char *date;
	char *method;
	char *item;
	char buf[513];
	FILE *in;
	int i;
	mystrptime("Tue Jan 1 00:00:01 1980", TIME_FMT, &start_t); 
	mystrptime("Fri Jan 1 00:00:01 2010", TIME_FMT, &end_t); 
	for (i=1; (i<argc); i++) {
		if (!strcmp(argv[i], "-a")) {
			allsites = 1;
		} else if (!strcmp(argv[i], "-s")) {
			checkbinary();
			mystrptime(argv[i+1], TIME_FMT, &start_t);
			start_none = 0;
			i++;
		} else if (!strcmp(argv[i], "-e")) {
			checkbinary();
			mystrptime(argv[i+1], TIME_FMT, &end_t);
			end_none = 0;
			i++;
		} else if (!strcmp(argv[i], "-t")) {
			totalonly = 1;
		} else if (!strcmp(argv[i], "-h")) {
			html = 1;
		} else if (!strcmp(argv[i], "-w")) {
			weekly = 1;
		}	else if (!strcmp(argv[i], "-c")) {
			checkbinary();
			configfile = argv[i+1];
			i++;
		} else {
			usage();
			exit(-1);
		}		
	}	
	configure();
	if (weekly) {
		char s[81];
		FILE *in;
		sprintf(s, "%s/status", filepath);
		in = fopen(s, "r");
		if (in) {
			int i;
			time_t now;
			time_t w_t_gm;
			fgets(s, 80, in);
			week = atoi(s);
			fgets(s, 80, in);
			mystrptime(s, TIME_FMT, &start_t); 
			start_t_gm = mktime(&start_t);
			start_none = 0;
			fclose(in);
			w_t_gm = start_t_gm - 86400*7*(week-1);
			for (i=0; (i<week-1); i++) {
				weeks[i] = strdup(ctime(&w_t_gm));
				w_t_gm += 86400*7;
			}
			time(&now);
			if (totalonly) {
				int i;
				for (i=0; (i<week-1); i++) {
					FILE *in;
					char s[81];
					int t;
					sprintf(s, "%s/week%d.total", 
						filepath, i);
					in = fopen(s, "r"); 
					fscanf(in, "%d", &t);
					printf("%d\n", t);	
					fclose(in);
				}
			}
			if ((now - start_t_gm) < 86400*7) {
				reset();
				finish();
				/* Only full weeks! */
			}
		}
	}
	if (html) {
		if (weekly) {				
			char s[81];
			sprintf(s, "%s/week%d.html", filepath, week-1);
			out = fopen(s, "w");
			if (!out) {
				fprintf(stderr,
	"Can't open file-- directory probably missing: %s\n", filepath);
				exit(-1);
			}
		} else {
			char s[81];
			sprintf(s, "%s/index.html", filepath);
			out = fopen(s, "w");
			if (!out) {
				fprintf(stderr,
	"Can't open file-- directory probably missing: %s\n", filepath);
				exit(-1);
			}
		}
	}
	start_t_gm = mktime(&start_t);
	if (weekly) {
		if (start_none) {
			week_t_gm = 0;
		} else {
			week_t_gm = start_t_gm;
		}
	}
	end_t_gm = mktime(&end_t);
	in = fopen(logfile, "r");	
	if (!in) {
		fprintf(stderr, "Couldn't open %s\n", logfile);
		exit(-1);
	}
	if (!(html && (!weekly))) {
		while (!feof(in)) {
			char *sp;
			int ignored, hidden, site_ignored;
			if (!fgets(buf, 512, in)) {
				break;
			}
			if (logtype == LOG_NCSA_OLD) {
				/* Old NCSA-style log */
				if (!(site = strtok(buf, " "))) {
					continue;
				}
				if (!(date = strtok(0, "]"))) {
					continue;
				}
				date++;
				mystrptime(date, TIME_FMT, &item_t);
				method = strtok(0, " ");
				if (!method) {
					continue;
				}
				item = strtok(0, "\n");
				if (!item) {
					continue;
				}
				break;
			} else if (logtype == LOG_CERN_OLD) {
				/* Old CERN-style log */
				char *s;
				s = mystrptime(buf, TIME_FMT, &item_t);
				while (s[0] == ' ') {
					s++;
				}
				if (!(site = strtok(s, " "))) {
					continue;
				}
				method = strtok(0, " ");
				if (!method) {
					continue;
				}
				if (strcmp(method, "TESTCOMMAND") == 0) {
					continue;
				}	
				item = strtok(0, "\n");
				if (!item) {
					continue;
				}
			} else {
				/* New-style log */
				char *site_s;
				char *time_s;
				char *request_s;
				char *sat;
				site_s = buf;	
				sat = strchr(buf, ' ');
				if (!sat) {
					continue;
				}
				time_s = sat+1;
				*sat = '\0';
				time_s = strchr(time_s, '[');
				if (!time_s) {
					continue;
				}
				sat = strchr(time_s, ']');
				if (!sat) {
					continue;
				}
				time_s++;
				request_s = sat+1;
				*sat = '\0';
				request_s = strchr(request_s, '\"');
				if (!request_s) {
					continue;
				}
				request_s++;
				sat = strchr(request_s, '\"');
				if (!sat) {
					continue;
				}
				*sat = '\0';
				site = site_s;
				sat = strchr(request_s, ' ');
				if (!sat) {
					continue;
				} 
				item = sat+1;
				sat = strchr(item, ' ');
				if (sat) {
					*sat = '\0';
				} 
				mystrptime(time_s, COMMON_TIME_FMT, &item_t);
			}		
			item_t_gm = mktime(&item_t);
			/* Skip anything not in the time range specified */
			if (((!start_none) && (item_t_gm < start_t_gm)) || 
					((!end_none) && (item_t_gm > end_t_gm))) {
				continue;
			} 
			if (weekly && (week_t_gm == 0)) {
				/* Tick back to Sunday midnight */
				week_t_gm = 
					(item_t_gm - item_t.tm_wday * 86400 -
					item_t.tm_hour * 3600 - 
					item_t.tm_min * 60 -
					item_t.tm_sec);
			}
			if (weekly) {
				if (item_t_gm - week_t_gm >= 86400*7) {
					time_t now;
					char s[81];
					output();
					if (html) {
						fclose(out);
					}
					sprintf(s,
						"%s/week%d.total", 
						filepath,
						week-1);
					out = fopen(s, "w");
					if (!out) {
						fprintf(stderr,
			"Can't open file-- directory probably missing: %s\n", filepath);
						exit(-1);
					}
					fprintf(out, "%d\n", accesses);
					fclose(out);
					week_t_gm += 86400*7;
					time(&now);
					if ((now - week_t_gm) < 86400*7) {
						reset();
						finish();
						/* Only full weeks! */
					}
					week++;
					if (html) {
						char s[81];
						sprintf(s, 
						"%s/week%d.html", 
							filepath, week-1);
						out = fopen(s, "w");
						if (!out) {
							fprintf(stderr,
				"Can't open file-- directory probably missing: %s\n", filepath);
							exit(-1);
						}
					}
					reset();
				}
			}		
			sp = strchr(item, ' ');
			/* Anything after space is uninteresting */
			if (sp) {
				*sp = '\0';
			}	
			/* truncate "/index.html" so it merges with "/" */
			sp = strstr(item, "index.html");
			if (sp) {
				*sp = '\0';
			}
			/* Remove any trailing slash for further merges */
			if (item[strlen(item)-1] == '/') {
				item[strlen(item)-1] = '\0';
			}
			site_ignored = wildfind(site, 
				ignored_sites, ignored_sites_t);
			ignored = wildfind(item, 
				ignored_items, ignored_items_t);
			hidden = wildfind(item, 
				hidden_items, hidden_items_t);
			if (!(ignored || hidden || site_ignored)) {
				site_access(site);
				item_access(item);
			} 
			if (!(ignored || site_ignored)) {
				accesses++;
			}
			if (strstr(item, "?")) {
				indexed++;
			}
		}
	}
	fclose(in);
	output();
	reset();
	finish();
}		

void finish() {
	if (html) {
		fclose(out);
		if (weekly) {
			FILE *note;
			int i;
			char s[81];
			sprintf(s, "%s/usagegraph -c %s", binpath, configfile);
			system(s);
			sprintf(s, "%s/list.html", filepath);
			note = fopen(s, "w");
			if (!note) {
				fprintf(stderr,
	"Can't open file-- directory probably missing: %s\n", filepath);
				exit(-1);
			}
			fprintf(note, "<H2>Usage by Week</H2>\n");
			fprintf(note, "<IMG SRC=\"usage.graph.gif\">\n");
			
			fprintf(note, "<H3>Weeks beginning...</H3>\n");
			fprintf(note, "<UL>\n");
			if (!week_t_gm) {
				/* If we never got started, we're
					a week ahead of where we
					should be */
				week--;
			}
			for (i=(week-1); (i>=0); i--) {
				fprintf(note, 
			"<LI><A HREF=\"%s/week%d.html\">%s</A>\n", 
					webpath, i, weeks[i]);
			}
			fprintf(note, "</UL>\n");
			fclose(note);
			if (week_t_gm) {
				/* Don't touch status unless work was done */
				char s[81];
				sprintf(s, "%s/status", filepath);
				note = fopen(s, "w");
				if (!note) {
					fprintf(stderr,
		"Can't open file-- directory probably missing: %s\n", filepath);
					exit(-1);
				}
				fprintf(note, "%d\n", week+1);
				fprintf(note, "%s\n", ctime(&week_t_gm));
				fclose(note);
			}
		} else {
			char s[81];			
			sprintf(s, "%s/wusage -c %s -h -w", binpath, configfile);
			system(s);
			sprintf(s, "cat %s/index.html %s/list.html > %s/tmp.html",
				filepath, filepath, filepath);
			system(s);
			sprintf(s, "mv %s/tmp.html %s/index.html", filepath, filepath);
			system(s);
		}
	}
	exit(0);
}
					
void output() {
	if (totalonly) {
		printf("%d\n", accesses);
	} else if (html) {
		if (html) {
			fprintf(out, "<TITLE>%s Server Usage</TITLE>\n",
				servername);
			fprintf(out, "<H1>%s Server Usage</H1>\n",
				servername);
		}
		if (weekly) {
			fprintf(out, "<A NAME=week%d>\n", week);
			weeks[week-1] = strdup(ctime(&week_t_gm));
			fprintf(out, 
				"<H2>Week of %s</H2>\n", weeks[week-1]);
			fprintf(out, "<UL>\n");
			fprintf(out, "<LI>Total accesses: %d\n", accesses);
			fprintf(out, "<LI>Total index accesses: %d\n", indexed);
			site_output();
			item_output();
			fprintf(out, "</UL>\n");
		} else {
			/* Do nothing- we don't output global totals anymore-
				too painful and not very meaningful */
		}
	} else {
		printf("Total accesses: %d\n", accesses);
		printf("Total script accesses: %d\n", indexed);
		site_output();
		item_output();
	}
	if (html) {
		if (weekly) {
			fprintf(out, 
		"<H4><A HREF=\"%s/index.html\">Up to usage page</A></H4>\n", webpath);
		} 
		fprintf(out, 
		"<H4><A HREF=\"%s\">Up to home page</A></H4>\n", webhome);
	}
}

void reset() {
	int i;
	for (i=0; (i<sites_t); i++) {
		free(sites[i]);
	}
	for (i=0; (i<items_t); i++) {
		free(items[i]);
	}
	accesses = 0;
	sites_t = 0;
	items_t = 0;
	indexed = 0;
}


void site_access(char *site) {
	int i;
	int found = 0;
	for (i=0; (i<sites_t); i++) {
		if (!strcmp(sites[i], site)) {
			site_accs[i]++;
			found = 1;
			break;
		}
	}
	if (!found) {
		site_accs[i] = 1;
		sites[sites_t++] = strdup(site);
	}
}
				
void item_access(char *item) {
	int i;
	int found = 0;
	for (i=0; (i<items_t); i++) {
		if (!strcmp(items[i], item)) {
			item_accs[i]++;
			found = 1;
			break;
		}
	}
	if (!found) {
		items[items_t] = strdup(item);
		item_accs[items_t] = 1;
		items_t++;
	}
}

int site_o[SITES_MAX];
int item_o[ITEMS_MAX];
 
void site_output() {
	int i;
	int m;
	for (i=0; (i<sites_t); i++) {
		site_o[i] = i;
	}
	qsort(site_o, sites_t, sizeof(int), site_compare);
	if (allsites) {
		m = sites_t;
	} else {
		m = 10;
		if (m > sites_t) {
			m = sites_t;
		}
	}
	if (html) {
		fprintf(out, "<LI>%d sites accessing server most:\n", m);
		fprintf(out, "<UL>\n");
	} else {
		printf("%d sites accessing server most:\n", m);
	}
	for (i=0; (i<m); i++) {
		int o = site_o[i];
		if (html) {
			fprintf(out, 
				"<LI>%s %d\n", sites[o], site_accs[o]);
		} else {	
			printf("%s %d\n", sites[o], site_accs[o]);
		}
	}
	if (html) {
		fprintf(out, "</UL>\n");
	}
}

void item_output() {
	int i;
	int m;
	for (i=0; (i<items_t); i++) {
		item_o[i] = i;
	}
	qsort(item_o, items_t, sizeof(int), item_compare);
	m = 10;
	if (m > items_t) {
		m = items_t;
	}
	if (html) {
		fprintf(out, "<LI>%d items accessed most:\n", m);
		fprintf(out, "<UL>\n");
	} else {
		printf("%d items accessed most:\n", m);
	}
	for (i=0; (i<m); i++) {
		int o = item_o[i];
		/* Make root look like root */
		if (strlen(items[o]) == 0) {
			if (html) {
				fprintf(out, "<LI>/ %d\n", item_accs[o]);
			} else {
				printf("/ %d\n", item_accs[o]);
			}
		} else {
			if (html) {
				fprintf(out, "<LI>%s %d\n", 
					items[o], item_accs[o]);
			} else {
				printf("%s %d\n", items[o], item_accs[o]);
			}
		}
	}
	if (html) {
		fprintf(out, "</UL>\n");
	}
}

int site_compare(int *s1, int *s2) {
	return  - (site_accs[*s1] - site_accs[*s2]);
}

int item_compare(int *s1, int *s2) {
	return - (item_accs[*s1] - item_accs[*s2]);
}

void usage() {
	fprintf(stderr, 
		"Usage: wusage -c file [-t] [-a] [-w] [-s starttime] [-e endtime]\n");
	fprintf(stderr, "Where:\n");
	fprintf(stderr, "-c specifies the configuration file (required)\n");
	fprintf(stderr, "-a specifies that all sites should be listed\n");
	fprintf(stderr, "-t specifies that only the total # of accesses\n");
	fprintf(stderr, "-w specifies output for each week in range\n");
	fprintf(stderr, " should be output (useful for graphing)\n");
	fprintf(stderr, 
		"starttime and endtime are in date command format\n");
	fprintf(stderr, " MINUS THE TIMEZONE (no EST, etc)\n");
	fprintf(stderr, " Enclose times in quotes when running by hand\n");
	fprintf(stderr, "By default, entire usage log is examined\n");
}

int wildfind(char *item, char **table, int table_size) {
	int i;
	for (i=0; (i<table_size); i++) {
		int skip = 0;
		char *m = table[i];
		char *it = item;
		while ((*m) != '\0') {
			char mc = *m;
			char ic = *it;
			if (mc == '*') {
				char *after;
				char *find;
				int span;
				span = strcspn(m+1, "*?");
				if (span == 0) {
					return 1;
				}
				find = (char*)malloc(sizeof(char)*(span+1));
				strncpy(find, m+1, span);   
				find[span] = '\0';           
				after = strstr(it, find);    
				if (after) {
					it = after-1;        
				} else {
					skip = 1;
					break;
				}
			} else if (mc == '?') {
				/* No action needed, always matches */	
			} else if (mc != ic) {
				break;
			}
			m++;                                 
			it++;                                
		}
		if (!skip) {
			if (((*m) == '\0') && ((*it) == '\0')) {
				return 1;
			}
		}
	}
	return 0;
}	


/* A very limited implementation to plug the hole in SysV. Should work
	fine for the format output by ctime(), which is our concern. 
	Only knows English month names, however; you will need to
	patch it for other locales. I am willing to improve on this
	but don't have the necessary information about locale support
	in C, and fear falling into yet another incompatibility hole. */

int monthlen[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };

char *mystrptime(char *buf, char *fmt, struct tm *tm) {
	static char *parseweekday(char *buf, struct tm *tm);
	static char *parsemonth(char *buf, struct tm *tm);
	static char *parseday(char *buf, struct tm *tm);
	static char *parsetimecolon(char *buf, struct tm *tm);
	static char *parsefullyear(char *buf, struct tm *tm);
	int i;
	int s;
	int m4, m20, m100, m400;
	char *obuf = buf;
	enum state {
		state_normal, state_escape
	};	

	/* Cheat to get the zone, et cetera, into the tm struct */
	time_t now;
	time(&now);
	*tm = *(localtime(&now));

	s = state_normal;	
	while ((*fmt) != '\0') {
		char c;
		c = *fmt;
		fmt++;
		switch(s) {
			case state_normal:
				if (c == '%') {
					s = state_escape;
				}
				break;
			case state_escape:
				switch(c) {
					case 'a':
						buf = parseweekday(buf, tm);
						break;
					case 'h':
						buf = parsemonth(buf, tm);
						break;
					case 'e':
						buf = parseday(buf, tm);
						break;
					case 'T':
						buf = parsetimecolon(buf, tm);
						break;
					case 'Y':
						buf = parsefullyear(buf, tm);
						break;
					default:
						fprintf(stderr,
	"mystrptime: sorry, unsupported format flag: %c\n", c);
						break;
				}
				s = state_normal;
				break;
			default:
				/* ???? */
				break;
		}	
	}
	/* Now the yday problem (oh such fun!) */
	tm->tm_yday = 0;
	for (i=0; (i<(tm->tm_mon)); i++) {
		tm->tm_yday += monthlen[i];
	}
	/* Leap years */
	m4 = tm->tm_year % 4;
	m20 = tm->tm_year % 20;
	m100 = tm->tm_year % 100;
	m400 = tm->tm_year % 400;
	if ((!m4) && (m20 || ((!m100) && m400))) {
		if (tm->tm_mon > 1) {
			tm->tm_yday += 1;
		}
	} 
	tm->tm_yday += tm->tm_mday;
	return buf;
}

static char *daynames[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };			
static char *parseweekday(char *buf, struct tm *tm) {
	int i;
	for (i=0; (i<7); i++) {
		if (strncmp(buf, daynames[i], 3) == 0) {
			tm->tm_wday = i+1;
			break;
		}
	}
	return buf+4;
}
		
static char *monthnames[] = {
	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
	"Sep", "Oct", "Nov", "Dec"
};

static char *parsemonth(char *buf, struct tm *tm) {
	int i;
	for (i=0; (i<12); i++) {
		if (strncmp(buf, monthnames[i], 3) == 0) {
			tm->tm_mon = i;
			break;
		}
	}
	return buf+4;
}
		
static char *parseday(char *buf, struct tm *tm) {
	tm->tm_mday = atoi(buf);
	return buf+3;
}

static char *parsetimecolon(char *buf, struct tm *tm) {
	tm->tm_hour = atoi(buf);
	tm->tm_min = atoi(buf+3);
	tm->tm_sec = atoi(buf+6);
	return buf+9;
}

static char *parsefullyear(char *buf, struct tm *tm) {
	while ((*buf) != '\0') {
		char c = *buf;
		if (isdigit(c)) {
			tm->tm_year = atoi(buf) - 1900;
			return buf+4;
		}
		buf++;
	}	
	/* Bad news, but what can we do? */
	return buf;
}
