#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <savant.h>
#include <savutil.h>
#include <template.h>
#include <savantio.h>
#include <main.h>

void savant_retrieve(int); 
DV_Tree *get_query(int, int *);
int print_suggestions(DocSim *, int);
int retrieve_and_print(int);
int retrieve_and_print_loc(int);
char *read_string(FILE *);

void savant_retrieve(int num_docvecs)
{
  int maxdoc=5, docnum;
  char command[4096], *cmdptr;
  int   query_bias[NUM_FIELD_TYPES];
  DocSim *best_matches=NULL;
  DV_Tree *query=NULL;
  DB_INT *all_dv_biases;

  all_dv_biases = (DB_INT *)malloc((NUM_FIELD_TYPES*num_docvecs)*sizeof(DB_INT));    /* With windowing, will be overkill */
  fread_big(all_dv_biases, sizeof(DB_INT), NUM_FIELD_TYPES*num_docvecs, BIAS_FILE);

  cmdptr = command;
  cmdptr[0] = '\0';
  while((strncasecmp(cmdptr,"quit",4)) && !(feof(stdin))) {
    if(SavantDebug) { 
      printf("%s", cmdptr);
    }
    if ((!(*cmdptr) || *cmdptr=='?') && (SavantVerbose)) {
      printf("query [n]    : Find n most relevant documents to a query.  Default is 5.\n"
	     "keyquery [n] : Find n most relevant documents to a query, using keywords.\n"
	     "               Default is 5.\n"
	     "repeat       : Repeat the results of most recent query.\n"
	     "print n      : Retrieve and print the nth document in the query listing.\n"
	     "loc-print n  : Retrieve and print the nth document location in the\n"
	     "               query listing.\n"
	     "retrieve n   : Retrieve and print the document containing the nth window\n"
	     "loc-retrieve n  : Retrieve and print the document location containing\n"
	     "               the nth window of the collection.\n"
	     "quit         : Quit.\n"
	     "?            : Display this help.\n");
      fflush(stdout);
    }
    else if (!strncasecmp(cmdptr,"query",5)) {
      if (query) { /* free old query and old results */
	destroy_dvtree(query);
	free(best_matches);
      }

      cmdptr+=5;
      maxdoc=atoi(cmdptr);
      if (!maxdoc)
	maxdoc=5;

      query = get_query(4096, query_bias);
      if (SavantVerbose) {
	print_dvtree(stdout, query);
	printf("\n------------------------------\n");
	fflush(stdout);
      }

      best_matches = find_matches(query, maxdoc, query_bias, 1, all_dv_biases);
      /* 3rd arg == 1 means with weights */
      maxdoc = print_suggestions(best_matches, maxdoc);

    }
    else if (!strncasecmp(cmdptr,"keyquery",8)) {
      if (query) { /* free old query and old results */
	destroy_dvtree(query);
	free(best_matches);
      }

      cmdptr+=5;
      maxdoc=atoi(cmdptr);
      if (!maxdoc)
	maxdoc=5;

      query = get_query(4096, query_bias);
      if (SavantVerbose) {
	print_dvtree(stdout, query);
	printf("\n------------------------------\n");
	fflush(stdout);
      }

      best_matches = find_matches(query, maxdoc, query_bias, 0, all_dv_biases);
      /* 3rd arg == 0 means without weights */
      maxdoc = print_suggestions(best_matches, maxdoc);
    }
    else if (!strncasecmp(cmdptr,"repeat",6)) {
      if (query == NULL) {
	if (SavantVerbose) {
	  printf("No current query! Use 'query'.\n");
	  fflush(stdout);
	}
      }
      else 
	print_suggestions(best_matches, maxdoc);
    }
    else if (!strncasecmp(cmdptr,"print",5)) {
      if (query == NULL) {
	if (SavantVerbose) {
	  printf("No current query! Use 'query'.\n");
	  fflush(stdout);
	}
      }
      else {
	cmdptr+=5;
	docnum=atoi(cmdptr);
	if (!docnum && SavantVerbose) {
	  printf("Try 'print n' to see the nth document for this query\n");
	  fflush(stdout);
	}
	else if (docnum>maxdoc && SavantVerbose) {
	  printf("There are only %d documents!", maxdoc);
	  fflush(stdout);
	}
	else {
	  retrieve_and_print(best_matches[docnum-1].vecnum);
	}
      }
    }
    else if (!strncasecmp(cmdptr,"loc-print",9)) {
      if (query == NULL) {
	if (SavantVerbose) {
	  printf("No current query! Use 'query'.\n");
	  fflush(stdout);
	}
      }
      else {
	cmdptr+=9;
	docnum=atoi(cmdptr);
	if (!docnum && SavantVerbose) {
	  printf("Try 'print n' to see the nth document for this query\n");
	  fflush(stdout);
	}
	else if (docnum>maxdoc && SavantVerbose) {
	  printf("There are only %d documents!", maxdoc);
	  fflush(stdout);
	}
	else {
	  retrieve_and_print_loc(best_matches[docnum-1].vecnum);
	}
      }
    }
    else if (!strncasecmp(cmdptr,"retrieve ",8)) {
      cmdptr+=8;
      docnum=atoi(cmdptr);
      if (0 < docnum && docnum < num_docvecs+1) {
	retrieve_and_print(docnum-1);
      }
      else if (SavantVerbose) {
	printf("Window numbers range from 1 to %d.\n", num_docvecs);
	fflush(stdout);
      }
    }
    else if (!strncasecmp(cmdptr,"loc-retrieve ",12)) {
      cmdptr+=12;
      docnum=atoi(cmdptr);
      if (0 < docnum && docnum < num_docvecs+1) {
	retrieve_and_print_loc(docnum-1);
      }
      else if (SavantVerbose) {
	printf("Window numbers range from 1 to %d.\n", num_docvecs);
	fflush(stdout);
      }
    }
    else if (SavantVerbose) {
      printf("I see no %s here.\n", cmdptr);
      fflush(stdout);
    }

    if (SavantVerbose) {
      printf("\nSavant: ");
      fflush(stdout);
    }
    fgets(command,60,stdin);
    cmdptr=command;
    while((*cmdptr) && !(feof(stdin))) {
      if (isspace(*cmdptr))
	cmdptr++;
      else 
	break;
    }
  }

  if (query) { /* free old query and old results */
    destroy_dvtree(query);
    free(best_matches);
  }
  if(all_dv_biases) {
    free(all_dv_biases);
  }


}

int print_suggestions(DocSim *best_matches,
		      int maxdoc) 
{
  int match_num;
  DB_INT doc_num;
  DB_INT offset;
  char *fstring;

  for(match_num=0;match_num<maxdoc;match_num++) {
    if (best_matches[match_num].vecnum == -1) /* found less than maxdoc matches */
      break; 

    fseek(WMAP_FILE, 2*sizeof(DB_INT)*best_matches[match_num].vecnum, SEEK_SET);
    fread_big(&doc_num, sizeof(DB_INT), 1, WMAP_FILE);

    fseek(TOFF_FILE, sizeof(DB_INT)*doc_num, SEEK_SET);
    fread_big(&offset, sizeof(DB_INT), 1, TOFF_FILE);
    fseek(TITLE_FILE, offset, SEEK_SET);
    fstring = read_string(TITLE_FILE);
    printf("%-3d %.2f |%d|%s\n", match_num+1, 
	   best_matches[match_num].sim, best_matches[match_num].vecnum+1, fstring);
    fflush(stdout);
    free(fstring);

    if(SavantVerbose) {
      fseek(DLOFF_FILE, 3*sizeof(DB_INT)*doc_num, SEEK_SET);
      fread_big(&offset, sizeof(DB_INT), 1, DLOFF_FILE);
      fseek(DOCLOC_FILE, offset, SEEK_SET);
      fstring = read_string(DOCLOC_FILE);
      printf("     %s\n", fstring);
      fflush(stdout);
      free(fstring);
    }
  }
  printf("\n"); /* signals that all suggestions have been made */
  fflush(stdout);

  return(match_num);
}

int retrieve_and_print(int win_num) 
{
  DB_INT doc_num;
  DB_UINT window_offset, dl_offset, body_start, body_length;
  char *filename, *print_buf;
  FILE *doc_file;

  fseek(WMAP_FILE, 2*sizeof(DB_INT)*win_num, SEEK_SET);
  fread_big(&doc_num, sizeof(DB_INT), 1, WMAP_FILE);
  fread_big(&window_offset, sizeof(DB_INT), 1, WMAP_FILE);

  fseek(DLOFF_FILE, 3*sizeof(DB_INT)*doc_num, SEEK_SET);
  fread_big(&dl_offset, sizeof(DB_INT), 1, DLOFF_FILE);
  fseek(DOCLOC_FILE, dl_offset, SEEK_SET);
  filename = read_string(DOCLOC_FILE);

  if(NULL == (doc_file=fopen(filename, "r"))) {
    printf("Can't retrieve document %d (containing window %d): can't open %s!\n",
	   doc_num, win_num, filename);
    fflush(stdout);
  }
  else {
    fread_big(&body_start, sizeof(DB_INT), 1, DLOFF_FILE);
    fread_big(&body_length, sizeof(DB_INT), 1, DLOFF_FILE);
    print_buf = (char *)malloc((body_length+1)*sizeof(char));
    fseek(doc_file, body_start, SEEK_SET);
    fread_big(print_buf, sizeof(char), body_length, doc_file);
    print_buf[body_length] = '\0';
    printf("%d\n%s\n.\n", window_offset, print_buf);
    fflush(stdout);
    free(print_buf);
    fclose(doc_file);
  }
  free(filename);

  return(0);
}

int retrieve_and_print_loc(int win_num) 
{
  DB_INT doc_num;
  DB_UINT window_offset, dl_offset, body_start;
  char *filename;

  fseek(WMAP_FILE, 2*sizeof(DB_INT)*win_num, SEEK_SET);
  fread_big(&doc_num, sizeof(DB_INT), 1, WMAP_FILE);
  fread_big(&window_offset, sizeof(DB_INT), 1, WMAP_FILE);

  fseek(DLOFF_FILE, 3*sizeof(DB_INT)*doc_num, SEEK_SET);
  fread_big(&dl_offset, sizeof(DB_INT), 1, DLOFF_FILE);
  fseek(DOCLOC_FILE, dl_offset, SEEK_SET);
  filename = read_string(DOCLOC_FILE);
  fread_big(&body_start, sizeof(DB_INT), 1, DLOFF_FILE);

  printf("%d\n%s\n.\n", window_offset + body_start, filename);
  fflush(stdout);
  free(filename);
  return(0);
}

DV_Tree *get_query(int maxlen, int *query_bias)
{
  int len, loclen, sourcelen, subjectlen, datelen;
  char *buf, *locbuf, *sourcebuf, *subjectbuf, *datebuf, line[128];
  DV_Tree *return_vector, *loc_vector, *source_vector, *subject_vector, *date_vector, *time_vector, *day_vector;
  enum Field_Types type;
  int query_bias_set[NUM_FIELD_TYPES];

  buf = (char *)malloc((maxlen+1)*sizeof(char));
  locbuf = (char *)malloc((maxlen+1)*sizeof(char));
  sourcebuf = (char *)malloc((maxlen+1)*sizeof(char));
  subjectbuf = (char *)malloc((maxlen+1)*sizeof(char));
  datebuf = (char *)malloc((maxlen+1)*sizeof(char));
  len = loclen = sourcelen = subjectlen = datelen = 0;
  buf[0] = '\0';
  locbuf[0] = '\0';
  sourcebuf[0] = '\0';
  subjectbuf[0] = '\0';
  datebuf[0] = '\0';
  for (type=BODY_FIELD; type < LAST_FIELD; type++) {
    query_bias_set[type] = 0; /* has it been set manually? */
    query_bias[type] = 0;     /* start'em at 0 */
  }

  if(SavantVerbose) {
    printf("Type your query now. End with a control-D or a dot on a line by itself.\n");
    fflush(stdout);
  }

  while(fgets(line,128,stdin) && !(feof(stdin))) {
    if(SavantDebug) {
      printf("%s", line);
    }
    if (strcmp(line,".\n") == 0) {
      break;
    }
    else if (!(strncmp(line,"!LOCATION: ", 11)) && (loclen < maxlen)) {
      strncat(locbuf, &(line[11]), maxlen-loclen);
      loclen += strlen(&(line[11]));
      if (!query_bias_set[LOCATION_FIELD]) query_bias[LOCATION_FIELD] = 1;
    }
    else if (!(strncmp(line,"!SOURCE: ", 9)) && (sourcelen < maxlen)) {
      strncat(sourcebuf, &(line[9]), maxlen-sourcelen);
      sourcelen += strlen(&(line[9]));
      if (!query_bias_set[SOURCE_FIELD]) query_bias[SOURCE_FIELD] = 1;
    }
    else if (!(strncmp(line,"!SUBJECT: ", 10)) && (subjectlen < maxlen)) {
      strncat(subjectbuf, &(line[10]), maxlen-subjectlen);
      subjectlen += strlen(&(line[10]));
      if (!query_bias_set[SUBJECT_FIELD]) query_bias[SUBJECT_FIELD] = 1;
    }
    else if (!(strncmp(line,"!DATE: ", 7)) && (datelen < maxlen)) {
      strncat(datebuf, &(line[7]), maxlen-datelen);
      datelen += strlen(&(line[7]));
      if (!query_bias_set[DATE_FIELD]) query_bias[DATE_FIELD] = 1;
      if (!query_bias_set[DAY_FIELD]) query_bias[DAY_FIELD] = 1;
      if (!query_bias_set[TIME_FIELD]) query_bias[TIME_FIELD] = 1;
    }
    else if (!(strncmp(line,"!LOCATION-BIAS: ", 16))) {
      query_bias[LOCATION_FIELD] = strtol(&(line[16]), NULL, 10);
      query_bias_set[LOCATION_FIELD] = 1;
    }
    else if (!(strncmp(line,"!SOURCE-BIAS: ", 14))) {
      query_bias[SOURCE_FIELD] = strtol(&(line[14]), NULL, 10);
      query_bias_set[SOURCE_FIELD] = 1;
    }
    else if (!(strncmp(line,"!SUBJECT-BIAS: ", 15))) {
      query_bias[SUBJECT_FIELD] = strtol(&(line[15]), NULL, 10);
      query_bias_set[SUBJECT_FIELD] = 1;
    }
    else if (!(strncmp(line,"!DATE-BIAS: ", 12))) {
      query_bias[DATE_FIELD] = strtol(&(line[12]), NULL, 10);
      query_bias_set[DATE_FIELD] = 1;
    }
    else if (!(strncmp(line,"!TIME-BIAS: ", 12))) {
      query_bias[TIME_FIELD] = strtol(&(line[12]), NULL, 10);
      query_bias_set[TIME_FIELD] = 1;
    }
    else if (!(strncmp(line,"!DAY-BIAS: ", 11))) {
      query_bias[DAY_FIELD] = strtol(&(line[11]), NULL, 10);
      query_bias_set[DAY_FIELD] = 1;
    }
    else if (!(strncmp(line,"!BODY-BIAS: ", 12))) {
      query_bias[BODY_FIELD] = strtol(&(line[12]), NULL, 10);
      query_bias_set[BODY_FIELD] = 1;
    }
    else {
      if (!query_bias_set[BODY_FIELD]) query_bias[BODY_FIELD] = 1;
      strncat(buf, line, maxlen-len);
      len += strlen(line);
    }
    if(len > maxlen)
      break;
  }

  return_vector = vectorize_buffer(buf, BODY_FIELD);
  loc_vector = vectorize_buffer(locbuf, LOCATION_FIELD);
  subject_vector = vectorize_buffer(subjectbuf, SUBJECT_FIELD);
  source_vector = vectorize_buffer(sourcebuf, SOURCE_FIELD);
  date_vector = vectorize_buffer(datebuf, DATE_FIELD);
  time_vector = vectorize_buffer(datebuf, TIME_FIELD);
  day_vector = vectorize_buffer(datebuf, DAY_FIELD);

  free(buf);free(locbuf);free(subjectbuf);free(sourcebuf);free(datebuf);

  merge_dvtrees(return_vector, loc_vector);
  if (return_vector == NULL) return_vector = loc_vector; 
  else { destroy_dvtree(loc_vector); }

  merge_dvtrees(return_vector, source_vector);
  if (return_vector == NULL) { return_vector = source_vector; }
  else { destroy_dvtree(source_vector); }

  merge_dvtrees(return_vector, subject_vector);
  if (return_vector == NULL) { return_vector = subject_vector; }
  else { destroy_dvtree(subject_vector); }

  merge_dvtrees(return_vector, date_vector);
  if (return_vector == NULL) { return_vector = date_vector; }
  else { destroy_dvtree(date_vector); }

  merge_dvtrees(return_vector, time_vector);
  if (return_vector == NULL) { return_vector = time_vector; }
  else { destroy_dvtree(time_vector); }

  merge_dvtrees(return_vector, day_vector);
  if (return_vector == NULL) { return_vector = day_vector; }
  else { destroy_dvtree(day_vector); }

  return(return_vector);
}

char *read_string(FILE *file)
{
  char temp[256];

  if(!fgets(temp,256,file))
    return(NULL);

  temp[strlen(temp) - 1] = '\0';
  return(strdup(temp));
}
