/*** analog 1.9beta ***/
/* Please read Readme.html, or http://www.statslab.cam.ac.uk/~sret1/analog/  */

/*** alias.c; functions to cope with aliasing, etc. ***/

#include "analhea2.h"

flag doaliaslist(char *name, struct alias *listhead)
{                     /* do requested aliases on an object */

  extern flag wildmatch();       /* in utils.c */

  struct alias *aliasp;
  int anydone = OFF;
  char *p1, *p2, *c;
  char answer[MAXSTRINGLENGTH];

  for (aliasp = listhead; aliasp -> from[0] != '\0'; aliasp = aliasp -> next) {
    if (wildmatch(name, aliasp -> from, &p1, &p2)) {
      anydone = ON;
      if ((c = strchr(aliasp -> to, '*')) == NULL)
	strcpy(name, aliasp -> to);
      else {
	*c = '\0';
	if ((int)strlen(aliasp -> to) + (p2 - p1) + (int)strlen(c + 1) <
	    MAXSTRINGLENGTH) {
	  strcpy(answer, aliasp -> to);
	  strncat(answer, p1, (size_t)(p2 - p1));
	  strcat(answer, c + 1);
	  *c = '*';
	  strcpy(name, answer);
	}
      }
    }
  }

  return(anydone);

}

int doaliashost(char *hostn)
{   /* apply all required aliases to a hostname */
  extern struct alias *hostaliashead;

  int anydone = OFF;
  int i;

  /* convert to lower case */

  for (i = (int)strlen(hostn) - 1; i >= 0; i--) {
    if (isupper(hostn[i])) {
      hostn[i] = tolower(hostn[i]);
      anydone = ON;
    }
  }

  /* alias conversion from configuration file */

  if (doaliaslist(hostn, hostaliashead))
    anydone = ON;

  return(anydone);

}

char *reversehostname(char *hostn)
{  /* reverse an internet hostname (cam.ac.uk -> uk.ac.cam). Return a pointer
      to the reversed name, and leave the hostname changed on exit */

  char tempstr[MAXSTRINGLENGTH];
  char *tempp;
  int i = 0;

  while ((tempp = strrchr(hostn, '.')) != NULL) {
    strcpy(tempstr + i, tempp + 1);
    i = (int)strlen(tempstr);
    tempstr[i] = '.';
    i++;
    *tempp = '\0';
  }

  strcpy(tempstr + i, hostn);

  return(strcpy(hostn, tempstr));

}


int doaliasurl(char *filename)
{  /* apply all required aliases to a filename or http: URL */

  extern int dirsufflength;
  extern struct alias *filealiashead;

  int anydone = OFF;
  char *tempp;
  char tempstr[MAXSTRINGLENGTH];
  unsigned int tempint = 0;

  /* First, change %7E to ~, etc. */
  tempp = filename;
  while ((tempp = strchr(tempp, '%')) != NULL) {
    if (*(tempp + 1) != '\0' && *(tempp + 2) != '\0')
      sscanf(tempp + 1, "%2x", &tempint);
    if (tempint >= 0x20 && tempint < 0x7F) {
      anydone = ON;
      *tempp = tempint;
      strcpy(tempstr, tempp + 3);
      strcpy(tempp + 1, tempstr);
      /* strcpy(tempp + 1, tempp + 3) may not be safe on all machines
	 (overlapping arguments) */
    }
    tempp++;
  }

  /* Secondly, if it ends with the DIRSUFFIX (often index.html), strip it */

  if (dirsufflength > 0) {
    if ((STREQ(filename + MAX((int)strlen(filename) - dirsufflength, 0),
	       DIRSUFFIX)) &&
      *(filename + MAX((int)strlen(filename) - dirsufflength - 1, 0)) == '/') {
      filename[(int)strlen(filename) - dirsufflength] = '\0';
      anydone = ON;
    }
  }

  /* Thirdly, // -> /  ;  /./ -> /  ;  /spam/../ -> /  (except  at start) */
  /* We could worry about trailing /. and /.. but they should be 302 redirects
     anyway so shouldn't matter */
  /* NB On our server //Dept/ -> /Dept/, but //~sret1/ doesn't -> /~sret1/
     But that doesn't matter because the latter will be 404 not found, so
     won't ever get this far. Successes will have been interpreted in the
     following way, and that's all we care about. */

  tempp = filename;
  while ((tempp = strstr(tempp, "//")) != NULL) {
    if (tempp != filename && *(tempp - 1) == ':')
      tempp++;   /* Don't translate http:// ; just skip to next //  */
    else {
      anydone = ON;
      strcpy(tempstr, tempp + 2);
      strcpy(tempp + 1, tempstr);
    }
  }

  tempp = filename;
  while ((tempp = strstr(tempp, "/./")) != NULL) {
    anydone = ON;
    strcpy(tempstr, tempp + 3);
    strcpy(tempp + 1, tempstr);
  }

  tempp = filename;
  while ((tempp = strstr(tempp, "/../")) != NULL) {
    anydone = ON;
    strcpy(tempstr, tempp + 4);
    if (tempp != filename)  /* go back to prev slash */
    while (*(--tempp) != '/')
      ;
    strcpy(tempp + 1, tempstr);
  }

  /* Fourthly, alias conversion from config. file */

  if(doaliaslist(filename, filealiashead))
    anydone = ON;

  return(anydone);

}

int doaliasref(char *name)
{        /* apply all required aliases to a referer */
         /* return 1 if any done, -1 if url corrupt or wrong type, 0 o/wise */

  extern struct alias *refaliashead;

  int anydone = OFF;
  int defaultport = -1;
  int tempint;
  char *tempp, *tempp2;
  char tempstr[MAXSTRINGLENGTH];
  flag tempf;

  /* First, strip off #'s */
  if ((tempp = strchr(name, '#')) != NULL)
    *tempp = '\0';

  /* Next, change %7E to ~, etc. */
  tempp = name;
  while ((tempp = strchr(tempp, '%')) != NULL) {
    sscanf(tempp + 1, "%2x", &tempint);
    if (tempint >= 0x20 && tempint < 0x7F) {
      anydone = ON;
      *tempp = tempint;
      strcpy(tempstr, tempp + 3);
      strcpy(tempp + 1, tempstr);
      /* strcpy(tempp + 1, tempp + 3) may not be safe on all machines
	 (overlapping arguments) */
    }
    tempp++;
  }

  /* Coerce method to lower case */

  for (tempp = name; *tempp != ':' && *tempp != '\0'; tempp++)
    if (*tempp != tolower(*tempp)) {
      *tempp = tolower(*tempp);
      anydone = ON;
    }
  if (*tempp != ':')
    return(-1);

  /* find out what sort of URL it is */

  *tempp = '\0';
  if (STREQ(name, "http"))
    defaultport = 80;
  else if (STREQ(name, "ftp"))
    defaultport = 21;
  else if (STREQ(name, "file"))
    return(-1);
  else if (STREQ(name, "news"))
    defaultport = 0;
  else if (STREQ(name, "gopher"))
    defaultport = 70;
  else if (STREQ(name, "telnet"))
    defaultport = 23;
  else if (STREQ(name, "wais"))
    defaultport = 210;
  else if (STREQ(name, "nntp"))
    defaultport = 119;
  else if (STREQ(name, "prospero"))
    defaultport = 1525;
  else if (STREQ(name, "mailto"))
    return(-1);
  *tempp = ':';

  /* Unless "news:" or unknown, check it has the // next and coerce hostname
     to lower case */
  
  if (defaultport > 0) {
    if (*(tempp + 1) != '/' || *(tempp + 2) != '/')
      return(-1);
    else for (tempp += 3; *tempp != '/' && *tempp != ':' && *tempp != '\0';
	      tempp++) {
      if (*tempp != tolower(*tempp)) {
	*tempp = tolower(*tempp);
	anydone = ON;
      }
    }

    /* strip leading 0s from port numbers and cross out default port numbers */

    if (*tempp == ':') {
      while (*(tempp + 1) == '0') {
	strcpy(tempstr, tempp + 2);
	strcpy(tempp + 1, tempstr);
      }
      if ((tempp2 = strchr(tempp, '/')) != NULL) {
	*tempp2 = '\0';
	tempf = TRUE;
      }
      else
	tempf = FALSE;
      if (defaultport == atoi(tempp + 1)) {
	strcpy(tempstr, "/");
	if (tempf)
	  strcat(tempstr, tempp2 + 1);
	strcpy(tempp, tempstr);
	anydone = ON;
      }
      else if (tempf)
	*tempp2 = '/';        /* restore slash */
      else
	strcpy(tempp + strlen(tempp), "/"); /* port but no pathname; + slash */
    }

    /* trailing slash on methods without portname or pathname */

    else if (*tempp == '\0')
      strcpy(tempp, "/");
  }

  /* We probably don't want to change /./ -> / etc. even in http protocol,
     because we don't want to make assumptions about other people's file
     systems. */

  /* Finally, alias conversion from config. file */

  if(doaliaslist(name, refaliashead))
    anydone = ON;

  return(anydone);

}

/* Now a function to run through all hosts, files or refs doing all aliases */

void allaliases(struct genstruct *objhead[], int hashsize, int *totalobjs,
		int *totalobjs7, int *totalnew7, char code)
{
  extern void hashadd();          /* in hash.c */

  int onlist;
  struct genstruct *p, *lastp, *nextp;
  char name[MAXSTRINGLENGTH];
  int tempflag;

  onlist = 0;                      /* the list of items we are on */
  p = objhead[0];                  /* starting at list 0 */
  lastp = p;
  for ( ; onlist < hashsize; p = nextp) {  /* run through items */
    if (p -> name == NULL) {         /* then finished this list */
      nextp = objhead[++onlist];     /* so start the next list */
      lastp = nextp;
    }
    else {
      strcpy(name, p -> name);
      if (code == 'S')
	tempflag = doaliashost(name);
      else if (code == 'r')
	tempflag = doaliasurl(name);
      else if (code == 'f')
	tempflag = doaliasref(name);
      if (tempflag != 0) {             /* if there was an alias to do */
	(*totalobjs)--;                /* this wasn't a real item */
	if (p -> last7)
	  (*totalobjs7)--;
	if (p -> last7 && !(p -> pre7))
	  (*totalnew7)--;
	if (tempflag == 1)   /* we are putting it elsewhere not rejecting it */
	  hashadd(objhead, hashsize, name, p -> reqs, p -> bytes, p -> last7,
		  totalobjs, totalobjs7, totalnew7, ON);
	  /* then take this item off list */
	nextp = p -> next;
	if (p == lastp) {    /* we are at the head of the list */
	  objhead[onlist] = p -> next;
	  lastp = nextp;     /* and we still are afterwards */
	}
	else
	  lastp -> next = p -> next;  /* (and lastp is unchanged) */
      }
      else {  /* real host, but no alias */
	nextp = p -> next;
	lastp = p;
      }
    }   /* end else */

  }   /* end for all items */

}


int hosttodomcode(char *hostn)
{    /* find the domain code for a certain hostname */

  extern struct domain *ohead[];

  char domainname[MAXSTRINGLENGTH];
  register int i;

  int x;

  /* first change hostn into a domain */

  for (i = (int)strlen(hostn) - 1; hostn[i] != '.' && i > 0; i--)
    ;     /* run back to final '.' (or initial char) */

  if (isdigit(hostn[i + 1]))
    return(DOMHASHSIZE - 1);   /* representing numerical hosts */

  if (i == 0)
    return(DOMHASHSIZE - 2);   /* representing unknown hosts */

  strcpy(domainname, hostn + i + 1);
	    
  x = (domainname[0] - 'a') * 52 + (domainname[1] - 'a') * 2 +
    (domainname[2] != '\0');

  if (x < 0 || x > DOMHASHSIZE - 3)  /* some funny characters messed it up */
    return(DOMHASHSIZE - 2);

  if (ohead[x] -> name == NULL)
    return(DOMHASHSIZE - 2);      /* no domain at that domaincode */

  if (!STREQ(ohead[x] -> id, domainname))
    return(DOMHASHSIZE - 2);      /* right code, nevertheless wrong domain */

  return(x);

}

void urltodir(char *filename)
{  /* converts a filename to a dir for a level 'dirlevel' directory report */

  extern int dirlevel;

  register int i = 0, j;

  for (j = 0; j < dirlevel; j++) {
    if (filename[i] == '/')
      i++;
    for ( ; filename[i] != '/' && filename[i] != '\0'; i++)
      ;     /* run through to level'th slash, if any */
  }
  
  if (filename[i] == '\0')   /* not j levels; run back */
    for ( i-- ; filename[i] != '/' && i > 0; i--)
      ;

  if (filename[i] != '/')
    strcpy(filename, "[no directory]");
  else {
    filename[i + 1] = '\0';     /* Terminate it straight after the / */
    if (i == 0)
      strcpy(filename, "[root directory]");
  }

}

/* whether a certain string is wanted given a list of includes and excludes */
/* NB Need to make sure aliases have been done before calling this */

flag included(char *name, struct include *listhead)
{
  extern flag wildmatch();          /* in utils.c */

  struct include *stringp;
  flag answer;
  char *tempp1, *tempp2;

  if (listhead -> in == TRUE)
    answer = FALSE;
  else
    answer = TRUE;

  for (stringp = listhead; stringp -> in != UNSET;
       stringp = stringp -> next) {
    if (wildmatch(name, stringp -> name, &tempp1, &tempp2)) {
      if (stringp -> in == TRUE)
	answer = TRUE;
      else
	answer = FALSE;
    }
  }

  return(answer);
}
