#!/usr/bin/gawk -f

####################
function vislength(str) {
  return length(str);
}

####################
function alimg( wid ) {

  wid -= vislength(imline);

  if( !wid )
    image[iline] = image[iline] imline vrule;

  else if( substr(align[tnst,row,col],0,6) == "CENTER" ) {
    if( wid  % 2 ) {
      imline = imline " ";
      wid--;
    }
    wid = wid / 2;
    image[iline] = image[iline] substr( spaces, 0, wid ) imline substr( spaces, 0, wid ) vrule;
  }

  else if( substr(align[tnst,row,col],0,5) == "RIGHT" ) {
    image[iline] = image[iline] substr( spaces, 0, wid ) imline vrule;
  }

  else {
    image[iline] = image[iline] imline substr( spaces, 0, wid ) vrule;
  }
}

####################
function printtab()  {

#  sides = (tnst == 1 ); #supress sides for nested tables
  sides = 1; #always include sides
#  sides = 0; #never include sides

  currow[tnst]++;

  while( !colwid[tnst,maxcol] )
    maxcol--;
#top line

if( sides )
  image[0]= boxtl;
else
  image[0]="";

  col = 1;
  while( col < maxcol )
    image[0]=image[0] substr( hrule , 0, colwid[tnst,col++] ) boxt;
  image[0]=image[0] substr( hrule , 0, colwid[tnst,col] );

if( sides )
  image[0] = image[0] boxtr;

  iline = 1;
#data rows
  row = 1;
  while( row < currow[tnst] ) {
#lines in row (valign and rowspan not handled)
    hght = 0;
    while( hght < rowhght[tnst,row] ) {
#cols in line
      if( sides )
	image[iline]=vrule;
      else
	image[iline]="";

      col = 1;
      while( col <= maxcol ) {

	imline = ttext[tnst,row,col,hght];
	ttext[tnst,row,col,hght] = "";

	if( tcs[tnst,row,col] == 0 ) {
	  if( col == 1 ) {
	    while( col <= maxcol )
	      image[iline] = image[iline] substr( spaces, 0, colwid[tnst,col++] ) vrule;
	    col = 1;
	  }
	}
	else {
	  len = -1;
	  cnt = 0;
	  while( cnt < tcs[tnst,row,col] ) {
	    len += colwid[tnst,col+cnt]+1;
	    cnt++;
	  }
	  alimg(len);
	}

	col++;
      }

      hght++;
if( !sides )
  image[iline] = substr(image[iline],0,length(image[iline])-1);
      iline++;
    }
    row++;
  }
#bottom line
  if( sides )
    image[iline] = boxbl;
  else
    image[iline] =   "";
  col = 1;
  while( col < maxcol ) {
    len = colwid[tnst,col];
    colwid[tnst,col] = 0;
    if( len )
      image[iline] = image[iline] substr( hrule , 0, len ) boxb;
    col++;
  }
  image[iline] = image[iline] substr( hrule , 0, colwid[tnst,col] );
  if( sides )
    image[iline]=image[iline] boxbr;
  colwid[tnst,col] = 0;
}

####################
function startentry() {
#missing </td>
  if( tdflag[tnst] )
    endentry();

  curcol[tnst]++;
  ttext[tnst,currow[tnst],curcol[tnst],0] = "";

#this needs to be the previous value, colsp[curcol[tnst]], maybe tcs?
  colsp[tnst] = 1;

  if( rowspan[curcol[tnst]] )
    while( --rowspan[curcol[tnst]] ) {
#see prev
      tcs[tnst,currow[tnst],curcol[tnst]] = 1;
      curcol[tnst]++;
    }

  line[tnst] = 0;

#grab alignment
  align[tnst,currow[tnst],curcol[tnst]] = defalign[tnst];
  if( substr(toupper($1)" ",0,3) == "TH " )
    align[tnst,currow[tnst],curcol[tnst]] = "CENTER";
  if( match(toupper($1), " ALIGN=") )
    align[tnst,currow[tnst],curcol[tnst]] = toupper(substr($1,RSTART+7,6));

#grab colspan
  if( match(toupper($1), "COLSPAN=") )
    colsp[tnst] = substr($1,RSTART+8,5); 
  colsp[tnst] += 0;

#grab rowspan;
  rowsp = 1;
  if( match(toupper($1), "ROWSPAN=") )
    rowsp = substr($1,RSTART+8,5); 
  rowspan[curcol[tnst]] = rowsp + 0;

  tcs[tnst,currow[tnst],curcol[tnst]] = colsp[tnst];
  tdflag[tnst] = 1;
}

####################
function endentry() {

  if( colwid[tnst,curcol[tnst]] == 0 )
    colwid[tnst,curcol[tnst]] = 1;

  if( !colsp[tnst] )
    colsp[tnst] = 1;

  colsp[tnst] += 0;

  lx = 0;
  while( lx <= line[tnst] ) {

#trim edge spaces
    while( sub(" $","",ttext[tnst,currow[tnst],curcol[tnst],lx]));
    while( sub("^ ","",ttext[tnst,currow[tnst],curcol[tnst],lx]));

    col = vislength( ttext[tnst,currow[tnst],curcol[tnst],lx] );

    if( !col ) {
      ttext[tnst,currow[tnst],curcol[tnst],lx] = \
	ttext[tnst,currow[tnst],curcol[tnst],lx] " ";
      col = 1;
    }
#print "row:" currow[tnst] " col:" curcol[tnst] " line:" lx " len:" col ">" ttext[tnst,currow[tnst],curcol[tnst],lx] "<";

    if( colsp[tnst] == 1 ) {
      if( col > colwid[tnst,curcol[tnst]] )
	colwid[tnst,curcol[tnst]] = col;
    }
    else {
      col = ( (col + colsp[tnst] - 1 ) / colsp[tnst] ) ;
      sub( "[.]"," ",col );
      col += 0;
      col1 = 0;
      while( col1 < colsp[tnst] ) {
	if( col > colwid[tnst,curcol[tnst]+col1] )
	  colwid[tnst,curcol[tnst]+col1] = col;
	rowspan[curcol[tnst]+col1] = rowspan[curcol[tnst]];
	col1++;
      }
    }
    lx++;
  }

  while( lx > 1 && (\
	 !length( ttext[tnst,currow[tnst],curcol[tnst],lx-1] ) || \
	 ttext[tnst,currow[tnst],curcol[tnst],lx-1] == " ") )
    lx--;

#FIXME need to spread rowhght among rowspan
  if( lx > rowhght[tnst,currow[tnst]] )
    rowhght[tnst,currow[tnst]] = lx;

  while( colsp[tnst] > 1 ) {
    curcol[tnst]++;
    tcs[tnst,currow[tnst],curcol[tnst]] = 0;
    colsp[tnst]--;
  }

  tdflag[tnst] = 0;
  line[tnst] = 0;

  tralready=0;
}

####################
function fixrow () {
#omitted </td>
  if( tdflag[tnst] )
    endentry();
  if( curcol[tnst] > maxcol )
    maxcol = curcol[tnst];
  curcol[tnst] = 0;
  currow[tnst]++;
  rowhght[tnst,currow[tnst]] = 1;
  line[tnst] = 0;
}

####################
function startrow () {
#omitted </tr>
  if( !tralready )
    fixrow();
  tralready = 0;
#valign?
  defalign[tnst] = "default";
  if( match(toupper($1), " ALIGN=") )
    defalign[tnst] = toupper(substr($1,RSTART+7,6));
}

####################
function endrow() {
  fixrow();
  tralready = 1;
}

####################
BEGIN { 
  RS = "\<" ; 
  FS = "\>" ; 

  tdflag[tnst] = 0;
  tnst = 0;
  colsp[tnst] = 0;
  dflg = 0;
  loff = 0;
  lofmx = 1;
  tralready = 0;

# PC Graphics characters (single);
  boxtl = "\332";  boxt = "\302";  boxtr = "\277";
  boxbl = "\300";  boxb = "\301";  boxbr = "\331";
  vrule = "\263";  hrule = "\304";  #cross = "\305";

# PC Graphics characters (double);
#  boxtl = "\311";  boxt = "\313";  boxtr = "\273";
#  boxbl = "\310";  boxb = "\312";  boxbr = "\274";
#  vrule = "\272";  hrule = "\315";  #cross = "\316";

# Ascii boxes
#  boxtl = "+";  boxt = "+";  boxtr = "+";
#  boxbl = "+";  boxb = "+";  boxbr = "+";
#  vrule = "|";  hrule = "-"; #cross = "+";

  hrule = hrule hrule hrule hrule; #4
  hrule = hrule hrule hrule hrule; #16
  hrule = hrule hrule hrule hrule; #64
  hrule = hrule hrule hrule hrule; #256
  spaces = " ";
  spaces = spaces spaces spaces spaces;
  spaces = spaces spaces spaces spaces;
  spaces = spaces spaces spaces spaces;
  spaces = spaces spaces spaces spaces;
#  colwid = 12;
}

#################### MAIN
{
  if( substr(toupper($1)" ",0,6) == "TABLE " ) {

    if( tnst > 0 ) {
      if( !tdflag[tnst] ) {
	startrow();
	startentry();
      }
      rowhght[tnst,currow[tnst]] = 1;
    }
    tnst++;

    currow[tnst] = 1;
    curcol[tnst] = 0;
    line[tnst] = 0;

    maxcol = 0;
    tralready = 1;
  }

  if( tnst > 0 ) {	
    if( substr(toupper($1)" ",0,3) == "TR " )
      startrow();
    
    if( substr(toupper($1)" ",0,3) == "TH " \
	|| substr(toupper($1)" ",0,3) == "TD " )
      startentry();
    if( toupper($1)" " == "BR " || \
	substr(toupper($1)" ",0,3) == "HR " || \
	substr(toupper($1)" ",0,3) == "LI " || \
	toupper($1)" " == "P " || \
	substr(toupper($1)" ",0,7) == "OPTION " \
	) {
      line[tnst]++;
#      if( substr(toupper($1)" ",0,7) == "OPTION "  )
#	ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] = ">";
#      else
	ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] = "";
    }

#extract ALT string
    if( match(substr(toupper($1)" ",0,4), "IMG ") && match(toupper($1)," ALT")) {
      name = substr($1,RSTART+4,length($1)-7);
      match( name , "=" );
      name = substr(name,RSTART+1,length(name)-1);
      while( sub("^ ","",name) );
      if( substr(name,0,1) == "\"" ) {
	name = substr(name,2,length(name)-1);
	match(name,"\"");
	name = substr(name,0,RSTART-1);
      }
      else if( match(name," ") )
	name = substr(name,0,RSTART-1);
      gsub( "\>" , "", name );
      $2 = " [" name "]" $2;
    }

#fix character formats
    if( NF > 1 && length($2) ) {
      gsub("\046amp;","+",$2);
      gsub("\046#169;","(C)",$2);
      gsub("\046#162;","cents",$2);
      gsub("\046nbsp"," ",$2);
      gsub("\n","",$2);
      gsub("\r","",$2);
      gsub("\t"," ",$2);
      gsub("\t"," ",$2);
      while( gsub("  "," ",$2) );
      if( !tdflag[tnst] )
	sub(" $","",$2);
    }

#mainly for forms - extract name in select
#    if( match(toupper($1), "NAME=") ) {
#	name = substr($1,RSTART+5,64);
#	if( match(name," ") )
#	  name = substr(name,1,RSTART-1 );
#	gsub( "\>" , "", name );
#	gsub( "\"" , "" , name );
#	$2 = name "=" $2;
#    }
    
    if( NF > 1 && length($2) )
      ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] \
	= ttext[tnst,currow[tnst],curcol[tnst],line[tnst]] $2;

#print currow[tnst]","curcol[tnst]","line[tnst] "<" $1 ">" $2 ":"

    if( substr(toupper($1)" ",0,4) == "/TD " \
	|| substr(toupper($1)" ",0,4) == "/TH " )
      endentry();

    if( substr(toupper($1)" ",0,4) == "/TR " )
      endrow();

    if( substr(toupper($1)" ",0,7) == "/TABLE " ) {

      if( !tralready )
	endrow();
      currow[tnst]--;
      tralready = 0;

      printtab();

      tnst--;
      rowhght[tnst,currow[tnst]] += iline + 1;

      colwid[tnst,curcol[tnst]] += 0;

      inrow = 0;
      if( !tnst )
	while( inrow <= iline )
	  print image[inrow++];
      else {
	while( inrow <= iline ) {
	  ttext[tnst,currow[tnst],curcol[tnst],inrow] = image[inrow];
	  inrow++;
	}
	col = vislength( image[inrow-1] );
	if( col > colwid[tnst,curcol[tnst]] )
	  colwid[tnst,curcol[tnst]] = col;
      }
      line[tnst] += iline;
      inrow = 0;
      currow[tnst+1] = 0;
      if( tnst == 0 )
	system("");

      if( !tdflag[tnst] ) {
	endentry();
	endrow();
      }
#for vertical stacking of all tables
#     else
#       endentry();
      tdflag[tnst] = 0;
    }    
  }
}

END { 
print ""; 
}
