/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003, 2004, 2005, 2006 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <algorithm>
#include <cctype>
#include <climits>
#include <cstdio>
#include <vector>

#include "common.h"
#include "rational.h"
#include "rectangle.h"
#include "bitmap.h"


namespace {

// binarization by Otsu's method based on maximization of inter-class variance
//
int otsu_th( const std::vector< std::vector< int > > & data, const int maxval ) throw()
  {
  const int rows = data.size(), cols = data[0].size(), size = rows * cols;

  std::vector< int > hist( maxval + 1, 0 );	// histogram of image data
  for( int row = 0; row < rows; ++row )
    for( int col = 0; col < cols; ++col )
      ++hist[data[row][col]];

  std::vector< int > chist;		// cumulative histogram
  chist.reserve( maxval + 1 );
  chist.push_back( hist[0] );
  std::vector< long long > cmom;	// cumulative moment
  cmom.reserve( maxval + 1 );
  cmom.push_back( 0 );			// 0 times hist[0] equals zero
  for( int i = 1; i <= maxval; ++i )
    {
    chist.push_back( chist[i-1] + hist[i] );
    cmom.push_back( cmom[i-1] + ( i * hist[i] ) );
    }

  const double cmom_max = cmom[maxval];
  double bvar_max = 0;
  int threshold = 0;			// threshold for binarization
  for( int i = 0; i < maxval; ++i )
    if( chist[i] > 0 && chist[i] < size )
      {
      double bvar = (double)cmom[i] / chist[i];
      bvar -= ( cmom_max - cmom[i] ) / ( size - chist[i] );
      bvar *= bvar; bvar *= chist[i]; bvar *= ( size - chist[i] );
      if( bvar > bvar_max ) { bvar_max = bvar; threshold = i; }
      }

  if( Ocrad::verbose )
    std::fprintf( stderr, "maxval = %d, automatic threshold = %d (%s)\n",
                  maxval, threshold, Rational( threshold, maxval ).to_decimal() );
  return threshold;
  }


unsigned char pnm_getrawbyte( FILE * f ) throw( Bitmap::Error )
  {
  int ch = std::fgetc( f );

  if( ch == EOF )
    throw Bitmap::Error( "end-of-file reading pnm file." );

  return static_cast< unsigned char > (ch);
  }


char pnm_getc( FILE * f ) throw( Bitmap::Error )
  {
  char ch;
  bool comment = false;

  do {
    ch = pnm_getrawbyte( f );
    if( ch == '#' ) comment = true;
    else if( ch == '\n' ) comment = false;
    }
  while( comment );
  return ch;
  }


int pnm_getint( FILE * f ) throw( Bitmap::Error )
  {
  char ch;
  int i = 0;

  do ch = pnm_getc( f ); while( std::isspace( ch ) );
  if( !std::isdigit( ch ) )
    throw Bitmap::Error( "junk in pnm file where an integer should be." );
  do {
    if( ( INT_MAX - (ch - '0') ) / 10 < i )
      throw Bitmap::Error( "number too big in pnm file." );
    i = (i * 10) + (ch - '0');
    ch = pnm_getc( f );
    }
  while( std::isdigit( ch ) );
  return i;
  }


bool pbm_getbit( FILE * f ) throw( Bitmap::Error )
  {
  char ch;

  do ch = pnm_getc( f ); while( std::isspace( ch ) );

  if( ch == '0' ) return false;
  if( ch == '1' ) return true;
  throw Bitmap::Error( "junk in pbm file where bits should be." );
  }


void read_p1( std::vector< std::vector< char > > & data,
              FILE * f, const bool invert ) throw( Bitmap::Error )
  {
  const int rows = data.size(), cols = data[0].size();
  for( int row = 0; row < rows; ++row )
    for( int col = 0; col < cols; ++col )
      if( pbm_getbit( f ) != invert ) data[row][col] = true;
  }


void read_p4( std::vector< std::vector< char > > & data,
              FILE * f, const bool invert ) throw( Bitmap::Error )
  {
  const int rows = data.size(), cols = data[0].size();
  if( !invert )
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; )
        {
        unsigned char byte = pnm_getrawbyte( f );
        if( byte == 0 ) col += 8;
        else
          for( unsigned char mask = 0x80; mask > 0 && col < cols; mask >>= 1, ++col )
            if( byte & mask ) data[row][col] = true;
        }
  else
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; )
        {
        unsigned char byte = pnm_getrawbyte( f );
        if( byte == 0xFF ) col += 8;
        else
          for( unsigned char mask = 0x80; mask > 0 && col < cols; mask >>= 1, ++col )
            if( !( byte & mask ) ) data[row][col] = true;
        }
  }


void read_p2( std::vector< std::vector< char > > & data, FILE * f,
              const Rational & th, const bool invert ) throw( Bitmap::Error )
  {
  const int maxval = pnm_getint( f );
  if( maxval == 0 ) throw Bitmap::Error( "zero maxval in pgm file." );
  const int rows = data.size(), cols = data[0].size();

  if( th >= 0 && th <= 1 )
    {
    const int threshold = ( Rational( maxval ) * th ).trunc();
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        {
        const int val = pnm_getint( f );
        if( val > maxval ) throw Bitmap::Error( "value > maxval in pgm file." );
        if( ( val <= threshold ) != invert ) data[row][col] = true;
        }
    }
  else
    {
    std::vector< std::vector< int > > tmp( rows );
    for( int row = 0; row < rows; ++row )
      {
      tmp[row].reserve( cols );
      std::vector< int > & tmprow = tmp[row];
      for( int col = 0; col < cols; ++col )
        {
        const int val = pnm_getint( f );
        if( val > maxval ) throw Bitmap::Error( "value > maxval in pgm file." );
        tmprow.push_back( val );
        }
      }
    const int threshold = otsu_th( tmp, maxval );
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        if( ( tmp[row][col] <= threshold ) != invert )
          data[row][col] = true;
    }
  }


void read_p5( std::vector< std::vector< char > > & data, FILE * f,
              const Rational & th, const bool invert ) throw( Bitmap::Error )
  {
  const int maxval = pnm_getint( f );
  if( maxval == 0 ) throw Bitmap::Error( "zero maxval in pgm file." );
  if( maxval > 255 ) throw Bitmap::Error( "maxval > 255 in pgm \"P5\" file." );
  const int rows = data.size(), cols = data[0].size();

  if( th >= 0 && th <= 1 )
    {
    const int threshold = ( Rational( maxval ) * th ).trunc();
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        {
        int val = pnm_getrawbyte( f );
        if( val > maxval ) throw Bitmap::Error( "value > maxval in pgm file." );
        if( ( val <= threshold ) != invert ) data[row][col] = true;
        }
    }
  else
    {
    std::vector< std::vector< int > > tmp( rows );
    for( int row = 0; row < rows; ++row )
      {
      tmp[row].reserve( cols );
      std::vector< int > & tmprow = tmp[row];
      for( int col = 0; col < cols; ++col )
        {
        const int val = pnm_getrawbyte( f );
        if( val > maxval ) throw Bitmap::Error( "value > maxval in pgm file." );
        tmprow.push_back( val );
        }
      }
    const int threshold = otsu_th( tmp, maxval );
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        if( ( tmp[row][col] <= threshold ) != invert )
          data[row][col] = true;
    }
  }


void read_p3( std::vector< std::vector< char > > & data, FILE * f,
              const Rational & th, const bool invert ) throw( Bitmap::Error )
  {
  const int maxval = pnm_getint( f );
  if( maxval == 0 ) throw Bitmap::Error( "zero maxval in ppm file." );
  const int rows = data.size(), cols = data[0].size();

  if( th >= 0 && th <= 1 )
    {
    const int threshold = ( Rational( maxval ) * th ).trunc();
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        {
        int val = pnm_getint( f );			// Red value
        val = std::min( val, pnm_getint( f ) );		// Green value
        val = std::min( val, pnm_getint( f ) );		// Blue value
        if( val > maxval ) throw Bitmap::Error( "value > maxval in ppm file." );
        if( ( val <= threshold ) != invert ) data[row][col] = true;
        }
    }
  else
    {
    std::vector< std::vector< int > > tmp( rows );
    for( int row = 0; row < rows; ++row )
      {
      tmp[row].reserve( cols );
      std::vector< int > & tmprow = tmp[row];
      for( int col = 0; col < cols; ++col )
        {
        int val = pnm_getint( f );			// Red value
        val = std::min( val, (int)pnm_getint( f ) );	// Green value
        val = std::min( val, (int)pnm_getint( f ) );	// Blue value
        if( val > maxval ) throw Bitmap::Error( "value > maxval in ppm file." );
        tmprow.push_back( val );
        }
      }
    const int threshold = otsu_th( tmp, maxval );
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        if( ( tmp[row][col] <= threshold ) != invert )
          data[row][col] = true;
    }
  }


void read_p6( std::vector< std::vector< char > > & data, FILE * f,
              const Rational & th, const bool invert ) throw( Bitmap::Error )
  {
  const int maxval = pnm_getint( f );
  if( maxval == 0 ) throw Bitmap::Error( "zero maxval in ppm file." );
  if( maxval > 255 ) throw Bitmap::Error( "maxval > 255 in ppm \"P6\" file." );
  const int rows = data.size(), cols = data[0].size();

  if( th >= 0 && th <= 1 )
    {
    const int threshold = ( Rational( maxval ) * th ).trunc();
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        {
        int val = pnm_getrawbyte( f );				// Red value
        val = std::min( val, (int)pnm_getrawbyte( f ) );	// Green value
        val = std::min( val, (int)pnm_getrawbyte( f ) );	// Blue value
        if( val > maxval ) throw Bitmap::Error( "value > maxval in ppm file." );
        if( ( val <= threshold ) != invert ) data[row][col] = true;
        }
    }
  else
    {
    std::vector< std::vector< int > > tmp( rows );
    for( int row = 0; row < rows; ++row )
      {
      tmp[row].reserve( cols );
      std::vector< int > & tmprow = tmp[row];
      for( int col = 0; col < cols; ++col )
        {
        int val = pnm_getrawbyte( f );				// Red value
        val = std::min( val, (int)pnm_getrawbyte( f ) );	// Green value
        val = std::min( val, (int)pnm_getrawbyte( f ) );	// Blue value
        if( val > maxval ) throw Bitmap::Error( "value > maxval in ppm file." );
        tmprow.push_back( val );
        }
      }
    const int threshold = otsu_th( tmp, maxval );
    for( int row = 0; row < rows; ++row )
      for( int col = 0; col < cols; ++col )
        if( ( tmp[row][col] <= threshold ) != invert )
          data[row][col] = true;
    }
  }



} // end namespace


// Creates a Bitmap from a pbm, pgm or ppm file
// "P1" (pbm), "P4" (pbm RAWBITS), "P2" (pgm), "P5" (pgm RAWBITS),
// "P3" (ppm), "P6" (ppm RAWBITS) file formats are recognized.
//
Bitmap::Bitmap( FILE * f, const Rational & th, const bool invert ) throw( Bitmap::Error )
  : Rectangle( 0, 0, 0, 0 )
  {
  char filetype = 0;

  if( pnm_getrawbyte( f ) == 'P' )
    {
    char ch = pnm_getrawbyte( f );
    if( ch >= '1' && ch <= '6' ) filetype = ch;
    }
  if( filetype == 0 )
    throw Error( "bad magic number - not a pbm, pgm or ppm file." );

  {
  int tmp = pnm_getint( f );
  if( tmp == 0 ) throw Error( "zero width in pnm file." );
  Rectangle::width( tmp );
  tmp = pnm_getint( f );
  if( tmp == 0 ) throw Error( "zero height in pnm file." );
  Rectangle::height( tmp );
  if( (long long)width() * height() > (long long)INT_MAX )
    throw Error( "image too big. `int' will overflow." );
  }

  data.resize( height() );
  for( unsigned int row = 0; row < data.size(); ++row )
    data[row].resize( width(), false );

  if( Ocrad::verbose )
    std::fprintf( stderr, "file type is P%c\n", filetype );

  switch( filetype )
    {
    case '1': read_p1( data, f, invert ); break;
    case '4': read_p4( data, f, invert ); break;
    case '2': read_p2( data, f, th, invert ); break;
    case '5': read_p5( data, f, th, invert ); break;
    case '3': read_p3( data, f, th, invert ); break;
    case '6': read_p6( data, f, th, invert ); break;
    }
  }


void Bitmap::save( FILE * f, char filetype ) const throw()
  {
  if( filetype != '1' && filetype != '4' ) filetype = '4';
  std::fprintf( f, "P%c\n%d %d\n", filetype, width(), height() );

  for( int row = top(); row <= bottom(); ++row )
    {
    if( filetype == '4' )
      {
      unsigned char byte = 0, mask = 0x80;
      for( int col = left(); col <= right(); ++col )
        {
        if( get_bit( row, col ) ) byte |= mask;
        mask >>= 1;
        if( mask == 0 ) { std::putc( byte, f ); byte = 0; mask = 0x80; }
        }
      if( mask != 0x80 ) std::putc( byte, f ); // incomplete byte at end of row
      }
    else	// filetype == '1'
      {
      for( int col = left(); col <= right(); ++col )
        std::putc( get_bit( row, col ) ? '1' : '0', f );
      std::putc( '\n', f );
      }
    }
  }
