
/* This module does the most essential part of every mandelbrot
 * program: it iterates points (pixels). I am trying to make this
 * part fast using 16-bit or 32-bit fixed point math whenever possible.
 * Unfortunately, this had to be done in somewhat unportable manner.
 * If your machine has problems with these, #undefine USE_INTEGER_MATH
 * from mb.h.
 */

/* And right :) You guessed it; because it is me that is writing this
 * program, the contour crawling algorithm is used, too.
 */

/* iterate() is the main callable function of this module:

int iterate(x0, y0, dx, dy, cx, cy, nx, ny, iter, flags, dest)
double x0, y0, dx, dy, cx, cy;
int nx, ny;
int iter;
int flags;
u_short **dest;

 * Where
 o x0+iy0 is the upper lefthand corner of the rectangular area,
 o dx and idy are deltas in two orthogonal directions,
 o cx+icy is the constant used in iterating Julia sets,
 o (nx, ny) is the size of the rectangle,
 o iter is the maximum number of iterations,
 o flags can be set to JULIA and

 o dest is an array of u_shorts that returns the result to the caller.
 o iterate() returns the number of usable u_shorts in the array.
 
 * dest will be allocated by iterate(), but it is callers responsibility
 * to free it. dest has the following format:

 o 16 bits for color of the first pixel (as returned by first_point(),
 o 16 bits for the number of entries in the chain of directions and
 o 2 bits for each direction code (see mb.h), rounded to next 16 bits.
 * NOTE: to further reduce the bandwidth needed, if color has highest
 * (bit 15 set), the number of directions is zero (0), that is, the
 * area consists of one (1) pixel.
 */

#include <sys/types.h>
#include <math.h>
#include <stdio.h>
#include "mb.h"
#include "pix.h"
#define PACKSIZE 512

#ifdef USE_INTEGER_MATH
/* First, the 16-bit version */
#define multi16(x,y) (((x)*(y)) >> BIT16)

int mb16(xl, yl, cx, cy, iter)
double xl, yl;
double cx, cy;
int iter;
{
register short x= xl * (1 << BIT16), y= yl * (1 << BIT16);
register short x0= cx * (1 << BIT16), y0= cy * (1 << BIT16);
register short x2, y2;
register short tmp;

/* It is painful for assembler programmer such as me to write
 * unefficient code such as this. And, worse still, this code
 * is not portable. (It counts on signed bit-shifts and 2's
 * complement math.
 */
while (iter--) {
  x2= multi16(x,x);
  y2= multi16(y,y);
  tmp= x2 + y2;
  if (x < -(2 << BIT16) || x > (2 << BIT16) ||
      y < -(2 << BIT16) || y > (2 << BIT16) ||
      tmp < 0 || tmp > ((4 << BIT16)-1) )
    return(iter);
  y= multi16(x,y);
  tmp= y + y;
  if ((tmp > 0 && y <= 0) || (tmp < 0 && y >= 0))
    return(iter?iter-1:0);
  y= tmp + y0;
  if ((tmp > 0 && y0 > 0 && y <= 0) || (tmp < 0 && y0 < 0 && y >= 0))
    return(iter?iter-1:0);
  tmp= x2 - y2;
  x= tmp + x0;
  if ((tmp > 0 && x0 > 0 && x <= 0) || (tmp < 0 && x0 < 0 && x >= 0))
    return(iter?iter-1:0);
  }
return(0);
}


/* Then a 32-bit one with even more complicated multiplication */
long multi32(x, y)
register long x,y;
{
register short sign= 1;
register unsigned short xl, yl, xh, yh;

if (x < 0) {sign= -1; x= -x;}
if (y < 0) {sign= -sign; y= -y;}
xl= x & 0xffff; xh= x >> 16;
yl= y & 0xffff; yh= y >> 16;
return(sign*(long)( ((xh*yh) << (32-BIT32)) + ((xl*yh) >> (BIT32-16)) +
        ((xh*yl) >> (BIT32-16)) + ((xl*yl) >> (BIT32))) );
}

int mb32(xl, yl, cx, cy, iter)
double xl, yl;
double cx, cy;
int iter;
{
long x= xl * (1 << BIT32), y= yl * (1 << BIT32);
long x0= cx * (1 << BIT32), y0= cy * (1 << BIT32);
long x2, y2;
long tmp;

while (iter--) {
  x2= multi32(x,x);
  y2= multi32(y,y);
  tmp= x2 + y2;
  if (x < -(2 << BIT32) || x > (2 << BIT32) ||
      y < -(2 << BIT32) || y > (2 << BIT32) ||
      tmp < 0 || tmp > ((4 << BIT32)-1) )
    return(iter);
  y= multi32(x,y);
  tmp= y + y;
  if ((tmp > 0 && y <= 0) || (tmp < 0 && y >= 0))
    return(iter?iter-1:0);
  y= tmp + y0;
  if ((tmp > 0 && y0 > 0 && y <= 0) || (tmp < 0 && y0 < 0 && y >= 0))
    return(iter?iter-1:0);
  tmp= x2 - y2;
  x= tmp + x0;
  if ((tmp > 0 && x0 > 0 && x <= 0 ) || (tmp < 0 && x0 < 0 && x >= 0))
    return(iter?iter-1:0);
  }
return(0);
}
#endif


/* Then, the simplest version of them all. (Slowest too.) */
int mbf(x0, y0, cx, cy, iter)
double x0, y0;
double cx, cy;
int iter;
{
double x= x0, y= y0;
double x2, y2;

x0= cx; y0= cy;
while (iter--) {
  x2= x*x;
  y2= y*y;
  if (x2 + y2 >= 4)
    return(iter);
  y= 2 * x * y + y0;
  x= x2 - y2 + x0;
  }
return(0);
}

int iterate(x0, y0, dx, dy, cx, cy, nx, ny, iter, flags, dest)
double x0, y0, dx, dy, cx, cy;
int nx, ny;
int iter;
int flags;
u_short **dest;
{
int px, py;
int **pixel;
int (*f)();
long index= 0;
long colorindex;
long len;
int bitindex= 7;

long size= PACKSIZE;

*dest= (u_short *)malloc(PACKSIZE * sizeof(u_short));

/* Determine which iterating function to use... */
f= mbf; /* default to highest precision */
#ifdef USE_INTEGER_MATH
if (flags & JULIA) {
  if (fabs(dx) > LIMIT_J32 && fabs(dy) > LIMIT_J32)
    f= mb32;
  if (fabs(dx) > LIMIT_J16 && fabs(dy) > LIMIT_J16)
    f= mb16;
  }
else {
  if (fabs(dx) > LIMIT_M32 && fabs(dy) > LIMIT_M32)
    f= mb32;
  if (fabs(dx) > LIMIT_M16 && fabs(dy) > LIMIT_M16)
    f= mb16;
  }
#endif

/* alloc space for map of iterated pixels... */
pixel= (int **)malloc(ny * sizeof(int *));
for (py= 0; py < ny; py++) {
  pixel[py]= (int *)malloc(nx*sizeof(int));
  for (px= 0; px < nx; px++)
    pixel[py][px]= -1;
  }

/* Move pixel forward (relative to dir) */
#define forward(dir) (ox= x, oy= y, qx= px,qy= py,\
         (dir&1 ? (dir&2?(x-= dx, --px) : (x+= dx, ++px)) : \
             dir&2?(y-= dy,++py) : (y+= dy,--py)))
/* This nullifies the effect of previous forward() */
#define stepback (x= ox, y= oy,px= qx,py= qy)
/* And these are used to rotate dir 90 deg clockwise or anticlockwice */
#define clockwise(dir) (dir==3 ? dir= 0 : ++dir)
#define anticlockwise(dir) (dir==0 ? dir= 3 : --dir)


/* First check if x and y are within limits; if not, return -1.
 * Then check if pixel already calculated; if so, return its value.
 * Otherwise calculate the pixel, store and return it.
 */
#define getpixel(px,py) (px < 0 || py < 0 || px >= nx || py >= ny ? -1 : \
      (pixel[py][px] >= 0 ? pixel[py][px] : \
      (pixel[py][px]= (flags & JULIA ? \
      f(x, y, cx, cy, iter) : f(x, y, x, y, iter)))))

initialize(nx, ny); /* pixel recording module (pix.c) */
for (;;) {
  double x, y, ox, oy;
  int qx, qy;
  int startx, starty;
  int prevpixel, newpixel;
  int dir;
  u_short color;

/* This tells us where to start from. */
  if (first_point(&px, &py) == 0)
    break;

  if (flags & FAST) {
    if (index+2 >= size) *dest= (u_short *)
      realloc(*dest, (size += PACKSIZE) * sizeof(u_short));
    (*dest)[index++]= px;
    (*dest)[index++]= py;
    }

  x= x0+dx*px; y= y0-dy*py; dir= RIGHT;
  startx= px; starty= py;
  color= prevpixel= getpixel(px,py);
  colorindex= index;
  index += 2; /* space for color & len */
  if (index >= size) *dest= (u_short *)
    realloc(*dest, (size += PACKSIZE) * sizeof(u_short));
  len= 0;

/* Another macro... the complexity of the crawling algorithm is hidden behind
 * the facade of simple-looking macros ;) (Just kidding)
 */
#define record_dir(dir) { \
if (bitindex <0 ) {bitindex= 7; index++;} \
if (index >= size) *dest= (u_short *) \
  realloc(*dest, (size += PACKSIZE) * sizeof(u_short)); \
if (bitindex == 7) (*dest)[index]= 0; \
(*dest)[index] |= ((u_short)dir) << (2*bitindex); \
bitindex--; \
}

/* This is the actual crawling algorithm. See how inherintly simple it is.
 */
  do {
    forward(dir);
    if ((newpixel= getpixel(px,py)) != prevpixel) {
      stepback;
      clockwise(dir);
      }
    else {
      prevpixel= newpixel;
      next_point(dir); /* every single pixel must be recorded */
      record_dir(dir); /* ...twice */
      len++;
      anticlockwise(dir);
      }
    }
  while (startx != px || starty != py || dir != RIGHT);

  if (bitindex != 7) {
    bitindex= 7;
    index++;
    }
  if (len==0) {
    (*dest)[colorindex]=  color | 0x8000;
    index--;
    }
  else {
    (*dest)[colorindex]= color & 0x7fff;
    (*dest)[colorindex+1]= len;
    }

  } /* for */

for (py= 0; py < ny; py++) {
  free(pixel[py]);
  }
free(pixel);
deinit(); /* free some memory (pix.c) */

if (flags & FAST) {
  if (index >= size) *dest= (u_short *)
    realloc(*dest, (size += PACKSIZE) * sizeof(u_short));
  (*dest)[index++]= LASTPIX;
  }
return(index);
}

