/* atoms.c: The Floo atom table.
    Designed by Andrew Plotkin <erkyrath@netcom.com>
    http://www.eblong.com/zarf/glk/floo/index.html
*/

#include "glk.h"
#include "floo.h"
#include <string.h>
#include <stdlib.h>

/* To aid fast lookup of name objects, Floo keeps a binary tree of every string
    which has been seen in a name. These are atoms, and name objects refer to
    atoms instead of directly to C strings. This allows two atoms to be compared
    by a simple pointer equality test, rather than by string comparison.
    
    The binary tree is a red-black tree. I happily stole the algorithm from
    Graham Nelson's Inform source code -- my eclectic CS textbook collection
    somehow failed to have any information on red-black trees at all.
*/

/* The actual strings which the atoms represent are stored in large memory chunks,
    allocated in size ATOM_CARGO_SIZE. (They're never freed, so we can forego the
    flexibility of keeping each one in its own malloc chunk.) Strings stored in
    these chunks are *not* null-terminated. An atom knows its own length. */
#define ATOM_CARGO_SIZE (512)
/* Atoms larger than ATOM_CARGO_LIMIT *do* get their own malloc chunk, to avoid
    wasting much space in padding. */
#define ATOM_CARGO_LIMIT (32)

/* Some macros useful for manipulating a binary tree. */
#define ATOM_BRANCH(atm, side)  (((side) == 0) ? ((atm)->left) : (atm)->right)
#define ATOM_BRANCH_PTR(atm, side)  (((side) == 0) ? &((atm)->left) : &(atm)->right)
#define ATOM_BRANCH_SET(atm, side, atm2)  \
    (((side) == 0) ? (((atm)->left) = (atm2)) : (((atm)->right) = (atm2)))

static atom_t *root = NULL; /* The tree. */
static char *cargo = NULL; /* The atom string chunk being filled out. */
static int cargopos = 0; /* How much of it is full. */

/* Set everything up. Returns TRUE for ok. */
int init_atomdict()
{
    cargo = (char *)malloc(sizeof(char) * ATOM_CARGO_SIZE);
    if (!cargo)
        return FALSE;
    cargopos = 0;
    
    return TRUE;
}

/* Compare a string (not necessarily null-terminated) with the contents of an
    atom. Returns 0 if equal, or -1 or 1 depending on which is lexically first
    (ASCII order.) */
static int compare(char *str, int len, atom_t *at)
{
    int res;
    if (len == at->len) {
        return strncmp(str, at->str, len);
    }
    else if (len < at->len) {
        res = strncmp(str, at->str, len);
        if (res != 0)
            return res;
        return -1;
    }
    else {
        res = strncmp(str, at->str, at->len);
        if (res != 0)
            return res;
        return 1;
    }
}

/* Find an atom with the given string. If there is none, this quietly creates
    one and returns it. 
    If len is non-negative, it gives the length of str (which is not necessarily
    null-terminated.) If it is negative, str is null-terminated; use that length.
    If copystr is TRUE, this makes its own copy of the string; if FALSE, it
    assumes the contents of str are static, and refers to them directly. */
atom_t *atomdict_find(char *str, int len, int copystr)
{
    atom_t *at = root;
    atom_t **atp;
    char *cx;
    int res;
    int r, fr, gfr, ggfr;
    atom_t *f, *gf, *ggf, *tmpf;
    
    if (len == -1)
        len = strlen(str);
    
    /* First, search for an existing atom. */
    while (at) {
        res = compare(str, len, at);
        if (res == 0) {
            return at; /* Return the atom found. */
        }
        if (res < 0) {
            at = at->left;
        }
        else {
            at = at->right;
        }
    }
    
    /* No existing atom. Find the right place to put a new one. */
    
    atp = &root;
    
    if (!root) {
        atp = &root;
    }
    else {
        at = root;
        f = NULL;
        gf = NULL;
        ggf = NULL;
        while (1) {
            res = compare(str, len, at);
            r = (res > 0);
            if (at->left && at->left->isred
                && at->right && at->right->isred) {
                at->left->isred = FALSE;
                at->right->isred = FALSE;
                at->isred = TRUE;
                if (f && gf && f->isred) {
                    if (fr == gfr) {
                        if (!ggf)
                            root = f;
                        else
                            ATOM_BRANCH_SET(ggf, ggfr, f);
                        tmpf = ATOM_BRANCH(f, !fr);
                        ATOM_BRANCH_SET(gf, gfr, tmpf);
                        ATOM_BRANCH_SET(f, !fr, gf);
                        f->isred = FALSE;
                        gf->isred = TRUE;
                        gf = ggf;
                        gfr = ggfr;
                    }
                    else {
                        if (!ggf)
                            root = at;
                        else
                            ATOM_BRANCH_SET(ggf, ggfr, at);
                        at->isred = FALSE;
                        gf->isred = TRUE;
                        tmpf = ATOM_BRANCH(at, gfr);
                        ATOM_BRANCH_SET(f, fr, tmpf);
                        tmpf = ATOM_BRANCH(at, fr);
                        ATOM_BRANCH_SET(gf, gfr, tmpf);
                        ATOM_BRANCH_SET(at, gfr, f);
                        ATOM_BRANCH_SET(at, fr, gf);
                        r = !r;
                        tmpf = at;
                        if (r == fr)
                            at = f;
                        else
                            at = gf;
                        f = tmpf;
                        gf = ggf;
                        fr = !r;
                        gfr = ggfr;
                    }
                }
            }
            if (ATOM_BRANCH(at, r) == NULL) {
                at->isred = TRUE;
                if (f && gf && f->isred) {
                    if (fr == gfr) {
                        if (!ggf)
                            root = f;
                        else
                            ATOM_BRANCH_SET(ggf, ggfr, f);
                        tmpf = ATOM_BRANCH(f, !fr);
                        ATOM_BRANCH_SET(gf, gfr, tmpf);
                        ATOM_BRANCH_SET(f, !fr, gf);
                        f->isred = FALSE;
                        gf->isred = TRUE;
                    }
                    else {
                        if (!ggf)
                            root = at;
                        else
                            ATOM_BRANCH_SET(ggf, ggfr, at);
                        at->isred = FALSE;
                        gf->isred = TRUE;
                        tmpf = ATOM_BRANCH(at, gfr);
                        ATOM_BRANCH_SET(f, fr, tmpf);
                        tmpf = ATOM_BRANCH(at, fr);
                        ATOM_BRANCH_SET(gf, gfr, tmpf);
                        ATOM_BRANCH_SET(at, gfr, f);
                        ATOM_BRANCH_SET(at, fr, gf);
                        r = !r;
                        tmpf = at;
                        if (r == fr)
                            at = f;
                        else
                            at = gf;
                        f = tmpf;
                        gf = ggf;
                    }
                }
                atp = ATOM_BRANCH_PTR(at, r);
                break;
            }
            ggf = gf;
            gf = f;
            f = at;
            at = ATOM_BRANCH(at, r);
            ggfr = gfr;
            gfr = fr;
            fr = r;
        }
    }
    
    /* Allocate an atom and store it. */
    at = (atom_t *)malloc(sizeof(atom_t));
    if (!at)
        return NULL;
    
    at->left = NULL;
    at->right = NULL;
    at->isred = FALSE;
    
    if (copystr) {
        if (len >= ATOM_CARGO_LIMIT) {
            cx = (char *)malloc(sizeof(char) * len);
            if (!cx)
                return NULL;
        }
        else {
            if (cargopos+len > ATOM_CARGO_SIZE) {
                cargo = (char *)malloc(sizeof(char) * ATOM_CARGO_SIZE);
                if (!cargo)
                    return NULL;
                cargopos = 0;
            }
            cx = &(cargo[cargopos]);
            cargopos += len;
        }
        if (len)
            memcpy(cx, str, len);
    }
    else {
        cx = str;
    }
    
    at->len = len;
    at->str = cx;
    at->bucket = floodict_string_to_key(str, len);
    *atp = at;
    
    return at;
}

/* Some debugging code to measure the number of atoms, and the depth of the
    binary tree. */

static void debug_splot(atom_t *atom, int *data, int depth)
{
    data[0]++;
    if (atom->left || atom->right) {
        if (atom->left)
            debug_splot(atom->left, data, depth+1);
        if (atom->right)
            debug_splot(atom->right, data, depth+1);
    }
    else {
        if (data[1] == -1 || depth > data[1])
            data[1] = depth;
        if (data[2] == -1 || depth < data[2])
            data[2] = depth;
    }
}

void atomdict_debugdata(int *numatoms, int *mindepth, int *maxdepth)
{
    int data[3];
    data[0] = 0;
    data[1] = -1;
    data[2] = -1;
    debug_splot(root, data, 0);
    *numatoms = data[0];
    *mindepth = data[2];
    *maxdepth = data[1];
}

