#include "giz.ipp"


/* !! There are potentially 256 separators, although this is absurd... */
Array dict_separators -> 256;
Array encode_buffer --> 3;

Global zdict_current;
Global zdict_num_separators;
Global zdict_entry_len;
Global zdict_num_entries;

/* !! entry_base is an offset from *glulx* memory */
Global zdict_entry_base;
        
[zdict_init addr loc i num;
    if (addr ~= zdict_current)
    {
        zdict_current = addr;
        readw(loc, addr);
        zdict_num_separators = zmem->loc++;
                
        for (i = 0 : i < zdict_num_separators : i++)
            dict_separators->i = zmem->loc++;
        
        zdict_entry_len = zmem->loc++;
        readw(num, loc);
        @sexs num num;
        zdict_num_entries = num;

        zdict_entry_base = zmem + loc + 2;
    }
];
        
[zdict_isSeparator c i;
    if (c == ' ')
        return 1;
            
    for (i = 0 : i < zdict_num_separators : i++)
        if (c == dict_separators->i)
            return 1;
    
    return 0;
];
    
[zdict_lookup encoded_buffer num_bytes num_structs res;

    if (zdict_num_entries >= 0)
    {
        @binarysearch encoded_buffer num_bytes zdict_entry_base zdict_entry_len
            zdict_num_entries $00 $01 res;
    }            
    else
    {
        num_structs = 0 - zdict_num_entries;
        @linearsearch encoded_buffer num_bytes zdict_entry_base
            zdict_entry_len num_structs $00 $01 res;
    }
            
/*     !! make the address relative to Z-machine memory, not */
/*     !! Glulx memory */
    if (res)
        return res - zmem;
    else
        return 0;
];
    
/* !! used by self.tokenize; returns true if the word is written */
/* !! to the parse buffer */
[zdict_parse_word str_start text_buf_offset str_len encoding_len
    parse_buf_loc leave_unknown word_loc;

    zscii_encode(str_start, str_len, encoding_len, encode_buffer);
    word_loc = zdict_lookup(encode_buffer, encoding_len * 2);

    if (leave_unknown || word_loc ~= 0)
    {
        writew(parse_buf_loc, word_loc);
        zmem->(parse_buf_loc + 2) = str_len;
        zmem->(parse_buf_loc + 3) = text_buf_offset;
        return 1;
    }
    else
    {
        return 0;
    }
];

[zdict_get_text_end_loc text_buf max_len i;
            
    if (z_ver > 4)
    {
        return zmem->(text_buf + 1) + 2;
    }
    else
    {
        max_len = zmem->text_buf;
        for (i = 1 : i < max_len : i++)
        {
            if (zmem->(text_buf + i) == 0)
                return i;
        }
        
        return max_len;
    }
];
    
        
[zdict_tokenize text_buf parse_buf leave_unknown
    max_words char_loc buf_size mark c num_words parse_loc
    encode_len word_len;

if (leave_unknown)
    num_words = 0;
    parse_loc = parse_buf + 2;
    max_words = zmem->parse_buf;
    buf_size = zdict_get_text_end_loc(text_buf);
            
    if (z_ver > 4)
        char_loc = 2;
    else
        char_loc = 1;

    mark = char_loc;
    
    if (z_ver < 4)
        encode_len = 2;
    else
        encode_len = 3;
    
    
    while (num_words < max_words && char_loc < buf_size)
    {
        c = zmem->(text_buf + char_loc++);

        if (zdict_isSeparator(c))
        {
            word_len = (char_loc - 1) - mark;
                    
            if (word_len > 0)
            {
                if (zdict_parse_word(text_buf + mark, mark,
                                     word_len, encode_len,
                                     parse_loc, leave_unknown))
                {
                    num_words++;
                    parse_loc = parse_loc + 4;
                }
            }

            if (num_words < max_words && c ~= ' ')
            {
                if (zdict_parse_word(text_buf + (char_loc - 1),
                                     char_loc - 1, 1,
                                     encode_len, parse_loc,
                                     leave_unknown))
                {
                    num_words++;
                    parse_loc = parse_loc + 4;
                }
                
            }
            
            mark = char_loc;
        }
    }
            
    if (num_words < max_words)
    {
        word_len = char_loc - mark;
        if (word_len > 0)
        {
            if (zdict_parse_word(text_buf + mark, mark,
                                 word_len, encode_len,
                                 parse_loc, leave_unknown))
                num_words++;
        }
    }

    zmem->(parse_buf + 1) = num_words;
];

