//  text.cpp: implementation of the Text class.
//  2019-10-30

//  The Tilton Macro Processor

//  Douglas Crockford
//  http://www.crockford.com/tilton.html

//  This program is Open Source and Public Domain.

//  Text wraps a string, and provides methods for setting and modifying the
//  string and for doing I/O with it. A Text can also have a name, which
//  is used as a macro name. A Text can have a link which can chain Texts
//  together. This is used to manage hash collisions.

//  The encoding of strings is UTF-8 (the 8-bit form of Unicode). A character
//  is between 1 and 4 bytes in length. The utfLength and utfSubstr methods
//  count multibyte characters. However, if a multibyte character appears to
//  be badly formed, it will interpret the first byte as a single byte
//  character. So while expecting UTF-8 encoded strings, it will usually
//  do the right thing with Latin-1 and similar encodings.

#include <string.h>
#include <stdio.h>
#include "tilton.h"
#include "text.h"


// null constructor

Text::Text() {
    init(NULL, 0);
}


// length constructor

Text::Text(number len) {
    init(NULL, len);
}


// c-string constructor

Text::Text(const char* s) {
    init(s, strlen(s));
}


// string constructor

Text::Text(const char* s, number len) {
    init(s, len);
}


// text constructor

Text::Text(Text* text) {
    if (text) {
        init(text->string, text->length);
    } else {
        init(NULL, 0);
    }
}


// deconstructor

Text::~Text() {
    delete this->string;
    delete this->name;
}


// append character (8-bit)

void Text::append(number codeunit) {
    checkMaxLength(1);
    string[length] = (char)codeunit;
    length += 1;
    myHash = 0;
}


// append characters

void Text::append(number codeunit, number n) {
    checkMaxLength(n);
    while (n > 0) {
        string[length] = (char)codeunit;
        length += 1;
        n -= 1;
    }
    myHash = 0;
}


// append c-string

void Text::append(const char* s) {
    if (s) {
        append(s, strlen(s));
    }
}


// append string

void Text::append(const char* s, number len) {
    if (s && len) {
        checkMaxLength(len);
        memmove(&string[length], s, len);
        length += len;
        myHash = 0;
    }
}


// append text

void Text::append(Text* text) {
    if (text) {
        append(text->string, text->length);
    }
}


// append number

void Text::appendNumber(number n) {
    number d;
    if (n != NAN) {
        if (n < 0) {
            append('-');
            n = -n;
        }
        d = n / 10;
        if (d > 0) {
            appendNumber(d);
        }
        append((number)(n % 10) + '0');
        myHash = 0;
    }
}


//  If the requested amount does not fit within the allocated max length,
//  then increase the size of the string. The new allocation will be at least
//  twice the previous allocation.

void Text::checkMaxLength(number len) {
    number newMaxLength;
    number req = length + len;
    if (maxLength < req) {
        newMaxLength = maxLength * 2;
        if (newMaxLength < req) {
            newMaxLength = req;
        }
        char* newString = new char[newMaxLength];
        memmove(newString, string, maxLength);
        delete string;
        string = newString;
        maxLength = newMaxLength;
    }
}


void Text::dump() {
    Text* text = this;
    while (text) {
        fwrite(text->name, sizeof(char), text->nameLength, stderr);
        if (text->length) {
            fputc('~', stderr);
            fwrite(text->string, sizeof(char), text->length, stderr);
        }
        fprintf(stderr, "\n");
        text = text->link;
    }
}


// get character

number Text::get(number index) {
    if (index >= 0 && index < length) {
        return string[index];
    } else {
        return EOT;
    }
}


// get number, ignoring leading and trailing whitespace.

number Text::getNumber() {
    number codeunit;
    number i = 0;
    bool sign = false;
    bool ok = false;
    number value = 0;
    for (;;) {
        codeunit = string[i];
        i += 1;
        if (i > length) {
            return NAN;
        }
        if (codeunit > ' ') {
            break;
        }
    }
    if (codeunit == '-') {
        sign = true;
        codeunit = string[i];
        i += 1;
        if (i > length) {
            return NAN;
        }
    }
    for (;;) {
        if (codeunit >= '0' && codeunit <= '9') {
            value = (value * 10) + (codeunit - '0');
            ok = true;
            if (value < 0) {
                ok = false;
                break;
            }
        } else {
             for (;;) {
                if (codeunit > ' ') {
                    return NAN;
                }
                if (i >= length) {
                    break;
                }
                codeunit = string[i];
                i += 1;
            }
        }
        if (i >= length) {
            break;
        }
        codeunit = string[i];
        i += 1;
    }
    if (ok) {
        if (sign) {
            return -value;
        } else {
            return value;
        }
    } else {
        return NAN;
    }
}


// find the first occurance of a substring

number Text::indexOf(Text *text) {
    number len = text->length;
    const char* s = text->string;
    if (len) {
        bool b;
        number d = length - len;
        number i;
        number r;
        for (r = 0; r <= d; r += 1) {
            b = true;
            for (i = 0; i < len; i += 1) {
                if (string[r + i] != s[i]) {
                    b = false;
                    break;
                }
            }
            if (b) {
                return r;
            }
        }
    }
    return -1;
}


void Text::init(const char* s, number len) {
    name = NULL;
    link = NULL;
    function = NULL;
    length = nameLength = myHash = 0;
    maxLength = len;
    if (len == 0) {
        string = NULL;
    } else {
        string = new char[len];
        if (s) {
            memmove(string, s, len);
            length = len;
        }
    }
}


// read from standard input

void Text::input() {
    char buffer[10240];
    number len;
    length = 0;
    myHash = 0;
    for (;;) {
        len = fread(buffer, sizeof(char), sizeof(buffer), stdin);
        if (len <= 0) {
            break;
        }
        append(buffer, len);
    }
}


// is c-string

bool Text::is(const char* s) {
    number i;
    for (i = 0; i < length; i += 1) {
        if (string[i] != s[i]) {
            return false;
        }
    }
    return (s[length] == 0);
}


// is text

bool Text::is(Text* text) {
    number i;
    if (length != text->length) {
        return false;
    }
    for (i = 0; i < length; i += 1) {
        if (string[i] != text->string[i]) {
            return false;
        }
    }
    return true;
}


// is name text

bool Text::isName(Text* text) {
    if (nameLength != text->length) {
        return false;
    }
    for (number i = 0; i < nameLength; i += 1) {
        if (name[i] != text->string[i]) {
            return false;
        }
    }
    return true;
}


// find the last occurance of a substring

number Text::lastIndexOf(Text *text) {
    number len = text->length;
    const char* s = text->string;
    if (len) {
        bool b;
        number d = length - len;
        for (number r = d; r >= 0; r -= 1) {
            b = true;
            for (number i = 0; i < len; i += 1) {
                if (string[r + i] != s[i]) {
                    b = false;
                    break;
                }
            }
            if (b) {
                return r;
            }
        }
    }
    return -1;
}


// less than text

bool Text::lt(Text* text) {
    number len = text->length;
    if (len > length) {
        len = length;
    }
    for (number i = 0; i < len; i += 1) {
        if (string[i] != text->string[i]) {
            return (string[i] < text->string[i]);
        }
    }
    return len != text->length;
}


// write to standard output

void Text::output() {
    fwrite(string, sizeof(char), length, stdout);
}


//  read filename -- read the file in 10K chunks.

bool Text::read(Text* filename) {
    FILE *fp;
    char buffer[10240];
    number len;

    delete name;
    nameLength = filename->length;
    name = new char[nameLength];
    memmove(name, filename->string, nameLength);
    memmove(buffer, name, nameLength);
    buffer[filename->length] = 0;

    myHash = 0;
    length = 0;
    fp = fopen(buffer, "rb");
    if (fp) {
        for (;;) {
            len = fread(buffer, sizeof(char), sizeof(buffer), fp);
            if (len <= 0) {
                break;
            }
            append(buffer, len);
        }
        fclose(fp);
        return true;
    } else {
        return false;
    }
}


// set character

void Text::set(number index, number codeunit) {
    myHash = 0;
    if (index >= length) {
        append(codeunit);
    } else if (index >= 0) {
        string[index] = (char)codeunit;
    }
}


// set text

void Text::set(Text* text) {
    myHash = 0;
    if (text && text->length) {
        length = text->length;
        if (length > maxLength) {
            delete string;
            string = new char[length];
            maxLength = length;
        }
        memmove(string, text->string, length);
    } else {
        length = 0;
    }
}


// set name with c-string

void Text::setName(const char* s) {
    setName(s, strlen(s));
}


// set name with string

void Text::setName(const char* s, number len) {
    delete name;
    nameLength = len;
    name = new char[nameLength];
    memmove(name, s, nameLength);
}


// set name with text

void Text::setName(Text* text) {
    setName(text->string, text->length);
}


//  substring

void Text::substr(number start, number len) {
    memmove(string, &string[start], len);
    length = len;
}


// remove tail and return it

Text* Text::tail(number index) {
    if (index >= 0 && index < length) {
        number len = length - index;
        length = index;
        myHash = 0;
        return new Text(&string[index], len);
    } else {
        return new Text();
    }
}


// trim is like append, except that it trims leading, trailing spaces, and
// reduces runs of whitespace to single space

void Text::trim(Text* text) {
    const char* s = text->string;
    number l = text->length;
    number i = 0;
    bool b = false;
    for (;;) {
        while (s[i] > ' ') {
            append(s[i]);
            b = true;
            i += 1;
            if (i >= l) {
                return;
            }
        }
        do {
            i += 1;
            if (i >= l) {
                return;
            }
        } while (s[i] <= ' ');
        if (b) {
            append(' ');
        }
    }
}


// determine the utf-8 length

number Text::utfLength() {
    number codeunit;
    number i = 0;
    number num = 0;
    while (i < length) {
        codeunit = string[i] & 0xFF;
        i += 1;
        if (codeunit >= 0xC0) {
            if (codeunit < 0xE0) { // 2-byte form
                if ((i + 1) < length && ((string[i] & 0xC0) == 0x80)) {
                    i += 1;
                }
            } else if (codeunit < 0xF0) { // 3-byte form
                if ((i + 2) < length &&
                        ((string[i]     & 0xC0) == 0x80) &&
                        ((string[i + 1] & 0xC0) == 0x80)) {
                    i += 2;
                }
            } else { // 4-byte form
                if ((i + 3) < length &&
                        ((string[i]     & 0xC0) == 0x80) &&
                        ((string[i + 1] & 0xC0) == 0x80) &&
                        ((string[i + 2] & 0xC0) == 0x80)) {
                    i += 3;
                }
            }
        }
        num += 1;
    }
    return num;
}


// utf-8 substring

Text* Text::utfSubstr(number start, number len) {
    number codeunit;
    number i = 0;
    Text* text;
    while (start) {
        if (i >= length) {
            return NULL;
        }
        codeunit = string[i] & 0xFF;
        i += 1;
        if (codeunit >= 0xC0) {
            if (codeunit < 0xE0) { // 2-byte form
                if ((i + 1) < length && ((string[i] & 0xC0) == 0x80)) {
                    i += 1;
                }
            } else if (codeunit < 0xF0) { // 3-byte form
                if (
                    (i + 2) < length
                    && ((string[i] & 0xC0) == 0x80)
                    && ((string[i + 1] & 0xC0) == 0x80)
                ) {
                    i += 2;
                }
            } else { // 4-byte form
                if (
                    (i + 3) < length
                    && ((string[i] & 0xC0) == 0x80)
                    && ((string[i + 1] & 0xC0) == 0x80)
                    && ((string[i + 2] & 0xC0) == 0x80)
                ) {
                    i += 3;
                }
            }
        }
        start -= 1;
    }
    text = new Text(length - i);
    while (len && i < length) {
        codeunit = string[i] & 0xFF;
        i += 1;
        text->append(codeunit);
        if (codeunit >= 0xC0) {
            if (codeunit < 0xE0) { // 2-byte form
                if ((i + 1) < length && ((string[i] & 0xC0) == 0x80)) {
                    text->append(string[i]);
                    i += 1;
                }
            } else if (codeunit < 0xF0) { // 3-byte form
                if (
                    (i + 2) < length
                    && ((string[i] & 0xC0) == 0x80)
                    && ((string[i + 1] & 0xC0) == 0x80)
                ) {
                    text->append(&string[i], 2);
                    i += 2;
                }
            } else { // 4-byte form
                if (
                    (i + 3) < length
                    && ((string[i] & 0xC0) == 0x80)
                    && ((string[i + 1] & 0xC0) == 0x80)
                    && ((string[i + 2] & 0xC0) == 0x80)
                ) {
                    text->append(&string[i], 3);
                    i += 3;
                }
            }
        }
        len -= 1;
    }
    return text;
}


// write filename

bool Text::write(Text* filename) {
    FILE *fp;
    char fname[256];
    memmove(fname, filename->string, filename->length);
    fname[filename->length] = 0;
    fp = fopen(fname, "wb");
    if (fp) {
        fwrite(string, sizeof(char), length, fp);
        fclose(fp);
        return true;
    } else {
        return false;
    }
}


number Text::hash() {
    if (!myHash) {
        number  len = length;
        myHash = INFINITY;
        while (len > 0) {
            len -= 1;
            myHash ^= string[len];
            myHash ^= myHash << 11;
            myHash ^= myHash >> 1;
            myHash ^= myHash << 3;
        }
    }
    return myHash;
}