Пример #1
0
/**
 * @brief  Get previous cell from string
 *
 * @param  s    : the string
 * @param  pos  : the position in string to get cell previous to
 * @param  cell : the output buffer
 * @param  is_decomp_am : whether SARA AM is to be decomposed into 
 *                        NIKHANIT and SARA AA and to be in separate cells
 *
 * @returns  total chars consumed by the cell
 *
 * Gets last cell from the string bounded by @a s and @a pos, and if @a cell
 * is not null, stores the cell data in it.
 */
size_t
th_prev_cell (const thchar_t *s, size_t pos,
              struct thcell_t *cell, int is_decomp_am)
{
    size_t n = 0;
    struct thcell_t acell;
    acell.base = acell.hilo = acell.top = 0;
    if (pos > 0) {
        do {
            thchar_t c = s[pos-1];
            switch (th_chlevel (c)) {
            case 0:
                if (is_decomp_am && c == TIS_SARA_AM) {
                    acell.hilo = c;
                } else {
                    acell.base = c;
                }
                break;
            case 1: 
                if (acell.hilo && th_chlevel(acell.hilo) == 3) {
                    acell.top = acell.hilo;
                }
                /* fall through */
            case -1:
                acell.hilo = c;
                break;
            case 2:
                acell.top  = c;
                break;
            case 3:
                if (!acell.hilo) {
                    acell.hilo = c;
                } else {
                    acell.top = c;
                }
                break;
            }
            ++n; --pos;
        } while (
            pos > 0 && (
                is_composible (s[pos-1], s[pos]) || (
                    acell.hilo == TIS_SARA_AM &&
                    !acell.base && (
                        (!acell.top && th_isthtone (s[pos-1])) ||
                        th_isthcons (s[pos-1])
                    )
                )
            )
        );
    }
    if (cell) {
        *cell = acell;
    }
    return n;
}
Пример #2
0
/**
 * @brief  Get first cell from string
 *
 * @param  s    : the string
 * @param  len  : the length of string
 * @param  cell : the output buffer
 * @param  is_decomp_am : whether SARA AM is to be decomposed into 
 *                        NIKHANIT and SARA AA and to be in separate cells
 *
 * @returns  total chars consumed by the cell
 *
 * Gets first cell from the string bounded by @a s and @a len, and, if @a cell
 * is not null, stores the cell data in it.
 */
size_t
th_next_cell (const thchar_t *s, size_t len,
              struct thcell_t *cell, int is_decomp_am)
{
    size_t n = 0;
    struct thcell_t acell;
    acell.base = acell.hilo = acell.top = 0;
    if (len > 0) {
        do {
            switch (th_chlevel (*s)) {
            case 0:
                if (is_decomp_am && *s == TIS_SARA_AM) {
                    acell.hilo = *s++;
                } else {
                    acell.base = *s++;
                }
                break;
            case -1:
            case 1:
                acell.hilo = *s++;
                break;
            case 2:
                acell.top  = *s++;
                break;
            case 3:
                if (!acell.hilo) {
                    acell.hilo = *s++;
                } else {
                    acell.top = *s++;
                }
                break;
            }
            ++n; --len;
        } while (
            len > 0 && (
                is_composible (s[-1], s[0]) || (
                    is_decomp_am && s[0] == TIS_SARA_AM
                    && th_isthcons (acell.base) && acell.hilo == 0
                )
            )
        );
    }
    if (cell) {
        *cell = acell;
    }
    return n;
}
Пример #3
0
static void
th_brkpos_hints (const thchar_t *str, int len, char *hints)
{
    int  i;

    if (len < 0)
        len = strlen ((const char *)str);

    memset (hints, 0, len);

    for (i = 0; i < len; /* nop */) {
        if (th_isthcons (str[i])) {
            if (i+1 < len && str[i+1] == TIS_THANTHAKHAT) {
                i += 2; /* the cons + THANTHAKHAT */
            } else if (i+2 < len && str[i+2] == TIS_THANTHAKHAT) {
                i += 3; /* the cons + intermediate char + THANTHAKHAT */
            } else if (i+2 < len
                       && str[i] != TIS_KO_KAI && str[i+1] == TIS_MAITAIKHU
                       && (str[i+2] == TIS_O_ANG || str[i+2] == TIS_WO_WAEN))
            {
                hints[i] = 1;
                i += 4; /* the cons + MAITAIKHU + OANG/WOWAEN + cons */
            } else if ((i > 0
                        && (str[i-1] == TIS_MAI_HAN_AKAT
                            || str[i-1] == TIS_SARA_UEE))
                       || (i > 1 && th_isthtone (str[i-1])
                           && (str[i-2] == TIS_MAI_HAN_AKAT
                               || str[i-2] == TIS_SARA_UEE)))
            {
                i++;
            } else {
                hints[i++] = 1;
            }
        } else if (str[i] == TIS_SARA_E || str[i] == TIS_SARA_AE) {
            hints[i] = 1; /* sara e/ae */
            i += 2; /* sara e/ae + the supposedly cons */
            if (i >= len)
                break;
            if (str[i] == TIS_MAITAIKHU) {
                i += 2; /* MAITAIKHU + the supposedly cons */
            } else if (th_isupvowel (str[i])) {
                i++; /* the upper vowel, as part of composite vowel */
                if (i < len && th_isthtone (str[i]))
                    i++;
                i++; /* the supposedly cons */
            } else if (i+2 < len
                       && ((str[i+1] == TIS_SARA_AA && str[i+2] == TIS_SARA_A)
                            || (str[i] != TIS_KO_KAI
                                && str[i+1] == TIS_MAITAIKHU
                                && str[i+2] != TIS_O_ANG
                                && str[i+2] != TIS_WO_WAEN)))
            {
                i += 3; /* 2nd cons + SARA_AA + SARA_A, or
                         * 2nd cons + MAITAIKHU + final cons
                         */
            }
        } else if (th_isldvowel (str[i])) {
            hints[i] = 1; /* the ldvowel */
            i += 2; /* the ldvowel + the supposedly cons */
        } else if (str[i] == TIS_RU || str[i] == TIS_LU) {
            hints[i++] = 1;
        } else {
            i++;
        }
    }
}