Example #1
0
static ParserElem*
S_consume_text(StringIterator *iter) {
    StringIterator *temp = StrIter_Clone(iter);

    while (1) {
        int32_t code_point = StrIter_Next(temp);
        if (code_point == '\\') {
            code_point = StrIter_Next(temp);
            if (code_point == STR_OOB) {
                break;
            }
        }
        else if (code_point == STR_OOB) {
            break;
        }
        else if (StrHelp_is_whitespace(code_point)
            || code_point == '"'
            || code_point == '('
            || code_point == ')'
           ) {
            StrIter_Recede(temp, 1);
            break;
        }
    }

    String *text = StrIter_crop(iter, temp);
    StrIter_Assign(iter, temp);
    DECREF(temp);
    return ParserElem_new(TOKEN_STRING, (Obj*)text);
}
Example #2
0
static ParserElem*
S_consume_keyword(StringIterator *iter, const char *keyword,
                  size_t keyword_len, int type) {
    if (!StrIter_Starts_With_Utf8(iter, keyword, keyword_len)) {
        return NULL;
    }
    StringIterator *temp = StrIter_Clone(iter);
    StrIter_Advance(temp, keyword_len);
    int32_t lookahead = StrIter_Next(temp);
    if (lookahead == STR_OOB) {
        DECREF(temp);
        return NULL;
    }
    if (StrHelp_is_whitespace(lookahead)
        || lookahead == '"'
        || lookahead == '('
        || lookahead == ')'
        || lookahead == '+'
        || lookahead == '-'
       ) {
        StrIter_Recede(temp, 1);
        StrIter_Assign(iter, temp);
        DECREF(temp);
        return ParserElem_new(type, NULL);
    }
    DECREF(temp);
    return NULL;
}
static void
test_is_whitespace(TestBatchRunner *runner) {
    TEST_TRUE(runner, StrHelp_is_whitespace(' '), "space is whitespace");
    TEST_TRUE(runner, StrHelp_is_whitespace('\n'), "newline is whitespace");
    TEST_TRUE(runner, StrHelp_is_whitespace('\t'), "tab is whitespace");
    TEST_TRUE(runner, StrHelp_is_whitespace('\v'),
              "vertical tab is whitespace");
    TEST_TRUE(runner, StrHelp_is_whitespace(0x180E),
              "Mongolian vowel separator is whitespace");
    TEST_FALSE(runner, StrHelp_is_whitespace('a'), "'a' isn't whitespace");
    TEST_FALSE(runner, StrHelp_is_whitespace(0), "NULL isn't whitespace");
    TEST_FALSE(runner, StrHelp_is_whitespace(0x263A),
               "Smiley isn't whitespace");
}
Example #4
0
size_t
StrIter_Skip_Whitespace_Back_IMP(StringIterator *self) {
    size_t  num_skipped = 0;
    size_t  byte_offset = self->byte_offset;
    int32_t code_point;

    while (STR_OOB != (code_point = StrIter_Prev(self))) {
        if (!StrHelp_is_whitespace(code_point)) { break; }
        byte_offset = self->byte_offset;
        ++num_skipped;
    }

    self->byte_offset = byte_offset;
    return num_skipped;
}
Example #5
0
// Find an ending boundary before the current position given by the iterator.
// Skip up to max_skip code points plus potential whitespace. Update the
// iterator and return number of code points skipped. Return true if a
// ending edge (sentence) was found.
bool
S_find_ending_boundary(StringIterator *tail, uint32_t max_skip,
                       uint32_t *num_skipped_ptr) {
    int32_t code_point;

    // Check if we're at an ending boundary already. Don't check for a word
    // boundary because we need space for a trailing ellipsis.

    StringIterator *iter = StrIter_Clone(tail);

    do {
        code_point = StrIter_Next(iter);

        if (code_point == STRITER_DONE) {
            // Skip remaining whitespace.
            *num_skipped_ptr = StrIter_Skip_Prev_Whitespace(tail);
            DECREF(iter);
            return true;
        }
    } while (StrHelp_is_whitespace(code_point));

    // Keep track of the first word boundary.
    StringIterator *word = NULL;
    uint32_t word_offset = 0;

    StrIter_Assign(iter, tail);

    for (uint32_t i = 0;
            STRITER_DONE != (code_point = StrIter_Prev(iter));
            ++i)
    {
        if (code_point == '.') {
            StrIter_Assign(tail, iter);
            StrIter_Advance(tail, 1); // Include period.
            *num_skipped_ptr = i;
            DECREF(word);
            DECREF(iter);
            return true;
        }

        if (StrHelp_is_whitespace(code_point)) {
            if (word == NULL) {
                word = StrIter_Clone(iter);
                word_offset = i + 1;
            }
        }
        else if (i >= max_skip) {
            // Break only at non-whitespace to allow another sentence
            // boundary to be found.
            break;
        }
    }

    if (word == NULL) {
        // Make space for ellipsis.
        *num_skipped_ptr = StrIter_Recede(tail, 1);
    }
    else {
        // Use word boundary if no sentence boundary was found.
        StrIter_Assign(tail, word);

        // Strip whitespace and punctuation that collides with an ellipsis.
        while (STRITER_DONE != (code_point = StrIter_Prev(tail))) {
            if (!StrHelp_is_whitespace(code_point)
                    && code_point != '.'
                    && code_point != ','
                    && code_point != ';'
                    && code_point != ':'
                    && code_point != ':'
                    && code_point != '?'
                    && code_point != '!'
               ) {
                StrIter_Advance(tail, 1); // Back up.
                break;
            }
            ++word_offset;
        }

        *num_skipped_ptr = word_offset;
    }

    DECREF(word);
    DECREF(iter);
    return false;
}
Example #6
0
// Find a starting boundary after the current position given by the iterator.
// Skip up to max_skip code points plus potential whitespace. Update the
// iterator and return number of code points skipped. Return true if a
// starting edge (sentence) was found.
bool
S_find_starting_boundary(StringIterator *top, uint32_t max_skip,
                         uint32_t *num_skipped_ptr) {
    // Keep track of the first word boundary.
    StringIterator *word = NULL;
    uint32_t word_offset = 0;

    // Check if we're at a starting boundary already.

    StringIterator *iter = StrIter_Clone(top);

    while (true) {
        int32_t code_point = StrIter_Prev(iter);

        if (code_point == STRITER_DONE || code_point == '.') {
            // Skip remaining whitespace.
            *num_skipped_ptr = StrIter_Skip_Next_Whitespace(top);
            DECREF(iter);
            return true;
        }

        if (StrHelp_is_whitespace(code_point)) {
            if (word == NULL) {
                word = StrIter_Clone(top);
            }
        }
        else {
            break;
        }
    }

    // Try to start on a boundary.

    uint32_t num_skipped = 0;
    bool     found_edge  = false;

    StrIter_Assign(iter, top);

    for (uint32_t i = 0; i < max_skip; ++i) {
        int32_t code_point = StrIter_Next(iter);

        if (code_point == STRITER_DONE || code_point == '.') {
            found_edge = true;
            StrIter_Assign(top, iter);
            num_skipped = i + 1;
            break;
        }

        if (word == NULL && StrHelp_is_whitespace(code_point)) {
            word = StrIter_Clone(iter);
            word_offset = i + 1;
        }
    }

    // Try to use word boundary if no sentence boundary was found.
    if (!found_edge && word != NULL) {
        StrIter_Assign(top, word);
        num_skipped = word_offset;
    }

    // Skip remaining whitespace.
    num_skipped += StrIter_Skip_Next_Whitespace(top);
    *num_skipped_ptr = num_skipped;

    DECREF(word);
    DECREF(iter);
    return found_edge;
}