コード例 #1
0
ファイル: CharBuf.c プロジェクト: pavansondur/lucy
CharBuf*
CB_new_steal_str(char *ptr, size_t size, size_t cap) {
    if (!StrHelp_utf8_valid(ptr, size)) {
        DIE_INVALID_UTF8(ptr, size);
    }
    return CB_new_steal_from_trusted_str(ptr, size, cap);
}
コード例 #2
0
ファイル: String.c プロジェクト: srcclrapache1/lucy-clownfish
String*
Str_Cat_Utf8_IMP(String *self, const char* ptr, size_t size) {
    if (!StrHelp_utf8_valid(ptr, size)) {
        DIE_INVALID_UTF8(ptr, size);
    }
    return Str_Cat_Trusted_Utf8(self, ptr, size);
}
コード例 #3
0
ファイル: CharBuf.c プロジェクト: pavansondur/lucy
CharBuf*
CB_new_from_utf8(const char *ptr, size_t size) {
    if (!StrHelp_utf8_valid(ptr, size)) {
        DIE_INVALID_UTF8(ptr, size);
    }
    return CB_new_from_trusted_utf8(ptr, size);
}
コード例 #4
0
ファイル: TextType.c プロジェクト: gitpan/KinoSearch
void
TextTermStepper_read_key_frame(TextTermStepper *self, InStream *instream)
{ 
    const uint32_t text_len = InStream_Read_C32(instream);
    CharBuf *value;
    char *ptr;

    // Allocate space. 
    if (self->value == NULL) {
        self->value = (Obj*)CB_new(text_len);
    }
    value = (CharBuf*)self->value;
    ptr   = CB_Grow(value, text_len);

    // Set the value text. 
    InStream_Read_Bytes(instream, ptr, text_len);
    CB_Set_Size(value, text_len);
    if (!StrHelp_utf8_valid(ptr, text_len)) {
        THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64",
            InStream_Get_Filename(instream), 
            InStream_Tell(instream) - text_len);
    }

    // Null-terminate. 
    ptr[text_len] = '\0';
}
コード例 #5
0
ファイル: TextType.c プロジェクト: kidaa/lucy
void
TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    const uint32_t text_overlap     = InStream_Read_C32(instream);
    const uint32_t finish_chars_len = InStream_Read_C32(instream);
    const uint32_t total_text_len   = text_overlap + finish_chars_len;

    // Allocate space.
    CharBuf *charbuf = (CharBuf*)ivars->value;
    char    *ptr     = CB_Grow(charbuf, total_text_len);

    // Set the value text.
    InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len);
    CB_Set_Size(charbuf, total_text_len);
    if (!StrHelp_utf8_valid(ptr, total_text_len)) {
        THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64",
              InStream_Get_Filename(instream),
              InStream_Tell(instream) - finish_chars_len);
    }

    // Null-terminate.
    ptr[total_text_len] = '\0';

    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
コード例 #6
0
ファイル: String.c プロジェクト: srcclrapache1/lucy-clownfish
String*
Str_new_steal_utf8(char *utf8, size_t size) {
    if (!StrHelp_utf8_valid(utf8, size)) {
        DIE_INVALID_UTF8(utf8, size);
    }
    String *self = (String*)Class_Make_Obj(STRING);
    return Str_init_steal_trusted_utf8(self, utf8, size);
}
コード例 #7
0
static void
S_test_validity(TestBatchRunner *runner, const char *content, size_t size,
                bool expected, const char *description) {
    bool sane = StrHelp_utf8_valid(content, size);
    bool double_check = S_utf8_valid_alt(content, size);
    if (sane != double_check) {
        FAIL(runner, "Disagreement: %s", description);
    }
    else {
        TEST_TRUE(runner, sane == expected, "%s", description);
    }
}
コード例 #8
0
ファイル: Freezer.c プロジェクト: rectang/lucy
String*
Freezer_deserialize_string(String *string, InStream *instream) {
    size_t size = InStream_Read_C32(instream);
    if (size == SIZE_MAX) {
        THROW(ERR, "Can't deserialize SIZE_MAX bytes");
    }
    char *buf = (char*)MALLOCATE(size + 1);
    InStream_Read_Bytes(instream, buf, size);
    buf[size] = '\0';
    if (!StrHelp_utf8_valid(buf, size)) {
        THROW(ERR, "Attempt to deserialize invalid UTF-8");
    }
    return Str_init_steal_trusted_utf8(string, buf, size);
}
コード例 #9
0
ファイル: Json.c プロジェクト: pavansondur/lucy
static CharBuf*
S_parse_string(char **json_ptr, char *const limit) {
    // Find terminating double quote, determine whether there are any escapes.
    char *top = *json_ptr + 1;
    char *end = NULL;
    bool_t saw_backslash = false;
    for (char *text = top; text < limit; text++) {
        if (*text == '"') {
            end = text;
            break;
        }
        else if (*text == '\\') {
            saw_backslash = true;
            if (text + 1 < limit && text[1] == 'u') {
                text += 5;
            }
            else {
                text += 1;
            }
        }
    }
    if (!end) {
        SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit);
        return NULL;
    }

    // Advance the text buffer to just beyond the closing quote.
    *json_ptr = end + 1;

    if (saw_backslash) {
        return S_unescape_text(top, end);
    }
    else {
        // Optimize common case where there are no escapes.
        size_t len = end - top;
        if (!StrHelp_utf8_valid(top, len)) {
            CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
            Err_set_error(Err_new(mess));
            return NULL;
        }
        return CB_new_from_trusted_utf8(top, len);
    }
}
コード例 #10
0
static void
test_utf8_round_trip(TestBatchRunner *runner) {
    int32_t code_point;
    for (code_point = 0; code_point <= 0x10FFFF; code_point++) {
        char buffer[4];
        uint32_t size = StrHelp_encode_utf8_char(code_point, buffer);
        char *start = buffer;
        char *end   = start + size;

        // Verify length returned by encode_utf8_char().
        if (size != StrHelp_UTF8_COUNT[(unsigned char)buffer[0]]) {
            break;
        }
        // Verify that utf8_valid() agrees with alternate implementation.
        if (!!StrHelp_utf8_valid(start, size)
            != !!S_utf8_valid_alt(start, size)
           ) {
            break;
        }

        // Verify back_utf8_char().
        if (StrHelp_back_utf8_char(end, start) != start) {
            break;
        }

        // Verify round trip of encode/decode.
        if (StrHelp_decode_utf8_char(buffer) != code_point) {
            break;
        }
    }
    if (code_point == 0x110000) {
        PASS(runner, "Successfully round tripped 0 - 0x10FFFF");
    }
    else {
        FAIL(runner, "Failed round trip at 0x%.1X", (unsigned)code_point);
    }
}
コード例 #11
0
ファイル: CharBuf.c プロジェクト: pavansondur/lucy
void
CB_vcatf(CharBuf *self, const char *pattern, va_list args) {
    size_t      pattern_len   = strlen(pattern);
    const char *pattern_start = pattern;
    const char *pattern_end   = pattern + pattern_len;
    char        buf[64];

    for (; pattern < pattern_end; pattern++) {
        const char *slice_end = pattern;

        // Consume all characters leading up to a '%'.
        while (slice_end < pattern_end && *slice_end != '%') { slice_end++; }
        if (pattern != slice_end) {
            size_t size = slice_end - pattern;
            CB_Cat_Trusted_Str(self, pattern, size);
            pattern = slice_end;
        }

        if (pattern < pattern_end) {
            pattern++; // Move past '%'.

            switch (*pattern) {
                case '%': {
                        CB_Cat_Trusted_Str(self, "%", 1);
                    }
                    break;
                case 'o': {
                        Obj *obj = va_arg(args, Obj*);
                        if (!obj) {
                            CB_Cat_Trusted_Str(self, "[NULL]", 6);
                        }
                        else if (Obj_Is_A(obj, CHARBUF)) {
                            CB_Cat(self, (CharBuf*)obj);
                        }
                        else {
                            CharBuf *string = Obj_To_String(obj);
                            CB_Cat(self, string);
                            DECREF(string);
                        }
                    }
                    break;
                case 'i': {
                        int64_t val = 0;
                        size_t size;
                        if (pattern[1] == '8') {
                            val = va_arg(args, int32_t);
                            pattern++;
                        }
                        else if (pattern[1] == '3' && pattern[2] == '2') {
                            val = va_arg(args, int32_t);
                            pattern += 2;
                        }
                        else if (pattern[1] == '6' && pattern[2] == '4') {
                            val = va_arg(args, int64_t);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                        size = sprintf(buf, "%" I64P, val);
                        CB_Cat_Trusted_Str(self, buf, size);
                    }
                    break;
                case 'u': {
                        uint64_t val = 0;
                        size_t size;
                        if (pattern[1] == '8') {
                            val = va_arg(args, uint32_t);
                            pattern += 1;
                        }
                        else if (pattern[1] == '3' && pattern[2] == '2') {
                            val = va_arg(args, uint32_t);
                            pattern += 2;
                        }
                        else if (pattern[1] == '6' && pattern[2] == '4') {
                            val = va_arg(args, uint64_t);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                        size = sprintf(buf, "%" U64P, val);
                        CB_Cat_Trusted_Str(self, buf, size);
                    }
                    break;
                case 'f': {
                        if (pattern[1] == '6' && pattern[2] == '4') {
                            double num  = va_arg(args, double);
                            char bigbuf[512];
                            size_t size = sprintf(bigbuf, "%g", num);
                            CB_Cat_Trusted_Str(self, bigbuf, size);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                    }
                    break;
                case 'x': {
                        if (pattern[1] == '3' && pattern[2] == '2') {
                            unsigned long val = va_arg(args, uint32_t);
                            size_t size = sprintf(buf, "%.8lx", val);
                            CB_Cat_Trusted_Str(self, buf, size);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                    }
                    break;
                case 's': {
                        char *string = va_arg(args, char*);
                        if (string == NULL) {
                            CB_Cat_Trusted_Str(self, "[NULL]", 6);
                        }
                        else {
                            size_t size = strlen(string);
                            if (StrHelp_utf8_valid(string, size)) {
                                CB_Cat_Trusted_Str(self, string, size);
                            }
                            else {
                                CB_Cat_Trusted_Str(self, "[INVALID UTF8]", 14);
                            }
                        }
                    }
                    break;
                default: {
                        // Assume NULL-terminated pattern string, which
                        // eliminates the need for bounds checking if '%' is
                        // the last visible character.
                        S_die_invalid_pattern(pattern_start);
                    }
            }
コード例 #12
0
ファイル: Json.c プロジェクト: pavansondur/lucy
static CharBuf*
S_unescape_text(char *const top, char *const end) {
    // The unescaped string will never be longer than the escaped string
    // because only a \u escape can theoretically be too long and
    // StrHelp_encode_utf8_char guards against sequences over 4 bytes.
    // Therefore we can allocate once and not worry about reallocating.
    size_t cap = end - top + 1;
    char *target_buf = (char*)MALLOCATE(cap);
    size_t target_size = 0;
    for (char *text = top; text < end; text++) {
        if (*text != '\\') {
            target_buf[target_size++] = *text;
        }
        else {
            // Process escape.
            text++;
            switch (*text) {
                case '"':
                    target_buf[target_size++] = '"';
                    break;
                case '\\':
                    target_buf[target_size++] = '\\';
                    break;
                case '/':
                    target_buf[target_size++] = '/';
                    break;
                case 'b':
                    target_buf[target_size++] = '\b';
                    break;
                case 'f':
                    target_buf[target_size++] = '\f';
                    break;
                case 'n':
                    target_buf[target_size++] = '\n';
                    break;
                case 'r':
                    target_buf[target_size++] = '\r';
                    break;
                case 't':
                    target_buf[target_size++] = '\t';
                    break;
                case 'u': {
                        // Copy into a temp buffer because strtol will overrun
                        // into adjacent text data for e.g. "\uAAAA1".
                        char temp[5] = { 0, 0, 0, 0, 0 };
                        memcpy(temp, text + 1, 4);
                        text += 4;
                        char *num_end;
                        long code_point = strtol(temp, &num_end, 16);
                        char *temp_ptr = temp;
                        if (num_end != temp_ptr + 4 || code_point < 0) {
                            FREEMEM(target_buf);
                            SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end);
                            return NULL;
                        }
                        if (code_point >= 0xD800 && code_point <= 0xDFFF) {
                            FREEMEM(target_buf);
                            SET_ERROR(CB_newf("Surrogate pairs not supported"),
                                      text - 5, end);
                            return NULL;
                        }
                        target_size += StrHelp_encode_utf8_char((uint32_t)code_point,
                                                                target_buf + target_size);
                    }
                    break;
                default:
                    FREEMEM(target_buf);
                    SET_ERROR(CB_newf("Illegal escape"), text - 1, end);
                    return NULL;
            }
        }
    }

    // NULL-terminate, sanity check, then return the escaped string.
    target_buf[target_size] = '\0';
    if (!StrHelp_utf8_valid(target_buf, target_size)) {
        FREEMEM(target_buf);
        CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
        Err_set_error(Err_new(mess));
        return NULL;
    }
    return CB_new_steal_from_trusted_str(target_buf, target_size, cap);
}