Пример #1
0
void myhtml_string_append_lowercase_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 32);
    
    unsigned char *data = (unsigned char*)str->data;
    const unsigned char *u_buff = (unsigned char*)buff;
    
    for (size_t i = 0; i < length; i++)
    {
        if(u_buff[i] == 0x0D)
        {
            data[str->length] = 0x0A;
            
            if((i+1) < length && u_buff[(i + 1)] == 0x0A) {
                i++;
            }
        }
        else if(u_buff[i] == 0x00)
        {
            MyHTML_STRING_REALLOC_IF_NEED(str, (length + 4), 32);
            
            // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
            data[str->length] = 0xEF; str->length++;
            data[str->length] = 0xBF; str->length++;
            data[str->length] = 0xBD;
        }
        else {
            data[str->length] = myhtml_string_chars_lowercase_map[ u_buff[i] ];
        }
        
        str->length++;
    }
    
    str->data[str->length] = '\0';
}
Пример #2
0
void myhtml_string_append_with_replacement_null_characters_only(myhtml_string_t* str, const char* buff, size_t length)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0);
    
    unsigned char *data = (unsigned char*)str->data;
    const unsigned char *u_buff = (const unsigned char*)buff;
    
    for (size_t i = 0; i < length; i++)
    {
        if(u_buff[i] == 0x00) {
            myhtml_string_realloc(str, (str->size + 5));
            data = (unsigned char*)str->data;
            
            // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
            data[str->length] = 0xEF; str->length++;
            data[str->length] = 0xBF; str->length++;
            data[str->length] = 0xBD;
        }
        else
            data[str->length] = u_buff[i];
        
        str->length++;
    }
    
    str->data[str->length] = '\0';
}
Пример #3
0
size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0);
    
    unsigned const char* u_buff = (unsigned const char*)buff;
    const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    for (size_t i = 0; i < length; i++)
    {
        if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) {
            MyHTML_STRING_REALLOC_IF_NEED(str, 5, 1);
            
            size_t len = myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(res->result, &str->data[str->length]);
            
            if(len == 1) {
                if(str->data[str->length] == '\r') {
                    str->data[str->length] = '\n';
                    
                    if((i + 1) < length) {
                        if(buff[(i + 1)] == '\n')
                            i++;
                    }
                    else {
                        str->length++;
                        return str->length;
                    }
                }
                else if(str->data[str->length] == 0x00 && emit_null_chars == false)
                {
                    myhtml_string_realloc(str, (str->size + 5));
                    
                    // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
                    str->data[str->length] = (char)0xEF; str->length++;
                    str->data[str->length] = (char)0xBF; str->length++;
                    str->data[str->length] = (char)0xBD;
                }
            }
            
            str->length += len;
        }
    }
    
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1);
    
    return 0;
}
Пример #4
0
/////////////////////////////////////////////////////////
//// Append API
////
/////////////////////////////////////////////////////////
void myhtml_string_append(myhtml_string_t* str, const char* buff, size_t length)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0);
    
    memcpy(&str->data[str->length], buff, (sizeof(char) * length));
    
    str->length += length;
    str->data[str->length] = '\0';
}
Пример #5
0
void _myhtml_string_charef_append(myhtml_string_t* str, const char sm)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (str->length + 2), 32);
    
    str->data[str->length] = sm;
    str->length++;
    
    str->data[str->length] = '\0';
}
Пример #6
0
void _myhtml_string_append_char_references_state_end(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str)
{
    str->length = chunk->begin;
    
    /* 4 is max utf8 byte + \0 */
    MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 5), 12);
    
    str->length += myhtml_encoding_codepoint_to_ascii_utf_8(chunk->l_data, &str->data[str->length]);
    str->data[str->length] = '\0';
}
Пример #7
0
void myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding)
{
    unsigned const char* u_buff = (unsigned const char*)buff;
    myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    for (size_t i = 0; i < length; i++)
    {
        if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) {
            MyHTML_STRING_REALLOC_IF_NEED(str, 4, 32);
            
            size_t len = myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]);
            
            if(len == 1) {
                // change \r\n to \n
                if(str->length > 0) {
                    if(str->data[(str->length - 1)] == '\r') {
                        str->data[(str->length - 1)] = '\n';
                        
                        if(str->data[str->length] == '\n')
                            str->length--;
                        
                        str->length += len;
                        continue;
                    }
                }
                
                if(str->data[str->length] == 0x00)
                {
                    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 4), 32);
                    
                    // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
                    str->data[str->length] = 0xEF; str->length++;
                    str->data[str->length] = 0xBF; str->length++;
                    str->data[str->length] = 0xBD;
                }
            }
            
            str->length += len;
        }
    }
    
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1);
}
Пример #8
0
void myhtml_string_append_one_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char data, myhtml_encoding_t encoding)
{
    const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    if(func((unsigned const char)data, res) == MyHTML_ENCODING_STATUS_OK) {
        MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0);
        str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]);
        
        MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT_REALLOC('\0', str);
    }
}
Пример #9
0
size_t _myhtml_string_append_char_references_state_2(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size)
{
    size_t tmp_offset = offset;
    
    const charef_entry_t *current_entry = myhtml_charef_find_by_pos(chunk->charef_res.curr_entry->next, buff, &offset, size, &chunk->charef_res);
    
    if(chunk->charef_res.is_done) {
        chunk->state = 0;
        
        if(buff[offset] == ';')
            offset++;
        else {
            if(chunk->is_attributes &&
               (buff[offset] == '=' || myhtml_string_alphanumeric_character[ (unsigned char)buff[offset] ] != 0xff))
            {
                if(chunk->encoding == MyHTML_ENCODING_UTF_8)
                    myhtml_string_append_with_preprocessing(str, &buff[tmp_offset], (offset - tmp_offset));
                else
                    myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
                
                return offset;
            }
        }
        
        if(current_entry->codepoints_len)
        {
            for (size_t i = 0; i < current_entry->codepoints_len; i++) {
                MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 4), 32);
                
                chunk->begin += myhtml_encoding_codepoint_to_ascii_utf_8(current_entry->codepoints[i], &str->data[chunk->begin]);
            }
            
            str->length = chunk->begin;
        }
        else {
            if(chunk->encoding == MyHTML_ENCODING_UTF_8)
                myhtml_string_append_with_preprocessing(str, &buff[tmp_offset], (offset - tmp_offset));
            else
                myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
        }
        
        chunk->charef_res.last_entry = NULL;
    }
    else {
        if(chunk->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append_with_preprocessing(str, &buff[tmp_offset], (offset - tmp_offset));
        else
            myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
    }
    
    return offset;
}
Пример #10
0
void myhtml_string_append_chunk_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding)
{
    unsigned const char* u_buff = (unsigned const char*)buff;
    const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    for (size_t i = 0; i < length; i++)
    {
        if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) {
            MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0);
            str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]);
        }
    }
    
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1);
}
Пример #11
0
void myhtml_string_append_lowercase(myhtml_string_t* str, const char* data, size_t length)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0);
    
    unsigned char *ref = (unsigned char*)&str->data[str->length];
    const unsigned char *buf = (const unsigned char*)data;
    
    size_t i;
    for(i = 0; i < length; i++) {
        ref[i] = myhtml_string_chars_lowercase_map[ buf[i] ];
    }
    
    ref[i] = '\0';
    str->length += length;
}
Пример #12
0
size_t myhtml_string_append_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, bool emit_null_chars)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0);
    
    unsigned char *data = (unsigned char*)str->data;
    const unsigned char *u_buff = (const unsigned char*)buff;
    
    /* 0x0D == \r */
    /* 0x0A == \n */
    
    for (size_t i = 0; i < length; i++)
    {
        if(u_buff[i] == 0x0D) {
            data[str->length] = 0x0A;
            
            if((i + 1) < length) {
                if(u_buff[(i + 1)] == 0x0A)
                    i++;
            }
            else {
                str->length++;
                return str->length;
            }
        }
        else if(u_buff[i] == 0x00 && emit_null_chars == false)
        {
            myhtml_string_realloc(str, (str->size + 5));
            data = (unsigned char*)str->data;
            
            // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
            data[str->length] = 0xEF; str->length++;
            data[str->length] = 0xBF; str->length++;
            data[str->length] = 0xBD;
        }
        else
            data[str->length] = u_buff[i];
        
        str->length++;
    }
    
    str->data[str->length] = '\0';
    
    return 0;
}
Пример #13
0
void _myhtml_string_append_char_references_state_end(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str)
{
    str->length = chunk->begin;
    
    /* 4 is max utf8 byte + \0 */
    MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 5), 12);
    
    if(chunk->l_data <= 0x9F)
        chunk->l_data = replacement_character[chunk->l_data];
    else if(chunk->l_data >= 0xD800 && chunk->l_data <= 0xDFFF)
        chunk->l_data = replacement_character[0];
    else if(chunk->l_data > 0x10FFFF)
        chunk->l_data = replacement_character[0];
    
//    if(chunk->l_data == 0x10FFFE)
//        chunk->l_data = replacement_character[0];
    
    str->length += myhtml_encoding_codepoint_to_ascii_utf_8(chunk->l_data, &str->data[chunk->begin]);
    str->data[str->length] = '\0';
}
Пример #14
0
size_t _myhtml_string_append_char_references_state_2(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size)
{
    int is_done = 0;
    size_t tmp_offset = offset;
    
    chunk->entry = myhtml_charef_find_by_pos(chunk->entry->next, buff, &offset, size, &is_done);
    
    if(is_done) {
        chunk->state = 0;
        
        if(chunk->entry->codepoints_len)
        {
            for (size_t i = 0; i < chunk->entry->codepoints_len; i++) {
                MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 4), 32);
                
                chunk->begin += myhtml_encoding_codepoint_to_ascii_utf_8(chunk->entry->codepoints[i], &str->data[chunk->begin]);
            }
            
            str->length = chunk->begin;
        }
        else {
            if(chunk->encoding == MyHTML_ENCODING_UTF_8)
                myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
            else
                myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
        }
    }
    else {
        if(chunk->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
        else
            myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
    }
    
    return offset;
}
Пример #15
0
void myhtml_string_append_charef_end(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str)
{
    if(chunk->state == 4 || chunk->state == 5) {
        _myhtml_string_append_char_references_state_end(chunk, str);
    }
    else if(chunk->state == 2 && chunk->charef_res.last_entry)
    {
        const charef_entry_t *entry = chunk->charef_res.last_entry;
        
        for (size_t i = 0; i < entry->codepoints_len; i++) {
            MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 4), 32);
            
            chunk->begin += myhtml_encoding_codepoint_to_ascii_utf_8(entry->codepoints[i], &str->data[chunk->begin]);
        }
        
        str->length = chunk->begin;
    }
    
    if(str->length) {
        if(str->data[ (str->length - 1) ] == '\r') {
            str->data[ (str->length - 1) ] = '\n';
        }
    }
}
Пример #16
0
void myhtml_string_append_one(myhtml_string_t* str, const char data)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, 2, 1);
    MyHTML_STRING_APPEND_BYTE_WITHOUT_REALLOC(data, str);
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT_REALLOC('\0', str);
}