Example #1
0
void myhtml_string_append_chunk_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding)
{
    unsigned const char* u_buff = (unsigned const char*)buff;
    const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    for (size_t i = 0; i < length; i++)
    {
        if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) {
            MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0);
            str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]);
        }
    }
    
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1);
}
Example #2
0
size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars)
{
    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0);
    
    unsigned const char* u_buff = (unsigned const char*)buff;
    const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    for (size_t i = 0; i < length; i++)
    {
        if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) {
            MyHTML_STRING_REALLOC_IF_NEED(str, 5, 1);
            
            size_t len = myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(res->result, &str->data[str->length]);
            
            if(len == 1) {
                if(str->data[str->length] == '\r') {
                    str->data[str->length] = '\n';
                    
                    if((i + 1) < length) {
                        if(buff[(i + 1)] == '\n')
                            i++;
                    }
                    else {
                        str->length++;
                        return str->length;
                    }
                }
                else if(str->data[str->length] == 0x00 && emit_null_chars == false)
                {
                    myhtml_string_realloc(str, (str->size + 5));
                    
                    // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
                    str->data[str->length] = (char)0xEF; str->length++;
                    str->data[str->length] = (char)0xBF; str->length++;
                    str->data[str->length] = (char)0xBD;
                }
            }
            
            str->length += len;
        }
    }
    
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1);
    
    return 0;
}
Example #3
0
void myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding)
{
    unsigned const char* u_buff = (unsigned const char*)buff;
    myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
    
    for (size_t i = 0; i < length; i++)
    {
        if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) {
            MyHTML_STRING_REALLOC_IF_NEED(str, 4, 32);
            
            size_t len = myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]);
            
            if(len == 1) {
                // change \r\n to \n
                if(str->length > 0) {
                    if(str->data[(str->length - 1)] == '\r') {
                        str->data[(str->length - 1)] = '\n';
                        
                        if(str->data[str->length] == '\n')
                            str->length--;
                        
                        str->length += len;
                        continue;
                    }
                }
                
                if(str->data[str->length] == 0x00)
                {
                    MyHTML_STRING_REALLOC_IF_NEED(str, (length + 4), 32);
                    
                    // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD)
                    str->data[str->length] = 0xEF; str->length++;
                    str->data[str->length] = 0xBF; str->length++;
                    str->data[str->length] = 0xBD;
                }
            }
            
            str->length += len;
        }
    }
    
    MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1);
}