void myhtml_string_append_chunk_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding) { unsigned const char* u_buff = (unsigned const char*)buff; const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); for (size_t i = 0; i < length; i++) { if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0); str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); } } MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); }
size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars) { MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); unsigned const char* u_buff = (unsigned const char*)buff; const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); for (size_t i = 0; i < length; i++) { if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { MyHTML_STRING_REALLOC_IF_NEED(str, 5, 1); size_t len = myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(res->result, &str->data[str->length]); if(len == 1) { if(str->data[str->length] == '\r') { str->data[str->length] = '\n'; if((i + 1) < length) { if(buff[(i + 1)] == '\n') i++; } else { str->length++; return str->length; } } else if(str->data[str->length] == 0x00 && emit_null_chars == false) { myhtml_string_realloc(str, (str->size + 5)); // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) str->data[str->length] = (char)0xEF; str->length++; str->data[str->length] = (char)0xBF; str->length++; str->data[str->length] = (char)0xBD; } } str->length += len; } } MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); return 0; }
void myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding) { unsigned const char* u_buff = (unsigned const char*)buff; myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); for (size_t i = 0; i < length; i++) { if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { MyHTML_STRING_REALLOC_IF_NEED(str, 4, 32); size_t len = myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); if(len == 1) { // change \r\n to \n if(str->length > 0) { if(str->data[(str->length - 1)] == '\r') { str->data[(str->length - 1)] = '\n'; if(str->data[str->length] == '\n') str->length--; str->length += len; continue; } } if(str->data[str->length] == 0x00) { MyHTML_STRING_REALLOC_IF_NEED(str, (length + 4), 32); // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) str->data[str->length] = 0xEF; str->length++; str->data[str->length] = 0xBF; str->length++; str->data[str->length] = 0xBD; } } str->length += len; } } MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); }