size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once if((relative_begin + length) <= buffer->size) { if(tree->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char); else myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, &buffer->data[relative_begin], length, proc_entry->encoding, proc_entry->emit_null_char); return str->length; } size_t save_position = 0; // if the data are spread across multiple buffers that join them while(buffer) { if((relative_begin + length) > buffer->size) { size_t relative_end = (buffer->size - relative_begin); length -= relative_end; size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position); if(relative_end > 0) { if(tree->encoding == MyHTML_ENCODING_UTF_8) save_position = myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char); else save_position = myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->encoding, proc_entry->emit_null_char); } relative_begin = 0; buffer = buffer->next; } else { size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position); if(length > 0) { if(tree->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char); else myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->encoding, proc_entry->emit_null_char); } break; } } return str->length; }
size_t myhtml_parser_add_text_lowercase(myhtml_tree_t *tree, myhtml_string_t* string, const char *text, size_t begin, size_t length) { myhtml_incoming_buf_t *inc_buf = myhtml_parser_find_first_buf(tree, begin); myhtml_encoding_result_t str_res; myhtml_encoding_result_clean(&str_res); size_t current_buf_offset = begin - inc_buf->offset; size_t save_str_len = string->length; if((current_buf_offset + length) <= inc_buf->size) { if(tree->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(string, &inc_buf->data[current_buf_offset], length); else myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, &inc_buf->data[current_buf_offset], length, tree->encoding); return (string->length - save_str_len); } size_t buf_next_offset = inc_buf->size - current_buf_offset; if(tree->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(string, &inc_buf->data[current_buf_offset], buf_next_offset); else myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, &inc_buf->data[current_buf_offset], buf_next_offset, tree->encoding); length = length - buf_next_offset; inc_buf = inc_buf->next; if(tree->encoding == MyHTML_ENCODING_UTF_8) { while (inc_buf && length) { if(length > inc_buf->size) { myhtml_string_append_lowercase_with_preprocessing(string, inc_buf->data, inc_buf->size); length -= inc_buf->size; } else { myhtml_string_append_lowercase_with_preprocessing(string, inc_buf->data, length); break; } inc_buf = inc_buf->next; } } else { while (inc_buf && length) { if(length > inc_buf->size) { myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, inc_buf->data, inc_buf->size, tree->encoding); length -= inc_buf->size; } else { myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, inc_buf->data, length, tree->encoding); break; } inc_buf = inc_buf->next; } } return (string->length - save_str_len); }