size_t myhtml_string_append_lowercase_with_convert_encoding_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars) { myhtml_encoding_result_t res; myhtml_encoding_result_clean(&res); return myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &res, buff, length, encoding, emit_null_chars); }
void myhtml_string_append_lowercase_ascii_with_convert_encoding(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding) { myhtml_encoding_result_t res; myhtml_encoding_result_clean(&res); myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(str, &res, buff, length, encoding); }
size_t _myhtml_string_append_char_references_state_0(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size) { size_t tmp_offset = offset; while(offset < size) { if(buff[offset] == '&') { if(chunk->encoding == MyHTML_ENCODING_UTF_8) { myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset)); } else { myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding); myhtml_encoding_result_clean(&chunk->res); } chunk->begin = str->length; chunk->state = 1; _myhtml_string_charef_append(str, buff[offset]); offset++; return offset; } offset++; } if(chunk->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset)); else myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding); return offset; }
void myhtml_string_append_with_convert_encoding_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding) { myhtml_encoding_result_t res; myhtml_encoding_result_clean(&res); myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &res, buff, length, encoding); }
myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding) { if(node == NULL) return NULL; if(encoding >= MyHTML_ENCODING_LAST_ENTRY) return NULL; if(node->token == NULL) { mcobject_async_status_t mcstatus; node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus); if(mcstatus) return NULL; myhtml_token_node_clean(node->token); } if(node->token->str.data == NULL) { myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } else { if(node->token->str.size < length) { mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data); myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); } else node->token->str.length = 0; } myhtml_data_process_entry_t proc_entry; myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = encoding; myhtml_encoding_result_clean(&proc_entry.res); myhtml_data_process(&proc_entry, &node->token->str, text, length); myhtml_data_process_end(&proc_entry, &node->token->str); node->token->raw_begin = 0; node->token->raw_length = 0; return &node->token->str; }
myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding) { if(node == NULL) return NULL; if(encoding >= MyHTML_ENCODING_LAST_ENTRY) return NULL; if(node->token == NULL) { mcobject_async_status_t mcstatus; node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus); if(mcstatus) return NULL; myhtml_token_node_clean(node->token); } if(node->token->my_str_tm.data == NULL) { myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, (length + 2)); } else { if(node->token->my_str_tm.size < length) { mchar_async_free(tree->mchar, node->token->my_str_tm.node_idx, node->token->my_str_tm.data); myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, length); } else node->token->my_str_tm.length = 0; } myhtml_string_char_ref_chunk_t str_chunk = {0, 0, 0, {0}, false, encoding}; myhtml_encoding_result_clean(&str_chunk.res); myhtml_string_append_charef(&str_chunk, &node->token->my_str_tm, text, length); myhtml_string_append_charef_end(&str_chunk, &node->token->my_str_tm); node->token->begin = 0; node->token->length = node->token->my_str_tm.length; return &node->token->my_str_tm; }
size_t myhtml_parser_add_text_with_charef(myhtml_tree_t *tree, myhtml_string_t* string, const char *text, size_t begin, size_t length) { myhtml_incoming_buf_t *inc_buf = myhtml_parser_find_first_buf(tree, begin); myhtml_string_char_ref_chunk_t str_chunk = {0, 0, 0, NULL, tree->encoding}; myhtml_encoding_result_clean(&str_chunk.res); size_t current_buf_offset = begin - inc_buf->offset; size_t save_str_len = string->length; if((current_buf_offset + length) <= inc_buf->size) { myhtml_string_append_charef(&str_chunk, string, &inc_buf->data[current_buf_offset], length); myhtml_string_append_charef_end(&str_chunk, string); return (string->length - save_str_len); } size_t buf_next_offset = inc_buf->size - current_buf_offset; myhtml_string_append_charef(&str_chunk, string, &inc_buf->data[current_buf_offset], buf_next_offset); length = length - buf_next_offset; inc_buf = inc_buf->next; while (inc_buf && length) { if(length > inc_buf->size) { myhtml_string_append_charef(&str_chunk, string, inc_buf->data, inc_buf->size); length -= inc_buf->size; } else { myhtml_string_append_charef(&str_chunk, string, inc_buf->data, length); break; } inc_buf = inc_buf->next; } myhtml_string_append_charef_end(&str_chunk, string); return (string->length - save_str_len); }
size_t myhtml_parser_add_text(myhtml_tree_t *tree, myhtml_string_t* string, const char *text, size_t begin, size_t length) { myhtml_incoming_buf_t *inc_buf = myhtml_parser_find_first_buf(tree, begin); size_t current_buf_offset = begin - inc_buf->offset; size_t save_str_len = string->length; if((current_buf_offset + length) <= inc_buf->size) { if(tree->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append(string, &inc_buf->data[current_buf_offset], length); else myhtml_string_append_with_convert_encoding(string, &inc_buf->data[current_buf_offset], length, tree->encoding); return (string->length - save_str_len); } size_t buf_next_offset = inc_buf->size - current_buf_offset; myhtml_encoding_result_t res; myhtml_encoding_result_clean(&res); if(tree->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append(string, &inc_buf->data[current_buf_offset], buf_next_offset); else myhtml_string_append_chunk_with_convert_encoding(string, &res, &inc_buf->data[current_buf_offset], buf_next_offset, tree->encoding); length = length - buf_next_offset; inc_buf = inc_buf->next; if(tree->encoding == MyHTML_ENCODING_UTF_8) { while (inc_buf && length) { if(length > inc_buf->size) { myhtml_string_append(string, inc_buf->data, inc_buf->size); length -= inc_buf->size; } else { myhtml_string_append(string, inc_buf->data, length); break; } inc_buf = inc_buf->next; } } else { while (inc_buf && length) { if(length > inc_buf->size) { myhtml_string_append_chunk_with_convert_encoding(string, &res, inc_buf->data, inc_buf->size, tree->encoding); length -= inc_buf->size; } else { myhtml_string_append_chunk_with_convert_encoding(string, &res, inc_buf->data, length, tree->encoding); break; } inc_buf = inc_buf->next; } } return (string->length - save_str_len); }