myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding) { if(node == NULL) return NULL; if(encoding >= MyHTML_ENCODING_LAST_ENTRY) return NULL; if(node->token == NULL) { mcobject_async_status_t mcstatus; node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus); if(mcstatus) return NULL; myhtml_token_node_clean(node->token); } if(node->token->str.data == NULL) { myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } else { if(node->token->str.size < length) { mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data); myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); } else node->token->str.length = 0; } myhtml_data_process_entry_t proc_entry; myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = encoding; myhtml_encoding_result_clean(&proc_entry.res); myhtml_data_process(&proc_entry, &node->token->str, text, length); myhtml_data_process_end(&proc_entry, &node->token->str); node->token->raw_begin = 0; node->token->raw_length = 0; return &node->token->str; }
mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding) { if(node == NULL) return NULL; if(encoding >= MyENCODING_LAST_ENTRY) return NULL; myhtml_tree_t* tree = node->tree; if(node->token == NULL) { node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id); if(node->token == NULL) return NULL; } if(node->token->str.data == NULL) { mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } else { if(node->token->str.size < length) { mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data); mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); } else node->token->str.length = 0; } myhtml_data_process_entry_t proc_entry; myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = encoding; myencoding_result_clean(&proc_entry.res); myhtml_data_process(&proc_entry, &node->token->str, text, length); myhtml_data_process_end(&proc_entry, &node->token->str); node->token->raw_begin = 0; node->token->raw_length = 0; return &node->token->str; }
void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode) { myhtml_tree_t* tree = qnode->tree; myhtml_token_node_t* token = qnode->token; if(qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) { if(tree->callback_before_token) tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx); token->type |= MyHTML_TOKEN_TYPE_DONE; if(tree->callback_after_token) tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx); return; } if(tree->callback_before_token) tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx); size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id; if(token->tag_id == MyHTML_TAG__TEXT || token->tag_id == MyHTML_TAG__COMMENT) { myhtml_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1)); token->attr_first = NULL; token->attr_last = NULL; myhtml_data_process_entry_t proc_entry; myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = tree->encoding; if(token->type & MyHTML_TOKEN_TYPE_DATA) { proc_entry.emit_null_char = true; myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length); } else if(token->type & MyHTML_TOKEN_TYPE_RCDATA || token->type & MyHTML_TOKEN_TYPE_CDATA) { myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length); } else myhtml_parser_token_data_to_string(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length); } else if(token->attr_first) { myhtml_string_clean_all(&token->str); myhtml_token_attr_t* attr = token->attr_first; myhtml_data_process_entry_t proc_entry; myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = tree->encoding; while(attr) { if(attr->raw_key_length) { myhtml_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1)); myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length); } else myhtml_string_clean_all(&attr->key); if(attr->raw_value_length) { myhtml_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1)); proc_entry.is_attributes = true; myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length); } else myhtml_string_clean_all(&attr->value); attr = attr->next; } } else { token->attr_first = NULL; token->attr_last = NULL; myhtml_string_clean_all(&token->str); } token->type |= MyHTML_TOKEN_TYPE_DONE; if(tree->callback_after_token) tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx); }