void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode) { myhtml_token_node_t* token = qnode->token; size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id; if(token->tag_ctx_idx == MyHTML_TAG__TEXT || token->tag_ctx_idx == MyHTML_TAG__COMMENT) { myhtml_string_init(qnode->tree->mchar, mchar_node_id, &token->my_str_tm, (qnode->length + 2)); token->begin = token->my_str_tm.length; token->length = qnode->length; token->attr_first = NULL; token->attr_last = NULL; if(token->type & MyHTML_TOKEN_TYPE_RCDATA || token->type & MyHTML_TOKEN_TYPE_CDATA || token->type & MyHTML_TOKEN_TYPE_DATA) { token->length = myhtml_parser_add_text_with_charef(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length); } else token->length = myhtml_parser_add_text(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length); } else if(token->attr_first) { token->my_str_tm.data = NULL; token->my_str_tm.mchar = NULL; token->my_str_tm.node_idx = 0; token->my_str_tm.length = 0; token->my_str_tm.size = 0; token->begin = 0; token->length = 0; myhtml_token_attr_t* attr = token->attr_first; while(attr) { myhtml_string_init(qnode->tree->mchar, mchar_node_id, &attr->entry, (attr->name_length + attr->value_length + 8)); if(attr->name_length) { size_t begin = attr->name_begin; attr->name_begin = attr->entry.length; attr->name_length = myhtml_parser_add_text_lowercase(qnode->tree, &attr->entry, qnode->text, begin, attr->name_length); } if(attr->value_length) { size_t begin = attr->value_begin; attr->value_begin = attr->entry.length; attr->value_length = myhtml_parser_add_text_with_charef(qnode->tree, &attr->entry, qnode->text, begin, attr->value_length); } attr = attr->next; } } else { token->begin = 0; token->length = 0; token->attr_first = NULL; token->attr_last = NULL; token->my_str_tm.data = NULL; token->my_str_tm.mchar = NULL; token->my_str_tm.node_idx = 0; token->my_str_tm.length = 0; token->my_str_tm.size = 0; } token->type |= MyHTML_TOKEN_TYPE_DONE; }
void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode) { myhtml_token_node_t* token = qnode->token; size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id; if(token->tag_ctx_idx == MyHTML_TAG__TEXT || token->tag_ctx_idx == MyHTML_TAG__COMMENT) { myhtml_string_init(qnode->tree->mchar, mchar_node_id, &token->my_str_tm, (qnode->length + 4)); token->begin = token->my_str_tm.length; token->length = qnode->length; token->attr_first = NULL; token->attr_last = NULL; // for NULL token; NULL Token contains only one char == \0 // The further processing may be changed (in rules processing) to 'REPLACEMENT CHARACTER' (U+FFFD) if(token->type & MyHTML_TOKEN_TYPE_NULL) { token->length = 1; token->my_str_tm.length = 1; token->my_str_tm.data[0] = '\0'; } else if(token->type & MyHTML_TOKEN_TYPE_DATA || token->type & MyHTML_TOKEN_TYPE_RCDATA || token->type & MyHTML_TOKEN_TYPE_CDATA) { token->length = myhtml_parser_add_text_with_charef(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length, false); } else token->length = myhtml_parser_add_text(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length); } else if(token->attr_first) { token->my_str_tm.data = NULL; token->my_str_tm.mchar = NULL; token->my_str_tm.node_idx = 0; token->my_str_tm.length = 0; token->my_str_tm.size = 0; token->begin = 0; token->length = 0; myhtml_token_attr_t* attr = token->attr_first; while(attr) { myhtml_string_init(qnode->tree->mchar, mchar_node_id, &attr->entry, (attr->name_length + attr->value_length + 8)); if(attr->name_length) { size_t begin = attr->name_begin; attr->name_begin = attr->entry.length; attr->name_length = myhtml_parser_add_text_lowercase(qnode->tree, &attr->entry, qnode->text, begin, attr->name_length); } if(attr->value_length) { size_t begin = attr->value_begin; attr->value_begin = attr->entry.length; attr->value_length = myhtml_parser_add_text_with_charef(qnode->tree, &attr->entry, qnode->text, begin, attr->value_length, true); } attr->my_namespace = MyHTML_NAMESPACE_HTML; attr = attr->next; } } else { token->begin = 0; token->length = 0; token->attr_first = NULL; token->attr_last = NULL; token->my_str_tm.data = NULL; token->my_str_tm.mchar = NULL; token->my_str_tm.node_idx = 0; token->my_str_tm.length = 0; token->my_str_tm.size = 0; } token->type |= MyHTML_TOKEN_TYPE_DONE; }