Ejemplo n.º 1
0
myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
    if(node == NULL)
        return NULL;
    
    if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
        return NULL;
    
    if(node->token == NULL) {
        mcobject_async_status_t mcstatus;
        node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
        
        if(mcstatus)
            return NULL;
        
        myhtml_token_node_clean(node->token);
    }
    
    if(node->token->str.data == NULL) {
        myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2));
    }
    else {
        if(node->token->str.size < length) {
            mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data);
            myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length);
        }
        else
            node->token->str.length = 0;
    }
    
    myhtml_data_process_entry_t proc_entry;
    myhtml_data_process_entry_clean(&proc_entry);
    
    proc_entry.encoding = encoding;
    myhtml_encoding_result_clean(&proc_entry.res);
    
    myhtml_data_process(&proc_entry, &node->token->str, text, length);
    myhtml_data_process_end(&proc_entry, &node->token->str);
    
    node->token->raw_begin  = 0;
    node->token->raw_length = 0;
    
    return &node->token->str;
}
Ejemplo n.º 2
0
mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding)
{
    if(node == NULL)
        return NULL;
    
    if(encoding >= MyENCODING_LAST_ENTRY)
        return NULL;
    
    myhtml_tree_t* tree = node->tree;
    
    if(node->token == NULL) {
        node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
        
        if(node->token == NULL)
            return NULL;
    }
    
    if(node->token->str.data == NULL) {
        mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2));
    }
    else {
        if(node->token->str.size < length) {
            mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data);
            mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length);
        }
        else
            node->token->str.length = 0;
    }
    
    myhtml_data_process_entry_t proc_entry;
    myhtml_data_process_entry_clean(&proc_entry);
    
    proc_entry.encoding = encoding;
    myencoding_result_clean(&proc_entry.res);
    
    myhtml_data_process(&proc_entry, &node->token->str, text, length);
    myhtml_data_process_end(&proc_entry, &node->token->str);
    
    node->token->raw_begin  = 0;
    node->token->raw_length = 0;
    
    return &node->token->str;
}
Ejemplo n.º 3
0
void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode)
{
    myhtml_tree_t* tree = qnode->tree;
    myhtml_token_node_t* token = qnode->token;
    
    if(qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) {
        if(tree->callback_before_token)
            tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
        
        token->type |= MyHTML_TOKEN_TYPE_DONE;
        
        if(tree->callback_after_token)
            tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
        
        return;
    }
    
    if(tree->callback_before_token)
        tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
    
    size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id;
    
    if(token->tag_id == MyHTML_TAG__TEXT ||
       token->tag_id == MyHTML_TAG__COMMENT)
    {
        myhtml_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1));
        
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        myhtml_data_process_entry_t proc_entry;
        myhtml_data_process_entry_clean(&proc_entry);
        
        proc_entry.encoding = tree->encoding;
        
        if(token->type & MyHTML_TOKEN_TYPE_DATA) {
            proc_entry.emit_null_char = true;
            
            myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
        }
        else if(token->type & MyHTML_TOKEN_TYPE_RCDATA || token->type & MyHTML_TOKEN_TYPE_CDATA) {
            myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
        }
        else
            myhtml_parser_token_data_to_string(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
    }
    else if(token->attr_first)
    {
        myhtml_string_clean_all(&token->str);
        
        myhtml_token_attr_t* attr = token->attr_first;
        
        myhtml_data_process_entry_t proc_entry;
        myhtml_data_process_entry_clean(&proc_entry);
        
        proc_entry.encoding = tree->encoding;
        
        while(attr)
        {
            if(attr->raw_key_length) {
                myhtml_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1));
                myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length);
            }
            else
                myhtml_string_clean_all(&attr->key);
            
            if(attr->raw_value_length) {
                myhtml_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1));
                proc_entry.is_attributes = true;
                
                myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length);
            }
            else
                myhtml_string_clean_all(&attr->value);
            
            attr = attr->next;
        }
    }
    else {
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        myhtml_string_clean_all(&token->str);
    }
    
    token->type |= MyHTML_TOKEN_TYPE_DONE;
    
    if(tree->callback_after_token)
        tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
}