예제 #1
0
파일: myhtml.c 프로젝트: CSRedRat/myhtml
myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
    if(node == NULL)
        return NULL;
    
    if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
        return NULL;
    
    if(node->token == NULL) {
        mcobject_async_status_t mcstatus;
        node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
        
        if(mcstatus)
            return NULL;
        
        myhtml_token_node_clean(node->token);
    }
    
    if(node->token->str.data == NULL) {
        myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2));
    }
    else {
        if(node->token->str.size < length) {
            mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data);
            myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length);
        }
        else
            node->token->str.length = 0;
    }
    
    myhtml_data_process_entry_t proc_entry;
    myhtml_data_process_entry_clean(&proc_entry);
    
    proc_entry.encoding = encoding;
    myhtml_encoding_result_clean(&proc_entry.res);
    
    myhtml_data_process(&proc_entry, &node->token->str, text, length);
    myhtml_data_process_end(&proc_entry, &node->token->str);
    
    node->token->raw_begin  = 0;
    node->token->raw_length = 0;
    
    return &node->token->str;
}
예제 #2
0
파일: myhtml.c 프로젝트: eriknstr/myhtml
myhtml_string_t * myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
    if(node == NULL)
        return NULL;
    
    if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
        return NULL;
    
    if(node->token == NULL) {
        mcobject_async_status_t mcstatus;
        node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
        
        if(mcstatus)
            return NULL;
        
        myhtml_token_node_clean(node->token);
    }
    
    if(node->token->my_str_tm.data == NULL) {
        myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, (length + 2));
    }
    else {
        if(node->token->my_str_tm.size < length) {
            mchar_async_free(tree->mchar, node->token->my_str_tm.node_idx, node->token->my_str_tm.data);
            myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, length);
        }
        else
            node->token->my_str_tm.length = 0;
    }
    
    if(encoding != MyHTML_ENCODING_UTF_8) {
        myhtml_string_append_with_convert_encoding(&node->token->my_str_tm, text, length, encoding);
    }
    else {
        myhtml_string_append(&node->token->my_str_tm, text, length);
    }
    
    node->token->begin  = 0;
    node->token->length = node->token->my_str_tm.length;
    
    return &node->token->my_str_tm;
}
예제 #3
0
파일: myhtml.c 프로젝트: eriknstr/myhtml
myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
    if(node == NULL)
        return NULL;
    
    if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
        return NULL;
    
    if(node->token == NULL) {
        mcobject_async_status_t mcstatus;
        node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
        
        if(mcstatus)
            return NULL;
        
        myhtml_token_node_clean(node->token);
    }
    
    if(node->token->my_str_tm.data == NULL) {
        myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, (length + 2));
    }
    else {
        if(node->token->my_str_tm.size < length) {
            mchar_async_free(tree->mchar, node->token->my_str_tm.node_idx, node->token->my_str_tm.data);
            myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, length);
        }
        else
            node->token->my_str_tm.length = 0;
    }
    
    myhtml_string_char_ref_chunk_t str_chunk = {0, 0, 0, {0}, false, encoding};
    myhtml_encoding_result_clean(&str_chunk.res);
    
    myhtml_string_append_charef(&str_chunk, &node->token->my_str_tm, text, length);
    myhtml_string_append_charef_end(&str_chunk, &node->token->my_str_tm);
    
    node->token->begin  = 0;
    node->token->length = node->token->my_str_tm.length;
    
    return &node->token->my_str_tm;
}
예제 #4
0
파일: parser.c 프로젝트: roox/myhtml
void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode)
{
    myhtml_token_node_t* token = qnode->token;
    
    size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id;
    
    if(token->tag_ctx_idx == MyHTML_TAG__TEXT ||
       token->tag_ctx_idx == MyHTML_TAG__COMMENT)
    {
        myhtml_string_init(qnode->tree->mchar, mchar_node_id, &token->my_str_tm, (qnode->length + 2));
        
        token->begin      = token->my_str_tm.length;
        token->length     = qnode->length;
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        if(token->type & MyHTML_TOKEN_TYPE_RCDATA ||
           token->type & MyHTML_TOKEN_TYPE_CDATA ||
           token->type & MyHTML_TOKEN_TYPE_DATA)
        {
            token->length = myhtml_parser_add_text_with_charef(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length);
        }
        else
            token->length = myhtml_parser_add_text(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length);
    }
    else if(token->attr_first)
    {
        token->my_str_tm.data     = NULL;
        token->my_str_tm.mchar    = NULL;
        token->my_str_tm.node_idx = 0;
        token->my_str_tm.length   = 0;
        token->my_str_tm.size     = 0;
        
        token->begin  = 0;
        token->length = 0;
        
        myhtml_token_attr_t* attr = token->attr_first;
        
        while(attr)
        {
            myhtml_string_init(qnode->tree->mchar, mchar_node_id, &attr->entry, (attr->name_length + attr->value_length + 8));
            
            if(attr->name_length)
            {
                size_t begin = attr->name_begin;
                attr->name_begin = attr->entry.length;
                
                attr->name_length = myhtml_parser_add_text_lowercase(qnode->tree, &attr->entry, qnode->text, begin, attr->name_length);
            }
            
            if(attr->value_length)
            {
                size_t begin = attr->value_begin;
                attr->value_begin = attr->entry.length;
                
                attr->value_length = myhtml_parser_add_text_with_charef(qnode->tree, &attr->entry, qnode->text, begin, attr->value_length);
            }
            
            attr = attr->next;
        }
    }
    else {
        token->begin      = 0;
        token->length     = 0;
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        token->my_str_tm.data     = NULL;
        token->my_str_tm.mchar    = NULL;
        token->my_str_tm.node_idx = 0;
        token->my_str_tm.length   = 0;
        token->my_str_tm.size     = 0;
    }
    
    token->type |= MyHTML_TOKEN_TYPE_DONE;
}
예제 #5
0
파일: parser.c 프로젝트: CSRedRat/myhtml
void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode)
{
    myhtml_tree_t* tree = qnode->tree;
    myhtml_token_node_t* token = qnode->token;
    
    if(qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) {
        if(tree->callback_before_token)
            tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
        
        token->type |= MyHTML_TOKEN_TYPE_DONE;
        
        if(tree->callback_after_token)
            tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
        
        return;
    }
    
    if(tree->callback_before_token)
        tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx);
    
    size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id;
    
    if(token->tag_id == MyHTML_TAG__TEXT ||
       token->tag_id == MyHTML_TAG__COMMENT)
    {
        myhtml_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1));
        
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        myhtml_data_process_entry_t proc_entry;
        myhtml_data_process_entry_clean(&proc_entry);
        
        proc_entry.encoding = tree->encoding;
        
        if(token->type & MyHTML_TOKEN_TYPE_DATA) {
            proc_entry.emit_null_char = true;
            
            myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
        }
        else if(token->type & MyHTML_TOKEN_TYPE_RCDATA || token->type & MyHTML_TOKEN_TYPE_CDATA) {
            myhtml_parser_token_data_to_string_charef(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
        }
        else
            myhtml_parser_token_data_to_string(tree, &token->str, &proc_entry, token->raw_begin, token->raw_length);
    }
    else if(token->attr_first)
    {
        myhtml_string_clean_all(&token->str);
        
        myhtml_token_attr_t* attr = token->attr_first;
        
        myhtml_data_process_entry_t proc_entry;
        myhtml_data_process_entry_clean(&proc_entry);
        
        proc_entry.encoding = tree->encoding;
        
        while(attr)
        {
            if(attr->raw_key_length) {
                myhtml_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1));
                myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length);
            }
            else
                myhtml_string_clean_all(&attr->key);
            
            if(attr->raw_value_length) {
                myhtml_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1));
                proc_entry.is_attributes = true;
                
                myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length);
            }
            else
                myhtml_string_clean_all(&attr->value);
            
            attr = attr->next;
        }
    }
    else {
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        myhtml_string_clean_all(&token->str);
    }
    
    token->type |= MyHTML_TOKEN_TYPE_DONE;
    
    if(tree->callback_after_token)
        tree->callback_after_token_ctx = tree->callback_after_token(tree, token, tree->callback_after_token_ctx);
}
예제 #6
0
파일: parser.c 프로젝트: adrianhust/myhtml
void myhtml_parser_worker(mythread_id_t thread_id, mythread_queue_node_t *qnode)
{
    myhtml_token_node_t* token = qnode->token;
    
    size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id;
    
    if(token->tag_ctx_idx == MyHTML_TAG__TEXT ||
       token->tag_ctx_idx == MyHTML_TAG__COMMENT)
    {
        myhtml_string_init(qnode->tree->mchar, mchar_node_id, &token->my_str_tm, (qnode->length + 4));
        
        token->begin      = token->my_str_tm.length;
        token->length     = qnode->length;
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        // for NULL token; NULL Token contains only one char == \0
        // The further processing may be changed (in rules processing) to 'REPLACEMENT CHARACTER' (U+FFFD)
        if(token->type & MyHTML_TOKEN_TYPE_NULL) {
            token->length = 1;
            
            token->my_str_tm.length = 1;
            token->my_str_tm.data[0] = '\0';
        }
        else if(token->type & MyHTML_TOKEN_TYPE_DATA ||
           token->type & MyHTML_TOKEN_TYPE_RCDATA ||
           token->type & MyHTML_TOKEN_TYPE_CDATA)
        {
            token->length = myhtml_parser_add_text_with_charef(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length, false);
        }
        else
            token->length = myhtml_parser_add_text(qnode->tree, &token->my_str_tm, qnode->text, qnode->begin, qnode->length);
    }
    else if(token->attr_first)
    {
        token->my_str_tm.data     = NULL;
        token->my_str_tm.mchar    = NULL;
        token->my_str_tm.node_idx = 0;
        token->my_str_tm.length   = 0;
        token->my_str_tm.size     = 0;
        
        token->begin  = 0;
        token->length = 0;
        
        myhtml_token_attr_t* attr = token->attr_first;
        
        while(attr)
        {
            myhtml_string_init(qnode->tree->mchar, mchar_node_id, &attr->entry, (attr->name_length + attr->value_length + 8));
            
            if(attr->name_length)
            {
                size_t begin = attr->name_begin;
                attr->name_begin = attr->entry.length;
                
                attr->name_length = myhtml_parser_add_text_lowercase(qnode->tree, &attr->entry, qnode->text, begin, attr->name_length);
            }
            
            if(attr->value_length)
            {
                size_t begin = attr->value_begin;
                attr->value_begin = attr->entry.length;
                
                attr->value_length = myhtml_parser_add_text_with_charef(qnode->tree, &attr->entry, qnode->text, begin, attr->value_length, true);
            }
            
            attr->my_namespace = MyHTML_NAMESPACE_HTML;
            
            attr = attr->next;
        }
    }
    else {
        token->begin      = 0;
        token->length     = 0;
        token->attr_first = NULL;
        token->attr_last  = NULL;
        
        token->my_str_tm.data     = NULL;
        token->my_str_tm.mchar    = NULL;
        token->my_str_tm.node_idx = 0;
        token->my_str_tm.length   = 0;
        token->my_str_tm.size     = 0;
    }
    
    token->type |= MyHTML_TOKEN_TYPE_DONE;
}