예제 #1
0
파일: mystring.c 프로젝트: roox/myhtml
size_t _myhtml_string_append_char_references_state_0(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size)
{
    size_t tmp_offset = offset;
    
    while(offset < size)
    {
        if(buff[offset] == '&')
        {
            if(chunk->encoding == MyHTML_ENCODING_UTF_8) {
                myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
            }
            else {
                myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
                myhtml_encoding_result_clean(&chunk->res);
            }
            
            chunk->begin = str->length;
            chunk->state = 1;
            
            _myhtml_string_charef_append(str, buff[offset]);
            
            offset++;
            return offset;
        }
        
        offset++;
    }
    
    if(chunk->encoding == MyHTML_ENCODING_UTF_8)
        myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
    else
        myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
    
    return offset;
}
예제 #2
0
파일: mystring.c 프로젝트: roox/myhtml
size_t _myhtml_string_append_char_references_state_2(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size)
{
    int is_done = 0;
    size_t tmp_offset = offset;
    
    chunk->entry = myhtml_charef_find_by_pos(chunk->entry->next, buff, &offset, size, &is_done);
    
    if(is_done) {
        chunk->state = 0;
        
        if(chunk->entry->codepoints_len)
        {
            for (size_t i = 0; i < chunk->entry->codepoints_len; i++) {
                MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 4), 32);
                
                chunk->begin += myhtml_encoding_codepoint_to_ascii_utf_8(chunk->entry->codepoints[i], &str->data[chunk->begin]);
            }
            
            str->length = chunk->begin;
        }
        else {
            if(chunk->encoding == MyHTML_ENCODING_UTF_8)
                myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
            else
                myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
        }
    }
    else {
        if(chunk->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
        else
            myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
    }
    
    return offset;
}
예제 #3
0
파일: myhtml.c 프로젝트: eriknstr/myhtml
myhtml_string_t * myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
    if(node == NULL)
        return NULL;
    
    if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
        return NULL;
    
    if(node->token == NULL) {
        mcobject_async_status_t mcstatus;
        node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
        
        if(mcstatus)
            return NULL;
        
        myhtml_token_node_clean(node->token);
    }
    
    if(node->token->my_str_tm.data == NULL) {
        myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, (length + 2));
    }
    else {
        if(node->token->my_str_tm.size < length) {
            mchar_async_free(tree->mchar, node->token->my_str_tm.node_idx, node->token->my_str_tm.data);
            myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->my_str_tm, length);
        }
        else
            node->token->my_str_tm.length = 0;
    }
    
    if(encoding != MyHTML_ENCODING_UTF_8) {
        myhtml_string_append_with_convert_encoding(&node->token->my_str_tm, text, length, encoding);
    }
    else {
        myhtml_string_append(&node->token->my_str_tm, text, length);
    }
    
    node->token->begin  = 0;
    node->token->length = node->token->my_str_tm.length;
    
    return &node->token->my_str_tm;
}
예제 #4
0
파일: mystring.c 프로젝트: CSRedRat/myhtml
void myhtml_string_copy(myhtml_string_t* dest, myhtml_string_t* target)
{
    myhtml_string_append(dest, target->data, target->length);
}
예제 #5
0
파일: parser.c 프로젝트: roox/myhtml
size_t myhtml_parser_add_text(myhtml_tree_t *tree, myhtml_string_t* string, const char *text, size_t begin, size_t length)
{
    myhtml_incoming_buf_t *inc_buf = myhtml_parser_find_first_buf(tree, begin);
    
    size_t current_buf_offset = begin - inc_buf->offset;
    size_t save_str_len = string->length;
    
    if((current_buf_offset + length) <= inc_buf->size)
    {
        if(tree->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append(string, &inc_buf->data[current_buf_offset], length);
        else
            myhtml_string_append_with_convert_encoding(string,
                                                       &inc_buf->data[current_buf_offset],
                                                       length, tree->encoding);
        
        return (string->length - save_str_len);
    }
    
    size_t buf_next_offset = inc_buf->size - current_buf_offset;
    
    myhtml_encoding_result_t res;
    myhtml_encoding_result_clean(&res);
    
    if(tree->encoding == MyHTML_ENCODING_UTF_8)
        myhtml_string_append(string, &inc_buf->data[current_buf_offset], buf_next_offset);
    else
        myhtml_string_append_chunk_with_convert_encoding(string, &res, &inc_buf->data[current_buf_offset],
                                                         buf_next_offset, tree->encoding);
    
    length = length - buf_next_offset;
    inc_buf = inc_buf->next;
    
    if(tree->encoding == MyHTML_ENCODING_UTF_8) {
        while (inc_buf && length)
        {
            if(length > inc_buf->size) {
                myhtml_string_append(string, inc_buf->data, inc_buf->size);
                length -= inc_buf->size;
            }
            else {
                myhtml_string_append(string, inc_buf->data, length);
                break;
            }
            
            inc_buf = inc_buf->next;
        }
    }
    else {
        while (inc_buf && length)
        {
            if(length > inc_buf->size) {
                myhtml_string_append_chunk_with_convert_encoding(string, &res, inc_buf->data,
                                                                 inc_buf->size, tree->encoding);
                length -= inc_buf->size;
            }
            else {
                myhtml_string_append_chunk_with_convert_encoding(string, &res, inc_buf->data,
                                                                 length, tree->encoding);
                break;
            }
            
            inc_buf = inc_buf->next;
        }
    }
    
    return (string->length - save_str_len);
}