예제 #1
0
파일: mystring.c 프로젝트: roox/myhtml
size_t _myhtml_string_append_char_references_state_0(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size)
{
    size_t tmp_offset = offset;
    
    while(offset < size)
    {
        if(buff[offset] == '&')
        {
            if(chunk->encoding == MyHTML_ENCODING_UTF_8) {
                myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
            }
            else {
                myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
                myhtml_encoding_result_clean(&chunk->res);
            }
            
            chunk->begin = str->length;
            chunk->state = 1;
            
            _myhtml_string_charef_append(str, buff[offset]);
            
            offset++;
            return offset;
        }
        
        offset++;
    }
    
    if(chunk->encoding == MyHTML_ENCODING_UTF_8)
        myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
    else
        myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
    
    return offset;
}
예제 #2
0
파일: mystring.c 프로젝트: CSRedRat/myhtml
/////////////////////////////////////////////////////////
//// Append With Convert Encoding without Preprocessing API
////
/////////////////////////////////////////////////////////
void myhtml_string_append_with_convert_encoding(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding)
{
    myhtml_encoding_result_t res;
    myhtml_encoding_result_clean(&res);
    
    myhtml_string_append_chunk_with_convert_encoding(str, &res, buff, length, encoding);
}
예제 #3
0
파일: mystring.c 프로젝트: roox/myhtml
size_t _myhtml_string_append_char_references_state_2(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size)
{
    int is_done = 0;
    size_t tmp_offset = offset;
    
    chunk->entry = myhtml_charef_find_by_pos(chunk->entry->next, buff, &offset, size, &is_done);
    
    if(is_done) {
        chunk->state = 0;
        
        if(chunk->entry->codepoints_len)
        {
            for (size_t i = 0; i < chunk->entry->codepoints_len; i++) {
                MyHTML_STRING_REALLOC_IF_NEED(str, (chunk->begin + 4), 32);
                
                chunk->begin += myhtml_encoding_codepoint_to_ascii_utf_8(chunk->entry->codepoints[i], &str->data[chunk->begin]);
            }
            
            str->length = chunk->begin;
        }
        else {
            if(chunk->encoding == MyHTML_ENCODING_UTF_8)
                myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
            else
                myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
        }
    }
    else {
        if(chunk->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset));
        else
            myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding);
    }
    
    return offset;
}
예제 #4
0
파일: parser.c 프로젝트: roox/myhtml
size_t myhtml_parser_add_text(myhtml_tree_t *tree, myhtml_string_t* string, const char *text, size_t begin, size_t length)
{
    myhtml_incoming_buf_t *inc_buf = myhtml_parser_find_first_buf(tree, begin);
    
    size_t current_buf_offset = begin - inc_buf->offset;
    size_t save_str_len = string->length;
    
    if((current_buf_offset + length) <= inc_buf->size)
    {
        if(tree->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append(string, &inc_buf->data[current_buf_offset], length);
        else
            myhtml_string_append_with_convert_encoding(string,
                                                       &inc_buf->data[current_buf_offset],
                                                       length, tree->encoding);
        
        return (string->length - save_str_len);
    }
    
    size_t buf_next_offset = inc_buf->size - current_buf_offset;
    
    myhtml_encoding_result_t res;
    myhtml_encoding_result_clean(&res);
    
    if(tree->encoding == MyHTML_ENCODING_UTF_8)
        myhtml_string_append(string, &inc_buf->data[current_buf_offset], buf_next_offset);
    else
        myhtml_string_append_chunk_with_convert_encoding(string, &res, &inc_buf->data[current_buf_offset],
                                                         buf_next_offset, tree->encoding);
    
    length = length - buf_next_offset;
    inc_buf = inc_buf->next;
    
    if(tree->encoding == MyHTML_ENCODING_UTF_8) {
        while (inc_buf && length)
        {
            if(length > inc_buf->size) {
                myhtml_string_append(string, inc_buf->data, inc_buf->size);
                length -= inc_buf->size;
            }
            else {
                myhtml_string_append(string, inc_buf->data, length);
                break;
            }
            
            inc_buf = inc_buf->next;
        }
    }
    else {
        while (inc_buf && length)
        {
            if(length > inc_buf->size) {
                myhtml_string_append_chunk_with_convert_encoding(string, &res, inc_buf->data,
                                                                 inc_buf->size, tree->encoding);
                length -= inc_buf->size;
            }
            else {
                myhtml_string_append_chunk_with_convert_encoding(string, &res, inc_buf->data,
                                                                 length, tree->encoding);
                break;
            }
            
            inc_buf = inc_buf->next;
        }
    }
    
    return (string->length - save_str_len);
}