Exemple #1
0
size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length)
{
    myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin);
    size_t relative_begin = begin - buffer->offset;
    
    // if token data length in one buffer then print them all at once
    if((relative_begin + length) <= buffer->size) {
        if(tree->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char);
        else
            myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
                                                                                          &buffer->data[relative_begin], length,
                                                                                          proc_entry->encoding, proc_entry->emit_null_char);
        
        return str->length;
    }
    
    size_t save_position = 0;
    // if the data are spread across multiple buffers that join them
    while(buffer) {
        if((relative_begin + length) > buffer->size)
        {
            size_t relative_end = (buffer->size - relative_begin);
            length -= relative_end;
            
            size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position);
            
            if(relative_end > 0) {
                if(tree->encoding == MyHTML_ENCODING_UTF_8)
                    save_position = myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char);
                else
                    save_position = myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
                                                                                                                  &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset),
                                                                                                                  proc_entry->encoding, proc_entry->emit_null_char);
            }
            
            relative_begin = 0;
            buffer         = buffer->next;
        }
        else {
            size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position);
            
            if(length > 0) {
                if(tree->encoding == MyHTML_ENCODING_UTF_8)
                    myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char);
                else
                    myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res,
                                                                                                                  &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset),
                                                                                                                  proc_entry->encoding, proc_entry->emit_null_char);
            }
            
            break;
        }
    }
    
    return str->length;
}
Exemple #2
0
size_t myhtml_parser_add_text_lowercase(myhtml_tree_t *tree, myhtml_string_t* string, const char *text, size_t begin, size_t length)
{
    myhtml_incoming_buf_t *inc_buf = myhtml_parser_find_first_buf(tree, begin);
    
    myhtml_encoding_result_t str_res;
    myhtml_encoding_result_clean(&str_res);
    
    size_t current_buf_offset = begin - inc_buf->offset;
    size_t save_str_len = string->length;
    
    if((current_buf_offset + length) <= inc_buf->size)
    {
        if(tree->encoding == MyHTML_ENCODING_UTF_8)
            myhtml_string_append_lowercase_with_preprocessing(string, &inc_buf->data[current_buf_offset], length);
        else
            myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, &inc_buf->data[current_buf_offset], length, tree->encoding);
        
        return (string->length - save_str_len);
    }
    
    size_t buf_next_offset = inc_buf->size - current_buf_offset;
    
    if(tree->encoding == MyHTML_ENCODING_UTF_8)
        myhtml_string_append_lowercase_with_preprocessing(string, &inc_buf->data[current_buf_offset], buf_next_offset);
    else
        myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, &inc_buf->data[current_buf_offset], buf_next_offset, tree->encoding);
    
    length = length - buf_next_offset;
    inc_buf = inc_buf->next;
    
    if(tree->encoding == MyHTML_ENCODING_UTF_8) {
        while (inc_buf && length)
        {
            if(length > inc_buf->size) {
                myhtml_string_append_lowercase_with_preprocessing(string, inc_buf->data, inc_buf->size);
                length -= inc_buf->size;
            }
            else {
                myhtml_string_append_lowercase_with_preprocessing(string, inc_buf->data, length);
                break;
            }
            
            inc_buf = inc_buf->next;
        }
    }
    else {
        while (inc_buf && length)
        {
            if(length > inc_buf->size) {
                myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, inc_buf->data, inc_buf->size, tree->encoding);
                length -= inc_buf->size;
            }
            else {
                myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(string, &str_res, inc_buf->data, length, tree->encoding);
                break;
            }
            
            inc_buf = inc_buf->next;
        }
    }
    
    return (string->length - save_str_len);
}