size_t _myhtml_string_append_char_references_state_5(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size) { unsigned const char *u_buff = (unsigned const char*)buff; size_t start_pos = chunk->begin + 3; while(offset < size) { if(myhtml_string_chars_hex_map[ u_buff[offset] ] == 0xff) { chunk->state = 0; if((str->length - (chunk->begin + 2)) == 0) { return offset; } if(buff[offset] == ';') offset++; _myhtml_string_append_char_references_state_end(chunk, str); break; } if((str->length - start_pos) < 5) { chunk->l_data <<= 4; chunk->l_data |= myhtml_string_chars_hex_map[ u_buff[offset] ]; _myhtml_string_charef_append(str, buff[offset]); } offset++; } return offset; }
size_t _myhtml_string_append_char_references_state_4(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const unsigned char *buff, size_t offset, size_t size) { while(offset < size) { if(myhtml_string_chars_num_map[ buff[offset] ] == 0xff) { chunk->state = 0; if((str->length - (chunk->begin + 2)) == 0) { return offset; } if(buff[offset] == ';') offset++; _myhtml_string_append_char_references_state_end(chunk, str); break; } chunk->l_data = myhtml_string_chars_num_map[ buff[offset] ] + chunk->l_data * 10; _myhtml_string_charef_append(str, buff[offset]); offset++; } return offset; }
size_t _myhtml_string_append_char_references_state_0(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size) { size_t tmp_offset = offset; while(offset < size) { if(buff[offset] == '&') { if(chunk->encoding == MyHTML_ENCODING_UTF_8) { myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset)); } else { myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding); myhtml_encoding_result_clean(&chunk->res); } chunk->begin = str->length; chunk->state = 1; _myhtml_string_charef_append(str, buff[offset]); offset++; return offset; } offset++; } if(chunk->encoding == MyHTML_ENCODING_UTF_8) myhtml_string_append(str, &buff[tmp_offset], (offset - tmp_offset)); else myhtml_string_append_chunk_with_convert_encoding(str, &chunk->res, &buff[tmp_offset], (offset - tmp_offset), chunk->encoding); return offset; }
size_t _myhtml_string_append_char_references_state_3(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size) { if(buff[offset] == 'x' || buff[offset] == 'X') { _myhtml_string_charef_append(str, buff[offset]); offset++; chunk->state = 5; } else chunk->state = 4; return offset; }
size_t _myhtml_string_append_char_references_state_1(myhtml_string_char_ref_chunk_t *chunk, myhtml_string_t* str, const char* buff, size_t offset, size_t size) { if(buff[offset] == '#') { _myhtml_string_charef_append(str, buff[offset]); offset++; chunk->l_data = 0; if(offset >= size) { chunk->state = 3; return offset; } if(buff[offset] == 'x' || buff[offset] == 'X') { _myhtml_string_charef_append(str, buff[offset]); offset++; chunk->state = 5; } else chunk->state = 4; } else { chunk->charef_res.last_entry = NULL; chunk->charef_res.curr_entry = myhtml_charef_get_first_position(buff[offset]); if(chunk->charef_res.curr_entry->ch == '\0') chunk->state = 0; else { chunk->state = 2; _myhtml_string_charef_append(str, buff[offset]); offset++; } } return offset; }