FAXPP_Error xml_decl_standalone_value_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '"': env->stored_state = xml_decl_standalone_value_quot_state; env->state = xml_decl_standalone_value_state2; next_char(env); token_start_position(env); break; case '\'': env->stored_state = xml_decl_standalone_value_apos_state; env->state = xml_decl_standalone_value_state2; next_char(env); token_start_position(env); break; LINE_ENDINGS default: next_char(env); return INVALID_CHAR_IN_XML_DECL; } return NO_ERROR; }
FAXPP_Error elementdecl_name_ws_state2(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { WHITESPACE: next_char(env); break; case '%': store_state(env); env->state = parameter_entity_reference_in_markup_state; next_char(env); token_start_position(env); if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) return NO_ERROR; return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; default: env->state = elementdecl_name_state1; token_start_position(env); // No next_char break; } return NO_ERROR; }
FAXPP_Error xml_decl_or_markup_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '?': env->state = xml_decl_or_pi_state1; next_char(env); token_start_position(env); break; case '!': env->state = comment_start_state1; next_char(env); token_start_position(env); break; LINE_ENDINGS default: env->state = (env)->start_element_name_state; env->seen_doc_element = 1; token_start_position(env); next_char(env); if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) return INVALID_CHAR_IN_ELEMENT_NAME; break; } return NO_ERROR; }
FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env) { while(1) { read_char(env); switch(env->current_char) { WHITESPACE: case '%': env->state = elementdecl_content_ws_state1; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); // No next_char return NO_ERROR; case ':': env->state = elementdecl_name_seen_colon_state1; token_end_position(env); report_token(ELEMENTDECL_PREFIX_TOKEN, env); next_char(env); token_start_position(env); return NO_ERROR; default: break; } next_char(env); if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) return INVALID_CHAR_IN_ELEMENTDECL_NAME; } // Never happens return NO_ERROR; }
FAXPP_Error char_reference_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case 'x': env->state = hex_char_reference_state1; next_char(env); token_start_position(env); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': env->state = dec_char_reference_state; next_char(env); break; LINE_ENDINGS default: env->state = dec_char_reference_state; next_char(env); return INVALID_CHAR_IN_CHAR_REFERENCE; } return NO_ERROR; }
FAXPP_Error parameter_entity_reference_in_markup_state2(FAXPP_TokenizerEnv *env) { while(1) { read_char(env); switch(env->current_char) { LINE_ENDINGS break; case ';': retrieve_state(env); token_end_position(env); report_token(PE_REFERENCE_IN_MARKUP_TOKEN, env); next_char(env); token_start_position(env); return NO_ERROR; } next_char(env); if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) return INVALID_CHAR_IN_ENTITY_REFERENCE; } // Never happens return NO_ERROR; }
FAXPP_Error xml_decl_or_pi_state3(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case 'l': env->state = xml_decl_or_pi_state4; next_char(env); break; WHITESPACE: env->state = pi_ws_state; token_end_position(env); report_token(PI_NAME_TOKEN, env); next_char(env); break; case '?': env->state = pi_content_seen_question_state; token_end_position(env); report_token(PI_NAME_TOKEN, env); token_start_position(env); next_char(env); break; default: env->state = pi_name_m_state; break; } return NO_ERROR; }
FAXPP_Error xml_decl_encoding_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { WHITESPACE: next_char(env); break; case '?': env->state = xml_decl_seen_question_state; token_start_position(env); next_char(env); break; case 's': env->state = xml_decl_standalone_state2; next_char(env); break; case 'e': env->state = xml_decl_encoding_state2; next_char(env); break; default: next_char(env); return INVALID_CHAR_IN_XML_DECL; } return NO_ERROR; }
FAXPP_Error hex_char_reference_state2(FAXPP_TokenizerEnv *env) { while(1) { read_char(env); switch(env->current_char) { case ';': retrieve_state(env); token_end_position(env); report_token(HEX_CHAR_REFERENCE_TOKEN, env); next_char(env); token_start_position(env); return NO_ERROR; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': next_char(env); break; LINE_ENDINGS default: next_char(env); return INVALID_CHAR_IN_CHAR_REFERENCE; } } // Never happens return NO_ERROR; }
FAXPP_Error amp_entity_reference_state2(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case ';': retrieve_state(env); token_end_position(env); report_token(AMP_ENTITY_REFERENCE_TOKEN, env); next_char(env); token_start_position(env); return NO_ERROR; } env->state = entity_reference_state; return NO_ERROR; }
FAXPP_Error xml_decl_seen_question_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '>': env->state = initial_misc_state; report_empty_token(XML_DECL_END_TOKEN, env); next_char(env); token_start_position(env); break; LINE_ENDINGS default: next_char(env); return INVALID_CHAR_IN_XML_DECL; } return NO_ERROR; }
FAXPP_Error FAXPP_init_tokenize(FAXPP_Tokenizer *env, void *buffer, unsigned int length, unsigned int done) { env->buffer = buffer; env->buffer_end = buffer + length; env->position = buffer; env->current_char = 0; env->char_len = 0; env->line = 1; env->column = 0; env->nesting_level = 0; env->do_encode = 1; env->seen_doc_element = 0; env->buffer_done = done; env->decode = 0; env->token_buffer.cursor = 0; env->token_position1 = 0; env->token_position2 = 0; env->state = initial_state; env->stored_state = 0; env->start_element_name_state = default_start_element_name_state; env->element_content_state = default_element_content_state; env->ncname_start_char = NCNAME_START_CHAR10; env->ncname_char = NCNAME_CHAR10; env->non_restricted_char = NON_RESTRICTED_CHAR10; FAXPP_Error err = sniff_encoding(env); if(err) return err; token_start_position(env); return NO_ERROR; }
FAXPP_Error reference_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '#': env->state = char_reference_state; next_char(env); token_start_position(env); break; case 'a': env->state = a_entity_reference_state; token_start_position(env); next_char(env); break; case 'g': env->state = gt_entity_reference_state1; token_start_position(env); next_char(env); break; case 'l': env->state = lt_entity_reference_state1; token_start_position(env); next_char(env); break; case 'q': env->state = quot_entity_reference_state1; token_start_position(env); next_char(env); break; LINE_ENDINGS default: env->state = entity_reference_state; token_start_position(env); next_char(env); if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) return INVALID_CHAR_IN_ENTITY_REFERENCE; break; } return NO_ERROR; }