Ejemplo n.º 1
0
FAXPP_Error
xml_decl_standalone_value_state1(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case '"':
    env->stored_state = xml_decl_standalone_value_quot_state;
    env->state = xml_decl_standalone_value_state2;
    next_char(env);
    token_start_position(env);
    break;
  case '\'':
    env->stored_state = xml_decl_standalone_value_apos_state;
    env->state = xml_decl_standalone_value_state2;
    next_char(env);
    token_start_position(env);
    break;
  LINE_ENDINGS
  default:
    next_char(env);
    return INVALID_CHAR_IN_XML_DECL;
  }
  return NO_ERROR;  
}
Ejemplo n.º 2
0
FAXPP_Error
elementdecl_name_ws_state2(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  WHITESPACE:
    next_char(env);
    break;
  case '%':
    store_state(env);
    env->state = parameter_entity_reference_in_markup_state;
    next_char(env);
    token_start_position(env);
    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
      return NO_ERROR;
    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
  default:
    env->state = elementdecl_name_state1;
    token_start_position(env);
    // No next_char
    break;
  }
  return NO_ERROR;
}
Ejemplo n.º 3
0
FAXPP_Error
xml_decl_or_markup_state(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case '?':
    env->state = xml_decl_or_pi_state1;
    next_char(env);
    token_start_position(env);
    break;
  case '!':
    env->state = comment_start_state1;
    next_char(env);
    token_start_position(env);
    break;
  LINE_ENDINGS
  default:
    env->state = (env)->start_element_name_state;
    env->seen_doc_element = 1;
    token_start_position(env);
    next_char(env);
    if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
      return INVALID_CHAR_IN_ELEMENT_NAME;
    break;
  }
  return NO_ERROR;
}
Ejemplo n.º 4
0
FAXPP_Error
elementdecl_name_state2(FAXPP_TokenizerEnv *env)
{
  while(1) {
    read_char(env);

    switch(env->current_char) {
    WHITESPACE:
    case '%':
      env->state = elementdecl_content_ws_state1;
      token_end_position(env);
      report_token(ELEMENTDECL_NAME_TOKEN, env);
      // No next_char
      return NO_ERROR;
    case ':':
      env->state = elementdecl_name_seen_colon_state1;
      token_end_position(env);
      report_token(ELEMENTDECL_PREFIX_TOKEN, env);
      next_char(env);
      token_start_position(env);
      return NO_ERROR;
    default:
      break;
    }

    next_char(env);
    if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
      return INVALID_CHAR_IN_ELEMENTDECL_NAME;
  }

  // Never happens
  return NO_ERROR;  
}
Ejemplo n.º 5
0
FAXPP_Error
char_reference_state(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case 'x':
    env->state = hex_char_reference_state1;
    next_char(env);
    token_start_position(env);
    break;
  case '0':
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    env->state = dec_char_reference_state;
    next_char(env);
    break;
  LINE_ENDINGS
  default:
    env->state = dec_char_reference_state;
    next_char(env);
    return INVALID_CHAR_IN_CHAR_REFERENCE;
  }
  return NO_ERROR;
}
Ejemplo n.º 6
0
FAXPP_Error
parameter_entity_reference_in_markup_state2(FAXPP_TokenizerEnv *env)
{
  while(1) {
    read_char(env);

    switch(env->current_char) {
    LINE_ENDINGS
      break;
    case ';':
      retrieve_state(env);
      token_end_position(env);
      report_token(PE_REFERENCE_IN_MARKUP_TOKEN, env);
      next_char(env);
      token_start_position(env);
      return NO_ERROR;
    }

    next_char(env);
    if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
      return INVALID_CHAR_IN_ENTITY_REFERENCE;
  }

  // Never happens
  return NO_ERROR;
}
Ejemplo n.º 7
0
FAXPP_Error
xml_decl_or_pi_state3(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case 'l':
    env->state = xml_decl_or_pi_state4;
    next_char(env);
    break;
  WHITESPACE:
    env->state = pi_ws_state;
    token_end_position(env);
    report_token(PI_NAME_TOKEN, env);
    next_char(env);
    break;
  case '?':
    env->state = pi_content_seen_question_state;
    token_end_position(env);
    report_token(PI_NAME_TOKEN, env);
    token_start_position(env);
    next_char(env);
    break;
  default:
    env->state = pi_name_m_state;
    break;
  }
  return NO_ERROR;
}
Ejemplo n.º 8
0
FAXPP_Error
xml_decl_encoding_state1(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  WHITESPACE:
    next_char(env);
    break;
  case '?':
    env->state = xml_decl_seen_question_state;
    token_start_position(env);
    next_char(env);
    break;
  case 's':
    env->state = xml_decl_standalone_state2;
    next_char(env);
    break;
  case 'e':
    env->state = xml_decl_encoding_state2;
    next_char(env);
    break;
  default:
    next_char(env);
    return INVALID_CHAR_IN_XML_DECL;
  }
  return NO_ERROR;
}
Ejemplo n.º 9
0
FAXPP_Error
hex_char_reference_state2(FAXPP_TokenizerEnv *env)
{
  while(1) {
    read_char(env);

    switch(env->current_char) {
    case ';':
      retrieve_state(env);
      token_end_position(env);
      report_token(HEX_CHAR_REFERENCE_TOKEN, env);
      next_char(env);
      token_start_position(env);
      return NO_ERROR;
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case 'a':
    case 'b':
    case 'c':
    case 'd':
    case 'e':
    case 'f':
    case 'A':
    case 'B':
    case 'C':
    case 'D':
    case 'E':
    case 'F':
      next_char(env);
      break;
    LINE_ENDINGS
    default:
      next_char(env);
      return INVALID_CHAR_IN_CHAR_REFERENCE;
    }
  }

  // Never happens
  return NO_ERROR;
}
Ejemplo n.º 10
0
FAXPP_Error
amp_entity_reference_state2(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case ';':
    retrieve_state(env);
    token_end_position(env);
    report_token(AMP_ENTITY_REFERENCE_TOKEN, env);
    next_char(env);
    token_start_position(env);
    return NO_ERROR;
  }

  env->state = entity_reference_state;
  return NO_ERROR;
}
Ejemplo n.º 11
0
FAXPP_Error
xml_decl_seen_question_state(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case '>':
    env->state = initial_misc_state;
    report_empty_token(XML_DECL_END_TOKEN, env);
    next_char(env);
    token_start_position(env);
    break;
  LINE_ENDINGS
  default:
    next_char(env);
    return INVALID_CHAR_IN_XML_DECL;
  }
  return NO_ERROR;
}
Ejemplo n.º 12
0
FAXPP_Error
FAXPP_init_tokenize(FAXPP_Tokenizer *env, void *buffer, unsigned int length, unsigned int done)
{
  env->buffer = buffer;
  env->buffer_end = buffer + length;

  env->position = buffer;
  env->current_char = 0;
  env->char_len = 0;

  env->line = 1;
  env->column = 0;

  env->nesting_level = 0;
  env->do_encode = 1;
  env->seen_doc_element = 0;
  env->buffer_done = done;

  env->decode = 0;

  env->token_buffer.cursor = 0;

  env->token_position1 = 0;
  env->token_position2 = 0;

  env->state = initial_state;
  env->stored_state = 0;

  env->start_element_name_state = default_start_element_name_state;
  env->element_content_state = default_element_content_state;

  env->ncname_start_char = NCNAME_START_CHAR10;
  env->ncname_char = NCNAME_CHAR10;
  env->non_restricted_char = NON_RESTRICTED_CHAR10;

  FAXPP_Error err = sniff_encoding(env);
  if(err) return err;

  token_start_position(env);

  return NO_ERROR;
}
Ejemplo n.º 13
0
FAXPP_Error
reference_state(FAXPP_TokenizerEnv *env)
{
  read_char(env);

  switch(env->current_char) {
  case '#':
    env->state = char_reference_state;
    next_char(env);
    token_start_position(env);
    break;
  case 'a':
    env->state = a_entity_reference_state;
    token_start_position(env);
    next_char(env);
    break;
  case 'g':
    env->state = gt_entity_reference_state1;
    token_start_position(env);
    next_char(env);
    break;
  case 'l':
    env->state = lt_entity_reference_state1;
    token_start_position(env);
    next_char(env);
    break;
  case 'q':
    env->state = quot_entity_reference_state1;
    token_start_position(env);
    next_char(env);
    break;
  LINE_ENDINGS
  default:
    env->state = entity_reference_state;
    token_start_position(env);
    next_char(env);
    if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
      return INVALID_CHAR_IN_ENTITY_REFERENCE;
    break;
  }
  return NO_ERROR;
}