Пример #1
0
// --------------------------------------------------------------------------
void
processor::on_start_tag_type (
 icu::UnicodeString const& type
)
// --------------------------------------------------------------------------
{
  element_info info;
  
  if (!m_character_data.isEmpty())
  {
    character_data(m_character_data);
    m_character_data.remove();
  }

  info.type = type;
  info.child_counter = 0;
  
  if (m_element_info.empty())
  {
    if (m_validating
     && !m_document_type.m_root_type.isEmpty()
     && type != m_document_type.m_root_type)
    {
      std::string msg;

      msg += "Root element type does not match the document type.\n";
      msg += "Document type name: ";
      m_document_type.m_root_type.toUTF8String(msg);
      msg += "\nRoot element type: ";
      type.toUTF8String(msg);

      throw semantic_error(msg);
    }

    info.xmlns[""] = uri();
    info.base = m_base_iri;
    info.space = false;
  }
  else
  {
    if (m_validating)
    {
      throw not_implemented("Element validity checking.");
    }

    ++m_element_info.top().child_counter;
    
    info.xmlns = m_element_info.top().xmlns;
    info.base = m_element_info.top().base;
    info.lang = m_element_info.top().lang;
    info.space = m_element_info.top().space;
  }

  m_element_info.push(info);
  m_sax_attrs.clear();
}
Пример #2
0
// --------------------------------------------------------------------------
void
processor::on_end_tag (
 icu::UnicodeString const& type
)
// --------------------------------------------------------------------------
{
  if (!m_character_data.isEmpty())
  {
    character_data(m_character_data);
    m_character_data.remove();
  }
  
  if (type != m_element_info.top().type)
  {
    std::string msg, tree;

    msg += "STag-ETag name mismatch.\n";
    msg += "ETag name: ";
    type.toUTF8String(msg);

    while (!m_element_info.empty())
    {
      std::string tmp;
      
      m_element_info.top().type.toUTF8String(tmp);
      m_element_info.pop();
      tree = "/" + tmp + tree;
    }
    
    msg += "\nElement tree: " + tree;

    throw semantic_error(msg);
  }

  element_end();

  if (!m_element_info.empty())
  {
    m_element_info.pop();
  }

  // Update current element's variables.
  if (!m_element_info.empty())
  {
    m_element.assign(m_element_info.top().type, m_element_info.top().xmlns);
    m_attributes.clear();
    m_base_iri = m_element_info.top().base;
    m_language = m_element_info.top().lang;
    m_preserve_space = m_element_info.top().space;
  }
}
Пример #3
0
// --------------------------------------------------------------------------
void
processor::on_attribute (
 icu::UnicodeString const& name,
 icu::UnicodeString&& value
)
// --------------------------------------------------------------------------
{
  // Check for duplicates.
  if (!m_sax_attrs.emplace(name, std::move(value)).second)
  {
    std::string msg, tree;
    
    msg += "Duplicate attribute on an element.\n";
    msg += "Attribute name: ";
    name.toUTF8String(msg);

    while (!m_element_info.empty())
    {
      std::string tmp;

      m_element_info.top().type.toUTF8String(tmp);
      m_element_info.pop();
      tree = "/" + tmp + tree;
    }
    
    msg += "\nElement tree: " + tree;
    
    throw semantic_error(msg);
  }
  
  /*
  icu::UnicodeString normalized;
  UChar32 chr;
  int32_t const size = value.countChar32();

  for (int32_t i=0; i<size; i=value.moveIndex32(i, 1))
  {
    chr = value.char32At(i);

    if (chr == 0x20 || chr == 0x0D || chr == 0x0A || chr == 0x09)
    {
      normalized += 0x20;
    }
    else
    if (chr == '&')
    {
      int32_t end = value.indexOf(';', i);
      icu::UnicodeString name{value, i+1, end-i-1};

      i = end;
      
      if (name[0] == '#')
      {
        normalized += dereference_character(name.tempSubString(1));
      }
      else
      {
        if (on_reference(name, true))
        {
          icu::UnicodeString text;

          m_buffers.top().pipe(text);
          normalized += normalize_attvalue(text);
          m_buffers.pop();
        }
      }
    }
    else
    {
      normalized += chr;
    }
  }

  return normalized;
  */

  if (m_validating)
  {
    throw not_implemented("Attribute validity checking.");
  }
}
Пример #4
0
// --------------------------------------------------------------------------
void
processor::on_reference (
 icu::UnicodeString const& name,
 bool attvalue
)
// --------------------------------------------------------------------------
{
  if (!attvalue && m_ref_history.empty() && m_auto_replace_general)
  {
    if (!m_character_data.isEmpty())
    {
      character_data(m_character_data);
      m_character_data.remove();
    }

    reference(name);
  }
  
  // Pre-defined entities.
  if (name == "lt")
  {
    icu::UnicodeString entity = "&#60;";
    m_buffers.emplace("&lt;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }
    
    m_buffers.pop();
    return;
  }
  else
  if (name == "gt")
  {
    icu::UnicodeString entity = ">";
    m_buffers.emplace("&gt;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }
  else
  if (name == "amp")
  {
    icu::UnicodeString entity = "&#38;";
    m_buffers.emplace("&amp;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }
  else
  if (name == "apos")
  {
    icu::UnicodeString entity = "'";
    m_buffers.emplace("&apos;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }
  else
  if (name == "quot")
  {
    icu::UnicodeString entity = '"';
    m_buffers.emplace("&quot;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }

  std::pair<std::set<icu::UnicodeString>::iterator, bool> hist;
  std::map<icu::UnicodeString, general_entity_declaration*>::iterator it;

  // Look for the entity.
  it = m_dtd.general_entities.find(name);
  
  if (it == m_dtd.general_entities.end())
  {
    std::string msg;

    msg += "Reference to undeclared general entity '";
    name.toUTF8String(msg);
    msg += "'.";

    throw semantic_error(msg);
  }
  
  // Unparsed entity references are forbidden.
  if (it->second->unparsed)
  {
    std::string msg;

    msg += "Reference to an unparsed general entity '";
    name.toUTF8String(msg);
    msg += "'.";

    throw semantic_error(msg);
  }
  
  // Check for recursive references.
  hist = m_ref_history.insert(name);

  if (!hist.second)
  {
    std::string msg;

    msg += "Recursive reference to general entity '";
    name.toUTF8String(msg);
    msg += "'.";
    
    throw semantic_error(msg);
  }

  // Process the entity.
  bool const state = m_parsing_entity;
  std::string nameutf8 = "&";

  name.toUTF8String(nameutf8);
  nameutf8 += ';';
  m_parsing_entity = true;
  
  if (!it->second->id.sys.isBogus() || !it->second->id.pub.isBogus())
  {
    if (attvalue)
    {
      std::string msg;

      msg += "Reference to an external parsed general entity '";
      name.toUTF8String(msg);
      msg += "' in attribute value.";

      throw semantic_error(msg);
    }

    io::input* input = nullptr;
    std::string encoding;

    resolve_id(it->second->id, input, encoding);

    if (input == nullptr)
    {
      if (m_validating)
      {
        throw runtime_error(
        "Could not dereference external parsed general entity."
        );
      }
    }
    else
    {
      size_t const size = m_buffers.size();

      try
      {
        m_buffers.emplace(nameutf8, *input, true, encoding);

        if (attvalue)
        {
          parse_included_attvalue();
        }
        else
        {
          parse_content();
        }

        m_buffers.pop();
      }
      catch (...)
      {
        if (size < m_buffers.size())
        {
          m_buffers.pop();
        }

        delete input;
        m_ref_history.erase(hist.first);
        throw;
      }
      
      delete input;
    }
  }
  else
  {
    m_buffers.emplace(
     nameutf8, new io::uistring(it->second->text_or_notation, false)
    );

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
  }
  
  m_parsing_entity = state;
  m_ref_history.erase(hist.first);
}
Пример #5
0
// --------------------------------------------------------------------------
void
processor::on_pe_reference (
 icu::UnicodeString const& name,
 bool entityvalue
)
// --------------------------------------------------------------------------
{
  icu::UnicodeString text;
  std::map<icu::UnicodeString, parameter_entity_declaration*>::iterator it;
  std::pair<std::set<icu::UnicodeString>::iterator, bool> hist;
  
  if (!m_parsing_entity && !entityvalue && m_ref_history.empty())
  {
    dtd_element e;
    
    e.type = dtd_element::parameter_reference;
    e.text = new icu::UnicodeString(name);

    m_document_type.m_subset.emplace_back(std::move(e));
  }

  // Look for the entity.
  it = m_dtd.parameter_entities.find(name);

  if (it == m_dtd.parameter_entities.end())
  {
    if (m_validating)
    {
      std::string msg;

      msg += "Reference to undeclared parameter entity '";
      name.toUTF8String(msg);
      msg += "'.";
      
      throw semantic_error(msg);
    }
    else
    {
      m_dtd_stop = true;
      return;
    }
  }

  // Check for recursive references.
  hist = m_ref_history.insert(name);
  
  if (!hist.second)
  {
    std::string msg;

    msg += "Recursive reference to parameter entity '";
    name.toUTF8String(msg);
    msg += "'.";
    
    throw semantic_error(msg);
  }
  
  // Process the entity.
  bool const state = m_parsing_entity;
  std::string nameutf8 = "%";

  name.toUTF8String(nameutf8);
  nameutf8 += ';';
  m_parsing_entity = true;

  if (it->second->external)
  {
    io::input* input = nullptr;
    std::string encoding;

    resolve_id(it->second->id, input, encoding);

    if (input == nullptr)
    {
      if (m_validating)
      {
        throw runtime_error(
        "Could not dereference external parameter entity."
        );
      }
      else
      {
        m_dtd_stop = true;
      }
    }
    else
    {
      size_t const size = m_buffers.size();

      try
      {
        if (entityvalue)
        {
          m_buffers.emplace(nameutf8, *input, true, encoding);
          parse_included_entityvalue();
          m_buffers.pop();
        }
        else
        {
          m_buffers.emplace(nameutf8, *input, true, encoding, true);
          parse_ext_subset();
          m_buffers.pop();
        }
      }
      catch (...)
      {
        if (size < m_buffers.size())
        {
          m_buffers.pop();
        }

        delete input;
        m_ref_history.erase(hist.first);
        throw;
      }
      
      delete input;
    }
  }
  else
  {
    // The literal value is stored as public ID.
    if (entityvalue)
    {
      m_buffers.emplace(
       nameutf8, new io::uistring(it->second->id.pub, false)
      );
      parse_included_entityvalue();
      m_buffers.pop();
    }
    else
    {
      m_buffers.emplace(
       nameutf8, new io::uistring(it->second->id.pub, false), false
      );
      parse_ext_subset();
      m_buffers.pop();
    }
  }
  
  m_parsing_entity = state;
  m_ref_history.erase(hist.first);
}