// -------------------------------------------------------------------------- void processor::on_start_tag_type ( icu::UnicodeString const& type ) // -------------------------------------------------------------------------- { element_info info; if (!m_character_data.isEmpty()) { character_data(m_character_data); m_character_data.remove(); } info.type = type; info.child_counter = 0; if (m_element_info.empty()) { if (m_validating && !m_document_type.m_root_type.isEmpty() && type != m_document_type.m_root_type) { std::string msg; msg += "Root element type does not match the document type.\n"; msg += "Document type name: "; m_document_type.m_root_type.toUTF8String(msg); msg += "\nRoot element type: "; type.toUTF8String(msg); throw semantic_error(msg); } info.xmlns[""] = uri(); info.base = m_base_iri; info.space = false; } else { if (m_validating) { throw not_implemented("Element validity checking."); } ++m_element_info.top().child_counter; info.xmlns = m_element_info.top().xmlns; info.base = m_element_info.top().base; info.lang = m_element_info.top().lang; info.space = m_element_info.top().space; } m_element_info.push(info); m_sax_attrs.clear(); }
// -------------------------------------------------------------------------- void processor::on_end_tag ( icu::UnicodeString const& type ) // -------------------------------------------------------------------------- { if (!m_character_data.isEmpty()) { character_data(m_character_data); m_character_data.remove(); } if (type != m_element_info.top().type) { std::string msg, tree; msg += "STag-ETag name mismatch.\n"; msg += "ETag name: "; type.toUTF8String(msg); while (!m_element_info.empty()) { std::string tmp; m_element_info.top().type.toUTF8String(tmp); m_element_info.pop(); tree = "/" + tmp + tree; } msg += "\nElement tree: " + tree; throw semantic_error(msg); } element_end(); if (!m_element_info.empty()) { m_element_info.pop(); } // Update current element's variables. if (!m_element_info.empty()) { m_element.assign(m_element_info.top().type, m_element_info.top().xmlns); m_attributes.clear(); m_base_iri = m_element_info.top().base; m_language = m_element_info.top().lang; m_preserve_space = m_element_info.top().space; } }
// -------------------------------------------------------------------------- void processor::on_attribute ( icu::UnicodeString const& name, icu::UnicodeString&& value ) // -------------------------------------------------------------------------- { // Check for duplicates. if (!m_sax_attrs.emplace(name, std::move(value)).second) { std::string msg, tree; msg += "Duplicate attribute on an element.\n"; msg += "Attribute name: "; name.toUTF8String(msg); while (!m_element_info.empty()) { std::string tmp; m_element_info.top().type.toUTF8String(tmp); m_element_info.pop(); tree = "/" + tmp + tree; } msg += "\nElement tree: " + tree; throw semantic_error(msg); } /* icu::UnicodeString normalized; UChar32 chr; int32_t const size = value.countChar32(); for (int32_t i=0; i<size; i=value.moveIndex32(i, 1)) { chr = value.char32At(i); if (chr == 0x20 || chr == 0x0D || chr == 0x0A || chr == 0x09) { normalized += 0x20; } else if (chr == '&') { int32_t end = value.indexOf(';', i); icu::UnicodeString name{value, i+1, end-i-1}; i = end; if (name[0] == '#') { normalized += dereference_character(name.tempSubString(1)); } else { if (on_reference(name, true)) { icu::UnicodeString text; m_buffers.top().pipe(text); normalized += normalize_attvalue(text); m_buffers.pop(); } } } else { normalized += chr; } } return normalized; */ if (m_validating) { throw not_implemented("Attribute validity checking."); } }
// -------------------------------------------------------------------------- void processor::on_reference ( icu::UnicodeString const& name, bool attvalue ) // -------------------------------------------------------------------------- { if (!attvalue && m_ref_history.empty() && m_auto_replace_general) { if (!m_character_data.isEmpty()) { character_data(m_character_data); m_character_data.remove(); } reference(name); } // Pre-defined entities. if (name == "lt") { icu::UnicodeString entity = "<"; m_buffers.emplace("<", new io::uistring(entity, false)); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); return; } else if (name == "gt") { icu::UnicodeString entity = ">"; m_buffers.emplace(">", new io::uistring(entity, false)); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); return; } else if (name == "amp") { icu::UnicodeString entity = "&"; m_buffers.emplace("&", new io::uistring(entity, false)); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); return; } else if (name == "apos") { icu::UnicodeString entity = "'"; m_buffers.emplace("'", new io::uistring(entity, false)); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); return; } else if (name == "quot") { icu::UnicodeString entity = '"'; m_buffers.emplace(""", new io::uistring(entity, false)); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); return; } std::pair<std::set<icu::UnicodeString>::iterator, bool> hist; std::map<icu::UnicodeString, general_entity_declaration*>::iterator it; // Look for the entity. it = m_dtd.general_entities.find(name); if (it == m_dtd.general_entities.end()) { std::string msg; msg += "Reference to undeclared general entity '"; name.toUTF8String(msg); msg += "'."; throw semantic_error(msg); } // Unparsed entity references are forbidden. if (it->second->unparsed) { std::string msg; msg += "Reference to an unparsed general entity '"; name.toUTF8String(msg); msg += "'."; throw semantic_error(msg); } // Check for recursive references. hist = m_ref_history.insert(name); if (!hist.second) { std::string msg; msg += "Recursive reference to general entity '"; name.toUTF8String(msg); msg += "'."; throw semantic_error(msg); } // Process the entity. bool const state = m_parsing_entity; std::string nameutf8 = "&"; name.toUTF8String(nameutf8); nameutf8 += ';'; m_parsing_entity = true; if (!it->second->id.sys.isBogus() || !it->second->id.pub.isBogus()) { if (attvalue) { std::string msg; msg += "Reference to an external parsed general entity '"; name.toUTF8String(msg); msg += "' in attribute value."; throw semantic_error(msg); } io::input* input = nullptr; std::string encoding; resolve_id(it->second->id, input, encoding); if (input == nullptr) { if (m_validating) { throw runtime_error( "Could not dereference external parsed general entity." ); } } else { size_t const size = m_buffers.size(); try { m_buffers.emplace(nameutf8, *input, true, encoding); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); } catch (...) { if (size < m_buffers.size()) { m_buffers.pop(); } delete input; m_ref_history.erase(hist.first); throw; } delete input; } } else { m_buffers.emplace( nameutf8, new io::uistring(it->second->text_or_notation, false) ); if (attvalue) { parse_included_attvalue(); } else { parse_content(); } m_buffers.pop(); } m_parsing_entity = state; m_ref_history.erase(hist.first); }
// -------------------------------------------------------------------------- void processor::on_pe_reference ( icu::UnicodeString const& name, bool entityvalue ) // -------------------------------------------------------------------------- { icu::UnicodeString text; std::map<icu::UnicodeString, parameter_entity_declaration*>::iterator it; std::pair<std::set<icu::UnicodeString>::iterator, bool> hist; if (!m_parsing_entity && !entityvalue && m_ref_history.empty()) { dtd_element e; e.type = dtd_element::parameter_reference; e.text = new icu::UnicodeString(name); m_document_type.m_subset.emplace_back(std::move(e)); } // Look for the entity. it = m_dtd.parameter_entities.find(name); if (it == m_dtd.parameter_entities.end()) { if (m_validating) { std::string msg; msg += "Reference to undeclared parameter entity '"; name.toUTF8String(msg); msg += "'."; throw semantic_error(msg); } else { m_dtd_stop = true; return; } } // Check for recursive references. hist = m_ref_history.insert(name); if (!hist.second) { std::string msg; msg += "Recursive reference to parameter entity '"; name.toUTF8String(msg); msg += "'."; throw semantic_error(msg); } // Process the entity. bool const state = m_parsing_entity; std::string nameutf8 = "%"; name.toUTF8String(nameutf8); nameutf8 += ';'; m_parsing_entity = true; if (it->second->external) { io::input* input = nullptr; std::string encoding; resolve_id(it->second->id, input, encoding); if (input == nullptr) { if (m_validating) { throw runtime_error( "Could not dereference external parameter entity." ); } else { m_dtd_stop = true; } } else { size_t const size = m_buffers.size(); try { if (entityvalue) { m_buffers.emplace(nameutf8, *input, true, encoding); parse_included_entityvalue(); m_buffers.pop(); } else { m_buffers.emplace(nameutf8, *input, true, encoding, true); parse_ext_subset(); m_buffers.pop(); } } catch (...) { if (size < m_buffers.size()) { m_buffers.pop(); } delete input; m_ref_history.erase(hist.first); throw; } delete input; } } else { // The literal value is stored as public ID. if (entityvalue) { m_buffers.emplace( nameutf8, new io::uistring(it->second->id.pub, false) ); parse_included_entityvalue(); m_buffers.pop(); } else { m_buffers.emplace( nameutf8, new io::uistring(it->second->id.pub, false), false ); parse_ext_subset(); m_buffers.pop(); } } m_parsing_entity = state; m_ref_history.erase(hist.first); }