void KHTMLReader::completed() { kDebug(30503) << "KHTMLReader::completed"; qApp->exit_loop(); DOM::Document doc = _html->document(); // FIXME parse <HEAD> too DOM::NodeList list = doc.getElementsByTagName("body"); DOM::Node docbody = list.item(0); if (docbody.isNull()) { kWarning(30503) << "no <BODY>, giving up"; _it_worked = false; return; } parseNode(docbody); list = doc.getElementsByTagName("head"); DOM::Node dochead = list.item(0); if (!dochead.isNull()) parse_head(dochead); else kWarning(30503) << "WARNING: no html <HEAD> section"; _writer->cleanUpParagraph(state()->paragraph); _it_worked = _writer->writeDoc(); }
bool Parser::parse() { skip_newline(false); while (1) { ptr<Token> token = cur(); if (!token->type()) { break; } eat(); if (*token == '\n') { continue; } if (*token == TOKEN_INCLUDE) { bool old_skip_newline = skip_newline(); skip_newline(false); token = cur(); if (*token == TOKEN_CONST_STRING && look()->is_eol()) { eat(); eat(); skip_newline(old_skip_newline); ptr<Path> path = object<Path>(token->text()); if (_input.is_root()) { _symbols.exportSymbol(object<IncludeTree>(path)); } _input.load(path); } else { log_expect(token->loc(), "string eol"); } continue; } SegmentToken *seg = nullptr; if (token->type() == TOKEN_SEGMENT) { seg = static_cast<SegmentToken*>(token.get()); if (!seg->name()) { bool old_skip_newline = skip_newline(); skip_newline(false); if (!look()->is_eol()) { log_expect(token->loc(), "eol"); } skip_newline(old_skip_newline); eat(); } else { seg = nullptr; } } switch (_phase) { case PARSE_PHASE_HEAD: if (seg) { _phase = PARSE_PHASE_BODY; continue; } skip_newline(false); parse_head(token); break; case PARSE_PHASE_BODY: if (seg) { _phase = PARSE_PHASE_TAIL; continue; } skip_newline(true); parse_body(token); break; case PARSE_PHASE_TAIL: if (seg) { log_error(token->loc(), "too more segment declear."); } skip_newline(false); parse_tail(token); break; } } return true; }
inline bool xml_element::parse_body(const char *&data) { while(true) { if(!*data) return false; if(*data++ != '<') continue; if(*data == '/') return false; if(strbegin(data, "!DOCTYPE") == true) { parse_doctype(data); return true; } if(strbegin(data, "!--")) { if(optional<unsigned> offset = strpos(data, "-->")) { data += offset() + 3; continue; } else { throw "..."; } } if(strbegin(data, "![CDATA[")) { if(optional<unsigned> offset = strpos(data, "]]>")) { data += offset() + 3; continue; } else { throw "..."; } } optional<unsigned> offset = strpos(data, ">"); if(!offset) throw "..."; string tag = substr(data, 0, offset()); data += offset() + 1; const char *content_begin = data; bool self_terminating = false; if(strend(tag, "?") == true) { self_terminating = true; tag.rtrim_once("?"); } else if(strend(tag, "/") == true) { self_terminating = true; tag.rtrim_once("/"); } parse_head(tag); if(self_terminating) return true; while(*data) { unsigned index = element.size(); xml_element node; if(node.parse_body(data) == false) { if(*data == '/') { signed length = data - content_begin - 1; if(length > 0) content = substr(content_begin, 0, length); data++; optional<unsigned> offset = strpos(data, ">"); if(!offset) throw "..."; tag = substr(data, 0, offset()); data += offset() + 1; tag.replace("\t", " "); tag.replace("\r", " "); tag.replace("\n", " "); while(strpos(tag, " ")) tag.replace(" ", " "); tag.rtrim(); if(name != tag) throw "..."; return true; } } else { element.append(node); } } } }