Beispiel #1
0
void KHTMLReader::completed()
{
    kDebug(30503) << "KHTMLReader::completed";
    qApp->exit_loop();
    DOM::Document doc = _html->document(); // FIXME parse <HEAD> too
    DOM::NodeList list = doc.getElementsByTagName("body");
    DOM::Node docbody = list.item(0);

    if (docbody.isNull()) {
        kWarning(30503) << "no <BODY>, giving up";
        _it_worked = false;
        return;
    }


    parseNode(docbody);

    list = doc.getElementsByTagName("head");
    DOM::Node dochead = list.item(0);
    if (!dochead.isNull())
        parse_head(dochead);
    else
        kWarning(30503) << "WARNING: no html <HEAD> section";

    _writer->cleanUpParagraph(state()->paragraph);
    _it_worked = _writer->writeDoc();
}
Beispiel #2
0
bool Parser::parse() {
    skip_newline(false);

    while (1) {
        ptr<Token> token = cur();
        if (!token->type()) {
            break;
        }
        eat();
        if (*token == '\n') {
            continue;
        }

        if (*token == TOKEN_INCLUDE) {
            bool old_skip_newline = skip_newline();
            skip_newline(false);
            token = cur();
            if (*token == TOKEN_CONST_STRING && look()->is_eol()) {
                eat();
                eat();
                skip_newline(old_skip_newline);
                ptr<Path> path = object<Path>(token->text());
                if (_input.is_root()) {
                    _symbols.exportSymbol(object<IncludeTree>(path));
                }
                _input.load(path);
            }
            else {
                log_expect(token->loc(), "string eol");
            }
            continue;
        }
        SegmentToken *seg = nullptr; 
        if (token->type() == TOKEN_SEGMENT) {
            seg = static_cast<SegmentToken*>(token.get());
            if (!seg->name()) {
                bool old_skip_newline = skip_newline();
                skip_newline(false);
                if (!look()->is_eol()) {
                    log_expect(token->loc(), "eol");
                }
                skip_newline(old_skip_newline);
                eat();
            }
            else {
                seg = nullptr;
            }
        }

        switch (_phase) {
        case PARSE_PHASE_HEAD:
            if (seg) {
                _phase = PARSE_PHASE_BODY;
                continue;
            }
            skip_newline(false);
            parse_head(token); 
            break;
        case PARSE_PHASE_BODY:
            if (seg) {
                _phase = PARSE_PHASE_TAIL;
                continue;
            }
            skip_newline(true);
            parse_body(token);
            break;
        case PARSE_PHASE_TAIL:
            if (seg) {
                log_error(token->loc(), "too more segment declear.");
            }
            skip_newline(false);
            parse_tail(token);
            break;
        }
    }
    return true; 
}
Beispiel #3
0
inline bool xml_element::parse_body(const char *&data) {
  while(true) {
    if(!*data) return false;
    if(*data++ != '<') continue;
    if(*data == '/') return false;

    if(strbegin(data, "!DOCTYPE") == true) {
      parse_doctype(data);
      return true;
    }

    if(strbegin(data, "!--")) {
      if(optional<unsigned> offset = strpos(data, "-->")) {
        data += offset() + 3;
        continue;
      } else {
        throw "...";
      }
    }

    if(strbegin(data, "![CDATA[")) {
      if(optional<unsigned> offset = strpos(data, "]]>")) {
        data += offset() + 3;
        continue;
      } else {
        throw "...";
      }
    }

    optional<unsigned> offset = strpos(data, ">");
    if(!offset) throw "...";

    string tag = substr(data, 0, offset());
    data += offset() + 1;
    const char *content_begin = data;

    bool self_terminating = false;

    if(strend(tag, "?") == true) {
      self_terminating = true;
      tag.rtrim_once("?");
    } else if(strend(tag, "/") == true) {
      self_terminating = true;
      tag.rtrim_once("/");
    }

    parse_head(tag);
    if(self_terminating) return true;

    while(*data) {
      unsigned index = element.size();
      xml_element node;
      if(node.parse_body(data) == false) {
        if(*data == '/') {
          signed length = data - content_begin - 1;
          if(length > 0) content = substr(content_begin, 0, length);

          data++;
          optional<unsigned> offset = strpos(data, ">");
          if(!offset) throw "...";

          tag = substr(data, 0, offset());
          data += offset() + 1;

          tag.replace("\t", " ");
          tag.replace("\r", " ");
          tag.replace("\n", " ");
          while(strpos(tag, "  ")) tag.replace("  ", " ");
          tag.rtrim();

          if(name != tag) throw "...";
          return true;
        }
      } else {
        element.append(node);
      }
    }
  }
}