void SemanticMarkupParser::parse(const QString& text) { mCursorInsideTag = false; mOpenElements.clear(); mOpenElementsAtCursor.clear(); mNotClosedChildElements.clear(); QRegExp startTagRegExp("<\\s*(\\w+)\\s*((\\w+=\"[^\"]*\"\\s*)*)>"); QRegExp endTagRegExp("</\\s*(\\w+)\\s*>"); QRegExp emptyTagRegExp("<\\s*(\\w+)\\s*((\\w+=\"[^\"]*\"\\s*)*)/>"); QRegExp tagRegExp(startTagRegExp.pattern() + '|' + endTagRegExp.pattern() + '|' + emptyTagRegExp.pattern()); bool parsingReachedCursor = false; int parsingIndex = tagRegExp.indexIn(text); while (parsingIndex >= 0) { QString tag = tagRegExp.capturedTexts().at(0); if (mCursorIndex > parsingIndex && mCursorIndex < parsingIndex + tagRegExp.matchedLength()) { mCursorInsideTag = true; mOpenElementsAtCursor.clear(); return; } if (!parsingReachedCursor && parsingIndex >= mCursorIndex) { parsingReachedCursor = true; mOpenElementsAtCursor = mOpenElements; } if (startTagRegExp.exactMatch(tag)) { StartTag startTag; startTag.mName = startTagRegExp.capturedTexts().at(1); startTag.mAttributes = startTagRegExp.capturedTexts().at(2); startTag.mIndex = parsingIndex; mOpenElements.insert(0, startTag); } else if (endTagRegExp.exactMatch(tag)) { QString endTagName = endTagRegExp.capturedTexts().at(1); int endElementIndex = indexOf(mOpenElements, endTagName); if (endElementIndex >= 0) { int index = 0; while (index < endElementIndex) { mNotClosedChildElements.append(mOpenElements.first()); mOpenElements.removeFirst(); index++; } mOpenElements.removeFirst(); } } parsingIndex = parsingIndex + tagRegExp.matchedLength(); parsingIndex = tagRegExp.indexIn(text, parsingIndex); } if (!parsingReachedCursor) { mOpenElementsAtCursor = mOpenElements; } }
void cleanupXML(QTextStream &input, QTextStream &output) { QRegExp filenameRegExp("filename=\"[^\"]*\""), nameRegExp("name=\"[^\"]*\""), tagRegExp("^\\s*<([a-zA-Z]+) "), leadingSpaces("^ *"), closeTag("^\\s*</"); bool inComment = false, hasContents = false; while (!input.atEnd()) { QString line = input.readLine(); bool startComment = line.contains("<!--"); bool endComment = line.contains("-->"); if (startComment) inComment = true; if (endComment) inComment = false; if (inComment || endComment) { if (startComment) { /* Turn leading spaces into tabs */ if (leadingSpaces.indexIn(line) == 0) line = QString('\t').repeated(leadingSpaces.matchedLength()) + line.mid(leadingSpaces.matchedLength()); } output << line << endl; continue; } /* Make sure that the name attribute is always the first one */ int tagMatch = tagRegExp.indexIn(line), tagLength = tagRegExp.matchedLength(); int nameMatch = nameRegExp.indexIn(line), filenameMatch = filenameRegExp.indexIn(line), nameLength = nameRegExp.matchedLength(); if (tagMatch != -1 && nameMatch != -1 && filenameMatch == -1) { QString a = line.mid(tagLength, nameMatch-tagLength).trimmed(), b = line.mid(nameMatch+nameLength).trimmed(); line = line.left(tagLength) + line.mid(nameMatch, nameLength); if (a.length() > 0) line += " " + a; if (b.length() > 0) line += " " + b; } /* Add an extra newline if this is an object tag, and if there have been siblings before it */ if (tagMatch != -1) { const QString &el = tagRegExp.cap(1); bool isObject = true; isObject &= (el != "string"); isObject &= (el != "integer"); isObject &= (el != "float"); isObject &= (el != "boolean"); isObject &= (el != "vector"); isObject &= (el != "point"); isObject &= (el != "transform"); isObject &= (el != "spectrum"); isObject &= (el != "rgb"); isObject &= (el != "scale"); isObject &= (el != "translate"); isObject &= (el != "rotate"); isObject &= (el != "lookAt"); isObject &= (el != "matrix"); if (isObject && hasContents) { output << endl; hasContents = false; } if (!isObject) hasContents = true; } /* Turn leading spaces into tabs */ if (leadingSpaces.indexIn(line) == 0) line = QString('\t').repeated(leadingSpaces.matchedLength()) + line.mid(leadingSpaces.matchedLength()); /* Remove ugly spaces */ if (line.endsWith(" />")) { line = line.left(line.size()-4) + QString("/>"); hasContents = true; } else if (line.endsWith(" />")) { line = line.left(line.size()-3) + QString("/>"); hasContents = true; } else if (line.endsWith("/>")) { hasContents = true; } else if (line.endsWith(" >")) { line = line.left(line.size()-2) + QString(">"); } else if (line.endsWith("?>")) { hasContents = true; } if (closeTag.indexIn(line) == 0) hasContents = true; output << line << endl; } }