void HTMLParser :: headTag( TElementShared aParent ) { cout << "head tag found\n"; // Add to parent TElementShared element = mDocument->createElement( "head" ); aParent->appendChild( element ); bool insideHead = true; while ( insideHead ) { getTag(); if ( isStartTag() ) { if ( isTitleTag() || isStyleTag() || isScriptTag() ) { normalHeadTag( element ); continue; } if ( isIsIndexTag() || isBaseTag() || isMetaTag() || isLinkTag() ) { emptyElementTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "headTag: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( isHeadTag() ) { cout << "head closing tag found\n"; // End the while loop insideHead = false; } else { cout << "headTag: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } } }
void HTMLParser :: htmlTag() { cout << "html tag found\n"; // Add to DOM tree TElementShared element = mDocument->createElement( "html" ); mDocument->appendChild( element ); bool insideHtml = true; while ( insideHtml ) { getTag(); if ( isStartTag() ) { if ( isHeadTag() ) { headTag( element ); continue; } if ( isBodyTag() ) { bodyStyleTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } cout << "htmlTag: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( !mTag.compare( "html" ) ) { cout << "html closing tag found\n"; insideHtml = false; } else { cout << "htmlTag: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } } }
void HTMLParser :: normalHeadTag( TElementShared aParent ) { cout << mTag << " tag found\n"; // Save the tag name string tag = mTag; // Add to parent TElementShared element = mDocument->createElement( tag ); aParent->appendChild( element ); bool insideHeadLevel = true; string data; while ( insideHeadLevel ) { data += getText(); getTag(); if ( !isStartTag() ) { if ( !mTag.compare( tag ) ) { cout << mTag << " closing tag found\n"; insideHeadLevel = false; } else { cout << "normalHead: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } else { if ( isCommentTag() ) { commentTag( element ); continue; } cout << "normalHead: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } } TTextShared text = mDocument->createText( data ); element->appendChild( text ); cout << "Text is: " << data << endl; }
void XMLPathTraversalTest() { typedef std::map<std::basic_string<wchar_t>, std::basic_string<wchar_t> > AttributesType; TagElement<wchar_t> rootTag(L"root"); TagElement<wchar_t> childTag1(L"child1"); TagElement<wchar_t> childTag1_(L"child1"); TagElement<wchar_t> childTag2(L"child2"); StringElement<wchar_t> stringTag(L"string"); CommentElement<wchar_t> commentTag(L"comment"); rootTag.addChild(&childTag1); rootTag.addChild(&childTag2); rootTag.addChild(&childTag1_); childTag1.addChild(&stringTag); childTag1_.addChild(&commentTag); CPPUNIT_ASSERT(rootTag.children.size() == 3); CPPUNIT_ASSERT(dynamic_cast<TagElement<wchar_t>*> (rootTag.getChildElement(L"child1"))->getTagName() == L"child1"); CPPUNIT_ASSERT(dynamic_cast<TagElement<wchar_t>*> (rootTag.getChildElement(L"child2"))->getTagName() == L"child2"); std::vector<Element<wchar_t>*> result; XMLPath<wchar_t> path_root(L"/root"); result = path_root.evaluate(&rootTag); CPPUNIT_ASSERT(result.size() == 1); CPPUNIT_ASSERT(result[0] == &rootTag); XMLPath<wchar_t> path_roots(L"/root[]"); result = path_roots.evaluate(&rootTag); CPPUNIT_ASSERT(result.size() == 1); CPPUNIT_ASSERT(result[0] == &rootTag); XMLPath<wchar_t> path_child1(L"/root/child1"); result = path_child1.evaluate(&rootTag); CPPUNIT_ASSERT(result.size() == 1); CPPUNIT_ASSERT(result[0] == &childTag1); XMLPath<wchar_t> path_child1s(L"/root/child1[]"); result = path_child1s.evaluate(&rootTag); CPPUNIT_ASSERT(result.size() == 2); CPPUNIT_ASSERT(result[0] == &childTag1); CPPUNIT_ASSERT(result[1] == &childTag1_); XMLPath<wchar_t> path_text(L"/root/child1[]/#text"); result = path_text.evaluate(&rootTag); CPPUNIT_ASSERT(result.size() == 1); CPPUNIT_ASSERT(result[0] == &stringTag); XMLPath<wchar_t> path_comment(L"/root/child1[]/#comment"); result = path_comment.evaluate(&rootTag); CPPUNIT_ASSERT(result.size() == 1); CPPUNIT_ASSERT(result[0] == &commentTag); rootTag.removeChild(&childTag1); rootTag.removeChild(&childTag2); rootTag.removeChild(&childTag1_); childTag1.removeChild(&stringTag); childTag1_.removeChild(&commentTag); }
void HTMLParser :: normalTextTag( TElementShared aParent, bool aConserveSpaces, bool aInsideAnchor ) { cout << mTag << " tag found\n"; // Save the tag name string tag = mTag; // Add to parent TElementShared element = mDocument->createElement( mTag ); aParent->appendChild( element ); bool insideNormalText = true; string text; string attribute; while ( insideNormalText ) { string data = getString( aConserveSpaces ); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( !isStartTag() ) { if ( !mTag.compare( tag ) ) { cout << tag << " closing tag found\n"; insideNormalText = false; continue; } cout << "normalText: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } else { if ( isTextLevelTag() ) { textLevelTag( element, aConserveSpaces, aInsideAnchor ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "normalText: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } break; } case TEXT : { if ( ( data.compare( " " ) && data.compare( "" ) ) || ( aConserveSpaces && data.compare( "" ) ) ) { cout << "Text is:" << endl << data << endl; TTextShared text = mDocument->createText( data ); element->appendChild( text ); } break; } } } }
void HTMLParser :: mapTag( TElementShared aParent ) { cout << "map tag found\n"; // Add to parent TElementShared element = mDocument->createElement( "map" ); aParent->appendChild( element ); bool insideMap = true; string attribute; while ( insideMap ) { string data = getString(); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( isStartTag() ) { if ( isAreaTag() ) { emptyElementTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "map: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( isMapTag() ) { cout << "map closing tag found\n"; // End the while loop insideMap = false; } else { cout << "map: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } break; } case TEXT : { if ( ( data.compare( " " ) && data.compare( "" ) ) ) { cout << "Text found in illegal place. Skipping...\n"; } break; } } } }
void HTMLParser :: pcDataTag( TElementShared aParent, bool aConserveSpaces ) { cout << mTag << " tag found\n"; // Save the tag name string tag = mTag; // Add to parent TElementShared element = mDocument->createElement( mTag ); aParent->appendChild( element ); bool insidePcData = true; string attribute; while ( insidePcData ) { string data = getString( aConserveSpaces ); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( isStartTag() ) { if ( isCommentTag() ) { commentTag( element ); continue; } cout << mTag << " closed implicitly\n"; // End the while loop insidePcData = false; backPedal(); continue; } else { if ( !mTag.compare( tag ) ) { cout << mTag << " closing tag found\n"; // End the while loop insidePcData = false; } else { cout << mTag << " closed implicitly\n"; // End the while loop insidePcData = false; backPedal(); continue; } } break; } case TEXT : { if ( ( data.compare( " " ) && data.compare( "" ) ) || ( aConserveSpaces && data.compare( "" ) ) ) { cout << "Text is:" << endl << data << endl; TTextShared text = mDocument->createText( data ); element->appendChild( text ); } break; } } } }
void HTMLParser :: preTag( TElementShared aParent ) { cout << "pre tag found\n"; // Add to parent TElementShared element = mDocument->createElement( "pre" ); aParent->appendChild( element ); bool insidePre = true; string attribute; while ( insidePre ) { string data = getString( true ); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( !isStartTag() ) { if ( isPreTag() ) { cout << "pre closing tag found\n"; insidePre = false; continue; } cout << "pre: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } else { if ( isFontStylePreTag() || isPhraseTag() ) { normalTextTag( element, true ); continue; } if ( isFormFieldTag() ) { formFieldTag( element, true ); continue; } if ( isAnchorTag() ) { normalTextTag( element, true, true ); continue; } if ( isAppletTag() ) { appletTag( element, true ); continue; } if ( isEmptyTextPreTag() ) { emptyElementTag( element ); continue; } if ( isMapTag() ) { mapTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "pre: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } break; } case TEXT : { if ( data.compare( " " ) && data.compare( "" ) ) { cout << "Text is:" << endl << data << endl; TTextShared text = mDocument->createText( data ); element->appendChild( text ); } break; } } } }
void HTMLParser :: trTag( TElementShared aParent ) { cout << "tr tag found\n"; // Add to parent TElementShared element = mDocument->createElement( "tr" ); aParent->appendChild( element ); bool insideTr = true; string attribute; while ( insideTr ) { // Warning: more possible than a tag only string data = getString(); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( isStartTag() ) { if ( isThTag() || isTdTag() ) { bodyStyleTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "tr: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( isTrTag() ) { cout << "tr closing tag found\n"; // End the while loop insideTr = false; } else { cout << "tr: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } break; } case TEXT : { if ( data.compare( " " ) && data.compare( "" ) ) { cout << "Text found in illegal place. Skipping...\n"; } break; } } } }
void HTMLParser :: listTag( TElementShared aParent ) { cout << mTag << " tag found\n"; // Save the tag name string tag = mTag; // To check if this list can have list items or not bool listItem = false; if ( isULTag() || isOLTag() ) { listItem = true; } // Add to parent TElementShared element = mDocument->createElement( mTag ); aParent->appendChild( element ); bool insideList = true; string attribute; while ( insideList ) { // Warning: more possible than a tag only string data = getString(); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( isStartTag() ) { if ( ( isLITag() && listItem ) || ( isDDTag() && !listItem ) ) { flowLevelTag( element ); continue; } if ( isDTTag() && !listItem ) { textLevelTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "list: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( !mTag.compare( tag ) ) { cout << mTag << " closing tag found\n"; // End the while loop insideList = false; } else { cout << "list: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } break; } case TEXT : { if ( data.compare( " " ) && data.compare( "" ) ) { cout << "Text found in illegal place. Skipping...\n"; } break; } } } }
void HTMLParser :: pTag( TElementShared aParent ) { cout << "p tag found\n"; // Add to parent TElementShared element = mDocument->createElement( "p" ); aParent->appendChild( element ); bool insideP = true; string attribute; while ( insideP ) { string data = getString(); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( !isStartTag() ) { if ( isPTag() ) { cout << "p closing tag found\n"; insideP = false; continue; } if ( isAdressTag() || isBodyStyleTag() || isFormTag() || isListTag() ) { cout << "p closed implicitly\n"; insideP = false; backPedal(); continue; } cout << "p: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } else { if ( isBlockLevelTag() || isHeadingTag() || isLITag() ) { cout << "p closed implicitly\n"; insideP = false; backPedal(); continue; } if ( isTextLevelTag() ) { textLevelTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "p: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } break; } case TEXT : { if ( data.compare( " " ) && data.compare( "" ) ) { cout << "Text is:" << endl << data << endl; TTextShared text = mDocument->createText( data ); element->appendChild( text ); } break; } } } }
void HTMLParser :: adressTag( TElementShared aParent ) { cout << "adress tag found\n"; // Add to parent TElementShared element = mDocument->createElement( "adress" ); aParent->appendChild( element ); bool insideAdress = true; string attribute; while ( insideAdress ) { string data = getString(); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( isStartTag() ) { if ( isPTag() ) { pTag( element ); continue; } if ( isTextLevelTag() ) { textLevelTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "adress: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( !mTag.compare( "adress" ) ) { cout << mTag << "closing tag found\n"; // End the while loop insideAdress = false; } else { cout << "adress: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } break; } case TEXT : { if ( data.compare( " " ) && data.compare( "" ) ) { cout << "Text is:" << endl << data << endl; TTextShared text = mDocument->createText( data ); element->appendChild( text ); } break; } } } }
void HTMLParser :: bodyStyleTag( TElementShared aParent, bool aInsideForm ) { cout << mTag << " tag found\n"; // Save the tag name string tag = mTag; // Add to parent TElementShared element = mDocument->createElement( mTag ); aParent->appendChild( element ); bool insideBodyStyle = true; string attribute; while ( insideBodyStyle ) { // Warning: more possible than a tag only string data = getString(); switch ( mStringType ) { case ATTR : { attribute = data; if ( mAttrNoValue ) { element->setAttribute( attribute, "" ); attribute = ""; } break; } case ATTRVALUE : { if ( attribute.compare( "" ) ) { // Attribute has a name // I'll declare it legal element->setAttribute( attribute, data ); attribute = ""; } break; } case TAG : { if ( isStartTag() ) { if ( isHeadingTag() ) { normalTextTag( element ); continue; } if ( isAdressTag() ) { adressTag( element ); continue; } if ( isBlockLevelTag() ) { blockLevelTag( element, aInsideForm ); continue; } if ( isTextLevelTag() ) { textLevelTag( element ); continue; } if ( isCommentTag() ) { commentTag( element ); continue; } // Not a known tag cout << "bodyStyle: Unexpected tag found: " << mTag << ". Skipping...\n"; skipTag(); } else { if ( !mTag.compare( tag ) ) { cout << mTag << " closing tag found\n"; // End the while loop insideBodyStyle = false; } else { cout << "bodyStyle: Unexpected closing tag found: " << mTag << ". Skipping...\n"; } } break; } case TEXT : { if ( data.compare( " " ) && data.compare( "" ) ) { cout << "Text is:" << endl << data << endl; TTextShared text = mDocument->createText( data ); element->appendChild( text ); } break; } } } }