Example #1
0
void HTMLParser	::	headTag( TElementShared aParent )	{

	cout << "head tag found\n";
	
	// Add to parent
	TElementShared element = mDocument->createElement( "head" );
	aParent->appendChild( element );
	
	bool insideHead = true;
	
	while ( insideHead )	{
		getTag();

		if ( isStartTag() )	{
			if ( isTitleTag() ||
				 isStyleTag() ||
				 isScriptTag() )	{
				normalHeadTag( element );
				continue;
			}
			if ( isIsIndexTag() ||
				 isBaseTag() ||
				 isMetaTag() ||
				 isLinkTag() )	{
				emptyElementTag( element );
				continue;
			}
			if ( isCommentTag() )	{
				commentTag( element );
				continue;
			}
			
			// Not a known tag
			cout << "headTag: Unexpected tag found: " << mTag << ". Skipping...\n";
			skipTag();

		}
		else	{			
			if ( isHeadTag() )	{
				cout << "head closing tag found\n";

				// End the while loop
				insideHead = false;
			}
			else	{
				cout << "headTag: Unexpected closing tag found: " << mTag << ". Skipping...\n";
			}
		}
	}
	
}
Example #2
0
void HTMLParser	::	htmlTag()	{

	cout << "html tag found\n";

	// Add to DOM tree
	TElementShared element = mDocument->createElement( "html" );
	mDocument->appendChild( element );
	
	bool insideHtml = true;
	
	while ( insideHtml )	{
		getTag();

		if ( isStartTag() )	{
			if ( isHeadTag() )	{
				headTag( element );
				continue;
			}

			if ( isBodyTag() )	{
				bodyStyleTag( element );
				continue;
			}
			if ( isCommentTag() )	{
				commentTag( element );
				continue;
			}

			cout << "htmlTag: Unexpected tag found: " << mTag << ". Skipping...\n";
			skipTag();

		}
		else	{
			if ( !mTag.compare( "html" ) )	{
				cout << "html closing tag found\n";

				insideHtml = false;
									
			}
			else	{
				cout << "htmlTag: Unexpected closing tag found: " << mTag << ". Skipping...\n";
			}
		}
	}
	
}
Example #3
0
void HTMLParser	::	normalHeadTag( TElementShared aParent )	{

	cout << mTag << " tag found\n";

	// Save the tag name
	string tag = mTag;

	// Add to parent
	TElementShared element = mDocument->createElement( tag );
	aParent->appendChild( element );

	bool insideHeadLevel = true;
	
	string data;
	
	while ( insideHeadLevel )	{
		
		data += getText();
		
		getTag();

		if ( !isStartTag() )	{
			if ( !mTag.compare( tag ) )	{
				cout << mTag << " closing tag found\n";
				insideHeadLevel = false;
			}
			else	{
				cout << "normalHead: Unexpected closing tag found: " << mTag << ". Skipping...\n";
			}
		}
		else	{
			if ( isCommentTag() )	{
				commentTag( element );
				continue;
			}
			cout << "normalHead: Unexpected tag found: " << mTag << ". Skipping...\n";
			skipTag();
		}
	}

	TTextShared text = mDocument->createText( data );
	element->appendChild( text );

	cout << "Text is: " << data << endl;
	
}
Example #4
0
	void XMLPathTraversalTest()
	{
		typedef std::map<std::basic_string<wchar_t>,
			std::basic_string<wchar_t> > AttributesType;

		TagElement<wchar_t> rootTag(L"root");
		TagElement<wchar_t> childTag1(L"child1");
		TagElement<wchar_t> childTag1_(L"child1");
		TagElement<wchar_t> childTag2(L"child2");
		StringElement<wchar_t> stringTag(L"string");
		CommentElement<wchar_t> commentTag(L"comment");

		rootTag.addChild(&childTag1);
		rootTag.addChild(&childTag2);
		rootTag.addChild(&childTag1_);
		childTag1.addChild(&stringTag);
		childTag1_.addChild(&commentTag);

		CPPUNIT_ASSERT(rootTag.children.size() == 3);
		CPPUNIT_ASSERT(dynamic_cast<TagElement<wchar_t>*>
					   (rootTag.getChildElement(L"child1"))->getTagName() ==
					   L"child1");
		CPPUNIT_ASSERT(dynamic_cast<TagElement<wchar_t>*>
					   (rootTag.getChildElement(L"child2"))->getTagName() ==
					   L"child2");

		std::vector<Element<wchar_t>*> result;

		XMLPath<wchar_t> path_root(L"/root");
		result = path_root.evaluate(&rootTag);
		CPPUNIT_ASSERT(result.size() == 1);
		CPPUNIT_ASSERT(result[0] == &rootTag);

		XMLPath<wchar_t> path_roots(L"/root[]");
		result = path_roots.evaluate(&rootTag);
		CPPUNIT_ASSERT(result.size() == 1);
		CPPUNIT_ASSERT(result[0] == &rootTag);

		XMLPath<wchar_t> path_child1(L"/root/child1");
		result = path_child1.evaluate(&rootTag);
		CPPUNIT_ASSERT(result.size() == 1);
		CPPUNIT_ASSERT(result[0] == &childTag1);

		XMLPath<wchar_t> path_child1s(L"/root/child1[]");
		result = path_child1s.evaluate(&rootTag);
		CPPUNIT_ASSERT(result.size() == 2);
		CPPUNIT_ASSERT(result[0] == &childTag1);
		CPPUNIT_ASSERT(result[1] == &childTag1_);

		XMLPath<wchar_t> path_text(L"/root/child1[]/#text");
		result = path_text.evaluate(&rootTag);
		CPPUNIT_ASSERT(result.size() == 1);
		CPPUNIT_ASSERT(result[0] == &stringTag);

		XMLPath<wchar_t> path_comment(L"/root/child1[]/#comment");
		result = path_comment.evaluate(&rootTag);
		CPPUNIT_ASSERT(result.size() == 1);
		CPPUNIT_ASSERT(result[0] == &commentTag);

		rootTag.removeChild(&childTag1);
		rootTag.removeChild(&childTag2);
		rootTag.removeChild(&childTag1_);
		childTag1.removeChild(&stringTag);
		childTag1_.removeChild(&commentTag);
	}
Example #5
0
void HTMLParser	::	normalTextTag( TElementShared aParent, bool aConserveSpaces, bool aInsideAnchor )	{

	cout << mTag << " tag found\n";

	// Save the tag name
	string tag = mTag;

	// Add to parent
	TElementShared element = mDocument->createElement( mTag );
	aParent->appendChild( element );
	
	bool insideNormalText = true;
	string text;
	string attribute;
	
	while ( insideNormalText )	{
		
		string data = getString( aConserveSpaces );

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( !isStartTag() )	{
					if ( !mTag.compare( tag ) )	{
						cout << tag << " closing tag found\n";
						insideNormalText = false;
						continue;
					}
		
					cout << "normalText: Unexpected closing tag found: " << mTag << ". Skipping...\n";
		
				}
				else	{
					if ( isTextLevelTag() )	{
						textLevelTag( element, aConserveSpaces, aInsideAnchor );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}

					// Not a known tag
					cout << "normalText: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();

				}
				break;
			}
			case TEXT :	{
				if ( ( data.compare( " " ) && data.compare( "" ) ) || ( aConserveSpaces && data.compare( "" ) ) )	{
					cout << "Text is:" << endl << data << endl;
					TTextShared text = mDocument->createText( data );
					element->appendChild( text );
				}
				break;
			}
		}
	}

}
Example #6
0
void HTMLParser	::	mapTag( TElementShared aParent )	{
	
	cout << "map tag found\n";

	// Add to parent
	TElementShared element = mDocument->createElement( "map" );
	aParent->appendChild( element );

	bool insideMap = true;
	string attribute;
	
	while ( insideMap )	{
		string data = getString();

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( isStartTag() )	{
					if ( isAreaTag() )	{
						emptyElementTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}
		
					// Not a known tag
					cout << "map: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();
					
				}
				else	{			
					if ( isMapTag() )	{
						cout <<  "map  closing tag found\n";
		
						// End the while loop
						insideMap = false;
					}
					else	{
						cout << "map: Unexpected closing tag found: " << mTag << ". Skipping...\n";
					}
				}
				break;
			}
			case TEXT :	{
				if ( ( data.compare( " " ) && data.compare( "" ) ) )	{
					cout << "Text found in illegal place. Skipping...\n";
				}
				break;
			}
		}
	}
	
}
Example #7
0
void HTMLParser	::	pcDataTag( TElementShared aParent, bool aConserveSpaces )	{
	
	cout << mTag << " tag found\n";

	// Save the tag name
	string tag = mTag;

	// Add to parent
	TElementShared element = mDocument->createElement( mTag );
	aParent->appendChild( element );

	bool insidePcData = true;
	string attribute;
	
	while ( insidePcData )	{
		string data = getString( aConserveSpaces );

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( isStartTag() )	{
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}
		
					cout << mTag << " closed implicitly\n";
	
					// End the while loop
					insidePcData = false;
					backPedal();
					continue;
					
				}
				else	{			
					if ( !mTag.compare( tag ) )	{
						cout << mTag << " closing tag found\n";
		
						// End the while loop
						insidePcData = false;
					}
					else	{
						cout << mTag << " closed implicitly\n";
		
						// End the while loop
						insidePcData = false;
						backPedal();
						continue;
					}
				}
				break;
			}
			case TEXT :	{
				if ( ( data.compare( " " ) && data.compare( "" ) ) || ( aConserveSpaces && data.compare( "" ) ) )	{
					cout << "Text is:" << endl << data << endl;
					TTextShared text = mDocument->createText( data );
					element->appendChild( text );
				}
				break;
			}
		}
	}
	
}
Example #8
0
void HTMLParser	::	preTag( TElementShared aParent )	{

	cout << "pre tag found\n";

	// Add to parent
	TElementShared element = mDocument->createElement( "pre" );
	aParent->appendChild( element );
	
	bool insidePre = true;
	string attribute;
	
	while ( insidePre )	{
		
		string data = getString( true );

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( !isStartTag() )	{
					if ( isPreTag() )	{
						cout << "pre closing tag found\n";
						insidePre = false;
						continue;
					}
		
					cout << "pre: Unexpected closing tag found: " << mTag << ". Skipping...\n";
		
				}
				else	{
					if ( isFontStylePreTag() ||
						 isPhraseTag() )	{
						normalTextTag( element, true );
						continue;
					}
					if ( isFormFieldTag() )	{
						formFieldTag( element, true );
						continue;
					}
					if ( isAnchorTag() )	{
						normalTextTag( element, true, true );
						continue;
					}
					if ( isAppletTag() )	{
						appletTag( element, true );
						continue;
					}
					if ( isEmptyTextPreTag() )	{
						emptyElementTag( element );
						continue;
					}
					if ( isMapTag() )	{
						mapTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}

					// Not a known tag
					cout << "pre: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();

				}
				break;
			}
			case TEXT :	{
				if ( data.compare( " " ) && data.compare( "" ) )	{
					cout << "Text is:" << endl << data << endl;
					TTextShared text = mDocument->createText( data );
					element->appendChild( text );
				}
				break;
			}
		}
	}
	
}
Example #9
0
void HTMLParser	::	trTag( TElementShared aParent )	{
	
	cout << "tr tag found\n";

	// Add to parent
	TElementShared element = mDocument->createElement( "tr" );
	aParent->appendChild( element );

	bool insideTr = true;
	string attribute;
	
	while ( insideTr )	{
		// Warning: more possible than a tag only
		string data = getString();

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( isStartTag() )	{
					if ( isThTag() || isTdTag() )	{
						bodyStyleTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}
		
					// Not a known tag
					cout << "tr: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();
					
				}
				else	{			
					if ( isTrTag() )	{
						cout <<  "tr closing tag found\n";
		
						// End the while loop
						insideTr = false;
					}
					else	{
						cout << "tr: Unexpected closing tag found: " << mTag << ". Skipping...\n";
					}
				}
				break;
			}
			case TEXT :	{
				if ( data.compare( " " ) && data.compare( "" ) )	{
					cout << "Text found in illegal place. Skipping...\n";
				}
				break;
			}
		}
	}
	
}
Example #10
0
void HTMLParser	::	listTag( TElementShared aParent )	{
	
	cout << mTag << " tag found\n";

	// Save the tag name
	string tag = mTag;

	// To check if this list can have list items or not
	bool listItem = false;

	if ( isULTag() || isOLTag() )	{
		listItem = true;
	}

	// Add to parent
	TElementShared element = mDocument->createElement( mTag );
	aParent->appendChild( element );

	bool insideList = true;
	string attribute;
	
	while ( insideList )	{
		// Warning: more possible than a tag only
		string data = getString();

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( isStartTag() )	{
					if ( ( isLITag() && listItem ) ||
						 ( isDDTag() && !listItem ) )	{
						flowLevelTag( element );
						continue;
					}
					if ( isDTTag() && !listItem )	{
						textLevelTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}
		
					// Not a known tag
					cout << "list: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();
					
				}
				else	{			
					if ( !mTag.compare( tag ) )	{
						cout << mTag << " closing tag found\n";
		
						// End the while loop
						insideList = false;
					}
					else	{
						cout << "list: Unexpected closing tag found: " << mTag << ". Skipping...\n";
					}
				}
				break;
			}
			case TEXT :	{
				if ( data.compare( " " ) && data.compare( "" ) )	{
					cout << "Text found in illegal place. Skipping...\n";
				}
				break;
			}
		}
	}
	
}
Example #11
0
void HTMLParser	::	pTag( TElementShared aParent )	{

	cout << "p tag found\n";

	// Add to parent
	TElementShared element = mDocument->createElement( "p" );
	aParent->appendChild( element );
	
	bool insideP = true;
	string attribute;
	
	while ( insideP )	{
		
		string data = getString();

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( !isStartTag() )	{
					if ( isPTag() )	{
						cout << "p closing tag found\n";
						insideP = false;
						continue;
					}
					if ( isAdressTag() ||
						 isBodyStyleTag() ||
						 isFormTag() ||
						 isListTag() )	{
						cout << "p closed implicitly\n";
						insideP = false;
						backPedal();
						continue;
					}
		
					cout << "p: Unexpected closing tag found: " << mTag << ". Skipping...\n";
		
				}
				else	{
					if ( isBlockLevelTag() ||
						 isHeadingTag() ||
						 isLITag() )	{
						cout << "p closed implicitly\n";
						insideP = false;
						backPedal();
						continue;
					}
					if ( isTextLevelTag() )	{
						textLevelTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}

					// Not a known tag
					cout << "p: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();

				}
				break;
			}
			case TEXT :	{
				if ( data.compare( " " ) && data.compare( "" ) )	{
					cout << "Text is:" << endl << data << endl;
					TTextShared text = mDocument->createText( data );
					element->appendChild( text );
				}
				break;
			}
		}
	}

}
Example #12
0
void HTMLParser	::	adressTag( TElementShared aParent )	{

	cout << "adress tag found\n";

	// Add to parent
	TElementShared element = mDocument->createElement( "adress" );
	aParent->appendChild( element );

	bool insideAdress = true;
	string attribute;
	
	while ( insideAdress )	{

		string data = getString();

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( isStartTag() )	{
					if ( isPTag() )	{
						pTag( element );
						continue;
					}
					if ( isTextLevelTag() )	{
						textLevelTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}
		
					// Not a known tag
					cout << "adress: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();
					
				}
				else	{			
					if ( !mTag.compare( "adress" ) )	{
						cout << mTag << "closing tag found\n";
		
						// End the while loop
						insideAdress = false;
					}
					else	{
						cout << "adress: Unexpected closing tag found: " << mTag << ". Skipping...\n";
					}
				}
				break;
			}
			case TEXT :	{
				if ( data.compare( " " ) && data.compare( "" ) )	{
					cout << "Text is:" << endl << data << endl;
					TTextShared text = mDocument->createText( data );
					element->appendChild( text );
				}
				break;
			}
		}
	}

}
Example #13
0
void HTMLParser	::	bodyStyleTag( TElementShared aParent, bool aInsideForm )	{
	
	cout << mTag << " tag found\n";

	// Save the tag name
	string tag = mTag;

	// Add to parent
	TElementShared element = mDocument->createElement( mTag );
	aParent->appendChild( element );

	bool insideBodyStyle = true;
	string attribute;
	
	while ( insideBodyStyle )	{
		// Warning: more possible than a tag only
		string data = getString();

		switch ( mStringType )	{
			case ATTR :	{
				attribute = data;
				if ( mAttrNoValue )	{
					element->setAttribute( attribute, "" );
					attribute = "";
				}
				break;
			}
			case ATTRVALUE :	{
				if ( attribute.compare( "" ) )	{
					// Attribute has a name
					// I'll declare it legal
					element->setAttribute( attribute, data );
					attribute = "";
				}
				break;
			}
			case TAG :	{
				if ( isStartTag() )	{
					if ( isHeadingTag() )	{
						normalTextTag( element );
						continue;
					}
					if ( isAdressTag() )	{
						adressTag( element );
						continue;
					}
					if ( isBlockLevelTag() )	{
						blockLevelTag( element, aInsideForm );
						continue;
					}
					if ( isTextLevelTag() )	{
						textLevelTag( element );
						continue;
					}
					if ( isCommentTag() )	{
						commentTag( element );
						continue;
					}
		
					// Not a known tag
					cout << "bodyStyle: Unexpected tag found: " << mTag << ". Skipping...\n";
					skipTag();
					
				}
				else	{			
					if ( !mTag.compare( tag ) )	{
						cout << mTag << " closing tag found\n";
		
						// End the while loop
						insideBodyStyle = false;
					}
					else	{
						cout << "bodyStyle: Unexpected closing tag found: " << mTag << ". Skipping...\n";
					}
				}
				break;
			}
			case TEXT :	{
				if ( data.compare( " " ) && data.compare( "" ) )	{
					cout << "Text is:" << endl << data << endl;
					TTextShared text = mDocument->createText( data );
					element->appendChild( text );
				}
				break;
			}
		}
	}
	
}