void HTMLParser::processTitle(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken titleToken = tokenizer.GetNextToken(); if(titleToken.GetType() == TEXT){ title += titleToken.GetValue(); processText(titleToken.GetValue()); } if(isTitleEnd(titleToken)) return; } }
void HTMLParser::processHtml(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isHeadStart(curToken)){ TRACE("Head Start"); processHead(tokenizer); }else if(isBodyStart(curToken)) processBody(tokenizer); } }
void HTMLParser::runParser(string url){ try{ inputStream = new URLInputStream(url); HTMLTokenizer * tokenizer = new HTMLTokenizer(inputStream); while(tokenizer->HasNextToken()){ HTMLToken curToken = tokenizer->GetNextToken(); if(isHTMLStart(curToken)) processHtml(*tokenizer); } delete tokenizer; inputStream->Close(); } catch (std::exception &e){ cout << "Exception Occurred:" << e.what() << endl; } catch (CS240Exception &e){ cout << "Exception Occurred:" << e.GetMessage() << endl; } catch (...){ cout << "Unknown Exception Occurred" << endl; } }
void HTMLParser::processHead(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken headToken = tokenizer.GetNextToken(); if(isHeadEnd(headToken)) return; if(isTitleStart(headToken)){ processTitle(tokenizer); TRACE("TITLE Start"); } }// End While Loop }
void HTMLParser::processScript(HTMLTokenizer & tokenizer){ while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isScriptEnd(curToken)) return; } }
void HTMLParser::processHeader(HTMLTokenizer & tokenizer){ while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isText(curToken)){ header += curToken.GetValue(); processText(curToken.GetValue()); }else if(isHeaderEnd(curToken)) return; else if(isLinkStart(curToken)) processLink(curToken); } }
void HTMLParser::processBody(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isText(curToken)){ string tmpstr = curToken.GetValue(); processDescription(tmpstr); processText(tmpstr); }else if(isLinkStart(curToken)) processLink(curToken); else if(isHeaderStart(curToken) && header.empty()){ processHeader(tokenizer); }else if(isScriptStart(curToken)){ processScript(tokenizer); }else if(isBodyEnd(curToken)){ return; } } }