void HTMLParser::runParser(string url){ try{ inputStream = new URLInputStream(url); HTMLTokenizer * tokenizer = new HTMLTokenizer(inputStream); while(tokenizer->HasNextToken()){ HTMLToken curToken = tokenizer->GetNextToken(); if(isHTMLStart(curToken)) processHtml(*tokenizer); } delete tokenizer; inputStream->Close(); } catch (std::exception &e){ cout << "Exception Occurred:" << e.what() << endl; } catch (CS240Exception &e){ cout << "Exception Occurred:" << e.GetMessage() << endl; } catch (...){ cout << "Unknown Exception Occurred" << endl; } }
void HTMLParser::processScript(HTMLTokenizer & tokenizer){ while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isScriptEnd(curToken)) return; } }
void HTMLParser::processHeader(HTMLTokenizer & tokenizer){ while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isText(curToken)){ header += curToken.GetValue(); processText(curToken.GetValue()); }else if(isHeaderEnd(curToken)) return; else if(isLinkStart(curToken)) processLink(curToken); } }
void HTMLParser::processTitle(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken titleToken = tokenizer.GetNextToken(); if(titleToken.GetType() == TEXT){ title += titleToken.GetValue(); processText(titleToken.GetValue()); } if(isTitleEnd(titleToken)) return; } }
void HTMLParser::processHtml(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isHeadStart(curToken)){ TRACE("Head Start"); processHead(tokenizer); }else if(isBodyStart(curToken)) processBody(tokenizer); } }
void HTMLPage::load(const CL_String &page_url, const CL_String &refererer_url) { pageurl = HTMLUrl(page_url, refererer_url); webpage = download_url(page_url, refererer_url); HTMLTokenizer tokenizer; tokenizer.append(webpage); HTMLToken token; while (true) { tokenizer.tokenize(token); if (token.type == HTMLToken::type_null) break; if (token.type == HTMLToken::type_style_tag) { pagecss += load_css(token.value, page_url); } if (token.type == HTMLToken::type_tag_begin || token.type == HTMLToken::type_tag_single) { if (token.name == "link") { CL_String rel, href, media; for (size_t i = 0; i < token.attributes.size(); i++) { if (token.attributes[i].name == "rel") rel = token.attributes[i].value; if (token.attributes[i].name == "href") href = token.attributes[i].value; if (token.attributes[i].name == "media") media = token.attributes[i].value; } if (rel == "stylesheet" && !href.empty() && (media.empty() || media == "screen")) { CL_String css = download_url(href, page_url); pagecss += load_css(css, HTMLUrl(href, page_url).to_string()); } } } } CL_File fhtml("htmlpage.html", CL_File::create_always, CL_File::access_write); fhtml.write(webpage.data(), webpage.length()); fhtml.close(); CL_File fcss("htmlpage.css", CL_File::create_always, CL_File::access_write); fcss.write(pagecss.data(), pagecss.length()); fcss.close(); }
void HTMLParser::processHead(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken headToken = tokenizer.GetNextToken(); if(isHeadEnd(headToken)) return; if(isTitleStart(headToken)){ processTitle(tokenizer); TRACE("TITLE Start"); } }// End While Loop }
bool HTMLElementImpl::setInnerHTML( const DOMString &html ) { // the following is in accordance with the definition as used by IE if( endTag[id()] == FORBIDDEN ) return false; // IE disallows innerHTML on inline elements. I don't see why we should have this restriction, as our // dhtml engine can cope with it. Lars //if ( isInline() ) return false; switch( id() ) { case ID_COL: case ID_COLGROUP: case ID_FRAMESET: case ID_HEAD: case ID_HTML: case ID_STYLE: case ID_TABLE: case ID_TBODY: case ID_TFOOT: case ID_THEAD: case ID_TITLE: case ID_TR: return false; default: break; } if ( !getDocument()->isHTMLDocument() ) return false; DocumentFragmentImpl *fragment = new DocumentFragmentImpl( docPtr() ); HTMLTokenizer *tok = new HTMLTokenizer( docPtr(), fragment ); tok->begin(); tok->write( html.string(), true ); tok->end(); delete tok; removeChildren(); int ec = 0; appendChild( fragment, ec ); delete fragment; return !ec; }
void HTMLParser::processBody(HTMLTokenizer & tokenizer){ assert(tokenizer.HasNextToken()); while(tokenizer.HasNextToken()){ HTMLToken curToken = tokenizer.GetNextToken(); if(isText(curToken)){ string tmpstr = curToken.GetValue(); processDescription(tmpstr); processText(tmpstr); }else if(isLinkStart(curToken)) processLink(curToken); else if(isHeaderStart(curToken) && header.empty()){ processHeader(tokenizer); }else if(isScriptStart(curToken)){ processScript(tokenizer); }else if(isBodyEnd(curToken)){ return; } } }
void GameLoader::LoadBoard(HTMLTokenizer & tokenizer, Board * boardPtr) const { HTMLToken currentToken = tokenizer.GetNextToken(); while (currentToken.GetValue() != "board") { CheckForEndToken(currentToken); currentToken = tokenizer.GetNextToken(); } currentToken = tokenizer.GetNextToken(); while (currentToken.GetValue() != "board") { CheckForEndToken(currentToken); if (currentToken.GetValue() == "piece") { int pieceType = ConvertTypeStrToInt(currentToken.GetAttribute("type")); int pieceColor = ConvertColorStrToInt(currentToken.GetAttribute("color")); int col = atoi(currentToken.GetAttribute("column").c_str()); int row = atoi(currentToken.GetAttribute("row").c_str()); boardPtr->SetPiece(BoardPosition(row, col), pieceType, pieceColor); } currentToken = tokenizer.GetNextToken(); } }
void GameLoader::LoadMoveHistory(HTMLTokenizer & tokenizer, MoveHistory * gameHistory) const { HTMLToken currentToken = tokenizer.GetNextToken(); while (currentToken.GetValue() != "history") { CheckForEndToken(currentToken); currentToken = tokenizer.GetNextToken(); } currentToken = tokenizer.GetNextToken(); while (currentToken.GetValue() != "history") { CheckForEndToken(currentToken); if (currentToken.GetValue() == "move" && currentToken.GetType() == HTMLTokenType::TAG_START) { while (currentToken.GetValue() != "piece") { CheckForEndToken(currentToken); currentToken = tokenizer.GetNextToken(); } HTMLToken origin = currentToken; currentToken = tokenizer.GetNextToken(); while (currentToken.GetValue() != "piece") { CheckForEndToken(currentToken); currentToken = tokenizer.GetNextToken(); } HTMLToken destination = currentToken; currentToken = tokenizer.GetNextToken(); int originPieceType = ConvertTypeStrToInt(origin.GetAttribute("type")); int originPieceColor = ConvertColorStrToInt(origin.GetAttribute("color")); int originCol = atoi(origin.GetAttribute("column").c_str()); int originRow = atoi(origin.GetAttribute("row").c_str()); int destinationCol = atoi(destination.GetAttribute("column").c_str()); int destinationRow = atoi(destination.GetAttribute("row").c_str()); int capturedPieceType = -1; int capturedCol = -1; int capturedRow = -1; while (currentToken.GetValue() != "piece" && currentToken.GetValue() != "move") { CheckForEndToken(currentToken); currentToken = tokenizer.GetNextToken(); } if (currentToken.GetValue() == "piece") { capturedPieceType = ConvertTypeStrToInt(currentToken.GetAttribute("type")); capturedCol = atoi(currentToken.GetAttribute("column").c_str()); capturedRow = atoi(currentToken.GetAttribute("row").c_str()); } gameHistory->AddMove(Move(originPieceType, originPieceColor, BoardPosition(originRow, originCol), BoardPosition(destinationRow, destinationCol), capturedPieceType, BoardPosition(capturedRow, capturedCol))); } else { currentToken = tokenizer.GetNextToken(); } } }