Example #1
0
void HTMLParser::runParser(string url){
  
  try{
    inputStream = new URLInputStream(url);
    
    HTMLTokenizer * tokenizer = new HTMLTokenizer(inputStream);
    
    while(tokenizer->HasNextToken()){
      HTMLToken curToken = tokenizer->GetNextToken();
     
      if(isHTMLStart(curToken))
	  processHtml(*tokenizer);
    }

  delete tokenizer;
  
  inputStream->Close();
  
  }
  catch (std::exception &e){
    cout << "Exception Occurred:" << e.what() << endl;
  }
  catch (CS240Exception &e){
    cout << "Exception Occurred:" << e.GetMessage() << endl;
  }
  catch (...){
    cout << "Unknown Exception Occurred" << endl;
  }

}
Example #2
0
void HTMLParser::processScript(HTMLTokenizer & tokenizer){

  while(tokenizer.HasNextToken()){
    HTMLToken curToken = tokenizer.GetNextToken();

    if(isScriptEnd(curToken))
      return;
  }
}
Example #3
0
void HTMLParser::processHeader(HTMLTokenizer & tokenizer){
  while(tokenizer.HasNextToken()){
    HTMLToken curToken = tokenizer.GetNextToken();
    
    if(isText(curToken)){
      header += curToken.GetValue();
      processText(curToken.GetValue());
    }else if(isHeaderEnd(curToken))
      return;
    else if(isLinkStart(curToken))
      processLink(curToken);
  }
}
Example #4
0
void HTMLParser::processTitle(HTMLTokenizer & tokenizer){
  assert(tokenizer.HasNextToken());
  while(tokenizer.HasNextToken()){
    HTMLToken titleToken = tokenizer.GetNextToken();
    if(titleToken.GetType() == TEXT){
      title += titleToken.GetValue();
      processText(titleToken.GetValue());
    }
    
    if(isTitleEnd(titleToken))
      return;
    
  }
}
Example #5
0
void HTMLParser::processHtml(HTMLTokenizer & tokenizer){
   assert(tokenizer.HasNextToken());

  while(tokenizer.HasNextToken()){

    HTMLToken curToken = tokenizer.GetNextToken();
  
    if(isHeadStart(curToken)){
      TRACE("Head Start");
      processHead(tokenizer);
    }else if(isBodyStart(curToken))
      processBody(tokenizer);
  }  
}
Example #6
0
void HTMLPage::load(const CL_String &page_url, const CL_String &refererer_url)
{
	pageurl = HTMLUrl(page_url, refererer_url);
	webpage = download_url(page_url, refererer_url);

	HTMLTokenizer tokenizer;
	tokenizer.append(webpage);
	HTMLToken token;
	while (true)
	{
		tokenizer.tokenize(token);
		if (token.type == HTMLToken::type_null)
			break;
		if (token.type == HTMLToken::type_style_tag)
		{
			pagecss += load_css(token.value, page_url);
		}
		if (token.type == HTMLToken::type_tag_begin || token.type == HTMLToken::type_tag_single)
		{
			if (token.name == "link")
			{
				CL_String rel, href, media;
				for (size_t i = 0; i < token.attributes.size(); i++)
				{
					if (token.attributes[i].name == "rel")
						rel = token.attributes[i].value;
					if (token.attributes[i].name == "href")
						href = token.attributes[i].value;
					if (token.attributes[i].name == "media")
						media = token.attributes[i].value;
				}

				if (rel == "stylesheet" && !href.empty() && (media.empty() || media == "screen"))
				{
					CL_String css = download_url(href, page_url);
					pagecss += load_css(css, HTMLUrl(href, page_url).to_string());
				}
			}
		}
	}

	CL_File fhtml("htmlpage.html", CL_File::create_always, CL_File::access_write);
	fhtml.write(webpage.data(), webpage.length());
	fhtml.close();

	CL_File fcss("htmlpage.css", CL_File::create_always, CL_File::access_write);
	fcss.write(pagecss.data(), pagecss.length());
	fcss.close();
}
Example #7
0
void HTMLParser::processHead(HTMLTokenizer & tokenizer){
  assert(tokenizer.HasNextToken());
 
  while(tokenizer.HasNextToken()){
    HTMLToken headToken = tokenizer.GetNextToken();
    
    if(isHeadEnd(headToken))
      return;
    
    if(isTitleStart(headToken)){
      processTitle(tokenizer);
      TRACE("TITLE Start");
      
    }
  }// End While Loop
}
bool HTMLElementImpl::setInnerHTML( const DOMString &html )
{
    // the following is in accordance with the definition as used by IE
    if( endTag[id()] == FORBIDDEN )
        return false;
    // IE disallows innerHTML on inline elements. I don't see why we should have this restriction, as our
    // dhtml engine can cope with it. Lars
    //if ( isInline() ) return false;
    switch( id() ) {
        case ID_COL:
        case ID_COLGROUP:
        case ID_FRAMESET:
        case ID_HEAD:
        case ID_HTML:
        case ID_STYLE:
        case ID_TABLE:
        case ID_TBODY:
        case ID_TFOOT:
        case ID_THEAD:
        case ID_TITLE:
        case ID_TR:
            return false;
        default:
            break;
    }
    if ( !getDocument()->isHTMLDocument() )
        return false;

    DocumentFragmentImpl *fragment = new DocumentFragmentImpl( docPtr() );
    HTMLTokenizer *tok = new HTMLTokenizer( docPtr(), fragment );
    tok->begin();
    tok->write( html.string(), true );
    tok->end();
    delete tok;

    removeChildren();
    int ec = 0;
    appendChild( fragment, ec );
    delete fragment;
    return !ec;
}
Example #9
0
void HTMLParser::processBody(HTMLTokenizer & tokenizer){
  assert(tokenizer.HasNextToken());
  
  while(tokenizer.HasNextToken()){
    HTMLToken curToken = tokenizer.GetNextToken();
    
    if(isText(curToken)){
      string tmpstr = curToken.GetValue();
      processDescription(tmpstr);
      processText(tmpstr);
    }else if(isLinkStart(curToken))
      processLink(curToken);
    else if(isHeaderStart(curToken) && header.empty()){
	processHeader(tokenizer);
    }else if(isScriptStart(curToken)){
      processScript(tokenizer);
    }else if(isBodyEnd(curToken)){
      return;
    }
    
  }
}
Example #10
0
void GameLoader::LoadBoard(HTMLTokenizer & tokenizer, Board * boardPtr) const
{
	HTMLToken currentToken = tokenizer.GetNextToken();
	while (currentToken.GetValue() != "board")
	{
		CheckForEndToken(currentToken);
		currentToken = tokenizer.GetNextToken();
	}

	currentToken = tokenizer.GetNextToken();
	while (currentToken.GetValue() != "board")
	{
		CheckForEndToken(currentToken);
		if (currentToken.GetValue() == "piece")
		{
			int pieceType = ConvertTypeStrToInt(currentToken.GetAttribute("type"));
			int pieceColor = ConvertColorStrToInt(currentToken.GetAttribute("color"));
			int col = atoi(currentToken.GetAttribute("column").c_str());
			int row = atoi(currentToken.GetAttribute("row").c_str());
			boardPtr->SetPiece(BoardPosition(row, col), pieceType, pieceColor);
		}
		currentToken = tokenizer.GetNextToken();
	}
}
Example #11
0
void GameLoader::LoadMoveHistory(HTMLTokenizer & tokenizer, MoveHistory * gameHistory) const
{
	HTMLToken currentToken = tokenizer.GetNextToken();
	while (currentToken.GetValue() != "history")
	{
		CheckForEndToken(currentToken);
		currentToken = tokenizer.GetNextToken();
	}

	currentToken = tokenizer.GetNextToken();
	while (currentToken.GetValue() != "history")
	{
		CheckForEndToken(currentToken);
		if (currentToken.GetValue() == "move"
				&& currentToken.GetType() == HTMLTokenType::TAG_START)
		{
			while (currentToken.GetValue() != "piece")
			{
				CheckForEndToken(currentToken);
				currentToken = tokenizer.GetNextToken();
			}
			HTMLToken origin = currentToken;

			currentToken = tokenizer.GetNextToken();
			while (currentToken.GetValue() != "piece")
			{
				CheckForEndToken(currentToken);
				currentToken = tokenizer.GetNextToken();
			}
			HTMLToken destination = currentToken;

			currentToken = tokenizer.GetNextToken();

			int originPieceType = ConvertTypeStrToInt(origin.GetAttribute("type"));
			int originPieceColor = ConvertColorStrToInt(origin.GetAttribute("color"));
			int originCol = atoi(origin.GetAttribute("column").c_str());
			int originRow = atoi(origin.GetAttribute("row").c_str());
			int destinationCol = atoi(destination.GetAttribute("column").c_str());
			int destinationRow = atoi(destination.GetAttribute("row").c_str());
			int capturedPieceType = -1;
			int capturedCol = -1;
			int capturedRow = -1;

			while (currentToken.GetValue() != "piece"
					&& currentToken.GetValue() != "move")
			{
				CheckForEndToken(currentToken);
				currentToken = tokenizer.GetNextToken();
			}

			if (currentToken.GetValue() == "piece")
			{
				capturedPieceType = ConvertTypeStrToInt(currentToken.GetAttribute("type"));
				capturedCol = atoi(currentToken.GetAttribute("column").c_str());
				capturedRow = atoi(currentToken.GetAttribute("row").c_str());
			}

			gameHistory->AddMove(Move(originPieceType,
					originPieceColor, BoardPosition(originRow, originCol),
					BoardPosition(destinationRow, destinationCol),
					capturedPieceType, BoardPosition(capturedRow, capturedCol)));
		}
		else
		{
			currentToken = tokenizer.GetNextToken();
		}
	}
}