Пример #1
0
void HTMLParser::processTitle(HTMLTokenizer & tokenizer){
  assert(tokenizer.HasNextToken());
  while(tokenizer.HasNextToken()){
    HTMLToken titleToken = tokenizer.GetNextToken();
    if(titleToken.GetType() == TEXT){
      title += titleToken.GetValue();
      processText(titleToken.GetValue());
    }
    
    if(isTitleEnd(titleToken))
      return;
    
  }
}
Пример #2
0
void HTMLParser::processHtml(HTMLTokenizer & tokenizer){
   assert(tokenizer.HasNextToken());

  while(tokenizer.HasNextToken()){

    HTMLToken curToken = tokenizer.GetNextToken();
  
    if(isHeadStart(curToken)){
      TRACE("Head Start");
      processHead(tokenizer);
    }else if(isBodyStart(curToken))
      processBody(tokenizer);
  }  
}
Пример #3
0
void HTMLParser::runParser(string url){
  
  try{
    inputStream = new URLInputStream(url);
    
    HTMLTokenizer * tokenizer = new HTMLTokenizer(inputStream);
    
    while(tokenizer->HasNextToken()){
      HTMLToken curToken = tokenizer->GetNextToken();
     
      if(isHTMLStart(curToken))
	  processHtml(*tokenizer);
    }

  delete tokenizer;
  
  inputStream->Close();
  
  }
  catch (std::exception &e){
    cout << "Exception Occurred:" << e.what() << endl;
  }
  catch (CS240Exception &e){
    cout << "Exception Occurred:" << e.GetMessage() << endl;
  }
  catch (...){
    cout << "Unknown Exception Occurred" << endl;
  }

}
Пример #4
0
void HTMLParser::processHead(HTMLTokenizer & tokenizer){
  assert(tokenizer.HasNextToken());
 
  while(tokenizer.HasNextToken()){
    HTMLToken headToken = tokenizer.GetNextToken();
    
    if(isHeadEnd(headToken))
      return;
    
    if(isTitleStart(headToken)){
      processTitle(tokenizer);
      TRACE("TITLE Start");
      
    }
  }// End While Loop
}
Пример #5
0
void HTMLParser::processScript(HTMLTokenizer & tokenizer){

  while(tokenizer.HasNextToken()){
    HTMLToken curToken = tokenizer.GetNextToken();

    if(isScriptEnd(curToken))
      return;
  }
}
Пример #6
0
void HTMLParser::processHeader(HTMLTokenizer & tokenizer){
  while(tokenizer.HasNextToken()){
    HTMLToken curToken = tokenizer.GetNextToken();
    
    if(isText(curToken)){
      header += curToken.GetValue();
      processText(curToken.GetValue());
    }else if(isHeaderEnd(curToken))
      return;
    else if(isLinkStart(curToken))
      processLink(curToken);
  }
}
Пример #7
0
void HTMLParser::processBody(HTMLTokenizer & tokenizer){
  assert(tokenizer.HasNextToken());
  
  while(tokenizer.HasNextToken()){
    HTMLToken curToken = tokenizer.GetNextToken();
    
    if(isText(curToken)){
      string tmpstr = curToken.GetValue();
      processDescription(tmpstr);
      processText(tmpstr);
    }else if(isLinkStart(curToken))
      processLink(curToken);
    else if(isHeaderStart(curToken) && header.empty()){
	processHeader(tokenizer);
    }else if(isScriptStart(curToken)){
      processScript(tokenizer);
    }else if(isBodyEnd(curToken)){
      return;
    }
    
  }
}