Example #1
0
bool IsCTxtHttpResp(const PUrl& Url, const PHttpResp& HttpResp, const int& MnCTxtToks){
  if (HttpResp->IsStatusCd_Ok()){
    PWebPg WebPg=TWebPg::New(Url->GetUrlStr(), HttpResp);
    if (HttpResp->IsContType(THttp::TextHtmlFldVal)){
      TMem BodyMem=HttpResp->GetBodyAsMem();
      PSIn BodyMemIn=TMemIn::New(BodyMem);
      // prepare html-tokens
      PHtmlDoc HtmlDoc=THtmlDoc::New(BodyMemIn, hdtAll, false);
      int Toks=HtmlDoc->GetToks(); THtmlLxSym TokSym; TStr TokStr;
      // prepare continuous-text indicators
      int CTxtToks=0; TChA CTxtChA; bool CTxtP=false;
      // prepare script & style flag
      bool InScript=false; bool InStyle=false; 
      // traverse tokens
      for (int TokN=0; TokN<Toks; TokN++){
        // get token data
        HtmlDoc->GetTok(TokN, TokSym, TokStr);
        switch (TokSym){
          case hsyStr:
          case hsyNum:
          case hsySSym:
            if (!InScript&&!InStyle){
              // text token
              CTxtToks++; CTxtChA+=TokStr; CTxtChA+=' '; 
            }
            break;
          case hsyBTag:
            if (!InScript&&!InStyle){
              if (TokStr=="<SCRIPT>"){
                // start of script
                InScript=true; CTxtToks=0; CTxtChA.Clr();
              } else 
              if (TokStr=="<STYLE>"){
                // start of style
                InStyle=true; CTxtToks=0; CTxtChA.Clr();
              } else {
                if ((TokStr=="<P>")||(TokStr=="<B>")||(TokStr=="<I>")){
                  // skip in-text-tags
                } else {
                  // non-text-tags - break continuous-text
                  CTxtToks=0; CTxtChA.Clr();
                }
              }
            }
            break;
          case hsyETag:
            if (InScript||InStyle){
              if (TokStr=="<SCRIPT>"){
                // end of script
                InScript=false;
              } else
              if (TokStr=="<STYLE>"){
                // end of style
                InStyle=false;
              }
            }
            break;
          default: 
            // non-text-token - break continuous-text
            CTxtToks=0; CTxtChA.Clr();
            break;
        }
        // stop if enough continuous-text
        if (CTxtToks>MnCTxtToks){
          CTxtP=true; break;
        }
      }
      if (CTxtP){
        printf("%s\n", Url->GetUrlStr().CStr());
      }
      return CTxtP;
    }
  }
  return false;
}