void TSkyGridBs::GetWordStrWgtPrVChA( const TStrFltPrV& WordStrWgtPrV, TChA& WordStrWgtPrVChA){ WordStrWgtPrVChA.Clr(); for (int WordN=0; WordN<WordStrWgtPrV.Len(); WordN++){ TStr WStr=WordStrWgtPrV[WordN].Val1; double WWgt=WordStrWgtPrV[WordN].Val2; if (WordN>0){WordStrWgtPrVChA+=' ';} WordStrWgtPrVChA+=TStr::Fmt("['%s':%.3f]", WStr.CStr(), WWgt); } }
bool TSIn::GetNextLn(TChA& LnChA){ LnChA.Clr(); while (!Eof()){ const char Ch=GetCh(); if (Ch=='\n'){return true;} if (Ch=='\r' && PeekCh()=='\n'){GetCh(); return true;} LnChA.AddCh(Ch); } return !LnChA.Empty(); }
void TSkyGridBs::GetLinkWgtDstEntIdPrVChA( const TIntPrV& LinkWgtDstEntIdPrV, TChA& LinkWgtDstEntIdPrVChA){ LinkWgtDstEntIdPrVChA.Clr(); for (int DstEntN=0; DstEntN<LinkWgtDstEntIdPrV.Len(); DstEntN++){ int DstEntId=LinkWgtDstEntIdPrV[DstEntN].Val2; TStr DstEntNm=GetEntNm(DstEntId); int LinkWgt=LinkWgtDstEntIdPrV[DstEntN].Val1; if (DstEntN>0){LinkWgtDstEntIdPrVChA+=' ';} LinkWgtDstEntIdPrVChA+=TStr::Fmt("['%s':%d]", DstEntNm.CStr(), LinkWgt); } }
TBible::TBible(const TStr& FNm): TBook("Bible", "King James Version", "Religious"), ChpNmSecIdKdV(), PsalmV(), Bix(TBix::New()){ // open file PSIn SIn=TFIn::New(FNm); TILx Lx(SIn, TFSet()); // define state variables bool InPsalm=false; TStr ChpNm; TChA SecNChA; int SecN; TChA SSecNChA; int SSecN; TStr SSecStr; // parsing while (Lx.GetSym(syLn, syEof)!=syEof){ TChA& Ln=Lx.Str; int VBarChN=Ln.SearchCh('|'); if ((VBarChN!=-1)&&(0<VBarChN)&&(VBarChN<Lx.Str.Len())&& TCh::IsAlNum(Ln[VBarChN-1])&&TCh::IsNum(Ln[VBarChN+1])){ if (InPsalm){AddPsalm(ChpNm, SecN, SSecN, SSecStr);} // if (PsalmV.Len()>100){return;} InPsalm=true; // psalm name ChpNm=Ln.GetSubStr(0, VBarChN-1).GetTrunc(); if (ChpNmSecIdKdV.Empty()||ChpNmSecIdKdV.Last().Key!=ChpNm){ ChpNmSecIdKdV.Add(TStrIntKd(ChpNm, PsalmV.Len()));} // section number SecNChA.Clr(); int ChN=VBarChN+1; while (TCh::IsNum(Ln[ChN])){SecNChA+=Ln[ChN]; ChN++;} SecN=TStr(SecNChA).GetInt(); IAssert(Ln[ChN]==':'); // subsection number SSecNChA.Clr(); ChN++; while (TCh::IsNum(Ln[ChN])){SSecNChA+=Ln[ChN]; ChN++;} SSecN=TStr(SSecNChA).GetInt(); // text SSecStr=Ln.GetSubStr(ChN, Ln.Len()-1).GetTrunc(); } else if (InPsalm){ SSecStr=(SSecStr+' '+Ln).GetTrunc(); } } // save last psalm if (InPsalm){AddPsalm(ChpNm, SecN, SSecN, SSecStr);} }
TTm TTm::GetTmFromWebLogTimeStr(const TStr& TimeStr, const char TimeSepCh, const char MSecSepCh){ int TimeStrLen=TimeStr.Len(); // year TChA ChA; int ChN=0; while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=TimeSepCh)){ ChA+=TimeStr[ChN]; ChN++;} TStr HourStr=ChA; // minute ChA.Clr(); ChN++; while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=TimeSepCh)){ ChA+=TimeStr[ChN]; ChN++;} TStr MinStr=ChA; // second ChA.Clr(); ChN++; while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=MSecSepCh)){ ChA+=TimeStr[ChN]; ChN++;} TStr SecStr=ChA; // mili-second ChA.Clr(); ChN++; while (ChN<TimeStrLen){ ChA+=TimeStr[ChN]; ChN++;} TStr MSecStr=ChA; if (MSecStr.Len() > 3) { MSecStr = MSecStr.GetSubStr(0, 2); } else if (MSecStr.Len() == 1) { MSecStr += "00"; } else if (MSecStr.Len() == 2) { MSecStr += "0"; } // transform to numbers int HourN=HourStr.GetInt(0); int MinN=MinStr.GetInt(0); int SecN=SecStr.GetInt(0); int MSecN=MSecStr.GetInt(0); // construct time TTm Tm(-1, -1, -1, -1, HourN, MinN, SecN, MSecN); // return time return Tm; }
TSecTm TSecTm::GetDtTmFromMdyHmsPmStr(const TStr& MdyHmsPmStr, const char& DateSepCh, const char& TimeSepCh){ int MdyHmsPmStrLen=MdyHmsPmStr.Len(); // month TChA ChA; int ChN=0; while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=DateSepCh)){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr MonthStr=ChA; // day ChA.Clr(); ChN++; while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=DateSepCh)){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr DayStr=ChA; // year ChA.Clr(); ChN++; while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=' ')){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr YearStr=ChA; // hour ChA.Clr(); ChN++; while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=TimeSepCh)){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr HourStr=ChA; // minute ChA.Clr(); ChN++; while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=TimeSepCh)){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr MinStr=ChA; // second ChA.Clr(); ChN++; while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=' ')){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr SecStr=ChA; // AM/PM ChA.Clr(); ChN++; while (ChN<MdyHmsPmStrLen){ ChA+=MdyHmsPmStr[ChN]; ChN++;} TStr AmPmStr=ChA; // transform to numbers int MonthN=MonthStr.GetInt(); int DayN=DayStr.GetInt(); int YearN=YearStr.GetInt(); int HourN; int MinN; int SecN; if (HourStr.IsInt()){ HourN=HourStr.GetInt(); MinN=MinStr.GetInt(); SecN=SecStr.GetInt(); if (AmPmStr=="AM"){} else if (AmPmStr=="PM"){HourN+=12;} else {Fail;} } else { HourN=0; MinN=0; SecN=0; } // construct the time TSecTm Tm=TSecTm::GetDtTm(YearN, MonthN, DayN); Tm.AddHours(HourN); Tm.AddMins(MinN); Tm.AddSecs(SecN); return Tm; }
///////////////////////////////////////////////// // Translation-Evaluation-Scores void TEvalScore::Tokenize(const TStr& Str, TStrV& TokenV) { TStr LcStr = GetLc(Str); TokenV.Clr(); TChA WordChA; const int Chs = LcStr.Len(); for (int ChN = 0; ChN < Chs; ChN++) { const char Ch = LcStr[ChN]; if (IsChar(Ch)) { WordChA += Ch; } else if (!WordChA.Empty()) { TokenV.Add(WordChA); WordChA.Clr(); } } }
TTm TTm::GetTmFromWebLogDateTimeStr(const TStr& DateTimeStr, const char DateSepCh, const char TimeSepCh, const char MSecSepCh, const char DateTimeSepCh){ int DateTimeStrLen=DateTimeStr.Len(); // year TChA ChA; int ChN=0; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr YearStr=ChA; // month ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MonthStr=ChA; // day ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateTimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr DayStr=ChA; // hour ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr HourStr=ChA; // minute ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MinStr=ChA; // second ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=MSecSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr SecStr=ChA; // mili-second ChA.Clr(); ChN++; while (ChN<DateTimeStrLen){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MSecStr=ChA; // transform to numbers int YearN=YearStr.GetInt(-1); int MonthN=MonthStr.GetInt(-1); int DayN=DayStr.GetInt(-1); int HourN=HourStr.GetInt(0); int MinN=MinStr.GetInt(0); int SecN=SecStr.GetInt(0); int MSecN=MSecStr.GetInt(0); // construct time TTm Tm; if ((YearN!=-1)&&(MonthN!=-1)&&(DayN!=-1)){ Tm=TTm(YearN, MonthN, DayN, -1, HourN, MinN, SecN, MSecN); } // return time return Tm; }
void TStrUtil::RemoveHtmlTags(const TChA& HtmlStr, TChA& TextStr) { TextStr.Clr(); char *StrB, *StrE; // use full page html: skip till <body> //PageHtmlStr = "<script fdsfs> fsdfsd </script> jure"; /*if (UseFullHtml) { StrB = PageHtmlStr.CStr(); StrE = StrB+PageHtmlStr.Len(); char * NewB = strstr(StrB, "<body>"); if (NewB != NULL) { StrB = NewB+6; } char * NewE = strstr(StrB, "body>"); if (NewE != NULL) { while (true) { char *E=strstr(NewE+4, "body>"); if (E == NULL) { break; } NewE = E; } StrE = NewE; } } else { // only extracted post html*/ StrB = (char *) HtmlStr.CStr(); StrE = (char *) StrB+HtmlStr.Len(); //} for (char *e = StrB; e < StrE; ) { char* b = e; while (e<StrE && *e != '<') { e++; } // copy text char tmp=*e; *e = 0; TextStr+= b; TextStr.AddCh(' '); *e = tmp; if (e >= StrE) { return; } // if start of a comment: skip if (e[1]=='!' && e[2]=='-' && e[3]=='-') { // comment e += 3; while(e<StrE && !(*(e-2)=='-' && *(e-1)=='-' && *e=='>')) { e++; } e++; continue; } // if "<script" then skip if (e[1]=='s' && e[2]=='c' && e[3]=='r' && e[4]=='i' && e[5]=='p' && e[6]=='t') { e += 5; while(e<StrE && !(*(e-6)=='s' && *(e-5)=='c' && *(e-4)=='r' && *(e-3)=='i' && *(e-2)=='p' && *(e-1)=='t' && *e=='>')) { e++; } e++; continue; } // skip to end of tag while (e < StrE && *e != '>') { e++; } if (e>=StrE) { return; } e++; } }
// Gets the next line to LnChA. // Returns true, if LnChA contains a valid line. // Returns false, if LnChA is empty, such as end of file was encountered. bool TZipIn::GetNextLnBf(TChA& LnChA) { int Status; int BfN; // new pointer to the end of line int BfP; // previous pointer to the line start LnChA.Clr(); do { if (BfC >= BfL) { BfP = 0; } // reset the current pointer, FindEol() will read a new buffer else { BfP = BfC; } Status = FindEol(BfN); if (Status >= 0) { LnChA.AddBf(&Bf[BfP],BfN-BfP); if (Status == 1) { return true; } // got a complete line } // get more data, if the line is incomplete } while (Status == 0); // eof or the last line has no newline return !LnChA.Empty(); }
// get <TagNm>*</TagNm> (can be many tags inbetween bool TStrUtil::GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms) { if (XmlLx.GetSym() != xsySTag) { return false; } TagVal.Clr(); TagNm = XmlLx.TagNm; //const TXmlLxSym NextSym = XmlLx.GetSym(); while (XmlLx.Sym != xsyETag || XmlLx.TagNm != TagNm.CStr()) { if (TakeTagNms) { TagVal += XmlLx.TxtChA; } else if (XmlLx.Sym == xsyStr) { TagVal += XmlLx.TxtChA; } XmlLx.GetSym(); } return true; //if (NextSym == xsyStr) { // EAssertR(XmlLx.GetSym() == xsyETag, TagNm); //} else { // EAssertR(NextSym == xsyETag, TagNm); // empty tag //printf(" token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr()); //} }
void TWebTravelHomeNet::StartTravel(){ PutConstrs(); TStrV UrlStrV(300000, 0); TIntIntH UserIdToDocsH(1000); PSIn SIn=PSIn(new TFIn(InFNm)); TILx Lx(SIn, TFSet()|iloRetEoln); TChA UrlStr; Lx.GetSym(syInt, syEof); while ((Lx.Sym!=syEof)&&(Lx.SymLnN<200000)){ // while (Lx.Sym!=syEof){ int UserId=Lx.Int; Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); Lx.GetInt(); Lx.GetSym(syComma); TStr Method=Lx.GetIdStr(); Lx.GetSym(syComma); // GET, POST UrlStr.Clr(); UrlStr+=Lx.GetIdStr(); Lx.GetSym(syComma); // http, ftp UrlStr+="://"; UrlStr+=Lx.GetStrToCh(','); Lx.GetSym(syComma); // domain name UrlStr+=Lx.GetStrToEoln(); Lx.GetEoln(); // path if ((UserId==TgUserId)&&IsUrlOk(UrlStr)&&(Method=="GET")){ UserIdToDocsH.AddDat(UserId)++; UrlStrV.Add(UrlStr); } Lx.GetSym(syInt, syEof); if (Lx.SymLnN%100000==0){OnNotify(TInt::GetStr(Lx.SymLnN)+ " docs");} } int UserIdToDocsP=UserIdToDocsH.FFirstKeyId(); while (UserIdToDocsH.FNextKeyId(UserIdToDocsP)){ int UserId=UserIdToDocsH.GetKey(UserIdToDocsP); int Docs=UserIdToDocsH[UserIdToDocsP]; TStr MsgStr=TStr("User ")+TInt::GetStr(UserId)+": "+ TInt::GetStr(Docs)+" Docs."; OnNotify(MsgStr); } UrlStrV.Shuffle(TRnd()); for (int UrlStrN=0; UrlStrN<UrlStrV.Len(); UrlStrN++){ Go(UrlStrV[UrlStrN]); } }
PSs TSs::LoadTxt( const TSsFmt& SsFmt, const TStr& FNm, const PNotify& Notify, const bool& IsExcelEoln, const int& MxY, const TIntV& AllowedColNV, const bool& IsQStr) { TNotify::OnNotify(Notify, ntInfo, TStr("Loading File ")+FNm+" ..."); PSIn SIn=TFIn::New(FNm); PSs Ss=TSs::New(); if (!SIn->Eof()) { int X=0; int Y=0; int PrevX=-1; int PrevY=-1; char Ch=SIn->GetCh(); TChA ChA; while (!SIn->Eof()) { // compose value ChA.Clr(); if (IsQStr&&(Ch=='"')) { // quoted string ('""' sequence means '"') Ch=SIn->GetCh(); forever { while ((!SIn->Eof())&&(Ch!='"')) { ChA+=Ch; Ch=SIn->GetCh(); } if (Ch=='"') { Ch=SIn->GetCh(); if (Ch=='"') { ChA+=Ch; Ch=SIn->GetCh(); } else { break; } } } } else { if (SsFmt==ssfTabSep) {
void TTokenizerUtil::Paragraphize(const PSIn& SIn, TStrV& Paragraphs) { TChA ParagraphBuf; int c; bool wasSpace = false; while (!SIn->Eof()) { c = SIn->GetCh(); // two consecutive spaces signal a new paragraph if (c == ' ' || c == '\t' || c == '\n') { if (wasSpace) { Paragraphs.Add(ParagraphBuf); ParagraphBuf.Clr(); continue; } wasSpace = true; } else { wasSpace = false; } ParagraphBuf += c; } if (ParagraphBuf.Len() > 0) { Paragraphs.Add(ParagraphBuf); } }
/////////////////////////////// // Tokenizer-Utils void TTokenizerUtil::Sentencize(const PSIn& SIn, TStrV& Sentences, const bool& SplitNewLineP) { TChA SentenceBuf; int c; while (!SIn->Eof()) { c = SIn->GetCh(); switch (c) { case '\r': case '\n': { if (!SplitNewLineP) { SentenceBuf += ' '; break; } } case '"' : case '.' : case '!' : case ':' : case ';' : case '?' : case '\t': { if (SentenceBuf.Len() > 2) { Sentences.Add(SentenceBuf); printf("%s\n", SentenceBuf.CStr()); SentenceBuf.Clr(); } break; } default: SentenceBuf += c; break; } } if (SentenceBuf.Len() > 0) { Sentences.Add(SentenceBuf); } }
PAmazonItem TAmazonItem::GetFromWebPg(const PWebPg& WebPg){ TStr UrlStr=WebPg->GetUrlStr(); TStr ItemId=TAmazonItem::GetItemId(WebPg->GetUrl()); TStr HtmlStr=WebPg->GetHttpBodyAsStr(); PSIn HtmlSIn=TStrIn::New(HtmlStr); THtmlLx HtmlLx(HtmlSIn); THtmlLxSym Sym; TChA ChA; // move to title while (HtmlLx.GetSym()!=hsyEof){ Sym=HtmlLx.Sym; ChA=HtmlLx.ChA; if ((Sym==hsyBTag)&&(ChA=="<FONT>")){ TStr FaceArg=HtmlLx.GetArg("FACE", ""); TStr SizeArg=HtmlLx.GetArg("SIZE", ""); if ((FaceArg=="verdana,arial,helvetica")&&(SizeArg.Empty())){break;} } } // extract title TChA TitleChA; while (HtmlLx.GetSym()!=hsyEof){ Sym=HtmlLx.Sym; ChA=HtmlLx.ChA; if ((Sym==hsyETag)&&(ChA=="<FONT>")){break;} if (!TitleChA.Empty()){TitleChA+=HtmlLx.GetPreSpaceStr();} TitleChA+=ChA; } TStr TitleStr=TitleChA; //printf("'%s'\n", TitleStr.CStr()); // extract authors TStrV AuthorNmV; TChA AuthorNmChA; while (HtmlLx.GetSym()!=hsyEof){ Sym=HtmlLx.Sym; ChA=HtmlLx.ChA; if ((Sym==hsyBTag)&&(ChA=="<A>")){ do { HtmlLx.GetSym(); Sym=HtmlLx.Sym; ChA=HtmlLx.ChA; if (Sym==hsyStr){ if (!AuthorNmChA.Empty()){AuthorNmChA+=HtmlLx.GetPreSpaceStr();} AuthorNmChA+=ChA; } } while (!((Sym==hsyETag)&&(ChA=="<A>"))); AuthorNmV.Add(AuthorNmChA); AuthorNmChA.Clr(); } if ((Sym==hsyETag)&&(ChA=="<FONT>")){break;} } for (int AuthorNmN=0; AuthorNmN<AuthorNmV.Len(); AuthorNmN++){ //printf("'%s'\n", AuthorNmV[AuthorNmN].CStr()); } // move to x-sell TStrQ PrevStrQ(3); while (HtmlLx.GetSym()!=hsyEof){ Sym=HtmlLx.Sym; ChA=HtmlLx.ChA; if (Sym==hsyStr){ PrevStrQ.Push(ChA); if ((PrevStrQ.Len()==3)&&(PrevStrQ[0]=="Customers") &&(PrevStrQ[1]=="who")&&(PrevStrQ[2]=="bought")){break;} } else { PrevStrQ.Clr(); } } // extract x-sell pointers TStrV NextItemIdV; while (HtmlLx.GetSym()!=hsyEof){ Sym=HtmlLx.Sym; ChA=HtmlLx.ChA; if ((Sym==hsyBTag)&&(ChA=="<A>")){ TStr RelUrlStr=HtmlLx.GetArg("HREF"); PUrl Url=TUrl::New(RelUrlStr, UrlStr); TStr NextItemId=TAmazonItem::GetItemId(Url); NextItemIdV.Add(NextItemId); } if ((Sym==hsyETag)&&(ChA=="<UL>")){break;} } for (int NextItemIdN=0; NextItemIdN<NextItemIdV.Len(); NextItemIdN++){ //printf("'%s'\n", NextItemIdV[NextItemIdN].CStr()); } // construct item object PAmazonItem AmazonItem=PAmazonItem(new TAmazonItem(ItemId, TitleStr, AuthorNmV, NextItemIdV)); return AmazonItem; }
TTm TTm::GetTmFromWebLogDateTimeStr(const TStr& DateTimeStr, const char DateSepCh, const char TimeSepCh, const char MSecSepCh){ int DateTimeStrLen=DateTimeStr.Len(); // year TChA ChA; int ChN=0; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr YearStr=ChA; // month ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MonthStr=ChA; // day ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=' ')){ ChA+=DateTimeStr[ChN]; ChN++;} TStr DayStr=ChA; // hour ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr HourStr=ChA; // minute ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MinStr=ChA; // second ChA.Clr(); ChN++; while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=MSecSepCh)){ ChA+=DateTimeStr[ChN]; ChN++;} TStr SecStr=ChA; // mili-second ChA.Clr(); ChN++; while (ChN<DateTimeStrLen){ ChA+=DateTimeStr[ChN]; ChN++;} TStr MSecStr=ChA; // transform to numbers int YearN=YearStr.GetInt(-1); int MonthN=MonthStr.GetInt(-1); int DayN=DayStr.GetInt(-1); int HourN=HourStr.GetInt(0); int MinN=MinStr.GetInt(0); int SecN=SecStr.GetInt(0); int MSecN=MSecStr.GetInt(0); // construct time /* //!!peter: convert month name to number and flip date/day (oracle: 10-FEB-05) if ((MonthN==-1)&&(isalpha(MonthStr.CStr()[0]))){ if ((MonthN=MonthParser.GetMonthN(MonthStr))!=-1){ int Y=DayN; DayN=YearN; YearN=Y<100?Y+2000:Y; } } */ TTm Tm; if ((YearN!=-1)&&(MonthN!=-1)&&(DayN!=-1)){ Tm=TTm(YearN, MonthN, DayN, -1, HourN, MinN, SecN, MSecN); } // return time return Tm; }
bool IsCTxtHttpResp(const PUrl& Url, const PHttpResp& HttpResp, const int& MnCTxtToks){ if (HttpResp->IsStatusCd_Ok()){ PWebPg WebPg=TWebPg::New(Url->GetUrlStr(), HttpResp); if (HttpResp->IsContType(THttp::TextHtmlFldVal)){ TMem BodyMem=HttpResp->GetBodyAsMem(); PSIn BodyMemIn=TMemIn::New(BodyMem); // prepare html-tokens PHtmlDoc HtmlDoc=THtmlDoc::New(BodyMemIn, hdtAll, false); int Toks=HtmlDoc->GetToks(); THtmlLxSym TokSym; TStr TokStr; // prepare continuous-text indicators int CTxtToks=0; TChA CTxtChA; bool CTxtP=false; // prepare script & style flag bool InScript=false; bool InStyle=false; // traverse tokens for (int TokN=0; TokN<Toks; TokN++){ // get token data HtmlDoc->GetTok(TokN, TokSym, TokStr); switch (TokSym){ case hsyStr: case hsyNum: case hsySSym: if (!InScript&&!InStyle){ // text token CTxtToks++; CTxtChA+=TokStr; CTxtChA+=' '; } break; case hsyBTag: if (!InScript&&!InStyle){ if (TokStr=="<SCRIPT>"){ // start of script InScript=true; CTxtToks=0; CTxtChA.Clr(); } else if (TokStr=="<STYLE>"){ // start of style InStyle=true; CTxtToks=0; CTxtChA.Clr(); } else { if ((TokStr=="<P>")||(TokStr=="<B>")||(TokStr=="<I>")){ // skip in-text-tags } else { // non-text-tags - break continuous-text CTxtToks=0; CTxtChA.Clr(); } } } break; case hsyETag: if (InScript||InStyle){ if (TokStr=="<SCRIPT>"){ // end of script InScript=false; } else if (TokStr=="<STYLE>"){ // end of style InStyle=false; } } break; default: // non-text-token - break continuous-text CTxtToks=0; CTxtChA.Clr(); break; } // stop if enough continuous-text if (CTxtToks>MnCTxtToks){ CTxtP=true; break; } } if (CTxtP){ printf("%s\n", Url->GetUrlStr().CStr()); } return CTxtP; } } return false; }