示例#1
0
文件: skygrid.cpp 项目: Accio/snap
void TSkyGridBs::GetWordStrWgtPrVChA(
 const TStrFltPrV& WordStrWgtPrV, TChA& WordStrWgtPrVChA){
  WordStrWgtPrVChA.Clr();
  for (int WordN=0; WordN<WordStrWgtPrV.Len(); WordN++){
    TStr WStr=WordStrWgtPrV[WordN].Val1;
    double WWgt=WordStrWgtPrV[WordN].Val2;
    if (WordN>0){WordStrWgtPrVChA+=' ';}
    WordStrWgtPrVChA+=TStr::Fmt("['%s':%.3f]", WStr.CStr(), WWgt);
  }
}
示例#2
0
文件: fl.cpp 项目: FlyClover/movieinf
bool TSIn::GetNextLn(TChA& LnChA){
  LnChA.Clr();
  while (!Eof()){
    const char Ch=GetCh();
    if (Ch=='\n'){return true;}
    if (Ch=='\r' && PeekCh()=='\n'){GetCh(); return true;}
    LnChA.AddCh(Ch);
  }
  return !LnChA.Empty();
}
示例#3
0
文件: skygrid.cpp 项目: Accio/snap
void TSkyGridBs::GetLinkWgtDstEntIdPrVChA(
 const TIntPrV& LinkWgtDstEntIdPrV, TChA& LinkWgtDstEntIdPrVChA){
  LinkWgtDstEntIdPrVChA.Clr();
  for (int DstEntN=0; DstEntN<LinkWgtDstEntIdPrV.Len(); DstEntN++){
    int DstEntId=LinkWgtDstEntIdPrV[DstEntN].Val2;
    TStr DstEntNm=GetEntNm(DstEntId);
    int LinkWgt=LinkWgtDstEntIdPrV[DstEntN].Val1;
    if (DstEntN>0){LinkWgtDstEntIdPrVChA+=' ';}
    LinkWgtDstEntIdPrVChA+=TStr::Fmt("['%s':%d]", DstEntNm.CStr(), LinkWgt);
  }
}
示例#4
0
TBible::TBible(const TStr& FNm):
  TBook("Bible", "King James Version", "Religious"),
  ChpNmSecIdKdV(), PsalmV(), Bix(TBix::New()){
  // open file
  PSIn SIn=TFIn::New(FNm);
  TILx Lx(SIn, TFSet());
  // define state variables
  bool InPsalm=false; TStr ChpNm; TChA SecNChA; int SecN;
  TChA SSecNChA; int SSecN; TStr SSecStr;
  // parsing
  while (Lx.GetSym(syLn, syEof)!=syEof){
    TChA& Ln=Lx.Str;
    int VBarChN=Ln.SearchCh('|');
    if ((VBarChN!=-1)&&(0<VBarChN)&&(VBarChN<Lx.Str.Len())&&
     TCh::IsAlNum(Ln[VBarChN-1])&&TCh::IsNum(Ln[VBarChN+1])){
      if (InPsalm){AddPsalm(ChpNm, SecN, SSecN, SSecStr);}
//      if (PsalmV.Len()>100){return;}
      InPsalm=true;
      // psalm name
      ChpNm=Ln.GetSubStr(0, VBarChN-1).GetTrunc();
      if (ChpNmSecIdKdV.Empty()||ChpNmSecIdKdV.Last().Key!=ChpNm){
        ChpNmSecIdKdV.Add(TStrIntKd(ChpNm, PsalmV.Len()));}
      // section number
      SecNChA.Clr(); int ChN=VBarChN+1;
      while (TCh::IsNum(Ln[ChN])){SecNChA+=Ln[ChN]; ChN++;}
      SecN=TStr(SecNChA).GetInt();
      IAssert(Ln[ChN]==':');
      // subsection number
      SSecNChA.Clr(); ChN++;
      while (TCh::IsNum(Ln[ChN])){SSecNChA+=Ln[ChN]; ChN++;}
      SSecN=TStr(SSecNChA).GetInt();
      // text
      SSecStr=Ln.GetSubStr(ChN, Ln.Len()-1).GetTrunc();
    } else
    if (InPsalm){
      SSecStr=(SSecStr+' '+Ln).GetTrunc();
    }
  }
  // save last psalm
  if (InPsalm){AddPsalm(ChpNm, SecN, SSecN, SSecStr);}
}
示例#5
0
TTm TTm::GetTmFromWebLogTimeStr(const TStr& TimeStr,
 const char TimeSepCh, const char MSecSepCh){
  int TimeStrLen=TimeStr.Len();
  // year
  TChA ChA; int ChN=0;
  while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=TimeSepCh)){
    ChA+=TimeStr[ChN]; ChN++;}
  TStr HourStr=ChA;
  // minute
  ChA.Clr(); ChN++;
  while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=TimeSepCh)){
    ChA+=TimeStr[ChN]; ChN++;}
  TStr MinStr=ChA;
  // second
  ChA.Clr(); ChN++;
  while ((ChN<TimeStrLen)&&(TimeStr[ChN]!=MSecSepCh)){
    ChA+=TimeStr[ChN]; ChN++;}
  TStr SecStr=ChA;
  // mili-second
  ChA.Clr(); ChN++;
  while (ChN<TimeStrLen){
    ChA+=TimeStr[ChN]; ChN++;}
  TStr MSecStr=ChA;
  if (MSecStr.Len() > 3) {
	  MSecStr = MSecStr.GetSubStr(0, 2); 
  } else if (MSecStr.Len() == 1) {
	  MSecStr += "00";
  } else if (MSecStr.Len() == 2) {
	  MSecStr += "0";
  }
  // transform to numbers
  int HourN=HourStr.GetInt(0);
  int MinN=MinStr.GetInt(0);
  int SecN=SecStr.GetInt(0);
  int MSecN=MSecStr.GetInt(0);
  // construct time
  TTm Tm(-1, -1, -1, -1, HourN, MinN, SecN, MSecN);
  // return time
  return Tm;
}
示例#6
0
TSecTm TSecTm::GetDtTmFromMdyHmsPmStr(const TStr& MdyHmsPmStr,
 const char& DateSepCh, const char& TimeSepCh){
  int MdyHmsPmStrLen=MdyHmsPmStr.Len();
  // month
  TChA ChA; int ChN=0;
  while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=DateSepCh)){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr MonthStr=ChA;
  // day
  ChA.Clr(); ChN++;
  while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=DateSepCh)){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr DayStr=ChA;
  // year
  ChA.Clr(); ChN++;
  while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=' ')){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr YearStr=ChA;
  // hour
  ChA.Clr(); ChN++;
  while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=TimeSepCh)){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr HourStr=ChA;
  // minute
  ChA.Clr(); ChN++;
  while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=TimeSepCh)){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr MinStr=ChA;
  // second
  ChA.Clr(); ChN++;
  while ((ChN<MdyHmsPmStrLen)&&(MdyHmsPmStr[ChN]!=' ')){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr SecStr=ChA;
  // AM/PM
  ChA.Clr(); ChN++;
  while (ChN<MdyHmsPmStrLen){
    ChA+=MdyHmsPmStr[ChN]; ChN++;}
  TStr AmPmStr=ChA;
  // transform to numbers
  int MonthN=MonthStr.GetInt();
  int DayN=DayStr.GetInt();
  int YearN=YearStr.GetInt();
  int HourN; int MinN; int SecN;
  if (HourStr.IsInt()){
    HourN=HourStr.GetInt();
    MinN=MinStr.GetInt();
    SecN=SecStr.GetInt();
    if (AmPmStr=="AM"){} else if (AmPmStr=="PM"){HourN+=12;} else {Fail;}
  } else {
    HourN=0; MinN=0; SecN=0;
  }
  // construct the time
  TSecTm Tm=TSecTm::GetDtTm(YearN, MonthN, DayN);
  Tm.AddHours(HourN);
  Tm.AddMins(MinN);
  Tm.AddSecs(SecN);
  return Tm;
}
/////////////////////////////////////////////////
// Translation-Evaluation-Scores
void TEvalScore::Tokenize(const TStr& Str, TStrV& TokenV) {
    TStr LcStr = GetLc(Str);
    TokenV.Clr(); TChA WordChA; 
    const int Chs = LcStr.Len();
    for (int ChN = 0; ChN < Chs; ChN++) {
        const char Ch = LcStr[ChN];
        if (IsChar(Ch)) { 
            WordChA += Ch; 
        } else if (!WordChA.Empty()) {
            TokenV.Add(WordChA);
            WordChA.Clr();
        }
    }        
}
示例#8
0
文件: tm.cpp 项目: jethrotan/qminer
TTm TTm::GetTmFromWebLogDateTimeStr(const TStr& DateTimeStr,
 const char DateSepCh, const char TimeSepCh, const char MSecSepCh,
 const char DateTimeSepCh){
  int DateTimeStrLen=DateTimeStr.Len();
  // year
  TChA ChA; int ChN=0;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr YearStr=ChA;
  // month
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr MonthStr=ChA;
  // day
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateTimeSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr DayStr=ChA;
  // hour
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr HourStr=ChA;
  // minute
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr MinStr=ChA;
  // second
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=MSecSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr SecStr=ChA;
  // mili-second
  ChA.Clr(); ChN++;
  while (ChN<DateTimeStrLen){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr MSecStr=ChA;
  // transform to numbers
  int YearN=YearStr.GetInt(-1);
  int MonthN=MonthStr.GetInt(-1);
  int DayN=DayStr.GetInt(-1);
  int HourN=HourStr.GetInt(0);
  int MinN=MinStr.GetInt(0);
  int SecN=SecStr.GetInt(0);
  int MSecN=MSecStr.GetInt(0);
  // construct time
  TTm Tm;
  if ((YearN!=-1)&&(MonthN!=-1)&&(DayN!=-1)){
    Tm=TTm(YearN, MonthN, DayN, -1, HourN, MinN, SecN, MSecN);
  }
  // return time
  return Tm;
}
示例#9
0
void TStrUtil::RemoveHtmlTags(const TChA& HtmlStr, TChA& TextStr) {
  TextStr.Clr();
  char *StrB, *StrE;
  // use full page html: skip till <body>
  //PageHtmlStr = "<script fdsfs>  fsdfsd </script> jure";
  /*if (UseFullHtml) {
    StrB = PageHtmlStr.CStr();
    StrE = StrB+PageHtmlStr.Len();
    char * NewB = strstr(StrB, "<body>");
    if (NewB != NULL) { StrB = NewB+6; }
    char * NewE = strstr(StrB, "body>");
    if (NewE != NULL) {
      while (true) {
        char *E=strstr(NewE+4, "body>");
        if (E == NULL) { break; }  NewE = E; }
      StrE = NewE;
    }
  } else {  // only extracted post html*/
  StrB = (char *) HtmlStr.CStr();
  StrE = (char *) StrB+HtmlStr.Len(); //}
  for (char *e = StrB; e < StrE; ) {
    char* b = e;
    while (e<StrE && *e != '<') { e++; }
    // copy text
    char tmp=*e;  *e = 0;
    TextStr+= b; TextStr.AddCh(' ');  *e = tmp;
    if (e >= StrE) { return; }
    // if start of a comment: skip
    if (e[1]=='!' && e[2]=='-' && e[3]=='-') { // comment
      e += 3;
      while(e<StrE && !(*(e-2)=='-' && *(e-1)=='-' && *e=='>')) { e++; }
      e++;  continue;
    }
    // if "<script" then skip
    if (e[1]=='s' && e[2]=='c' && e[3]=='r' && e[4]=='i' && e[5]=='p' && e[6]=='t') {
      e += 5;
      while(e<StrE && !(*(e-6)=='s' && *(e-5)=='c' && *(e-4)=='r' && *(e-3)=='i' && *(e-2)=='p' && *(e-1)=='t' && *e=='>')) { e++; }
      e++;  continue;
    }
    // skip to end of tag
    while (e < StrE && *e != '>') { e++; }
    if (e>=StrE) { return; }
    e++;
  }
}
示例#10
0
// Gets the next line to LnChA.
// Returns true, if LnChA contains a valid line.
// Returns false, if LnChA is empty, such as end of file was encountered.
bool TZipIn::GetNextLnBf(TChA& LnChA) {
  int Status;
  int BfN;        // new pointer to the end of line
  int BfP;        // previous pointer to the line start
  LnChA.Clr();
  do {
    if (BfC >= BfL) { BfP = 0; } // reset the current pointer, FindEol() will read a new buffer
    else { BfP = BfC; }
    Status = FindEol(BfN);
    if (Status >= 0) {
      LnChA.AddBf(&Bf[BfP],BfN-BfP);
      if (Status == 1) { return true; } // got a complete line
    }
    // get more data, if the line is incomplete
  } while (Status == 0);
  // eof or the last line has no newline
  return !LnChA.Empty();
}
示例#11
0
// get <TagNm>*</TagNm> (can be many tags inbetween
bool TStrUtil::GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms) {
  if (XmlLx.GetSym() != xsySTag) {
    return false; }
  TagVal.Clr();
  TagNm = XmlLx.TagNm;
  //const TXmlLxSym NextSym = XmlLx.GetSym();
  while (XmlLx.Sym != xsyETag || XmlLx.TagNm != TagNm.CStr()) {
    if (TakeTagNms) {
      TagVal += XmlLx.TxtChA; }
    else if (XmlLx.Sym == xsyStr) {
      TagVal += XmlLx.TxtChA; }
    XmlLx.GetSym();
  }
  return true;
  //if (NextSym == xsyStr) {
  //  EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
  //} else {
  //  EAssertR(NextSym == xsyETag, TagNm); // empty tag
    //printf("  token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
  //}
}
示例#12
0
文件: webtrv.cpp 项目: Accio/snap
void TWebTravelHomeNet::StartTravel(){
  PutConstrs();
  TStrV UrlStrV(300000, 0);
  TIntIntH UserIdToDocsH(1000);
  PSIn SIn=PSIn(new TFIn(InFNm));
  TILx Lx(SIn, TFSet()|iloRetEoln);
  TChA UrlStr;
  Lx.GetSym(syInt, syEof);
  while ((Lx.Sym!=syEof)&&(Lx.SymLnN<200000)){
//  while (Lx.Sym!=syEof){
    int UserId=Lx.Int; Lx.GetSym(syComma);
    Lx.GetInt(); Lx.GetSym(syComma);
    Lx.GetInt(); Lx.GetSym(syComma);
    Lx.GetInt(); Lx.GetSym(syComma);
    Lx.GetInt(); Lx.GetSym(syComma);
    TStr Method=Lx.GetIdStr(); Lx.GetSym(syComma);  // GET, POST
    UrlStr.Clr(); UrlStr+=Lx.GetIdStr(); Lx.GetSym(syComma); // http, ftp
    UrlStr+="://";
    UrlStr+=Lx.GetStrToCh(','); Lx.GetSym(syComma); // domain name
    UrlStr+=Lx.GetStrToEoln(); Lx.GetEoln(); // path
    if ((UserId==TgUserId)&&IsUrlOk(UrlStr)&&(Method=="GET")){
      UserIdToDocsH.AddDat(UserId)++;
      UrlStrV.Add(UrlStr);
    }
    Lx.GetSym(syInt, syEof);
    if (Lx.SymLnN%100000==0){OnNotify(TInt::GetStr(Lx.SymLnN)+ " docs");}
  }
  int UserIdToDocsP=UserIdToDocsH.FFirstKeyId();
  while (UserIdToDocsH.FNextKeyId(UserIdToDocsP)){
    int UserId=UserIdToDocsH.GetKey(UserIdToDocsP);
    int Docs=UserIdToDocsH[UserIdToDocsP];
    TStr MsgStr=TStr("User ")+TInt::GetStr(UserId)+": "+
     TInt::GetStr(Docs)+" Docs.";
    OnNotify(MsgStr);
  }
  UrlStrV.Shuffle(TRnd());
  for (int UrlStrN=0; UrlStrN<UrlStrV.Len(); UrlStrN++){
    Go(UrlStrV[UrlStrN]);
  }
}
示例#13
0
文件: ss.cpp 项目: amrsobhy/qminer
PSs TSs::LoadTxt(
    const TSsFmt& SsFmt, const TStr& FNm,
    const PNotify& Notify, const bool& IsExcelEoln,
    const int& MxY, const TIntV& AllowedColNV, const bool& IsQStr) {
    TNotify::OnNotify(Notify, ntInfo, TStr("Loading File ")+FNm+" ...");
    PSIn SIn=TFIn::New(FNm);
    PSs Ss=TSs::New();
    if (!SIn->Eof()) {
        int X=0;
        int Y=0;
        int PrevX=-1;
        int PrevY=-1;
        char Ch=SIn->GetCh();
        TChA ChA;
        while (!SIn->Eof()) {
            // compose value
            ChA.Clr();
            if (IsQStr&&(Ch=='"')) {
                // quoted string ('""' sequence means '"')
                Ch=SIn->GetCh();
                forever {
                    while ((!SIn->Eof())&&(Ch!='"')) {
                        ChA+=Ch;
                        Ch=SIn->GetCh();
                    }
                    if (Ch=='"') {
                        Ch=SIn->GetCh();
                        if (Ch=='"') {
                            ChA+=Ch;
                            Ch=SIn->GetCh();
                        }
                        else {
                            break;
                        }
                    }
                }
            } else {
                if (SsFmt==ssfTabSep) {
void TTokenizerUtil::Paragraphize(const PSIn& SIn, TStrV& Paragraphs) {
	TChA ParagraphBuf;
	int c;
	bool wasSpace = false;
	while (!SIn->Eof()) {
		c = SIn->GetCh();
		// two consecutive spaces signal a new paragraph
		if (c == ' ' || c == '\t' || c == '\n') {
			if (wasSpace) {
				Paragraphs.Add(ParagraphBuf);
				ParagraphBuf.Clr();
				continue;
			}
			wasSpace = true;
		} else {
			wasSpace = false;
		}
		ParagraphBuf += c;
	}
	if (ParagraphBuf.Len() > 0) {
		Paragraphs.Add(ParagraphBuf);
	}
}
///////////////////////////////
// Tokenizer-Utils
void TTokenizerUtil::Sentencize(const PSIn& SIn, TStrV& Sentences, const bool& SplitNewLineP) {
	TChA SentenceBuf;
	int c;
	while (!SIn->Eof()) {
		c = SIn->GetCh();
		switch (c) {
			case '\r':
			case '\n':	{
				if (!SplitNewLineP) {
					SentenceBuf += ' ';
					break;
				}
			}
			case '"' :
			case '.' :
			case '!' :
			case ':' :
			case ';' :
			case '?' :
			case '\t': {
				if (SentenceBuf.Len() > 2) {
					Sentences.Add(SentenceBuf);
					printf("%s\n", SentenceBuf.CStr());
					SentenceBuf.Clr();
				}
				break;
			}
			default: 
				SentenceBuf += c;
				break;
		}
	}
	if (SentenceBuf.Len() > 0) {
		Sentences.Add(SentenceBuf);
	}	
}
示例#16
0
PAmazonItem TAmazonItem::GetFromWebPg(const PWebPg& WebPg){
  TStr UrlStr=WebPg->GetUrlStr();
  TStr ItemId=TAmazonItem::GetItemId(WebPg->GetUrl());
  TStr HtmlStr=WebPg->GetHttpBodyAsStr();
  PSIn HtmlSIn=TStrIn::New(HtmlStr);
  THtmlLx HtmlLx(HtmlSIn);
  THtmlLxSym Sym; TChA ChA;

  // move to title
  while (HtmlLx.GetSym()!=hsyEof){
    Sym=HtmlLx.Sym; ChA=HtmlLx.ChA;
    if ((Sym==hsyBTag)&&(ChA=="<FONT>")){
      TStr FaceArg=HtmlLx.GetArg("FACE", "");
      TStr SizeArg=HtmlLx.GetArg("SIZE", "");
      if ((FaceArg=="verdana,arial,helvetica")&&(SizeArg.Empty())){break;}
    }
  }
  // extract title
  TChA TitleChA;
  while (HtmlLx.GetSym()!=hsyEof){
    Sym=HtmlLx.Sym; ChA=HtmlLx.ChA;
    if ((Sym==hsyETag)&&(ChA=="<FONT>")){break;}
    if (!TitleChA.Empty()){TitleChA+=HtmlLx.GetPreSpaceStr();}
    TitleChA+=ChA;
  }
  TStr TitleStr=TitleChA;
  //printf("'%s'\n", TitleStr.CStr());
  // extract authors
  TStrV AuthorNmV;
  TChA AuthorNmChA;
  while (HtmlLx.GetSym()!=hsyEof){
    Sym=HtmlLx.Sym; ChA=HtmlLx.ChA;
    if ((Sym==hsyBTag)&&(ChA=="<A>")){
      do {
        HtmlLx.GetSym();
        Sym=HtmlLx.Sym; ChA=HtmlLx.ChA;
        if (Sym==hsyStr){
          if (!AuthorNmChA.Empty()){AuthorNmChA+=HtmlLx.GetPreSpaceStr();}
          AuthorNmChA+=ChA;
        }
      } while (!((Sym==hsyETag)&&(ChA=="<A>")));
      AuthorNmV.Add(AuthorNmChA); AuthorNmChA.Clr();
    }
    if ((Sym==hsyETag)&&(ChA=="<FONT>")){break;}
  }
  for (int AuthorNmN=0; AuthorNmN<AuthorNmV.Len(); AuthorNmN++){
    //printf("'%s'\n", AuthorNmV[AuthorNmN].CStr());
  }
  // move to x-sell
  TStrQ PrevStrQ(3);
  while (HtmlLx.GetSym()!=hsyEof){
    Sym=HtmlLx.Sym; ChA=HtmlLx.ChA;
    if (Sym==hsyStr){
      PrevStrQ.Push(ChA);
      if ((PrevStrQ.Len()==3)&&(PrevStrQ[0]=="Customers")
       &&(PrevStrQ[1]=="who")&&(PrevStrQ[2]=="bought")){break;}
    } else {
      PrevStrQ.Clr();
    }
  }
  // extract x-sell pointers
  TStrV NextItemIdV;
  while (HtmlLx.GetSym()!=hsyEof){
    Sym=HtmlLx.Sym; ChA=HtmlLx.ChA;
    if ((Sym==hsyBTag)&&(ChA=="<A>")){
      TStr RelUrlStr=HtmlLx.GetArg("HREF");
      PUrl Url=TUrl::New(RelUrlStr, UrlStr);
      TStr NextItemId=TAmazonItem::GetItemId(Url);
      NextItemIdV.Add(NextItemId);
    }
    if ((Sym==hsyETag)&&(ChA=="<UL>")){break;}
  }
  for (int NextItemIdN=0; NextItemIdN<NextItemIdV.Len(); NextItemIdN++){
    //printf("'%s'\n", NextItemIdV[NextItemIdN].CStr());
  }

  // construct item object
  PAmazonItem AmazonItem=PAmazonItem(new
   TAmazonItem(ItemId, TitleStr, AuthorNmV, NextItemIdV));
  return AmazonItem;
}
示例#17
0
TTm TTm::GetTmFromWebLogDateTimeStr(const TStr& DateTimeStr,
 const char DateSepCh, const char TimeSepCh, const char MSecSepCh){
  int DateTimeStrLen=DateTimeStr.Len();
  // year
  TChA ChA; int ChN=0;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr YearStr=ChA;
  // month
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=DateSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr MonthStr=ChA;
  // day
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=' ')){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr DayStr=ChA;
  // hour
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr HourStr=ChA;
  // minute
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=TimeSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr MinStr=ChA;
  // second
  ChA.Clr(); ChN++;
  while ((ChN<DateTimeStrLen)&&(DateTimeStr[ChN]!=MSecSepCh)){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr SecStr=ChA;
  // mili-second
  ChA.Clr(); ChN++;
  while (ChN<DateTimeStrLen){
    ChA+=DateTimeStr[ChN]; ChN++;}
  TStr MSecStr=ChA;
  // transform to numbers
  int YearN=YearStr.GetInt(-1);
  int MonthN=MonthStr.GetInt(-1);
  int DayN=DayStr.GetInt(-1);
  int HourN=HourStr.GetInt(0);
  int MinN=MinStr.GetInt(0);
  int SecN=SecStr.GetInt(0);
  int MSecN=MSecStr.GetInt(0);
  // construct time

/*
  //!!peter: convert month name to number and flip date/day (oracle: 10-FEB-05)
  if ((MonthN==-1)&&(isalpha(MonthStr.CStr()[0]))){
    if ((MonthN=MonthParser.GetMonthN(MonthStr))!=-1){
      int Y=DayN; DayN=YearN; YearN=Y<100?Y+2000:Y;
    }
  }
*/

  TTm Tm;
  if ((YearN!=-1)&&(MonthN!=-1)&&(DayN!=-1)){
    Tm=TTm(YearN, MonthN, DayN, -1, HourN, MinN, SecN, MSecN);
  }
  // return time
  return Tm;
}
示例#18
0
bool IsCTxtHttpResp(const PUrl& Url, const PHttpResp& HttpResp, const int& MnCTxtToks){
  if (HttpResp->IsStatusCd_Ok()){
    PWebPg WebPg=TWebPg::New(Url->GetUrlStr(), HttpResp);
    if (HttpResp->IsContType(THttp::TextHtmlFldVal)){
      TMem BodyMem=HttpResp->GetBodyAsMem();
      PSIn BodyMemIn=TMemIn::New(BodyMem);
      // prepare html-tokens
      PHtmlDoc HtmlDoc=THtmlDoc::New(BodyMemIn, hdtAll, false);
      int Toks=HtmlDoc->GetToks(); THtmlLxSym TokSym; TStr TokStr;
      // prepare continuous-text indicators
      int CTxtToks=0; TChA CTxtChA; bool CTxtP=false;
      // prepare script & style flag
      bool InScript=false; bool InStyle=false; 
      // traverse tokens
      for (int TokN=0; TokN<Toks; TokN++){
        // get token data
        HtmlDoc->GetTok(TokN, TokSym, TokStr);
        switch (TokSym){
          case hsyStr:
          case hsyNum:
          case hsySSym:
            if (!InScript&&!InStyle){
              // text token
              CTxtToks++; CTxtChA+=TokStr; CTxtChA+=' '; 
            }
            break;
          case hsyBTag:
            if (!InScript&&!InStyle){
              if (TokStr=="<SCRIPT>"){
                // start of script
                InScript=true; CTxtToks=0; CTxtChA.Clr();
              } else 
              if (TokStr=="<STYLE>"){
                // start of style
                InStyle=true; CTxtToks=0; CTxtChA.Clr();
              } else {
                if ((TokStr=="<P>")||(TokStr=="<B>")||(TokStr=="<I>")){
                  // skip in-text-tags
                } else {
                  // non-text-tags - break continuous-text
                  CTxtToks=0; CTxtChA.Clr();
                }
              }
            }
            break;
          case hsyETag:
            if (InScript||InStyle){
              if (TokStr=="<SCRIPT>"){
                // end of script
                InScript=false;
              } else
              if (TokStr=="<STYLE>"){
                // end of style
                InStyle=false;
              }
            }
            break;
          default: 
            // non-text-token - break continuous-text
            CTxtToks=0; CTxtChA.Clr();
            break;
        }
        // stop if enough continuous-text
        if (CTxtToks>MnCTxtToks){
          CTxtP=true; break;
        }
      }
      if (CTxtP){
        printf("%s\n", Url->GetUrlStr().CStr());
      }
      return CTxtP;
    }
  }
  return false;
}