Beispiel #1
0
// Test the default constructor
TEST(TStrIntH, DefaultConstructor) {
  TStrIntH TableStr;

  EXPECT_EQ(1,TableStr.Empty());
  EXPECT_EQ(0,TableStr.Len());
  EXPECT_EQ(0,TableStr.GetMxKeyIds());
}
Beispiel #2
0
TNodeJsRf24Radio* TNodeJsRf24Radio::NewFromArgs(const v8::FunctionCallbackInfo<v8::Value>& Args) {
	v8::Isolate* Isolate = v8::Isolate::GetCurrent();
	v8::HandleScope HandleScope(Isolate);

	PJsonVal ParamJson = TNodeJsUtil::GetArgJson(Args, 0);

	const int PinCE = ParamJson->GetObjInt("pinCE");
	const int PinCSN = ParamJson->GetObjInt("pinCSN");
	const uint16 MyId = (uint16) ParamJson->GetObjInt("id");
	const PJsonVal SensorJsonV = ParamJson->GetObjKey("sensors");

	const bool Verbose = ParamJson->GetObjBool("verbose", false);
	const PNotify Notify = Verbose ? TNotify::StdNotify : TNotify::NullNotify;

	Notify->OnNotify(TNotifyType::ntInfo, "Parsing configuration ...");

	TStrIntH SensorNmIdH;
	TStrIntH SensorIdNodeIdH;

	for (int SensorN = 0; SensorN < SensorJsonV->GetArrVals(); SensorN++) {
		const PJsonVal SensorJson = SensorJsonV->GetArrVal(SensorN);
		const TStr& SensorId = SensorJson->GetObjStr("id");
		SensorNmIdH.AddDat(SensorId, SensorJson->GetObjInt("internalId"));
		SensorIdNodeIdH.AddDat(SensorId, SensorJson->GetObjInt("nodeId"));
	}

	Notify->OnNotify(TNotifyType::ntInfo, "Calling cpp constructor ...");

	return new TNodeJsRf24Radio(MyId, PinCE, PinCSN, SensorNmIdH, SensorIdNodeIdH, Notify);
}
Beispiel #3
0
/////////////////////////////////////////////////
// SkyGrid-Document
void TSkyGridBinDoc::SaveBinDocV(
 const TStr& InXmlFPath, const TStr& OutBinFNm, const int& MxDocs){
  printf("Processing SkyGrid-News-Xml files from '%s'...\n", InXmlFPath.CStr());
  TFOut SOut(OutBinFNm);
  TFFile FFile(InXmlFPath, true); TStr FNm;
  int Docs=0; int DateDocs=0; uint64 PrevTm=0;
  while (FFile.Next(FNm)){
    if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;}
    //printf("  Processing '%s' ...", FNm.CStr());
    PXmlDoc XmlDoc=TXmlDoc::LoadTxt(FNm);
    PXmlTok ContentTok=XmlDoc->GetTagTok("item|content");
    TStr SwIdStr=ContentTok->GetTagTok("swid")->GetArgVal("value");
    TStr UrlStr=ContentTok->GetTagTok("url")->GetTokStr(false);
    TStr TitleStr=ContentTok->GetTagTok("title")->GetTokStr(false);
    TStr FetchedValStr=ContentTok->GetTagTok("fetched")->GetArgVal("value");
    TXmlTokV EntityTokV; ContentTok->GetTagTokV("annotations|entity", EntityTokV);
    TStr BodyStr=ContentTok->GetTagTok("body")->GetTokStr(false);
    // extract date
    TStr DateStr=SwIdStr.GetSubStr(0, 7);
    TStr YearStr=DateStr.GetSubStr(0, 3);
    TStr MonthStr=DateStr.GetSubStr(4, 5);
    TStr DayStr=DateStr.GetSubStr(6, 7);
    TTm DateTm(YearStr.GetInt(), MonthStr.GetInt(), DayStr.GetInt());
    uint64 Tm=TTm::GetMSecsFromTm(DateTm);
    // extract entities
    TStrIntH EntNmToFqH;
    for (int EntityTokN=0; EntityTokN<EntityTokV.Len(); EntityTokN++){
      PXmlTok EntityTok=EntityTokV[EntityTokN];
      if (!EntityTok->IsTag("entity")){continue;}
      TStr CanonicalNm=EntityTok->GetArgVal("canonical", "");
      TStr TextStr=EntityTok->GetArgVal("text", "");
      TStr TypeNm=EntityTok->GetArgVal("type", "");
      TStr EntNm=CanonicalNm.Empty() ? TextStr : CanonicalNm;
      EntNmToFqH.AddDat(EntNm)++;
    }
    TIntStrPrV FqEntNmPrV; EntNmToFqH.GetDatKeyPrV(FqEntNmPrV); FqEntNmPrV.Sort(false);
    // extract headline
    TChA HeadlineChA=BodyStr.GetSubStr(0, 250);
    while ((HeadlineChA.Len()>0)&&(HeadlineChA.LastCh()!=' ')){
      HeadlineChA.Trunc(HeadlineChA.Len()-1);}
    HeadlineChA+="...";
    // create document
    TSkyGridBinDoc Doc(SwIdStr, Tm, TitleStr, HeadlineChA, FqEntNmPrV);
    // save document
    Doc.Save(SOut);
    // screen log
    if (PrevTm!=Tm){
      if (PrevTm!=0){printf("\n");}
      PrevTm=Tm; DateDocs=0;
    }
    Docs++; DateDocs++;
    printf("  %s [Day:%d / All:%d]\r", DateStr.CStr(), DateDocs, Docs);
  }
  printf("\nDone.\n");
}
Beispiel #4
0
void TSkyGridEnt::GetDocsPerDateV(
 const TSkyGridBs* SkyGridBs, TStrIntPrV& DateStrDocsPrV, int& Docs) const {
  TStrIntH DateStrToDocsH; Docs=0;
  for (int DocN=0; DocN<GetDocIds(); DocN++){
    int DocId=GetDocId(DocN);
    PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId);
    uint64 DocTm=Doc->GetTm();
    TStr DocDateStr=TTm::GetTmFromMSecs(DocTm).GetWebLogDateStr();
    DateStrToDocsH.AddDat(DocDateStr)++; Docs++;
  }
  DateStrToDocsH.GetKeyDatPrV(DateStrDocsPrV);
  DateStrDocsPrV.Sort();
}
Beispiel #5
0
TNodeJsRf24Radio::TNodeJsRf24Radio(const uint16& NodeId, const int& PinCE, const int& PinCSN,
		const TStrIntH& ValueNmIdH, const TStrIntH& ValueNmNodeIdH,
		const PNotify& _Notify):
	Radio(NodeId, PinCE, PinCSN, BCM2835_SPI_SPEED_8MHZ, _Notify),
	ValNmNodeIdValIdPrH(),
	NodeIdValIdPrValNmH(),
	OnValueCallback(),
	Notify(_Notify) {

	Notify->OnNotify(TNotifyType::ntInfo, "Setting radio cpp callback ...");
	Radio.SetCallback(this);

	Notify->OnNotify(TNotifyType::ntInfo, "Initializing Id conversion structures ...");
	int KeyId = ValueNmIdH.FFirstKeyId();
	while (ValueNmIdH.FNextKeyId(KeyId)) {
		const TStr& ValNm = ValueNmIdH.GetKey(KeyId);

		const int& ValId = ValueNmIdH[KeyId];
		const int& NodeId = ValueNmNodeIdH.GetDat(ValNm);

		ValNmNodeIdValIdPrH.AddDat(ValNm, TIntPr(NodeId, ValId));
		NodeIdValIdPrValNmH.AddDat(TIntPr(NodeId, ValId), ValNm);
	}
}
Beispiel #6
0
int main(int argc, char* argv[]){
  Try;
  // create environment
  Env=TEnv(argc, argv, TNotify::StdNotify);
  // get command line parameters
  Env.PrepArgs("Crawl-Base to Text", 0);
  TStr InCrawlBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Crawl-Base-FileName");
  TStr OutTxtFNm=Env.GetIfArgPrefixStr("-ot:", "Crawl.Txt", "Output-Text-Filename");
  TStr OutStatFNm=Env.GetIfArgPrefixStr("-os:", "Crawl.Stat.Txt", "Output-Statistics-Text-Filename");
  bool SaveContP=Env.GetIfArgPrefixBool("-sc:", false, "Save-Content");
  bool SaveContOutUrlP=Env.GetIfArgPrefixBool("-scou:", true, "Save-Content-Outgoing-Urls");
  bool SaveContTagP=Env.GetIfArgPrefixBool("-sct:", true, "Save-Content-Tags");
  bool SaveOutUrlP=Env.GetIfArgPrefixBool("-sou:", false, "Save-Outgoing-Urls");
  bool SaveCTxtP=Env.GetIfArgPrefixBool("-sctc:", false, "Save-Continuos-Text-Content");
  int MnCTxtToks=Env.GetIfArgPrefixInt("-mctt:", 100, "Minimal-Continuos-Text-Tokens");
  TStrV BlockedDmNmV=Env.GetIfArgPrefixStrV("-bd:", "Blocked-Domain-Names (multiple)");
  if (Env.IsEndOfRun()){return 0;}
  // -i:si -sc:t -scou:n -sct:n -sctc:t -bd:.delo.si -bd:.dnevnik.si -bd:.vecer.si

  TStr BlobBsFMid=InCrawlBsFNm.GetFMid();
  // output file
  TFOut TxtFOut(OutTxtFNm); FILE* fTxt=TxtFOut.GetFileId();
  fprintf(fTxt, "Comment:input=%s\n", InCrawlBsFNm.CStr());
  fprintf(fTxt, "Comment:output=%s\n", OutTxtFNm.CStr());
  fprintf(fTxt, "BlobBaseName:%s\n", BlobBsFMid.CStr());
  // statistics
  TStrIntH HostNmToFqH;
  TStrIntH StatusCdToFqH;
  TStrIntH ContTypeToFqH;
  PMom HttpContLenMom=TMom::New();

  PBlobBs CrawlBBs=TMBlobBs::New(InCrawlBsFNm);
  TBlobPt TrvCrawlBPt=CrawlBBs->FFirstBlobPt();
  TBlobPt CrawlBPt; PSIn CrawlBlobSIn; int CrawlBlobN=0;
  while (CrawlBBs->FNextBlobPt(TrvCrawlBPt, CrawlBPt, CrawlBlobSIn)){
    CrawlBlobN++; printf("%d\r", CrawlBlobN);
    TStr DateTimeStr(*CrawlBlobSIn); //TStr DateTimeStr;
    TStr UrlStr(*CrawlBlobSIn);
    PUrl Url=TUrl::New(UrlStr); IAssert(Url->IsOk(usHttp));
    TMem HttpRespMem(*CrawlBlobSIn);
    PSIn HttpRespSIn=HttpRespMem.GetSIn();
    PHttpResp HttpResp=THttpResp::New(HttpRespSIn);
    // statistics
    HostNmToFqH.AddDat(Url->GetHostNm())++;
    StatusCdToFqH.AddDat(TInt::GetStr(HttpResp->GetStatusCd()))++;
    ContTypeToFqH.AddDat(HttpResp->GetFldVal(THttp::ContTypeFldNm))++;
    int ContLen=HttpResp->GetFldVal(THttp::ContLenFldNm).GetInt(-1);
    if (ContLen!=-1){
      HttpContLenMom->Add(ContLen);}
    // check blocked domain-names
    if (!BlockedDmNmV.Empty()){
      TStr DmNm=Url->GetDmNm(); int BlockedDmP=false;
      for (int BDmNmN=0; BDmNmN<BlockedDmNmV.Len(); BDmNmN++){
        if (DmNm.IsSuffix(BlockedDmNmV[BDmNmN])){
          BlockedDmP=true; break;
        }
      }
      if (BlockedDmP){
        continue;
      }
    }
    // check continuos-text
    if (SaveCTxtP&&IsCTxtHttpResp(Url, HttpResp, MnCTxtToks)){continue;}
    if (HttpResp->IsStatusCd_Ok()){
      PWebPg WebPg=TWebPg::New(UrlStr, HttpResp);
      fprintf(fTxt, "Start:HttpOk\n");
      fprintf(fTxt, "BlobBaseAddress:bb://%s/%d/%d\n",
       BlobBsFMid.CStr(), CrawlBPt.GetSeg(), CrawlBPt.GetAddr());
      fprintf(fTxt, "DateTime:%s\n", DateTimeStr.CStr());
      fprintf(fTxt, "Url:%s\n", UrlStr.CStr());
      fprintf(fTxt, "UrlMd5:%s\n", TMd5Sig(UrlStr).GetStr().CStr());
      fprintf(fTxt, "AtomName:%s\n", TUrl::GetTopDownDocNm(UrlStr).CStr());
      for (int FldN=0; FldN<HttpResp->GetFlds(); FldN++){
        TStr FldNm; TStr FldVal; HttpResp->GetFldNmVal(FldN, FldNm, FldVal);
        fprintf(fTxt, "HttpField:%s=%s\n", FldNm.CStr(), FldVal.CStr());
      }
      TMem BodyMem=HttpResp->GetBodyAsMem();
      fprintf(fTxt, "BodyMd5:%s\n", TMd5Sig(BodyMem).GetStr().CStr());
      // text
      if (SaveContP){
        if (HttpResp->IsContType(THttp::TextHtmlFldVal)){
          TStr HtmlStr=BodyMem.GetAsStr();
          TStr TxtStr=THtmlDoc::GetTxtLnDoc(HtmlStr, UrlStr, SaveContOutUrlP, SaveContTagP);
          fprintf(fTxt, "Content:%s\n", TxtStr.CStr());
        }
      }
      // outgoing-urls
      if (SaveOutUrlP){
        TUrlV OutUrlV; WebPg->GetOutUrlV(OutUrlV);
        for (int OutUrlN=0; OutUrlN<OutUrlV.Len(); OutUrlN++){
          TStr OutUrlStr=OutUrlV[OutUrlN]->GetUrlStr();
          fprintf(fTxt, "OutUrl:%s\n", OutUrlStr.CStr());
        }
      }
      fprintf(fTxt, "End:HttpOk\n");
    } else
    if (HttpResp->IsStatusCd_Redir()){
      TStr RedirUrlStr=HttpResp->GetFldVal(THttp::LocFldNm);
      PUrl RedirUrl=TUrl::New(RedirUrlStr, UrlStr);
      if (RedirUrl->IsOk(usHttp)){
        TStr RedirUrlStr=RedirUrl->GetUrlStr();
        fprintf(fTxt, "Start:HttpRedirection\n");
        fprintf(fTxt, "BlobBaseAddress:bb://%s/%d/%d\n",
         BlobBsFMid.CStr(), CrawlBPt.GetSeg(), CrawlBPt.GetAddr());
        fprintf(fTxt, "DateTime:%s\n", DateTimeStr.CStr());
        fprintf(fTxt, "Url:%s\n", UrlStr.CStr());
        fprintf(fTxt, "UrlMd5:%s\n", TMd5Sig(UrlStr).GetStr().CStr());
        fprintf(fTxt, "AtomName:%s\n", TUrl::GetTopDownDocNm(UrlStr).CStr());
        fprintf(fTxt, "RedirectionUrl:%s\n", RedirUrlStr.CStr());
        for (int FldN=0; FldN<HttpResp->GetFlds(); FldN++){
          TStr FldNm; TStr FldVal; HttpResp->GetFldNmVal(FldN, FldNm, FldVal);
          fprintf(fTxt, "HttpField:%s=%s\n", FldNm.CStr(), FldVal.CStr());
        }
        fprintf(fTxt, "End:HttpRedirection\n");
      }
    }
  }

  // statistics
  HttpContLenMom->Def();
  if (!OutStatFNm.Empty()){
    TFOut StatFOut(OutStatFNm); FILE* fStat=StatFOut.GetFileId();
    TIntStrPrV FqStatusCdPrV; StatusCdToFqH.GetDatKeyPrV(FqStatusCdPrV);
    TIntStrPrV FqContTypePrV; ContTypeToFqH.GetDatKeyPrV(FqContTypePrV);
    // hosts
    {fprintf(fStat, "================================================\n");
    TIntStrPrV FqHostNmPrV; HostNmToFqH.GetDatKeyPrV(FqHostNmPrV);
    FqHostNmPrV.Sort(false); int HostNmsSum=0;
    fprintf(fStat, "Hosts (%d):\n", FqHostNmPrV.Len());
    for (int HostNmN=0; HostNmN<FqHostNmPrV.Len(); HostNmN++){
      fprintf(fStat, "%7d   '%s'\n",
       FqHostNmPrV[HostNmN].Val1, FqHostNmPrV[HostNmN].Val2.CStr());
      HostNmsSum+=FqHostNmPrV[HostNmN].Val1;
    }
    fprintf(fStat, "----------\n");
    fprintf(fStat, "%7d   %s\n", HostNmsSum, "Sum");
    fprintf(fStat, "================================================\n");}
    // status-code
    {fprintf(fStat, "================================================\n");
    TIntStrPrV FqStatusCdPrV; StatusCdToFqH.GetDatKeyPrV(FqStatusCdPrV);
    FqStatusCdPrV.Sort(false); int StatusCdsSum=0;
    fprintf(fStat, "Status-Codes (%d):\n", FqStatusCdPrV.Len());
    for (int StatusCdN=0; StatusCdN<FqStatusCdPrV.Len(); StatusCdN++){
      fprintf(fStat, "%7d   '%s'\n",
       FqStatusCdPrV[StatusCdN].Val1, FqStatusCdPrV[StatusCdN].Val2.CStr());
      StatusCdsSum+=FqStatusCdPrV[StatusCdN].Val1;
    }
    fprintf(fStat, "----------\n");
    fprintf(fStat, "%7d   %s\n", StatusCdsSum, "Sum");
    fprintf(fStat, "================================================\n");}
    // content-type
    {fprintf(fStat, "================================================\n");
    TIntStrPrV FqContTypePrV; ContTypeToFqH.GetDatKeyPrV(FqContTypePrV);
    FqContTypePrV.Sort(false); int ContTypesSum=0;
    fprintf(fStat, "Content-Types (%d):\n", FqContTypePrV.Len());
    for (int ContTypeN=0; ContTypeN<FqContTypePrV.Len(); ContTypeN++){
      fprintf(fStat, "%7d   '%s'\n",
       FqContTypePrV[ContTypeN].Val1, FqContTypePrV[ContTypeN].Val2.CStr());
      ContTypesSum+=FqContTypePrV[ContTypeN].Val1;
    }
    fprintf(fStat, "----------\n");
    fprintf(fStat, "%7d   %s\n", ContTypesSum, "Sum");
    fprintf(fStat, "================================================\n");}
    // content-length
    {fprintf(fStat, "================================================\n");
    fprintf(fStat, "Content-length:\n");
    if (HttpContLenMom->IsUsable()){
      TStr MomStr=HttpContLenMom->GetStr('\n', ':', true, false, "%g");
      fprintf(fStat, "%s\n", MomStr.CStr());
    } else {
      fprintf(fStat, "Statistics not usable.\n");
    }
    fprintf(fStat, "================================================\n");}
  }

  return 0;
  Catch;
  return 1;
}
Beispiel #7
0
TLxSym TLxSymStr::GetSSym(const TStr& Str){
  static TStrIntH StrToLxSymH(100);
  if (StrToLxSymH.Len()==0){
    StrToLxSymH.AddDat(PeriodStr, syPeriod);
    StrToLxSymH.AddDat(DPeriodStr, syDPeriod);
    StrToLxSymH.AddDat(CommaStr, syComma);
    StrToLxSymH.AddDat(ColonStr, syColon);
    StrToLxSymH.AddDat(DColonStr, syDColon);
    StrToLxSymH.AddDat(SemicolonStr, sySemicolon);
    StrToLxSymH.AddDat(PlusStr, syPlus);
    StrToLxSymH.AddDat(MinusStr, syMinus);
    StrToLxSymH.AddDat(AsteriskStr, syAsterisk);
    StrToLxSymH.AddDat(SlashStr, sySlash);
    StrToLxSymH.AddDat(PercentStr, syPercent);
    StrToLxSymH.AddDat(ExclamationStr, syExclamation);
    StrToLxSymH.AddDat(VBarStr, syVBar);
    StrToLxSymH.AddDat(AmpersandStr, syAmpersand);
    StrToLxSymH.AddDat(QuestionStr, syQuestion);
    StrToLxSymH.AddDat(HashStr, syHash);
    StrToLxSymH.AddDat(EqStr, syEq);
    StrToLxSymH.AddDat(NEqStr, syNEq);
    StrToLxSymH.AddDat(LssStr, syLss);
    StrToLxSymH.AddDat(GtrStr, syGtr);
    StrToLxSymH.AddDat(LEqStr, syLEq);
    StrToLxSymH.AddDat(GEqStr, syGEq);
    StrToLxSymH.AddDat(LParenStr, syLParen);
    StrToLxSymH.AddDat(RParenStr, syRParen);
    StrToLxSymH.AddDat(LBracketStr, syLBracket);
    StrToLxSymH.AddDat(RBracketStr, syRBracket);
    StrToLxSymH.AddDat(LBraceStr, syLBrace);
    StrToLxSymH.AddDat(RBraceStr, syRBrace);
  }
  int KeyId=StrToLxSymH.GetKeyId(Str);
  if (KeyId==-1){
    return syUndef;
  } else {
    return TLxSym(int(StrToLxSymH[KeyId]));
  }
}
Beispiel #8
0
// Table manipulations
TEST(TStrIntH, ManipulateTable) {
  const int64 NElems = 1000000;
  int DDist = 10;
  const char *FName = "test.hashstr.dat";
  TStrIntH TableStr;
  TStrIntH TableStr1;
  TStrIntH TableStr2;
  int i;
  int d;
  int n;
  int Id;
  int Key;
  TStr KeyStr;
  int64 KeySumVal;
  int64 DatSumVal;
  int64 KeySum;
  int64 DatSum;
  int64 KeySumDel;
  int64 DatSumDel;
  int DelCount;
  int Count;
  char s[32];

  // add table elements
  d = Prime(NElems);
  n = d;
  KeySumVal = 0;
  DatSumVal = 0;
  for (i = 0; i < NElems; i++) {
    sprintf(s,"%d",n);
    TStr Str = TStr(s);
    TableStr.AddDat(Str,n+1);
    KeySumVal += n;
    DatSumVal += (n+1);
    //printf("add %d %d\n", n, n+1);
    n = (n + d) % NElems;
  }
  EXPECT_EQ(0,TableStr.Empty());
  EXPECT_EQ(NElems,TableStr.Len());

  EXPECT_EQ(0,(NElems-1)*(NElems)/2 - KeySumVal);
  EXPECT_EQ(0,(NElems)*(NElems+1)/2 - DatSumVal);

  // verify elements by successive keys
  KeySum = 0;
  DatSum = 0;
  for (i = 0; i < NElems; i++) {
    sprintf(s,"%d",i);
    TStr Str = TStr(s);
    Id = TableStr.GetKeyId(s);
    EXPECT_EQ(1,Id >= 0);
    KeyStr = TableStr.GetKey(Id);
    Key = atoi(KeyStr.CStr());
    //printf("vrfy %d %s %d %s %d %d\n", i, Str.CStr(), Id, KeyStr.CStr(), Key, (int) TableStr.GetDat(KeyStr));
    EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1);
    KeySum += Key;
    DatSum += TableStr.GetDat(KeyStr);
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by distant keys
  KeySum = 0;
  DatSum = 0;
  n = Prime(d);
  for (i = 0; i < NElems; i++) {
    sprintf(s,"%d",i);
    TStr Str = TStr(s);
    Id = TableStr.GetKeyId(s);
    EXPECT_EQ(1,Id >= 0);
    KeyStr = TableStr.GetKey(Id);
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1);
    KeySum += Key;
    DatSum += TableStr.GetDat(KeyStr);
    n = (n + d) % NElems;
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    KeySum += Key;
    DatSum += It.GetDat();
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // verify elements by key index
  KeySum = 0;
  DatSum = 0;
  Id = TableStr.FFirstKeyId();
  while (TableStr.FNextKeyId(Id)) {
    EXPECT_EQ(1,Id >= 0);
    KeyStr = TableStr.GetKey(Id);
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1);
    KeySum += Key;
    DatSum += TableStr.GetDat(KeyStr);
  }

  EXPECT_EQ(0,KeySumVal - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSum);

  // delete elements
  DelCount = 0;
  KeySumDel = 0;
  DatSumDel = 0;
  for (n = 0; n < NElems; n += DDist) {
    sprintf(s,"%d",n);
    TStr Str = TStr(s);
    Id = TableStr.GetKeyId(Str);
    //printf("del %d %d %d\n", n, Id, (int) TableStr[Id]);
    KeySumDel += n;
    DatSumDel += TableStr[Id];
    TableStr.DelKeyId(Id);
    DelCount++;
  }
  EXPECT_EQ(0,TableStr.Empty());
  EXPECT_EQ(NElems-DelCount,TableStr.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // assignment
  TableStr1 = TableStr;
  EXPECT_EQ(0,TableStr1.Empty());
  EXPECT_EQ(NElems-DelCount,TableStr1.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr1.BegI(); It < TableStr1.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // saving and loading
  {
    TFOut FOut(FName);
    TableStr.Save(FOut);
    FOut.Flush();
  }

  {
    TFIn FIn(FName);
    TableStr2.Load(FIn);
  }

  EXPECT_EQ(NElems-DelCount,TableStr2.Len());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr2.BegI(); It < TableStr2.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(NElems-DelCount,Count);
  EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum);
  EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum);

  // remove all elements
  for (i = 0; i < Count; i++) {
    Id = TableStr.GetRndKeyId(TInt::Rnd, 0.5);
    TableStr.DelKeyId(Id);
  }
  EXPECT_EQ(0,TableStr.Len());
  EXPECT_EQ(1,TableStr.Empty());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(0,Count);
  EXPECT_EQ(0,KeySum);
  EXPECT_EQ(0,DatSum);

  // clear the table
  TableStr1.Clr();
  EXPECT_EQ(0,TableStr1.Len());
  EXPECT_EQ(1,TableStr1.Empty());

  // verify elements by iterator
  KeySum = 0;
  DatSum = 0;
  Count = 0;
  for (TStrIntH::TIter It = TableStr1.BegI(); It < TableStr1.EndI(); It++) {
    KeyStr = It.GetKey();
    Key = atoi(KeyStr.CStr());
    EXPECT_EQ(0,It.GetDat()-Key-1);
    //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat());
    KeySum += Key;
    DatSum += It.GetDat();
    Count++;
  }

  EXPECT_EQ(0,Count);
  EXPECT_EQ(0,KeySum);
  EXPECT_EQ(0,DatSum);
}
Beispiel #9
0
PCycBs TCycBs::LoadCycXmlDump(const TStr& FPath){
  // file-names
  TStr NrFPath=TStr::GetNrFPath(FPath);
  TStr CycLexiconFNm=NrFPath+"lexicon-dump.xml";
  TStr CycTaxonomyFNm=NrFPath+"taxonomy-dump.xml";
  TStr CycRelevanceFNm=NrFPath+"relevance-dump.xml";
  TStr CycKBaseFNm=NrFPath+"kb-dump.xml";                                                 

  // create cyc-base
  PCycBs CycBs=TCycBs::New();

  // lexicon
  {printf("Processing Lexicon %s ...\n", CycLexiconFNm.CStr());
  PSIn CycLexiconSIn=TFIn::New(CycLexiconFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevCycWStr; TStr PrevCycLStr;
  forever{
    // statistics
    XmlDocs++; if (XmlDocs%1000==0){printf("%d Docs\r", XmlDocs);}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycLexiconSIn);
    if (!XmlDoc->IsOk()){
      printf("%s - %s\n", PrevCycWStr.CStr(), PrevCycLStr.CStr());
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("word"));
    TStr CycWStr=TopTok->GetArgVal("string");
    TStr CycLStr=TopTok->GetArgVal("cycl");
    PrevCycWStr=CycWStr; PrevCycLStr;
    // insert data
    CycBs->AddEdge(CycLStr, "#$nameString", CycWStr);
    CycBs->AddEdge(CycWStr, "~#$nameString", CycLStr);
  }
  printf("%d Docs\nDone.\n", XmlDocs);}

  // taxonomy
  {printf("Processing Taxonomy %s ...\n", CycTaxonomyFNm.CStr());
  PSIn CycTaxonomySIn=TFIn::New(CycTaxonomyFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevSrcCycLStr;
  forever{
    // statistics
    XmlDocs++;
    if (XmlDocs%1000==0){
      printf("%d Docs\r", XmlDocs);}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycTaxonomySIn);
    if (!XmlDoc->IsOk()){
      printf("%s\n", PrevSrcCycLStr.CStr());
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("term"));
    TStr SrcCycLStr=TopTok->GetArgVal("cycl");
    PrevSrcCycLStr=SrcCycLStr;
    for (int SubTokN=0; SubTokN<TopTok->GetSubToks(); SubTokN++){
      PXmlTok SubTok=TopTok->GetSubTok(SubTokN);
      TStr DstCycLStr=SubTok->GetTagNm();
      if (SubTok->IsTag("isa")){
        DstCycLStr=SubTok->GetArgVal("value");
        CycBs->AddEdge(SrcCycLStr, "#$isa", DstCycLStr);
        CycBs->AddEdge(DstCycLStr, "~#$isa", SrcCycLStr);
      } else
      if (SubTok->IsTag("genl")){
        DstCycLStr=SubTok->GetArgVal("value");
        CycBs->AddEdge(SrcCycLStr, "#$genls", DstCycLStr);
        CycBs->AddEdge(DstCycLStr, "~#$genls", SrcCycLStr);
      } else {
        Fail;
      }
    }
  }
  printf("%d Docs\nDone.\n", XmlDocs);}

  // relevance
  {printf("Processing Relevance %s ...\n", CycRelevanceFNm.CStr());
  PSIn CycRelevanceSIn=TFIn::New(CycRelevanceFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevCycStr;
  forever{
    // statistics
    XmlDocs++;
    if (XmlDocs%1000==0){
      printf("%d Docs\r", XmlDocs);}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycRelevanceSIn);
    if (!XmlDoc->IsOk()){
      printf("%s\n", PrevCycStr.CStr());
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("term"));
    TStr CycStr=TopTok->GetArgVal("cyc");
    PrevCycStr=CycStr;
    //IAssert(CycBs->IsVNm(CycStr));
    if (CycBs->IsVNm(CycStr)){
      if (TopTok->GetArgVal("thcl")=="T"){
        CycBs->GetVrtx(CycStr).SetFlag(cvfHumanRelevant, true);}
      if (TopTok->GetArgVal("irrel")=="T"){
        CycBs->GetVrtx(CycStr).SetFlag(cfvHumanIrrelevant, true);}
      if (TopTok->GetArgVal("clarifying")=="T"){
        CycBs->GetVrtx(CycStr).SetFlag(cfvHumanClarifying, true);}
      if ((TopTok->GetArgVal("thcl")=="T")||(TopTok->GetArgVal("clarifying")=="T")){
        CycBs->GetVrtx(CycStr).SetFlag(cvfHumanOk, true);}
    } else {
      //printf("%s\n", CycStr.CStr());
    }
  }
  printf("%d Docs\nDone.\n", XmlDocs);}

  // knowledge-base
  {printf("Processing KBase %s ...\n", CycKBaseFNm.CStr());
  PSIn CycKBaseSIn=TFIn::New(CycKBaseFNm);
  PXmlDoc XmlDoc; int XmlDocs=0;
  TStr PrevCycLStr; TStrV PrevArgCycLStrV;
  TStrIntH HdCycLToFq;
  forever{
    // statistics
    XmlDocs++;
    if (XmlDocs%1000==0){
      printf("%d Docs\r", XmlDocs);}
    //if (XmlDocs>10000){break;}
    // load xml-tree
    XmlDoc=TXmlDoc::LoadTxt(CycKBaseSIn);
    if (!XmlDoc->IsOk()){
      printf("%s\n", PrevCycLStr.CStr());
      for (int ArgN=0; ArgN<PrevArgCycLStrV.Len(); ArgN++){
        printf(" [%s]", PrevArgCycLStrV[ArgN].CStr());}
      printf("\n");
      Fail;
    }
    // extract fields from xml-tree
    PXmlTok TopTok=XmlDoc->GetTok();
    if (TopTok->IsTag("end")){break;}
    IAssert(TopTok->IsTag("sentence"));
    TStr CycLStr=TopTok->GetArgVal("cycl");
    TXmlTokV ArgXmlTokV; XmlDoc->GetTagTokV("sentence|arg", ArgXmlTokV);
    TStrV ArgCycLStrV;
    for (int ArgN=0; ArgN<ArgXmlTokV.Len(); ArgN++){
      PXmlTok Tok=ArgXmlTokV[ArgN];
      IAssert(Tok->IsTag("arg"));
      if (Tok->IsArg("cycl")){
        TStr ArgCycLStr=Tok->GetArgVal("cycl");
        ArgCycLStrV.Add(ArgCycLStr);
      } else {
        ArgCycLStrV.Add("Empty");
      }
    }
    PrevCycLStr=CycLStr;
    PrevArgCycLStrV=ArgCycLStrV;
    if (ArgCycLStrV.Len()>0){
      HdCycLToFq.AddDat(ArgCycLStrV[0]+" - "+TInt::GetStr(ArgCycLStrV.Len()-1))++;}
    // insert
    if (ArgCycLStrV.Len()==3){
      TStr PredNm=ArgCycLStrV[0];
      if ((PredNm!="#$isa")&&(PredNm!="#$termOfUnit")&&(PredNm!="#$genls")){
        TStr BackLinkPredNm=TStr("~")+PredNm;
        TStr Arg1=ArgCycLStrV[1];
        TStr Arg2=ArgCycLStrV[2];
        CycBs->AddEdge(Arg1, PredNm, Arg2);
        CycBs->AddEdge(Arg2, BackLinkPredNm, Arg1);
      }
    }
  }
  // output top cycl relations
  {TFOut CycLSOut("CycKB-CycLFq.Stat.Txt"); FILE* fCycL=CycLSOut.GetFileId();
  TIntStrPrV FqCycLStrPrV; HdCycLToFq.GetDatKeyPrV(FqCycLStrPrV); 
  FqCycLStrPrV.Sort(false);
  for (int CycLN=0; CycLN<FqCycLStrPrV.Len(); CycLN++){
    fprintf(fCycL, "%6d. %s\n", 1+FqCycLStrPrV[CycLN].Val1, FqCycLStrPrV[CycLN].Val2.CStr());
  }}
  printf("%d Docs\nDone.\n", XmlDocs);}

  // return cyc-base
  return CycBs;
}