// Test the default constructor TEST(TStrIntH, DefaultConstructor) { TStrIntH TableStr; EXPECT_EQ(1,TableStr.Empty()); EXPECT_EQ(0,TableStr.Len()); EXPECT_EQ(0,TableStr.GetMxKeyIds()); }
TNodeJsRf24Radio* TNodeJsRf24Radio::NewFromArgs(const v8::FunctionCallbackInfo<v8::Value>& Args) { v8::Isolate* Isolate = v8::Isolate::GetCurrent(); v8::HandleScope HandleScope(Isolate); PJsonVal ParamJson = TNodeJsUtil::GetArgJson(Args, 0); const int PinCE = ParamJson->GetObjInt("pinCE"); const int PinCSN = ParamJson->GetObjInt("pinCSN"); const uint16 MyId = (uint16) ParamJson->GetObjInt("id"); const PJsonVal SensorJsonV = ParamJson->GetObjKey("sensors"); const bool Verbose = ParamJson->GetObjBool("verbose", false); const PNotify Notify = Verbose ? TNotify::StdNotify : TNotify::NullNotify; Notify->OnNotify(TNotifyType::ntInfo, "Parsing configuration ..."); TStrIntH SensorNmIdH; TStrIntH SensorIdNodeIdH; for (int SensorN = 0; SensorN < SensorJsonV->GetArrVals(); SensorN++) { const PJsonVal SensorJson = SensorJsonV->GetArrVal(SensorN); const TStr& SensorId = SensorJson->GetObjStr("id"); SensorNmIdH.AddDat(SensorId, SensorJson->GetObjInt("internalId")); SensorIdNodeIdH.AddDat(SensorId, SensorJson->GetObjInt("nodeId")); } Notify->OnNotify(TNotifyType::ntInfo, "Calling cpp constructor ..."); return new TNodeJsRf24Radio(MyId, PinCE, PinCSN, SensorNmIdH, SensorIdNodeIdH, Notify); }
///////////////////////////////////////////////// // SkyGrid-Document void TSkyGridBinDoc::SaveBinDocV( const TStr& InXmlFPath, const TStr& OutBinFNm, const int& MxDocs){ printf("Processing SkyGrid-News-Xml files from '%s'...\n", InXmlFPath.CStr()); TFOut SOut(OutBinFNm); TFFile FFile(InXmlFPath, true); TStr FNm; int Docs=0; int DateDocs=0; uint64 PrevTm=0; while (FFile.Next(FNm)){ if ((MxDocs!=-1)&&(Docs>=MxDocs)){break;} //printf(" Processing '%s' ...", FNm.CStr()); PXmlDoc XmlDoc=TXmlDoc::LoadTxt(FNm); PXmlTok ContentTok=XmlDoc->GetTagTok("item|content"); TStr SwIdStr=ContentTok->GetTagTok("swid")->GetArgVal("value"); TStr UrlStr=ContentTok->GetTagTok("url")->GetTokStr(false); TStr TitleStr=ContentTok->GetTagTok("title")->GetTokStr(false); TStr FetchedValStr=ContentTok->GetTagTok("fetched")->GetArgVal("value"); TXmlTokV EntityTokV; ContentTok->GetTagTokV("annotations|entity", EntityTokV); TStr BodyStr=ContentTok->GetTagTok("body")->GetTokStr(false); // extract date TStr DateStr=SwIdStr.GetSubStr(0, 7); TStr YearStr=DateStr.GetSubStr(0, 3); TStr MonthStr=DateStr.GetSubStr(4, 5); TStr DayStr=DateStr.GetSubStr(6, 7); TTm DateTm(YearStr.GetInt(), MonthStr.GetInt(), DayStr.GetInt()); uint64 Tm=TTm::GetMSecsFromTm(DateTm); // extract entities TStrIntH EntNmToFqH; for (int EntityTokN=0; EntityTokN<EntityTokV.Len(); EntityTokN++){ PXmlTok EntityTok=EntityTokV[EntityTokN]; if (!EntityTok->IsTag("entity")){continue;} TStr CanonicalNm=EntityTok->GetArgVal("canonical", ""); TStr TextStr=EntityTok->GetArgVal("text", ""); TStr TypeNm=EntityTok->GetArgVal("type", ""); TStr EntNm=CanonicalNm.Empty() ? TextStr : CanonicalNm; EntNmToFqH.AddDat(EntNm)++; } TIntStrPrV FqEntNmPrV; EntNmToFqH.GetDatKeyPrV(FqEntNmPrV); FqEntNmPrV.Sort(false); // extract headline TChA HeadlineChA=BodyStr.GetSubStr(0, 250); while ((HeadlineChA.Len()>0)&&(HeadlineChA.LastCh()!=' ')){ HeadlineChA.Trunc(HeadlineChA.Len()-1);} HeadlineChA+="..."; // create document TSkyGridBinDoc Doc(SwIdStr, Tm, TitleStr, HeadlineChA, FqEntNmPrV); // save document Doc.Save(SOut); // screen log if (PrevTm!=Tm){ if (PrevTm!=0){printf("\n");} PrevTm=Tm; DateDocs=0; } Docs++; DateDocs++; printf(" %s [Day:%d / All:%d]\r", DateStr.CStr(), DateDocs, Docs); } printf("\nDone.\n"); }
void TSkyGridEnt::GetDocsPerDateV( const TSkyGridBs* SkyGridBs, TStrIntPrV& DateStrDocsPrV, int& Docs) const { TStrIntH DateStrToDocsH; Docs=0; for (int DocN=0; DocN<GetDocIds(); DocN++){ int DocId=GetDocId(DocN); PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId); uint64 DocTm=Doc->GetTm(); TStr DocDateStr=TTm::GetTmFromMSecs(DocTm).GetWebLogDateStr(); DateStrToDocsH.AddDat(DocDateStr)++; Docs++; } DateStrToDocsH.GetKeyDatPrV(DateStrDocsPrV); DateStrDocsPrV.Sort(); }
TNodeJsRf24Radio::TNodeJsRf24Radio(const uint16& NodeId, const int& PinCE, const int& PinCSN, const TStrIntH& ValueNmIdH, const TStrIntH& ValueNmNodeIdH, const PNotify& _Notify): Radio(NodeId, PinCE, PinCSN, BCM2835_SPI_SPEED_8MHZ, _Notify), ValNmNodeIdValIdPrH(), NodeIdValIdPrValNmH(), OnValueCallback(), Notify(_Notify) { Notify->OnNotify(TNotifyType::ntInfo, "Setting radio cpp callback ..."); Radio.SetCallback(this); Notify->OnNotify(TNotifyType::ntInfo, "Initializing Id conversion structures ..."); int KeyId = ValueNmIdH.FFirstKeyId(); while (ValueNmIdH.FNextKeyId(KeyId)) { const TStr& ValNm = ValueNmIdH.GetKey(KeyId); const int& ValId = ValueNmIdH[KeyId]; const int& NodeId = ValueNmNodeIdH.GetDat(ValNm); ValNmNodeIdValIdPrH.AddDat(ValNm, TIntPr(NodeId, ValId)); NodeIdValIdPrValNmH.AddDat(TIntPr(NodeId, ValId), ValNm); } }
int main(int argc, char* argv[]){ Try; // create environment Env=TEnv(argc, argv, TNotify::StdNotify); // get command line parameters Env.PrepArgs("Crawl-Base to Text", 0); TStr InCrawlBsFNm=Env.GetIfArgPrefixStr("-i:", "", "Crawl-Base-FileName"); TStr OutTxtFNm=Env.GetIfArgPrefixStr("-ot:", "Crawl.Txt", "Output-Text-Filename"); TStr OutStatFNm=Env.GetIfArgPrefixStr("-os:", "Crawl.Stat.Txt", "Output-Statistics-Text-Filename"); bool SaveContP=Env.GetIfArgPrefixBool("-sc:", false, "Save-Content"); bool SaveContOutUrlP=Env.GetIfArgPrefixBool("-scou:", true, "Save-Content-Outgoing-Urls"); bool SaveContTagP=Env.GetIfArgPrefixBool("-sct:", true, "Save-Content-Tags"); bool SaveOutUrlP=Env.GetIfArgPrefixBool("-sou:", false, "Save-Outgoing-Urls"); bool SaveCTxtP=Env.GetIfArgPrefixBool("-sctc:", false, "Save-Continuos-Text-Content"); int MnCTxtToks=Env.GetIfArgPrefixInt("-mctt:", 100, "Minimal-Continuos-Text-Tokens"); TStrV BlockedDmNmV=Env.GetIfArgPrefixStrV("-bd:", "Blocked-Domain-Names (multiple)"); if (Env.IsEndOfRun()){return 0;} // -i:si -sc:t -scou:n -sct:n -sctc:t -bd:.delo.si -bd:.dnevnik.si -bd:.vecer.si TStr BlobBsFMid=InCrawlBsFNm.GetFMid(); // output file TFOut TxtFOut(OutTxtFNm); FILE* fTxt=TxtFOut.GetFileId(); fprintf(fTxt, "Comment:input=%s\n", InCrawlBsFNm.CStr()); fprintf(fTxt, "Comment:output=%s\n", OutTxtFNm.CStr()); fprintf(fTxt, "BlobBaseName:%s\n", BlobBsFMid.CStr()); // statistics TStrIntH HostNmToFqH; TStrIntH StatusCdToFqH; TStrIntH ContTypeToFqH; PMom HttpContLenMom=TMom::New(); PBlobBs CrawlBBs=TMBlobBs::New(InCrawlBsFNm); TBlobPt TrvCrawlBPt=CrawlBBs->FFirstBlobPt(); TBlobPt CrawlBPt; PSIn CrawlBlobSIn; int CrawlBlobN=0; while (CrawlBBs->FNextBlobPt(TrvCrawlBPt, CrawlBPt, CrawlBlobSIn)){ CrawlBlobN++; printf("%d\r", CrawlBlobN); TStr DateTimeStr(*CrawlBlobSIn); //TStr DateTimeStr; TStr UrlStr(*CrawlBlobSIn); PUrl Url=TUrl::New(UrlStr); IAssert(Url->IsOk(usHttp)); TMem HttpRespMem(*CrawlBlobSIn); PSIn HttpRespSIn=HttpRespMem.GetSIn(); PHttpResp HttpResp=THttpResp::New(HttpRespSIn); // statistics HostNmToFqH.AddDat(Url->GetHostNm())++; StatusCdToFqH.AddDat(TInt::GetStr(HttpResp->GetStatusCd()))++; ContTypeToFqH.AddDat(HttpResp->GetFldVal(THttp::ContTypeFldNm))++; int ContLen=HttpResp->GetFldVal(THttp::ContLenFldNm).GetInt(-1); if (ContLen!=-1){ HttpContLenMom->Add(ContLen);} // check blocked domain-names if (!BlockedDmNmV.Empty()){ TStr DmNm=Url->GetDmNm(); int BlockedDmP=false; for (int BDmNmN=0; BDmNmN<BlockedDmNmV.Len(); BDmNmN++){ if (DmNm.IsSuffix(BlockedDmNmV[BDmNmN])){ BlockedDmP=true; break; } } if (BlockedDmP){ continue; } } // check continuos-text if (SaveCTxtP&&IsCTxtHttpResp(Url, HttpResp, MnCTxtToks)){continue;} if (HttpResp->IsStatusCd_Ok()){ PWebPg WebPg=TWebPg::New(UrlStr, HttpResp); fprintf(fTxt, "Start:HttpOk\n"); fprintf(fTxt, "BlobBaseAddress:bb://%s/%d/%d\n", BlobBsFMid.CStr(), CrawlBPt.GetSeg(), CrawlBPt.GetAddr()); fprintf(fTxt, "DateTime:%s\n", DateTimeStr.CStr()); fprintf(fTxt, "Url:%s\n", UrlStr.CStr()); fprintf(fTxt, "UrlMd5:%s\n", TMd5Sig(UrlStr).GetStr().CStr()); fprintf(fTxt, "AtomName:%s\n", TUrl::GetTopDownDocNm(UrlStr).CStr()); for (int FldN=0; FldN<HttpResp->GetFlds(); FldN++){ TStr FldNm; TStr FldVal; HttpResp->GetFldNmVal(FldN, FldNm, FldVal); fprintf(fTxt, "HttpField:%s=%s\n", FldNm.CStr(), FldVal.CStr()); } TMem BodyMem=HttpResp->GetBodyAsMem(); fprintf(fTxt, "BodyMd5:%s\n", TMd5Sig(BodyMem).GetStr().CStr()); // text if (SaveContP){ if (HttpResp->IsContType(THttp::TextHtmlFldVal)){ TStr HtmlStr=BodyMem.GetAsStr(); TStr TxtStr=THtmlDoc::GetTxtLnDoc(HtmlStr, UrlStr, SaveContOutUrlP, SaveContTagP); fprintf(fTxt, "Content:%s\n", TxtStr.CStr()); } } // outgoing-urls if (SaveOutUrlP){ TUrlV OutUrlV; WebPg->GetOutUrlV(OutUrlV); for (int OutUrlN=0; OutUrlN<OutUrlV.Len(); OutUrlN++){ TStr OutUrlStr=OutUrlV[OutUrlN]->GetUrlStr(); fprintf(fTxt, "OutUrl:%s\n", OutUrlStr.CStr()); } } fprintf(fTxt, "End:HttpOk\n"); } else if (HttpResp->IsStatusCd_Redir()){ TStr RedirUrlStr=HttpResp->GetFldVal(THttp::LocFldNm); PUrl RedirUrl=TUrl::New(RedirUrlStr, UrlStr); if (RedirUrl->IsOk(usHttp)){ TStr RedirUrlStr=RedirUrl->GetUrlStr(); fprintf(fTxt, "Start:HttpRedirection\n"); fprintf(fTxt, "BlobBaseAddress:bb://%s/%d/%d\n", BlobBsFMid.CStr(), CrawlBPt.GetSeg(), CrawlBPt.GetAddr()); fprintf(fTxt, "DateTime:%s\n", DateTimeStr.CStr()); fprintf(fTxt, "Url:%s\n", UrlStr.CStr()); fprintf(fTxt, "UrlMd5:%s\n", TMd5Sig(UrlStr).GetStr().CStr()); fprintf(fTxt, "AtomName:%s\n", TUrl::GetTopDownDocNm(UrlStr).CStr()); fprintf(fTxt, "RedirectionUrl:%s\n", RedirUrlStr.CStr()); for (int FldN=0; FldN<HttpResp->GetFlds(); FldN++){ TStr FldNm; TStr FldVal; HttpResp->GetFldNmVal(FldN, FldNm, FldVal); fprintf(fTxt, "HttpField:%s=%s\n", FldNm.CStr(), FldVal.CStr()); } fprintf(fTxt, "End:HttpRedirection\n"); } } } // statistics HttpContLenMom->Def(); if (!OutStatFNm.Empty()){ TFOut StatFOut(OutStatFNm); FILE* fStat=StatFOut.GetFileId(); TIntStrPrV FqStatusCdPrV; StatusCdToFqH.GetDatKeyPrV(FqStatusCdPrV); TIntStrPrV FqContTypePrV; ContTypeToFqH.GetDatKeyPrV(FqContTypePrV); // hosts {fprintf(fStat, "================================================\n"); TIntStrPrV FqHostNmPrV; HostNmToFqH.GetDatKeyPrV(FqHostNmPrV); FqHostNmPrV.Sort(false); int HostNmsSum=0; fprintf(fStat, "Hosts (%d):\n", FqHostNmPrV.Len()); for (int HostNmN=0; HostNmN<FqHostNmPrV.Len(); HostNmN++){ fprintf(fStat, "%7d '%s'\n", FqHostNmPrV[HostNmN].Val1, FqHostNmPrV[HostNmN].Val2.CStr()); HostNmsSum+=FqHostNmPrV[HostNmN].Val1; } fprintf(fStat, "----------\n"); fprintf(fStat, "%7d %s\n", HostNmsSum, "Sum"); fprintf(fStat, "================================================\n");} // status-code {fprintf(fStat, "================================================\n"); TIntStrPrV FqStatusCdPrV; StatusCdToFqH.GetDatKeyPrV(FqStatusCdPrV); FqStatusCdPrV.Sort(false); int StatusCdsSum=0; fprintf(fStat, "Status-Codes (%d):\n", FqStatusCdPrV.Len()); for (int StatusCdN=0; StatusCdN<FqStatusCdPrV.Len(); StatusCdN++){ fprintf(fStat, "%7d '%s'\n", FqStatusCdPrV[StatusCdN].Val1, FqStatusCdPrV[StatusCdN].Val2.CStr()); StatusCdsSum+=FqStatusCdPrV[StatusCdN].Val1; } fprintf(fStat, "----------\n"); fprintf(fStat, "%7d %s\n", StatusCdsSum, "Sum"); fprintf(fStat, "================================================\n");} // content-type {fprintf(fStat, "================================================\n"); TIntStrPrV FqContTypePrV; ContTypeToFqH.GetDatKeyPrV(FqContTypePrV); FqContTypePrV.Sort(false); int ContTypesSum=0; fprintf(fStat, "Content-Types (%d):\n", FqContTypePrV.Len()); for (int ContTypeN=0; ContTypeN<FqContTypePrV.Len(); ContTypeN++){ fprintf(fStat, "%7d '%s'\n", FqContTypePrV[ContTypeN].Val1, FqContTypePrV[ContTypeN].Val2.CStr()); ContTypesSum+=FqContTypePrV[ContTypeN].Val1; } fprintf(fStat, "----------\n"); fprintf(fStat, "%7d %s\n", ContTypesSum, "Sum"); fprintf(fStat, "================================================\n");} // content-length {fprintf(fStat, "================================================\n"); fprintf(fStat, "Content-length:\n"); if (HttpContLenMom->IsUsable()){ TStr MomStr=HttpContLenMom->GetStr('\n', ':', true, false, "%g"); fprintf(fStat, "%s\n", MomStr.CStr()); } else { fprintf(fStat, "Statistics not usable.\n"); } fprintf(fStat, "================================================\n");} } return 0; Catch; return 1; }
TLxSym TLxSymStr::GetSSym(const TStr& Str){ static TStrIntH StrToLxSymH(100); if (StrToLxSymH.Len()==0){ StrToLxSymH.AddDat(PeriodStr, syPeriod); StrToLxSymH.AddDat(DPeriodStr, syDPeriod); StrToLxSymH.AddDat(CommaStr, syComma); StrToLxSymH.AddDat(ColonStr, syColon); StrToLxSymH.AddDat(DColonStr, syDColon); StrToLxSymH.AddDat(SemicolonStr, sySemicolon); StrToLxSymH.AddDat(PlusStr, syPlus); StrToLxSymH.AddDat(MinusStr, syMinus); StrToLxSymH.AddDat(AsteriskStr, syAsterisk); StrToLxSymH.AddDat(SlashStr, sySlash); StrToLxSymH.AddDat(PercentStr, syPercent); StrToLxSymH.AddDat(ExclamationStr, syExclamation); StrToLxSymH.AddDat(VBarStr, syVBar); StrToLxSymH.AddDat(AmpersandStr, syAmpersand); StrToLxSymH.AddDat(QuestionStr, syQuestion); StrToLxSymH.AddDat(HashStr, syHash); StrToLxSymH.AddDat(EqStr, syEq); StrToLxSymH.AddDat(NEqStr, syNEq); StrToLxSymH.AddDat(LssStr, syLss); StrToLxSymH.AddDat(GtrStr, syGtr); StrToLxSymH.AddDat(LEqStr, syLEq); StrToLxSymH.AddDat(GEqStr, syGEq); StrToLxSymH.AddDat(LParenStr, syLParen); StrToLxSymH.AddDat(RParenStr, syRParen); StrToLxSymH.AddDat(LBracketStr, syLBracket); StrToLxSymH.AddDat(RBracketStr, syRBracket); StrToLxSymH.AddDat(LBraceStr, syLBrace); StrToLxSymH.AddDat(RBraceStr, syRBrace); } int KeyId=StrToLxSymH.GetKeyId(Str); if (KeyId==-1){ return syUndef; } else { return TLxSym(int(StrToLxSymH[KeyId])); } }
// Table manipulations TEST(TStrIntH, ManipulateTable) { const int64 NElems = 1000000; int DDist = 10; const char *FName = "test.hashstr.dat"; TStrIntH TableStr; TStrIntH TableStr1; TStrIntH TableStr2; int i; int d; int n; int Id; int Key; TStr KeyStr; int64 KeySumVal; int64 DatSumVal; int64 KeySum; int64 DatSum; int64 KeySumDel; int64 DatSumDel; int DelCount; int Count; char s[32]; // add table elements d = Prime(NElems); n = d; KeySumVal = 0; DatSumVal = 0; for (i = 0; i < NElems; i++) { sprintf(s,"%d",n); TStr Str = TStr(s); TableStr.AddDat(Str,n+1); KeySumVal += n; DatSumVal += (n+1); //printf("add %d %d\n", n, n+1); n = (n + d) % NElems; } EXPECT_EQ(0,TableStr.Empty()); EXPECT_EQ(NElems,TableStr.Len()); EXPECT_EQ(0,(NElems-1)*(NElems)/2 - KeySumVal); EXPECT_EQ(0,(NElems)*(NElems+1)/2 - DatSumVal); // verify elements by successive keys KeySum = 0; DatSum = 0; for (i = 0; i < NElems; i++) { sprintf(s,"%d",i); TStr Str = TStr(s); Id = TableStr.GetKeyId(s); EXPECT_EQ(1,Id >= 0); KeyStr = TableStr.GetKey(Id); Key = atoi(KeyStr.CStr()); //printf("vrfy %d %s %d %s %d %d\n", i, Str.CStr(), Id, KeyStr.CStr(), Key, (int) TableStr.GetDat(KeyStr)); EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1); KeySum += Key; DatSum += TableStr.GetDat(KeyStr); } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // verify elements by distant keys KeySum = 0; DatSum = 0; n = Prime(d); for (i = 0; i < NElems; i++) { sprintf(s,"%d",i); TStr Str = TStr(s); Id = TableStr.GetKeyId(s); EXPECT_EQ(1,Id >= 0); KeyStr = TableStr.GetKey(Id); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1); KeySum += Key; DatSum += TableStr.GetDat(KeyStr); n = (n + d) % NElems; } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // verify elements by iterator KeySum = 0; DatSum = 0; for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) { KeyStr = It.GetKey(); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,It.GetDat()-Key-1); KeySum += Key; DatSum += It.GetDat(); } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // verify elements by key index KeySum = 0; DatSum = 0; Id = TableStr.FFirstKeyId(); while (TableStr.FNextKeyId(Id)) { EXPECT_EQ(1,Id >= 0); KeyStr = TableStr.GetKey(Id); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,TableStr.GetDat(KeyStr)-Key-1); KeySum += Key; DatSum += TableStr.GetDat(KeyStr); } EXPECT_EQ(0,KeySumVal - KeySum); EXPECT_EQ(0,DatSumVal - DatSum); // delete elements DelCount = 0; KeySumDel = 0; DatSumDel = 0; for (n = 0; n < NElems; n += DDist) { sprintf(s,"%d",n); TStr Str = TStr(s); Id = TableStr.GetKeyId(Str); //printf("del %d %d %d\n", n, Id, (int) TableStr[Id]); KeySumDel += n; DatSumDel += TableStr[Id]; TableStr.DelKeyId(Id); DelCount++; } EXPECT_EQ(0,TableStr.Empty()); EXPECT_EQ(NElems-DelCount,TableStr.Len()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) { KeyStr = It.GetKey(); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,It.GetDat()-Key-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += Key; DatSum += It.GetDat(); Count++; } EXPECT_EQ(NElems-DelCount,Count); EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum); EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum); // assignment TableStr1 = TableStr; EXPECT_EQ(0,TableStr1.Empty()); EXPECT_EQ(NElems-DelCount,TableStr1.Len()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TStrIntH::TIter It = TableStr1.BegI(); It < TableStr1.EndI(); It++) { KeyStr = It.GetKey(); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,It.GetDat()-Key-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += Key; DatSum += It.GetDat(); Count++; } EXPECT_EQ(NElems-DelCount,Count); EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum); EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum); // saving and loading { TFOut FOut(FName); TableStr.Save(FOut); FOut.Flush(); } { TFIn FIn(FName); TableStr2.Load(FIn); } EXPECT_EQ(NElems-DelCount,TableStr2.Len()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TStrIntH::TIter It = TableStr2.BegI(); It < TableStr2.EndI(); It++) { KeyStr = It.GetKey(); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,It.GetDat()-Key-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += Key; DatSum += It.GetDat(); Count++; } EXPECT_EQ(NElems-DelCount,Count); EXPECT_EQ(0,KeySumVal - KeySumDel - KeySum); EXPECT_EQ(0,DatSumVal - DatSumDel - DatSum); // remove all elements for (i = 0; i < Count; i++) { Id = TableStr.GetRndKeyId(TInt::Rnd, 0.5); TableStr.DelKeyId(Id); } EXPECT_EQ(0,TableStr.Len()); EXPECT_EQ(1,TableStr.Empty()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TStrIntH::TIter It = TableStr.BegI(); It < TableStr.EndI(); It++) { KeyStr = It.GetKey(); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,It.GetDat()-Key-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += Key; DatSum += It.GetDat(); Count++; } EXPECT_EQ(0,Count); EXPECT_EQ(0,KeySum); EXPECT_EQ(0,DatSum); // clear the table TableStr1.Clr(); EXPECT_EQ(0,TableStr1.Len()); EXPECT_EQ(1,TableStr1.Empty()); // verify elements by iterator KeySum = 0; DatSum = 0; Count = 0; for (TStrIntH::TIter It = TableStr1.BegI(); It < TableStr1.EndI(); It++) { KeyStr = It.GetKey(); Key = atoi(KeyStr.CStr()); EXPECT_EQ(0,It.GetDat()-Key-1); //printf("get %d %d\n", (int) It.GetKey(), (int) It.GetDat()); KeySum += Key; DatSum += It.GetDat(); Count++; } EXPECT_EQ(0,Count); EXPECT_EQ(0,KeySum); EXPECT_EQ(0,DatSum); }
PCycBs TCycBs::LoadCycXmlDump(const TStr& FPath){ // file-names TStr NrFPath=TStr::GetNrFPath(FPath); TStr CycLexiconFNm=NrFPath+"lexicon-dump.xml"; TStr CycTaxonomyFNm=NrFPath+"taxonomy-dump.xml"; TStr CycRelevanceFNm=NrFPath+"relevance-dump.xml"; TStr CycKBaseFNm=NrFPath+"kb-dump.xml"; // create cyc-base PCycBs CycBs=TCycBs::New(); // lexicon {printf("Processing Lexicon %s ...\n", CycLexiconFNm.CStr()); PSIn CycLexiconSIn=TFIn::New(CycLexiconFNm); PXmlDoc XmlDoc; int XmlDocs=0; TStr PrevCycWStr; TStr PrevCycLStr; forever{ // statistics XmlDocs++; if (XmlDocs%1000==0){printf("%d Docs\r", XmlDocs);} // load xml-tree XmlDoc=TXmlDoc::LoadTxt(CycLexiconSIn); if (!XmlDoc->IsOk()){ printf("%s - %s\n", PrevCycWStr.CStr(), PrevCycLStr.CStr()); Fail; } // extract fields from xml-tree PXmlTok TopTok=XmlDoc->GetTok(); if (TopTok->IsTag("end")){break;} IAssert(TopTok->IsTag("word")); TStr CycWStr=TopTok->GetArgVal("string"); TStr CycLStr=TopTok->GetArgVal("cycl"); PrevCycWStr=CycWStr; PrevCycLStr; // insert data CycBs->AddEdge(CycLStr, "#$nameString", CycWStr); CycBs->AddEdge(CycWStr, "~#$nameString", CycLStr); } printf("%d Docs\nDone.\n", XmlDocs);} // taxonomy {printf("Processing Taxonomy %s ...\n", CycTaxonomyFNm.CStr()); PSIn CycTaxonomySIn=TFIn::New(CycTaxonomyFNm); PXmlDoc XmlDoc; int XmlDocs=0; TStr PrevSrcCycLStr; forever{ // statistics XmlDocs++; if (XmlDocs%1000==0){ printf("%d Docs\r", XmlDocs);} // load xml-tree XmlDoc=TXmlDoc::LoadTxt(CycTaxonomySIn); if (!XmlDoc->IsOk()){ printf("%s\n", PrevSrcCycLStr.CStr()); Fail; } // extract fields from xml-tree PXmlTok TopTok=XmlDoc->GetTok(); if (TopTok->IsTag("end")){break;} IAssert(TopTok->IsTag("term")); TStr SrcCycLStr=TopTok->GetArgVal("cycl"); PrevSrcCycLStr=SrcCycLStr; for (int SubTokN=0; SubTokN<TopTok->GetSubToks(); SubTokN++){ PXmlTok SubTok=TopTok->GetSubTok(SubTokN); TStr DstCycLStr=SubTok->GetTagNm(); if (SubTok->IsTag("isa")){ DstCycLStr=SubTok->GetArgVal("value"); CycBs->AddEdge(SrcCycLStr, "#$isa", DstCycLStr); CycBs->AddEdge(DstCycLStr, "~#$isa", SrcCycLStr); } else if (SubTok->IsTag("genl")){ DstCycLStr=SubTok->GetArgVal("value"); CycBs->AddEdge(SrcCycLStr, "#$genls", DstCycLStr); CycBs->AddEdge(DstCycLStr, "~#$genls", SrcCycLStr); } else { Fail; } } } printf("%d Docs\nDone.\n", XmlDocs);} // relevance {printf("Processing Relevance %s ...\n", CycRelevanceFNm.CStr()); PSIn CycRelevanceSIn=TFIn::New(CycRelevanceFNm); PXmlDoc XmlDoc; int XmlDocs=0; TStr PrevCycStr; forever{ // statistics XmlDocs++; if (XmlDocs%1000==0){ printf("%d Docs\r", XmlDocs);} // load xml-tree XmlDoc=TXmlDoc::LoadTxt(CycRelevanceSIn); if (!XmlDoc->IsOk()){ printf("%s\n", PrevCycStr.CStr()); Fail; } // extract fields from xml-tree PXmlTok TopTok=XmlDoc->GetTok(); if (TopTok->IsTag("end")){break;} IAssert(TopTok->IsTag("term")); TStr CycStr=TopTok->GetArgVal("cyc"); PrevCycStr=CycStr; //IAssert(CycBs->IsVNm(CycStr)); if (CycBs->IsVNm(CycStr)){ if (TopTok->GetArgVal("thcl")=="T"){ CycBs->GetVrtx(CycStr).SetFlag(cvfHumanRelevant, true);} if (TopTok->GetArgVal("irrel")=="T"){ CycBs->GetVrtx(CycStr).SetFlag(cfvHumanIrrelevant, true);} if (TopTok->GetArgVal("clarifying")=="T"){ CycBs->GetVrtx(CycStr).SetFlag(cfvHumanClarifying, true);} if ((TopTok->GetArgVal("thcl")=="T")||(TopTok->GetArgVal("clarifying")=="T")){ CycBs->GetVrtx(CycStr).SetFlag(cvfHumanOk, true);} } else { //printf("%s\n", CycStr.CStr()); } } printf("%d Docs\nDone.\n", XmlDocs);} // knowledge-base {printf("Processing KBase %s ...\n", CycKBaseFNm.CStr()); PSIn CycKBaseSIn=TFIn::New(CycKBaseFNm); PXmlDoc XmlDoc; int XmlDocs=0; TStr PrevCycLStr; TStrV PrevArgCycLStrV; TStrIntH HdCycLToFq; forever{ // statistics XmlDocs++; if (XmlDocs%1000==0){ printf("%d Docs\r", XmlDocs);} //if (XmlDocs>10000){break;} // load xml-tree XmlDoc=TXmlDoc::LoadTxt(CycKBaseSIn); if (!XmlDoc->IsOk()){ printf("%s\n", PrevCycLStr.CStr()); for (int ArgN=0; ArgN<PrevArgCycLStrV.Len(); ArgN++){ printf(" [%s]", PrevArgCycLStrV[ArgN].CStr());} printf("\n"); Fail; } // extract fields from xml-tree PXmlTok TopTok=XmlDoc->GetTok(); if (TopTok->IsTag("end")){break;} IAssert(TopTok->IsTag("sentence")); TStr CycLStr=TopTok->GetArgVal("cycl"); TXmlTokV ArgXmlTokV; XmlDoc->GetTagTokV("sentence|arg", ArgXmlTokV); TStrV ArgCycLStrV; for (int ArgN=0; ArgN<ArgXmlTokV.Len(); ArgN++){ PXmlTok Tok=ArgXmlTokV[ArgN]; IAssert(Tok->IsTag("arg")); if (Tok->IsArg("cycl")){ TStr ArgCycLStr=Tok->GetArgVal("cycl"); ArgCycLStrV.Add(ArgCycLStr); } else { ArgCycLStrV.Add("Empty"); } } PrevCycLStr=CycLStr; PrevArgCycLStrV=ArgCycLStrV; if (ArgCycLStrV.Len()>0){ HdCycLToFq.AddDat(ArgCycLStrV[0]+" - "+TInt::GetStr(ArgCycLStrV.Len()-1))++;} // insert if (ArgCycLStrV.Len()==3){ TStr PredNm=ArgCycLStrV[0]; if ((PredNm!="#$isa")&&(PredNm!="#$termOfUnit")&&(PredNm!="#$genls")){ TStr BackLinkPredNm=TStr("~")+PredNm; TStr Arg1=ArgCycLStrV[1]; TStr Arg2=ArgCycLStrV[2]; CycBs->AddEdge(Arg1, PredNm, Arg2); CycBs->AddEdge(Arg2, BackLinkPredNm, Arg1); } } } // output top cycl relations {TFOut CycLSOut("CycKB-CycLFq.Stat.Txt"); FILE* fCycL=CycLSOut.GetFileId(); TIntStrPrV FqCycLStrPrV; HdCycLToFq.GetDatKeyPrV(FqCycLStrPrV); FqCycLStrPrV.Sort(false); for (int CycLN=0; CycLN<FqCycLStrPrV.Len(); CycLN++){ fprintf(fCycL, "%6d. %s\n", 1+FqCycLStrPrV[CycLN].Val1, FqCycLStrPrV[CycLN].Val2.CStr()); }} printf("%d Docs\nDone.\n", XmlDocs);} // return cyc-base return CycBs; }