void TNmObjBs::GetNmObjStrFqPrV(TStrIntPrV& NmObjStrFqPrV, const int& MnFq) const { int NmObjs=GetNmObjs(); NmObjStrFqPrV.Gen(NmObjs, 0); for (int NmObjId=0; NmObjId<NmObjs; NmObjId++){ TStrIntPr NmObjStrFqPr; NmObjStrFqPr.Val1=GetNmObjStr(NmObjId); NmObjStrFqPr.Val2=GetNmObjDocs(NmObjId); if (NmObjStrFqPr.Val2>=MnFq){ NmObjStrFqPrV.Add(NmObjStrFqPr);} } }
TStr TStrUtil::GetStr(const TStrIntPrV& StrIntPrV, const TStr& FieldDelimiterStr, const TStr& DelimiterStr) { TChA ResChA; for (int EltN = 0; EltN < StrIntPrV.Len(); EltN++) { if (!ResChA.Empty()) { ResChA+=DelimiterStr; } ResChA+=StrIntPrV[EltN].Val1; ResChA+=FieldDelimiterStr; ResChA+=StrIntPrV[EltN].Val2.GetStr(); } return ResChA; }
void TSkyGridEnt::GetDocsPerDateV( const TSkyGridBs* SkyGridBs, TStrIntPrV& DateStrDocsPrV, int& Docs) const { TStrIntH DateStrToDocsH; Docs=0; for (int DocN=0; DocN<GetDocIds(); DocN++){ int DocId=GetDocId(DocN); PSkyGridDoc Doc=SkyGridBs->GetDoc(DocId); uint64 DocTm=Doc->GetTm(); TStr DocDateStr=TTm::GetTmFromMSecs(DocTm).GetWebLogDateStr(); DateStrToDocsH.AddDat(DocDateStr)++; Docs++; } DateStrToDocsH.GetKeyDatPrV(DateStrDocsPrV); DateStrDocsPrV.Sort(); }
void __fastcall TContexterF::NmObjSortRgClick(TObject *Sender){ // select radio-group & list-box TRadioGroup* NmObjSortRg=NULL; TListBox* NmObjLb=NULL; if (Sender==CtxNmObjSortRg){NmObjSortRg=CtxNmObjSortRg; NmObjLb=CtxNmObjLb;} else if (Sender==SrcNmObjSortRg){NmObjSortRg=SrcNmObjSortRg; NmObjLb=SrcNmObjLb;} else if (Sender==DstNmObjSortRg){NmObjSortRg=DstNmObjSortRg; NmObjLb=DstNmObjLb;} else {return;} // determine sort-order bool SortByNameP=false; if (NmObjSortRg->ItemIndex==0){SortByNameP=true;} else if (NmObjSortRg->ItemIndex==1){SortByNameP=false;} else {NmObjSortRg->ItemIndex=1; SortByNameP=false;} // fill NmObjLb NmObjLb->Clear(); EnConceptWordLb->Clear(); EnCoNmObjLb->Clear(); if (SortByNameP){ //...sorted by name TStrIntPrV NmObjStrFqPrV; State->NmObjBs->GetNmObjStrFqPrV(NmObjStrFqPrV, 3); NmObjStrFqPrV.Sort(); for (int NmObjN=0; NmObjN<NmObjStrFqPrV.Len(); NmObjN++){ TStr LbItemStr=NmObjStrFqPrV[NmObjN].Val1+ TInt::GetStr(NmObjStrFqPrV[NmObjN].Val2, " (%d)"); NmObjLb->Items->Add(LbItemStr.CStr()); } } else { //...sorted by frequency TIntStrPrV NmObjFqStrPrV; State->NmObjBs->GetNmObjFqStrPrV(NmObjFqStrPrV, 3); NmObjFqStrPrV.Sort(false); for (int NmObjN=0; NmObjN<NmObjFqStrPrV.Len(); NmObjN++){ TStr LbItemStr=NmObjFqStrPrV[NmObjN].Val2+ TInt::GetStr(NmObjFqStrPrV[NmObjN].Val1, " (%d)"); NmObjLb->Items->Add(LbItemStr.CStr()); } } }
void TSkyGridBs::SaveTxt(const TStr& FNm, const uint64& CurTm){ // time-limit TStr CurTmStr=TTm::GetTmFromMSecs(CurTm).GetWebLogDateTimeStr(); uint64 CurDateTm=TTm::GetMSecsFromTm(TTm::GetTmFromWebLogDateTimeStr(TTm::GetTmFromMSecs(CurTm).GetWebLogDateStr())); TStr CurDateTmStr=TTm::GetTmFromMSecs(CurDateTm).GetWebLogDateTimeStr(); TUInt64V MnTmV; MnTmV.Add(CurDateTm-0*TTmInfo::GetDayMSecs()); MnTmV.Add(CurDateTm-1*TTmInfo::GetDayMSecs()); MnTmV.Add(CurDateTm-2*TTmInfo::GetDayMSecs()); MnTmV.Add(CurDateTm-4*TTmInfo::GetDayMSecs()); MnTmV.Add(CurDateTm-8*TTmInfo::GetDayMSecs()); MnTmV.Add(CurDateTm-16*TTmInfo::GetDayMSecs()); MnTmV.Add(CurDateTm-32*TTmInfo::GetDayMSecs()); // get bow //PBowDocBs BowDocBs=GetBowDocBs(3, 5); PBowDocBs BowDocBs=GetBowDocBs(); PBowDocWgtBs BowDocWgtBs=GetBowDocWgtBs(BowDocBs); // open file TFOut FOut(FNm); FILE* fOut=FOut.GetFileId(); // get docs-entities sorted vector TIntPrV DocsEntIdPrV; GetSorted_DocsEntIdPrV(DocsEntIdPrV); // traverse entities for (int EntN=0; EntN<DocsEntIdPrV.Len(); EntN++){ int EntId=DocsEntIdPrV[EntN].Val2; TStr EntNm=GetEntNm(EntId); int EntDocs=DocsEntIdPrV[EntN].Val1; TSkyGridEnt& Ent=GetEnt(EntId); int LinkEnts=Ent.GetLinkEnts(); fprintf(fOut, "'%s' [%d docs] [%d ents]\n", EntNm.CStr(), EntDocs, LinkEnts); // output docs over dates {TStrIntPrV DateStrDocsPrV; int _EntDocs; Ent.GetDocsPerDateV(this, DateStrDocsPrV, _EntDocs); fprintf(fOut, " Docs per Date (%d docs):", _EntDocs); for (int DateN=0; DateN<DateStrDocsPrV.Len(); DateN++){ TStr DateStr=DateStrDocsPrV[DateN].Val1; int Docs=DateStrDocsPrV[DateN].Val2; fprintf(fOut, " [%s:%d]", DateStr.CStr(), Docs); } fprintf(fOut, "\n");} fprintf(fOut, " [Now: %s]\n", CurTmStr.CStr()); TIntPrV PrevLinkWgtDstEntIdPrV; TStrFltPrV PrevWordStrWgtPrV; for (int MnTmN=0; MnTmN<MnTmV.Len(); MnTmN++){ uint64 MnTm=MnTmV[MnTmN]; double PastDays=(CurDateTm-MnTm)/double(TTmInfo::GetDayMSecs()); TStr MnTmStr=TTm::GetTmFromMSecs(MnTm).GetWebLogDateTimeStr(); // get linked entities TIntPrV LinkWgtDstEntIdPrV; Ent.GetSorted_LinkWgtDstEntIdPrV(MnTm, 0.9, LinkWgtDstEntIdPrV); // output difference between previous and current centroid if (MnTmN>0){ TIntPrV NegDiffLinkWgtDstEntIdPrV; TIntPrV PosDiffLinkWgtDstEntIdPrV; GetLinkWgtDstEntIdPrVDiff(LinkWgtDstEntIdPrV, PrevLinkWgtDstEntIdPrV, NegDiffLinkWgtDstEntIdPrV, PosDiffLinkWgtDstEntIdPrV); // output positive change TChA PosDiffLinkWgtDstEntIdPrVChA; GetLinkWgtDstEntIdPrVChA(PosDiffLinkWgtDstEntIdPrV, PosDiffLinkWgtDstEntIdPrVChA); fprintf(fOut, " Pos-Diff: %s\n", PosDiffLinkWgtDstEntIdPrVChA.CStr()); // output negative change TChA NegDiffLinkWgtDstEntIdPrVChA; GetLinkWgtDstEntIdPrVChA(NegDiffLinkWgtDstEntIdPrV, NegDiffLinkWgtDstEntIdPrVChA); fprintf(fOut, " Neg-Diff: %s\n", NegDiffLinkWgtDstEntIdPrVChA.CStr()); } PrevLinkWgtDstEntIdPrV=LinkWgtDstEntIdPrV; // output linked entities int TopLinkEnts=LinkWgtDstEntIdPrV.Len(); TChA LinkWgtDstEntIdPrVChA; GetLinkWgtDstEntIdPrVChA(LinkWgtDstEntIdPrV, LinkWgtDstEntIdPrVChA); fprintf(fOut, " Entities (%d ents): %s\n", TopLinkEnts, LinkWgtDstEntIdPrVChA.CStr()); // get text centroid int CtrDocs; TStrFltPrV WordStrWgtPrV; Ent.GetDocCentroid(this, BowDocBs, BowDocWgtBs, MnTm, 150, 0.9, CtrDocs, WordStrWgtPrV); // output difference between previous and current centroid if (MnTmN>0){ TStrFltPrV NegDiffWordStrWgtPrV; TStrFltPrV PosDiffWordStrWgtPrV; GetWordStrWgtPrVDiff(WordStrWgtPrV, PrevWordStrWgtPrV, NegDiffWordStrWgtPrV, PosDiffWordStrWgtPrV); // output positive change TChA PosDiffWordStrWgtPrVChA; GetWordStrWgtPrVChA(PosDiffWordStrWgtPrV, PosDiffWordStrWgtPrVChA); fprintf(fOut, " Pos-Diff: %s\n", PosDiffWordStrWgtPrVChA.CStr()); // output negative change TChA NegDiffWordStrWgtPrVChA; GetWordStrWgtPrVChA(NegDiffWordStrWgtPrV, NegDiffWordStrWgtPrVChA); fprintf(fOut, " Neg-Diff: %s\n", NegDiffWordStrWgtPrVChA.CStr()); } PrevWordStrWgtPrV=WordStrWgtPrV; // output centroid TChA WordStrWgtPrVChA; GetWordStrWgtPrVChA(WordStrWgtPrV, WordStrWgtPrVChA); fprintf(fOut, " Centroid (%d docs, %d words): %s\n", CtrDocs, WordStrWgtPrV.Len(), WordStrWgtPrVChA.CStr()); // output time fprintf(fOut, " [-%.1f days: %s]\n", PastDays, MnTmStr.CStr()); } // entity clustering /*TVec<TStrFltPrV> EntNmWgtPrVV; Ent.GetEntClustV(this, MnTmV.Last(), 100, 1000, 10, EntNmWgtPrVV); for (int ClustN=0; ClustN<EntNmWgtPrVV.Len(); ClustN++){ TStrFltPrV& EntNmWgtPrV=EntNmWgtPrVV[ClustN]; fprintf(fOut, " Clust-%d:", ClustN); for (int EntN=0; EntN<EntNmWgtPrV.Len(); EntN++){ TStr EntNm=EntNmWgtPrV[EntN].Val1; double Wgt=EntNmWgtPrV[EntN].Val2; fprintf(fOut, " ['%s':%.3f]", EntNm.CStr(), Wgt); } fprintf(fOut, "\n"); }*/ fprintf(fOut, "\n"); } }