void TNmObjBs::GetFqNmObjIdPrV( const TStr& TargetNmObjStr, TIntPrV& FqNmObjIdPrV) const { //printf("Searching %s ...", TargetNmObjStr.CStr()); // get target named-object-id int TargetNmObjId=GetNmObjId(TargetNmObjStr); // collect target named-object frequencies TIntIntH NmObjIdToFqH; // traverse target named-object documents int NmObjDocs=GetNmObjDocs(TargetNmObjId); for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocs; NmObjDocIdN++){ // get document-id int DocId=GetNmObjDocId(TargetNmObjId, NmObjDocIdN); // traverse named-object in document int DocNmObjs=GetDocNmObjs(DocId); for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){ // get named-object & frequency int NmObjId; int TermFq; GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq); // increment named-object document frequency NmObjIdToFqH.AddDat(NmObjId)++; } } // get & sort frequency table FqNmObjIdPrV.Clr(); NmObjIdToFqH.GetDatKeyPrV(FqNmObjIdPrV); FqNmObjIdPrV.Sort(false); }
void TSubGraphsEnum::RecurBfs1(const int& NId, const int& Depth) { if (Depth == 0) { TIntPrV EdgeV; EdgeH.GetKeyV(EdgeV); EdgeV.Sort(); SgV.Add(EdgeV); return; } const TNGraph::TNodeI NI = NGraph ->GetNI(NId); for (int e = 0; e < NI.GetOutDeg(); e++) { const TIntPr Edge(NId, NI.GetOutNId(e)); if (! EdgeH.IsKey(Edge)) { EdgeH.AddKey(Edge); RecurBfs1(NI.GetOutNId(e), Depth-1); EdgeH.DelKey(Edge); } } for (int e = 0; e < NI.GetInDeg(); e++) { const TIntPr Edge(NI.GetInNId(e), NId); if (! EdgeH.IsKey(Edge)) { EdgeH.AddKey(Edge); RecurBfs1(NI.GetInNId(e), Depth-1); EdgeH.DelKey(Edge); } } }
void TSkyGridEnt::GetSorted_LinkWgtDstEntIdPrV( const uint64& MnTm, const double& TopWgtSumPrc, TIntPrV& LinkWgtDstEntIdPrV) const { double AllLinkWgtSum=0; TIntIntH DstEntIdLinkWgtH; int LinkEnts=GetLinkEnts(); for (int LinkEntN=0; LinkEntN<LinkEnts; LinkEntN++){ int DstEntId=GetLinkEntId(LinkEntN); int EntLinks=GetEntLinks(LinkEntN); int EntLinkWgtSum=0; for (int EntLinkN=0; EntLinkN<EntLinks; EntLinkN++){ const TSkyGridEntLinkCtx& EntLinkCtx=GetEntLinkCtx(LinkEntN, EntLinkN); if (EntLinkCtx.Tm>=MnTm){ EntLinkWgtSum+=EntLinkCtx.LinkWgt;} } DstEntIdLinkWgtH.AddDat(DstEntId, EntLinkWgtSum); AllLinkWgtSum+=EntLinkWgtSum; } LinkWgtDstEntIdPrV.Clr(); DstEntIdLinkWgtH.GetDatKeyPrV(LinkWgtDstEntIdPrV); LinkWgtDstEntIdPrV.Sort(false); // cut long-tail if ((TopWgtSumPrc>0.0)&&(LinkWgtDstEntIdPrV.Len()>0)){ int TopLinkWgt=LinkWgtDstEntIdPrV[0].Val1; if (TopLinkWgt>(3*AllLinkWgtSum)/LinkWgtDstEntIdPrV.Len()){ double CutWgtSum=AllLinkWgtSum*(1-TopWgtSumPrc); int LastValN=LinkWgtDstEntIdPrV.Len()-1; while ((LastValN>0)&&(CutWgtSum>0)){ CutWgtSum-=LinkWgtDstEntIdPrV[LastValN].Val1; LastValN--; } LinkWgtDstEntIdPrV.Trunc(LastValN+1); } } }
///////////////////////////////////////////////// // SkyGrid-Base void TSkyGridBs::GetSorted_DocsEntIdPrV(TIntPrV& DocsEntIdPrV){ TIntIntH EntIdToDocsH; for (int EntId=0; EntId<GetEnts(); EntId++){ int Docs=GetEnt(EntId).GetDocIds(); EntIdToDocsH.AddDat(EntId, Docs); } DocsEntIdPrV.Clr(); EntIdToDocsH.GetDatKeyPrV(DocsEntIdPrV); DocsEntIdPrV.Sort(false); }
void GetBiConSzCnt(const PUNGraph& Graph, TIntPrV& SzCntV) { TCnComV BiCnComV; GetBiCon(Graph, BiCnComV); TIntH SzCntH; for (int c =0; c < BiCnComV.Len(); c++) { SzCntH.AddDat(BiCnComV[c].Len()) += 1; } SzCntH.GetKeyDatPrV(SzCntV); SzCntV.Sort(); }
int GetMaxMinDeg(const PNGraph& G, const TStr& IsDir, const TStr& IsIn, const TStr& IsMax){ TIntPrV DegCnt; if (IsDir == "false"){ PUNGraph U = TSnap::ConvertGraph<PUNGraph>(G); if (IsIn == "true") TSnap::GetInDegCnt(U, DegCnt); else TSnap::GetOutDegCnt(U, DegCnt); } else{ if (IsIn == "true") TSnap::GetInDegCnt(G, DegCnt); else TSnap::GetOutDegCnt(G, DegCnt); } // sort in descending order DegCnt.Sort(false); if (IsMax == "true") return DegCnt[0].Val1; else return DegCnt[DegCnt.Len()-1].Val1; }
void __fastcall TContexterF::CtxNmObjLbClick(TObject *Sender){ TListBox* NmObjLb=CtxNmObjLb; // get named-object string if ((NmObjLb->ItemIndex<0)||(NmObjLb->Items->Count<=NmObjLb->ItemIndex)){ return;} TStr LbItemStr=NmObjLb->Items->Strings[NmObjLb->ItemIndex].c_str(); TStr FqStr; LbItemStr.SplitOnCh(State->EnRootNmObjStr, ' ', FqStr); // get concept-vector PBowSpV ConceptSpV=State->NmObjBs->GetNmObjConcept( State->BowDocBs, State->BowDocWgtBs, State->EnRootNmObjStr); TStrFltPrV WordStrWgtPrV; ConceptSpV->GetWordStrWgtPrV(State->BowDocBs, 100, 0.66, WordStrWgtPrV); // fill concept-list-box EnConceptWordLb->Clear(); for (int WordN=0; WordN<WordStrWgtPrV.Len(); WordN++){ TStr LbItemStr=WordStrWgtPrV[WordN].Val1+ TFlt::GetStr(WordStrWgtPrV[WordN].Val2, " (%.3f)"); EnConceptWordLb->Items->Add(LbItemStr.CStr()); } // get coref-named-objects TIntPrV FqNmObjIdPrV; State->NmObjBs->GetFqNmObjIdPrV(State->EnRootNmObjStr, FqNmObjIdPrV); FqNmObjIdPrV.Sort(false); FqNmObjIdPrV.Trunc(100); // fill coref-named-objects EnCoNmObjLb->Clear(); for (int NmObjN=0; NmObjN<FqNmObjIdPrV.Len(); NmObjN++){ TStr CoNmObjStr=State->NmObjBs->GetNmObjStr(FqNmObjIdPrV[NmObjN].Val2); if (State->EnRootNmObjStr!=CoNmObjStr){ TStr LbItemStr=CoNmObjStr+TInt::GetStr(FqNmObjIdPrV[NmObjN].Val1, " (%d)"); EnCoNmObjLb->Items->Add(LbItemStr.CStr()); } } // context-tree State->EnCtxTree=GetCtxTreeGraph(State->NmObjBs, State->EnRootNmObjStr, State->EnDrawLevels-1); EnPbPaint(Sender); }
///////////////////////////////////////////////// // Context-Tree PGraph GetCtxTreeGraph( const PNmObjBs& NmObjBs, const TStr& RootNmObjStr, const int& MxDist){ // create distance graph PGraph Graph=TGraph::New(); // create root note int RootNmObjId=NmObjBs->GetNmObjId(RootNmObjStr); PVrtx RootVrtx=TGVrtx::New(RootNmObjId, RootNmObjStr); Graph->AddVrtx(RootVrtx); // create distance vector TIntV NmObjDistV(NmObjBs->GetNmObjs()); NmObjDistV.PutAll(-1); NmObjDistV[RootNmObjId]=0; // create queue TIntPrQ OpenNmObjIdDistPrQ; OpenNmObjIdDistPrQ.Push(TIntPr(RootNmObjId, 0)); while (!OpenNmObjIdDistPrQ.Empty()){ // get name-object-id from queue int NmObjId=OpenNmObjIdDistPrQ.Top().Val1; int NmObjDist=OpenNmObjIdDistPrQ.Top().Val2; OpenNmObjIdDistPrQ.Pop(); IAssert(NmObjDistV[NmObjId]==NmObjDist); // get named-object string TStr NmObjStr=NmObjBs->GetNmObjStr(NmObjId); TStr UcNmObjStr=NmObjStr.GetUc(); printf("[%s:%d] ", NmObjStr.CStr(), NmObjDist); // check distance if (NmObjDist>MxDist){continue;} // get named-object vertex PVrtx SrcVrtx=Graph->GetVrtx(NmObjId); // get named-object children TIntPrV FqNmObjIdPrV; NmObjBs->GetFqNmObjIdPrV(NmObjStr, FqNmObjIdPrV); int SubNmObjs=FqNmObjIdPrV.Len(); // traverse named-object children int CreatedSubNmObjs=0; for (int SubNmObjN=0; SubNmObjN<SubNmObjs; SubNmObjN++){ // get child data int SubNmObjFq=FqNmObjIdPrV[SubNmObjN].Val1; int SubNmObjId=FqNmObjIdPrV[SubNmObjN].Val2; TStr SubNmObjStr=NmObjBs->GetNmObjStr(SubNmObjId); TStr UcSubNmObjStr=SubNmObjStr.GetUc(); TStr SubNmObjVNm=SubNmObjStr; // calculate and add context string formed from coref-named-objects {TChA CtxChA; TIntPrV FqNmObjIdPrV; ContexterF->State->NmObjBs->GetFqNmObjIdPrV(SubNmObjStr, FqNmObjIdPrV); FqNmObjIdPrV.Sort(false); FqNmObjIdPrV.Trunc(ContexterF->State->EnCtxLen); FqNmObjIdPrV.Clr(); for (int NmObjN=0; NmObjN<FqNmObjIdPrV.Len(); NmObjN++){ TStr CoNmObjStr=ContexterF->State->NmObjBs->GetNmObjStr(FqNmObjIdPrV[NmObjN].Val2); if (SubNmObjStr!=CoNmObjStr){ CtxChA+='['; CtxChA+=CoNmObjStr; CtxChA+=']'; CtxChA+='\\';} } if (!CtxChA.Empty()){ SubNmObjVNm=SubNmObjStr+"\\"+CtxChA;}} // push child named-object-id if necessary if (NmObjDistV[SubNmObjId]==-1){ // check number of subnodes int MxCreatedSubNmObjs=0; switch (NmObjDist){ case 0: MxCreatedSubNmObjs=/*20;*/ContexterF->State->EnSubNodes; break; case 1: MxCreatedSubNmObjs=4; break; case 2: MxCreatedSubNmObjs=2; break; case 3: MxCreatedSubNmObjs=1; break; case 4: MxCreatedSubNmObjs=1; break; default: MxCreatedSubNmObjs=0; break; } // check if stop creating branches CreatedSubNmObjs++; if (CreatedSubNmObjs>MxCreatedSubNmObjs){break;} // push edge OpenNmObjIdDistPrQ.Push(TIntPr(SubNmObjId, NmObjDist+1)); NmObjDistV[SubNmObjId]=NmObjDist+1; // create vertex TStr VNm=SubNmObjVNm; PVrtx DstVrtx=TGVrtx::New(SubNmObjId, VNm); Graph->AddVrtx(DstVrtx); // create edge //TStr ENm=TStr("_")+TInt::GetStr(NmObjId)+"-"+TInt::GetStr(SubNmObjId); TStr ENm=TInt::GetStr(SubNmObjFq); // calculate and add context string formed from coref-named-objects if (ContexterF->EnInterNmObjContextCb->Checked){ TChA CtxChA; TStr SrcNmObjStr=NmObjStr; TChA DstNmObjChA=DstVrtx->GetVNm(); if (DstNmObjChA.IsChIn('\\')){ DstNmObjChA.Trunc(DstNmObjChA.SearchCh('\\'));} TStr DstNmObjStr=DstNmObjChA; PBowSpV ConceptSpV=ContexterF->State->NmObjBs->GetNmObjConcept( ContexterF->State->BowDocBs, ContexterF->State->BowDocWgtBs, SrcNmObjStr, DstNmObjStr); TStrFltPrV WordStrWgtPrV; ConceptSpV->GetWordStrWgtPrV( ContexterF->State->BowDocBs, -1, 1, WordStrWgtPrV); TStrV UcWordStrSfV; for (int WordN=0; WordN<WordStrWgtPrV.Len(); WordN++){ // get word TStr UcWordStr=WordStrWgtPrV[WordN].Val1; // remove duplicates if (UcWordStr.IsStrIn(UcNmObjStr)){continue;} if (UcWordStr.IsStrIn(UcSubNmObjStr)){continue;} if (UcNmObjStr.IsStrIn(UcWordStr)){continue;} if (UcSubNmObjStr.IsStrIn(UcWordStr)){continue;} bool Ok=true; for (int WordSfN=0; WordSfN<UcWordStrSfV.Len(); WordSfN++){ if (UcWordStrSfV[WordSfN].IsStrIn(UcWordStr)){Ok=false; break;} if (UcWordStr.IsStrIn(UcWordStrSfV[WordSfN])){Ok=false; break;} } if (!Ok){continue;} // add word UcWordStrSfV.Add(UcWordStr); CtxChA+='['; CtxChA+=UcWordStr; CtxChA+=']'; CtxChA+='\n'; // finish if limit reached if (UcWordStrSfV.Len()>=ContexterF->State->EnCtxLen){break;} } ENm=ENm+"\n"+CtxChA; } // create and add edge to the graph PEdge Edge=TGEdge::New(SrcVrtx, DstVrtx, ENm); Edge->PutWgt(1+log(SubNmObjFq)); Graph->AddEdge(Edge); } } } Graph->SetEdgeWidth(5); Graph->PlaceTreeAsStar(); Graph->RescaleXY(0.1, RootVrtx); // return graph return Graph; }