void TNmObjBs::GetFqNmObjIdPrV( const TStr& TargetNmObjStr, TIntPrV& FqNmObjIdPrV) const { //printf("Searching %s ...", TargetNmObjStr.CStr()); // get target named-object-id int TargetNmObjId=GetNmObjId(TargetNmObjStr); // collect target named-object frequencies TIntIntH NmObjIdToFqH; // traverse target named-object documents int NmObjDocs=GetNmObjDocs(TargetNmObjId); for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocs; NmObjDocIdN++){ // get document-id int DocId=GetNmObjDocId(TargetNmObjId, NmObjDocIdN); // traverse named-object in document int DocNmObjs=GetDocNmObjs(DocId); for (int DocNmObjN=0; DocNmObjN<DocNmObjs; DocNmObjN++){ // get named-object & frequency int NmObjId; int TermFq; GetDocNmObjId(DocId, DocNmObjN, NmObjId, TermFq); // increment named-object document frequency NmObjIdToFqH.AddDat(NmObjId)++; } } // get & sort frequency table FqNmObjIdPrV.Clr(); NmObjIdToFqH.GetDatKeyPrV(FqNmObjIdPrV); FqNmObjIdPrV.Sort(false); }
void TSkyGridEnt::GetSorted_LinkWgtDstEntIdPrV( const uint64& MnTm, const double& TopWgtSumPrc, TIntPrV& LinkWgtDstEntIdPrV) const { double AllLinkWgtSum=0; TIntIntH DstEntIdLinkWgtH; int LinkEnts=GetLinkEnts(); for (int LinkEntN=0; LinkEntN<LinkEnts; LinkEntN++){ int DstEntId=GetLinkEntId(LinkEntN); int EntLinks=GetEntLinks(LinkEntN); int EntLinkWgtSum=0; for (int EntLinkN=0; EntLinkN<EntLinks; EntLinkN++){ const TSkyGridEntLinkCtx& EntLinkCtx=GetEntLinkCtx(LinkEntN, EntLinkN); if (EntLinkCtx.Tm>=MnTm){ EntLinkWgtSum+=EntLinkCtx.LinkWgt;} } DstEntIdLinkWgtH.AddDat(DstEntId, EntLinkWgtSum); AllLinkWgtSum+=EntLinkWgtSum; } LinkWgtDstEntIdPrV.Clr(); DstEntIdLinkWgtH.GetDatKeyPrV(LinkWgtDstEntIdPrV); LinkWgtDstEntIdPrV.Sort(false); // cut long-tail if ((TopWgtSumPrc>0.0)&&(LinkWgtDstEntIdPrV.Len()>0)){ int TopLinkWgt=LinkWgtDstEntIdPrV[0].Val1; if (TopLinkWgt>(3*AllLinkWgtSum)/LinkWgtDstEntIdPrV.Len()){ double CutWgtSum=AllLinkWgtSum*(1-TopWgtSumPrc); int LastValN=LinkWgtDstEntIdPrV.Len()-1; while ((LastValN>0)&&(CutWgtSum>0)){ CutWgtSum-=LinkWgtDstEntIdPrV[LastValN].Val1; LastValN--; } LinkWgtDstEntIdPrV.Trunc(LastValN+1); } } }
// Algorithm: Find all bridges, remove them from the graph, find largest component K // now add all bridges that do not touch K, find connected components void Get1CnComSzCnt(const PUNGraph& Graph, TIntPrV& SzCntV) { //TCnCom::GetWccCnt(Graph, SzCntV); IAssertR(SzCntV.Len() == 1, "Graph is not connected."); TIntPrV EdgeV; GetEdgeBridges(Graph, EdgeV); if (EdgeV.Empty()) { SzCntV.Clr(false); return; } PUNGraph TmpG = TUNGraph::New(); *TmpG = *Graph; for (int e = 0; e < EdgeV.Len(); e++) { TmpG->DelEdge(EdgeV[e].Val1, EdgeV[e].Val2); } TCnComV CnComV; GetWccs(TmpG, CnComV); IAssert(CnComV.Len() >= 2); const TIntV& MxWcc = CnComV[0].NIdV; TIntSet MxCcSet(MxWcc.Len()); for (int i = 0; i < MxWcc.Len(); i++) { MxCcSet.AddKey(MxWcc[i]); } // create new graph: bridges not touching MxCc of G with no bridges for (int e = 0; e < EdgeV.Len(); e++) { if (! MxCcSet.IsKey(EdgeV[e].Val1) && ! MxCcSet.IsKey(EdgeV[e].Val2)) { TmpG->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2); } } GetWccSzCnt(TmpG, SzCntV); for (int c = 0; c < SzCntV.Len(); c++) { if (SzCntV[c].Val1 == MxCcSet.Len()) { SzCntV.Del(c); break; } } }
///////////////////////////////////////////////// // SkyGrid-Base void TSkyGridBs::GetSorted_DocsEntIdPrV(TIntPrV& DocsEntIdPrV){ TIntIntH EntIdToDocsH; for (int EntId=0; EntId<GetEnts(); EntId++){ int Docs=GetEnt(EntId).GetDocIds(); EntIdToDocsH.AddDat(EntId, Docs); } DocsEntIdPrV.Clr(); EntIdToDocsH.GetDatKeyPrV(DocsEntIdPrV); DocsEntIdPrV.Sort(false); }
// Test GetTriadParticip // Number of nodes with x number of triangles it participates in TEST(triad, TestGetTriadParticip) { // Test TUNGraph PUNGraph GraphTUN = TriadGetTestTUNGraph(); TIntPrV TriadCntV; TSnap::GetTriadParticip(GraphTUN, TriadCntV); VerifyGetTriadParticip(TriadCntV); // Test TNGraph which is same as undirected. PNGraph GraphTN = TriadGetTestTNGraph(); TriadCntV.Clr(); TSnap::GetTriadParticip(GraphTN, TriadCntV); VerifyGetTriadParticip(TriadCntV); // Test TNEGraph which is same as undirected. PNEGraph GraphTNE = TriadGetTestTNEGraph(); TriadCntV.Clr(); TSnap::GetTriadParticip(GraphTN, TriadCntV); VerifyGetTriadParticip(TriadCntV); }
void TNGramBs::GetNGramStrV( const TStr& HtmlStr, TStrV& NGramStrV, TIntPrV& NGramBEChXPrV) const { TIntV NGramIdV; NGramStrV.Clr(); NGramBEChXPrV.Clr(); TNGramBs::GetNGramIdV(HtmlStr, NGramIdV, NGramBEChXPrV); NGramStrV.Gen(NGramIdV.Len(), 0); for (int NGramIdN=0; NGramIdN<NGramIdV.Len(); NGramIdN++){ TStr NGramStr=GetNGramStr(NGramIdV[NGramIdN]); NGramStrV.Add(NGramStr); } }
// set actor's language and country void TImdbNet::SetActorCntryLangByMajority() { // set language TIntPrV NIdToVal; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (! NI().IsActor()) { continue; } IAssert(NI().GetLang() == 0); // no language set IAssert(NI.GetInDeg() == 0); // actors point to movies int Nbhs=0; TIntH LangCntH; for (int e = 0; e < NI.GetOutDeg(); e++) { LangCntH.AddDat(NI.GetOutNDat(e).GetLang()) += 1; Nbhs++; } if (LangCntH.IsKey(0)) { Nbhs-=LangCntH.GetDat(0); LangCntH.GetDat(0)=0; } LangCntH.SortByDat(false); if (LangCntH.GetKey(0) == 0) { continue; } if (LangCntH[0]*2 >= Nbhs) { NIdToVal.Add(TIntPr(NI.GetId(), LangCntH.GetKey(0))); } } for (int i = 0; i < NIdToVal.Len(); i++) { GetNDat(NIdToVal[i].Val1).Lang = NIdToVal[i].Val2; } printf(" language set: %d\n", NIdToVal.Len()); int cnt=0; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().IsActor() && NI().GetLang()==0) cnt++; } printf(" Actors NO language: %d\n\n", cnt); // set country NIdToVal.Clr(true); for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (! NI().IsActor()) { continue; } IAssert(NI().GetCntry() == 0); // no country set IAssert(NI.GetInDeg() == 0); // actors point to movies int Nbhs=0; TIntH CntryCntH; for (int e = 0; e < NI.GetOutDeg(); e++) { CntryCntH.AddDat(NI.GetOutNDat(e).GetCntry()) += 1; Nbhs++; } if (CntryCntH.IsKey(0)) { Nbhs-=CntryCntH.GetDat(0); CntryCntH.GetDat(0)=0; } CntryCntH.SortByDat(false); if (CntryCntH.GetKey(0) == 0) { continue; } if (CntryCntH[0]*2 >= Nbhs) { NIdToVal.Add(TIntPr(NI.GetId(), CntryCntH.GetKey(0))); } } for (int i = 0; i < NIdToVal.Len(); i++) { GetNDat(NIdToVal[i].Val1).Cntry = NIdToVal[i].Val2; } printf(" country set: %d\n", NIdToVal.Len()); cnt=0; for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { if (NI().IsActor() && NI().GetCntry()==0) cnt++; } printf(" Actors NO country: %d\n\n", cnt); }
///////////////////////////////////////////////// // Context-Tree PGraph GetCtxTreeGraph( const PNmObjBs& NmObjBs, const TStr& RootNmObjStr, const int& MxDist){ // create distance graph PGraph Graph=TGraph::New(); // create root note int RootNmObjId=NmObjBs->GetNmObjId(RootNmObjStr); PVrtx RootVrtx=TGVrtx::New(RootNmObjId, RootNmObjStr); Graph->AddVrtx(RootVrtx); // create distance vector TIntV NmObjDistV(NmObjBs->GetNmObjs()); NmObjDistV.PutAll(-1); NmObjDistV[RootNmObjId]=0; // create queue TIntPrQ OpenNmObjIdDistPrQ; OpenNmObjIdDistPrQ.Push(TIntPr(RootNmObjId, 0)); while (!OpenNmObjIdDistPrQ.Empty()){ // get name-object-id from queue int NmObjId=OpenNmObjIdDistPrQ.Top().Val1; int NmObjDist=OpenNmObjIdDistPrQ.Top().Val2; OpenNmObjIdDistPrQ.Pop(); IAssert(NmObjDistV[NmObjId]==NmObjDist); // get named-object string TStr NmObjStr=NmObjBs->GetNmObjStr(NmObjId); TStr UcNmObjStr=NmObjStr.GetUc(); printf("[%s:%d] ", NmObjStr.CStr(), NmObjDist); // check distance if (NmObjDist>MxDist){continue;} // get named-object vertex PVrtx SrcVrtx=Graph->GetVrtx(NmObjId); // get named-object children TIntPrV FqNmObjIdPrV; NmObjBs->GetFqNmObjIdPrV(NmObjStr, FqNmObjIdPrV); int SubNmObjs=FqNmObjIdPrV.Len(); // traverse named-object children int CreatedSubNmObjs=0; for (int SubNmObjN=0; SubNmObjN<SubNmObjs; SubNmObjN++){ // get child data int SubNmObjFq=FqNmObjIdPrV[SubNmObjN].Val1; int SubNmObjId=FqNmObjIdPrV[SubNmObjN].Val2; TStr SubNmObjStr=NmObjBs->GetNmObjStr(SubNmObjId); TStr UcSubNmObjStr=SubNmObjStr.GetUc(); TStr SubNmObjVNm=SubNmObjStr; // calculate and add context string formed from coref-named-objects {TChA CtxChA; TIntPrV FqNmObjIdPrV; ContexterF->State->NmObjBs->GetFqNmObjIdPrV(SubNmObjStr, FqNmObjIdPrV); FqNmObjIdPrV.Sort(false); FqNmObjIdPrV.Trunc(ContexterF->State->EnCtxLen); FqNmObjIdPrV.Clr(); for (int NmObjN=0; NmObjN<FqNmObjIdPrV.Len(); NmObjN++){ TStr CoNmObjStr=ContexterF->State->NmObjBs->GetNmObjStr(FqNmObjIdPrV[NmObjN].Val2); if (SubNmObjStr!=CoNmObjStr){ CtxChA+='['; CtxChA+=CoNmObjStr; CtxChA+=']'; CtxChA+='\\';} } if (!CtxChA.Empty()){ SubNmObjVNm=SubNmObjStr+"\\"+CtxChA;}} // push child named-object-id if necessary if (NmObjDistV[SubNmObjId]==-1){ // check number of subnodes int MxCreatedSubNmObjs=0; switch (NmObjDist){ case 0: MxCreatedSubNmObjs=/*20;*/ContexterF->State->EnSubNodes; break; case 1: MxCreatedSubNmObjs=4; break; case 2: MxCreatedSubNmObjs=2; break; case 3: MxCreatedSubNmObjs=1; break; case 4: MxCreatedSubNmObjs=1; break; default: MxCreatedSubNmObjs=0; break; } // check if stop creating branches CreatedSubNmObjs++; if (CreatedSubNmObjs>MxCreatedSubNmObjs){break;} // push edge OpenNmObjIdDistPrQ.Push(TIntPr(SubNmObjId, NmObjDist+1)); NmObjDistV[SubNmObjId]=NmObjDist+1; // create vertex TStr VNm=SubNmObjVNm; PVrtx DstVrtx=TGVrtx::New(SubNmObjId, VNm); Graph->AddVrtx(DstVrtx); // create edge //TStr ENm=TStr("_")+TInt::GetStr(NmObjId)+"-"+TInt::GetStr(SubNmObjId); TStr ENm=TInt::GetStr(SubNmObjFq); // calculate and add context string formed from coref-named-objects if (ContexterF->EnInterNmObjContextCb->Checked){ TChA CtxChA; TStr SrcNmObjStr=NmObjStr; TChA DstNmObjChA=DstVrtx->GetVNm(); if (DstNmObjChA.IsChIn('\\')){ DstNmObjChA.Trunc(DstNmObjChA.SearchCh('\\'));} TStr DstNmObjStr=DstNmObjChA; PBowSpV ConceptSpV=ContexterF->State->NmObjBs->GetNmObjConcept( ContexterF->State->BowDocBs, ContexterF->State->BowDocWgtBs, SrcNmObjStr, DstNmObjStr); TStrFltPrV WordStrWgtPrV; ConceptSpV->GetWordStrWgtPrV( ContexterF->State->BowDocBs, -1, 1, WordStrWgtPrV); TStrV UcWordStrSfV; for (int WordN=0; WordN<WordStrWgtPrV.Len(); WordN++){ // get word TStr UcWordStr=WordStrWgtPrV[WordN].Val1; // remove duplicates if (UcWordStr.IsStrIn(UcNmObjStr)){continue;} if (UcWordStr.IsStrIn(UcSubNmObjStr)){continue;} if (UcNmObjStr.IsStrIn(UcWordStr)){continue;} if (UcSubNmObjStr.IsStrIn(UcWordStr)){continue;} bool Ok=true; for (int WordSfN=0; WordSfN<UcWordStrSfV.Len(); WordSfN++){ if (UcWordStrSfV[WordSfN].IsStrIn(UcWordStr)){Ok=false; break;} if (UcWordStr.IsStrIn(UcWordStrSfV[WordSfN])){Ok=false; break;} } if (!Ok){continue;} // add word UcWordStrSfV.Add(UcWordStr); CtxChA+='['; CtxChA+=UcWordStr; CtxChA+=']'; CtxChA+='\n'; // finish if limit reached if (UcWordStrSfV.Len()>=ContexterF->State->EnCtxLen){break;} } ENm=ENm+"\n"+CtxChA; } // create and add edge to the graph PEdge Edge=TGEdge::New(SrcVrtx, DstVrtx, ENm); Edge->PutWgt(1+log(SubNmObjFq)); Graph->AddEdge(Edge); } } } Graph->SetEdgeWidth(5); Graph->PlaceTreeAsStar(); Graph->RescaleXY(0.1, RootVrtx); // return graph return Graph; }