/// Returns the amount the flow can be augmented over the paths, 0 if no path can be found. ##TSnap::FindAugV int FindAugV (const PNEANet &Net, const int& CapIndex, TIntV &Flow, TIntQ &FwdNodeQ, TIntH &PredEdgeH, TIntQ &BwdNodeQ, TIntH &SuccEdgeH, TIntV &MidToSrcAugV, TIntV &MidToSnkAugV, const int& SrcNId, const int& SnkNId) { int MidPtNId = IntFlowBiDBFS(Net, CapIndex, Flow, FwdNodeQ, PredEdgeH, BwdNodeQ, SuccEdgeH, SrcNId, SnkNId); if (MidPtNId == -1) { return 0; } int MinAug = TInt::Mx, NId = MidPtNId, AugFlow = 0; // Build the path from the midpoint back to the source by tracing through the PredEdgeH for (int EId = PredEdgeH.GetDat(NId); NId != SrcNId; EId = PredEdgeH.GetDat(NId)) { MidToSrcAugV.Add(EId); const TNEANet::TEdgeI &EI = Net->GetEI(EId); if (EI.GetSrcNId() == NId) { NId = EI.GetDstNId(); AugFlow = Flow[EId]; } else { NId = EI.GetSrcNId(); AugFlow = Net->GetIntAttrIndDatE(EId, CapIndex) - Flow[EId]; } if (AugFlow < MinAug) { MinAug = AugFlow; } } NId = MidPtNId; // Build the path from the midpoint back to the sink by tracing through the SuccEdgeH for (int EId = SuccEdgeH.GetDat(NId); NId != SnkNId; EId = SuccEdgeH.GetDat(NId)) { MidToSnkAugV.Add(EId); const TNEANet::TEdgeI &EI = Net->GetEI(EId); if (EI.GetDstNId() == NId) { NId = EI.GetSrcNId(); AugFlow = Flow[EId]; } else { NId = EI.GetDstNId(); AugFlow = Net->GetIntAttrIndDatE(EId, CapIndex) - Flow[EId]; } if (AugFlow < MinAug) { MinAug = AugFlow; } } return MinAug; }
TEST(SHMTest, LoadTables) { TStr Filename("test.graph"); TTableContext Context; // Create schema. Schema GradeS; GradeS.Add(TPair<TStr,TAttrType>("A", atStr)); GradeS.Add(TPair<TStr,TAttrType>("B", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Quarter", atStr)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2011", atInt)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2012", atInt)); GradeS.Add(TPair<TStr,TAttrType>("Grade 2013", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); RelevantCols.Add(5); PTable p1 = TTable::LoadSS(GradeS, "table/grades.txt", &Context, RelevantCols); TFOut OutStream(Filename); p1->Save(OutStream); TShMIn Shmin(Filename); PTable p2 = TTable::LoadShM(Shmin, &Context); EXPECT_EQ(p1->GetNumRows().Val, p2->GetNumRows().Val); EXPECT_EQ(p1->GetNumValidRows().Val, p2->GetNumValidRows().Val); EXPECT_EQ(p1->GetIntVal("Grade 2011", 0).Val, p2->GetIntVal("Grade 2011", 0).Val); EXPECT_EQ(p1->GetIntVal("Grade 2013", 4).Val, p2->GetIntVal("Grade 2013", 4).Val); }
int main(){ TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); // create table PTable T = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols); //PTable T = TTable::LoadSS("Animals", AnimalS, "animals.txt"); T->Unique("Animal"); TTable Ts = *T; // did we fix problem with copy-c'tor ? //PTable Ts = TTable::LoadSS("Animals_s", AnimalS, "../../testfiles/animals.txt", RelevantCols); //Ts->Unique(AnimalUnique); // test Select // create predicate tree: find all animals that are big and african or medium and Australian TPredicate::TAtomicPredicate A1(atStr, true, EQ, "Location", "", 0, 0, "Africa"); TPredicate::TPredicateNode N1(A1); // Location == "Africa" TPredicate::TAtomicPredicate A2(atStr, true, EQ, "Size", "", 0, 0, "big"); TPredicate::TPredicateNode N2(A2); // Size == "big" TPredicate::TPredicateNode N3(AND); N3.AddLeftChild(&N1); N3.AddRightChild(&N2); TPredicate::TAtomicPredicate A4(atStr, true, EQ, "Location", "", 0, 0, "Australia"); TPredicate::TPredicateNode N4(A4); TPredicate::TAtomicPredicate A5(atStr, true, EQ, "Size", "", 0, 0, "medium"); TPredicate::TPredicateNode N5(A5); TPredicate::TPredicateNode N6(AND); N6.AddLeftChild(&N4); N6.AddRightChild(&N5); TPredicate::TPredicateNode N7(OR); N7.AddLeftChild(&N3); N7.AddRightChild(&N6); TPredicate Pred(&N7); TIntV SelectedRows; Ts.Select(Pred, SelectedRows); TStrV GroupBy; GroupBy.Add("Location"); T->Group(GroupBy, "LocationGroup"); GroupBy.Add("Size"); T->Group(GroupBy, "LocationSizeGroup"); T->Count("LocationCount", "Location"); PTable Tj = T->Join("Location", Ts, "Location"); TStrV UniqueAnimals; UniqueAnimals.Add("Animals_1.Animal"); UniqueAnimals.Add("Animals_2.Animal"); Tj->Unique(UniqueAnimals, false); //print table T->SaveSS("tests/animals_out_T.txt"); Ts.SaveSS("tests/animals_out_Ts.txt"); Tj->SaveSS("tests/animals_out_Tj.txt"); return 0; }
// Test subgraphs void TestSubTNEGraphs() { PNEGraph Graph; PNEGraph Graph1; PNEGraph Graph2; PNEGraph Graph3; int i; TIntV NIdV; TIntV NIdV1; TIntV EIdV; Graph = GetTestTNEGraph(); PrintGraph("TNEGraph", Graph); for (i = 10; i < 15; i++) { NIdV.Add(i); } Graph1 = TSnap::GetSubGraph(Graph, NIdV); PrintGraph("TNEGraph1", Graph1); for (i = 0; i < 20; i += 2) { NIdV1.Add(i); } Graph2 = TSnap::GetSubGraph(Graph, NIdV1); PrintGraph("TNEGraph2", Graph2); for (i = 0; i < 120; i += 2) { EIdV.Add(i); } Graph3 = TSnap::GetESubGraph(Graph, EIdV); PrintGraph("TNEGraph3", Graph3); }
void TUStr::GetWordUStrV(TUStrV& WordUStrV){ // clear word vector WordUStrV.Clr(); // create boundaries TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV); IAssert(Len()==WordBoundPV.Len()-1); IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last())); // traverse characters and bounds int UniChs=Len(); TIntV WordUniChV; for (int UniChN=0; UniChN<=UniChs; UniChN++){ if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary if (UniChN<UniChs){ // if not finish // if last-word-char or single-alphabetic-char if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){ WordUniChV.Add(UniChV[UniChN]); // add char } } if (!WordUniChV.Empty()){ // add current word to vector TUStr WordUStr(WordUniChV); // construct word from char-vector WordUStrV.Add(WordUStr); // add word to word-vector WordUniChV.Clr(false); // clear char-vector } } else { // add character to char-vector WordUniChV.Add(UniChV[UniChN]); } } }
TEST(TQQueueTest, Unlimited2) { try { TQQueue<TInt> Q(64, -1); ASSERT_TRUE(Q.Empty()); TIntV Vec; Vec.Add(1); Vec.Add(2); Vec.Add(3); Q.PushV(Vec); ASSERT_EQ(Q.Front(), 1); ASSERT_EQ(Q.Back(), 3); ASSERT_EQ(Q.Len(), 3); TIntV Vec2; Q.GetSubValVec(0, 2, Vec2); ASSERT_EQ(Vec.Len(), Vec2.Len()); for (int i = 0; i < 3; i++) { ASSERT_EQ(Vec[i], Vec2[i]); ASSERT_EQ(Q[i], Vec[i]); } } catch (PExcept& Except) { printf("Error: %s", Except->GetStr()); throw Except; } }
int main(int argc, char* argv[]){ //test1(); TTableContext Context; // create scheme Schema PostS; PostS.Add(TPair<TStr,TAttrType>("Id", atInt)); PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt)); PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt)); PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr)); PostS.Add(TPair<TStr,TAttrType>("Score", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols); printf("Load done\n"); TStrV cols; cols.Add("OwnerUserId"); struct timeval begin, end; gettimeofday(&begin, NULL); P->Aggregate(cols, aaSum, "Score", "Sum"); gettimeofday(&end, NULL); double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); printf("Elapsed time:%.3lfs\n", diff / 1000000); if (atoi(argv[1]) == 0) return 0; P->SaveSS("tests/p3.txt"); return 0; }
// Test edge subgraph conversion TEST(subgraph, TestConvertESubGraphs) { PNEGraph NEGraph; PNGraph NGraph; TIntV NIdV; TIntV EIdV; int i; NGraph = GetTestTNGraph(); EXPECT_EQ(20,NGraph->GetNodes()); EXPECT_EQ(60,NGraph->GetEdges()); for (i = 0; i < 20; i += 2) { NIdV.Add(i); } // TODO: fix TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true), it fails // UNGraph = TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true); NEGraph = TSnap::ConvertGraph<PNEGraph>(NGraph); EXPECT_EQ(20,NEGraph->GetNodes()); EXPECT_EQ(60,NEGraph->GetEdges()); // select every second edge i = 0; for (TNEGraph::TEdgeI EI = NEGraph->BegEI(); EI < NEGraph->EndEI(); EI++) { if (i == 0) { EIdV.Add(EI.GetId()); } i = (i + 1) % 2; } NGraph = TSnap::ConvertESubGraph<PNGraph>(NEGraph, EIdV); EXPECT_EQ(20,NGraph->GetNodes()); EXPECT_EQ(30,NGraph->GetEdges()); }
void TBPGraph::GetNIdV(TIntV& NIdV) const { NIdV.Gen(GetNodes(), 0); for (int N=LeftH.FFirstKeyId(); LeftH.FNextKeyId(N); ) { NIdV.Add(LeftH.GetKey(N)); } for (int N=RightH.FFirstKeyId(); RightH.FNextKeyId(N); ) { NIdV.Add(RightH.GetKey(N)); } }
void TempMotifCounter::GetAllStaticTriangles(TIntV& Us, TIntV& Vs, TIntV& Ws) { Us.Clr(); Vs.Clr(); Ws.Clr(); // Get degree ordering of the graph int max_nodes = static_graph_->GetMxNId(); TVec<TIntPair> degrees(max_nodes); degrees.PutAll(TIntPair(0, 0)); // Set the degree of a node to be the number of nodes adjacent to the node in // the undirected graph. TIntV nodes; GetAllNodes(nodes); #pragma omp parallel for schedule(dynamic) for (int node_id = 0; node_id < nodes.Len(); node_id++) { int src = nodes[node_id]; TIntV nbrs; GetAllNeighbors(src, nbrs); degrees[src] = TIntPair(nbrs.Len(), src); } degrees.Sort(); TIntV order = TIntV(max_nodes); #pragma omp parallel for schedule(dynamic) for (int i = 0; i < order.Len(); i++) { order[degrees[i].Dat] = i; } // Get triangles centered at a given node where that node is the smallest in // the degree ordering. #pragma omp parallel for schedule(dynamic) for (int node_id = 0; node_id < nodes.Len(); node_id++) { int src = nodes[node_id]; int src_pos = order[src]; // Get all neighbors who come later in the ordering TIntV nbrs; GetAllNeighbors(src, nbrs); TIntV neighbors_higher; for (int i = 0; i < nbrs.Len(); i++) { int nbr = nbrs[i]; if (order[nbr] > src_pos) { neighbors_higher.Add(nbr); } } for (int ind1 = 0; ind1 < neighbors_higher.Len(); ind1++) { for (int ind2 = ind1 + 1; ind2 < neighbors_higher.Len(); ind2++) { int dst1 = neighbors_higher[ind1]; int dst2 = neighbors_higher[ind2]; // Check for triangle formation if (static_graph_->IsEdge(dst1, dst2) || static_graph_->IsEdge(dst2, dst1)) { #pragma omp critical { Us.Add(src); Vs.Add(dst1); Ws.Add(dst2); } } } } } }
// burn each link independently (forward with FwdBurnProb, backward with BckBurnProb) void TForestFire::BurnExpFire() { const double OldFwdBurnProb = FwdBurnProb; const double OldBckBurnProb = BckBurnProb; const int NInfect = InfectNIdV.Len(); const TNGraph& G = *Graph; TIntH BurnedNIdH; // burned nodes TIntV BurningNIdV = InfectNIdV; // currently burning nodes TIntV NewBurnedNIdV; // nodes newly burned in current step bool HasAliveNbrs; // has unburned neighbors int NBurned = NInfect, NDiedFire=0; for (int i = 0; i < InfectNIdV.Len(); i++) { BurnedNIdH.AddDat(InfectNIdV[i]); } NBurnedTmV.Clr(false); NBurningTmV.Clr(false); NewBurnedTmV.Clr(false); for (int time = 0; ; time++) { NewBurnedNIdV.Clr(false); // for each burning node for (int node = 0; node < BurningNIdV.Len(); node++) { const int& BurningNId = BurningNIdV[node]; const TNGraph::TNodeI Node = G.GetNI(BurningNId); HasAliveNbrs = false; NDiedFire = 0; // burn forward links (out-links) for (int e = 0; e < Node.GetOutDeg(); e++) { const int OutNId = Node.GetOutNId(e); if (! BurnedNIdH.IsKey(OutNId)) { // not yet burned HasAliveNbrs = true; if (Rnd.GetUniDev() < FwdBurnProb) { BurnedNIdH.AddDat(OutNId); NewBurnedNIdV.Add(OutNId); NBurned++; } } } // burn backward links (in-links) if (BckBurnProb > 0.0) { for (int e = 0; e < Node.GetInDeg(); e++) { const int InNId = Node.GetInNId(e); if (! BurnedNIdH.IsKey(InNId)) { // not yet burned HasAliveNbrs = true; if (Rnd.GetUniDev() < BckBurnProb) { BurnedNIdH.AddDat(InNId); NewBurnedNIdV.Add(InNId); NBurned++; } } } } if (! HasAliveNbrs) { NDiedFire++; } } NBurnedTmV.Add(NBurned); NBurningTmV.Add(BurningNIdV.Len() - NDiedFire); NewBurnedTmV.Add(NewBurnedNIdV.Len()); //BurningNIdV.AddV(NewBurnedNIdV); // node is burning eternally BurningNIdV.Swap(NewBurnedNIdV); // node is burning just 1 time step if (BurningNIdV.Empty()) break; FwdBurnProb = FwdBurnProb * ProbDecay; BckBurnProb = BckBurnProb * ProbDecay; } BurnedNIdV.Gen(BurnedNIdH.Len(), 0); for (int i = 0; i < BurnedNIdH.Len(); i++) { BurnedNIdV.Add(BurnedNIdH.GetKey(i)); } FwdBurnProb = OldFwdBurnProb; BckBurnProb = OldBckBurnProb; }
void TempMotifCounter::GetAllNeighbors(int node, TIntV& nbrs) { nbrs = TIntV(); TNGraph::TNodeI NI = static_graph_->GetNI(node); for (int i = 0; i < NI.GetOutDeg(); i++) { nbrs.Add(NI.GetOutNId(i)); } for (int i = 0; i < NI.GetInDeg(); i++) { int nbr = NI.GetInNId(i); if (!NI.IsOutNId(nbr)) { nbrs.Add(nbr); } } }
void TMultimodalGraphImplB::TNodeI::GetAdjacentModes(TIntV &AdjacentModes) const { int ModeId = NodeToModeMapping->GetDat(GetId()); for (TGraphs::TIter it = Graphs->BegI(); it < Graphs->EndI(); it++) { if (it.GetKey().GetVal1() == ModeId) { AdjacentModes.Add(it.GetKey().GetVal2()); } else if (it.GetKey().GetVal2() == ModeId) { AdjacentModes.Add(it.GetKey().GetVal1()); } } }
void TTable::Defrag() { TInt FreeIndex = 0; TIntV Mapping; // Mapping[old_index] = new_index/invalid for (TInt i = 0; i < Next.Len(); i++) { if (Next[i] != Invalid) { // "first row" properly set beforehand if (FreeIndex == 0) { Assert (i == FirstValidRow); FirstValidRow = 0; } if (Next[i] != Last) { Next[FreeIndex] = FreeIndex + 1; Mapping.Add(FreeIndex); } else { Next[FreeIndex] = Last; Mapping.Add(Last); } for (TInt j = 0; j < IntCols.Len(); j++) { IntCols[j][FreeIndex] = IntCols[j][i]; } for (TInt j = 0; j < FltCols.Len(); j++) { FltCols[j][FreeIndex] = FltCols[j][i]; } for (TInt j = 0; j < StrColMaps.Len(); j++) { StrColMaps[j][FreeIndex] = StrColMaps[j][i]; } FreeIndex++; } else { NumRows--; Mapping.Add(Invalid); } } for(THash<TStr,THash<TInt,TIntV> >::TIter it = GroupMapping.BegI(); it < GroupMapping.EndI(); it++){ THash<TInt,TIntV>& G = it->Dat; for(THash<TInt,TIntV>::TIter iit = G.BegI(); iit < G.EndI(); iit++) { TIntV& Group = iit->Dat; TInt FreeIndex = 0; for (TInt j=0; j < Group.Len(); j++) { if (Mapping[Group[j]] != Invalid) { Group[FreeIndex] = Mapping[Group[j]]; FreeIndex++; } } // resize to get rid of end values Group.Trunc(FreeIndex); } } // should match, or bug somewhere Assert (NumValidRows == NumRows); }
/// estimate number of communities using cross validation int TAGMFast::FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const double StepAlpha, const double StepBeta) { double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs); TIntV ComsV; ComsV.Add(MinComs); while (ComsV.Len() < DivComs) { int NewComs = int(ComsV.Last() * ComsGap); if (NewComs == ComsV.Last().Val) { NewComs++; } ComsV.Add(NewComs); } if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); } return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm + ".CV.likelihood", StepAlpha, StepBeta); }
void TStrUtil::GetWIdV(const TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV) { const int NotWId = -1; TChA ChA(CStr); TVec<char *> WrdV; TInt WId; TStrUtil::SplitWords(ChA, WrdV); WIdV.Clr(false); for (int w = 0; w < WrdV.Len(); w++) { if (StrH.IsKeyGetDat(WrdV[w], WId)) { WIdV.Add(WId); } else { WIdV.Add(NotWId); } } }
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds, TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) { TRnd RandomGenerator; // TODO: make this "more random" by incorporating time for (int i = 0; i < NumBands; ++i) { THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand) THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs) for (int j = 0; j < BandSize; ++j) { // Create new signature TVec < TMd5Sig > Signature; ShingleToQuoteIds.GetKeyV(Signature); Signature.Shuffle(RandomGenerator); // Place in bucket - not very efficient int SigLen = Signature.Len(); for (int k = 0; k < SigLen; ++k) { TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]); for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) { TInt Key = l.GetKey(); if (Inverted.IsKey(Key)) { TIntV CurSignature = Inverted.GetDat(Key); if (CurSignature.Len() <= j) { CurSignature.Add(k); Inverted.AddDat(Key, CurSignature); } } else { TIntV NewSignature; NewSignature.Add(k); Inverted.AddDat(Key, NewSignature); } } } } TIntV InvertedKeys; Inverted.GetKeyV(InvertedKeys); TInt InvertedLen = InvertedKeys.Len(); for (int k = 0; k < InvertedLen; ++k) { TIntSet Bucket; TIntV Signature = Inverted.GetDat(InvertedKeys[k]); if (BandBuckets.IsKey(Signature)) { Bucket = BandBuckets.GetDat(Signature); } Bucket.AddKey(InvertedKeys[k]); BandBuckets.AddDat(Signature, Bucket); } SignatureBandBuckets.Add(BandBuckets); Err("%d out of %d band signatures computed\n", i + 1, NumBands); } Err("Minhash step complete!\n"); }
// improved version void GetMergeSortedV1(TIntV& NeighbourV, TNGraph::TNodeI NI) { int j = 0; int k = 0; int prev = -1; int indeg = NI.GetInDeg(); int outdeg = NI.GetOutDeg(); //while (j < NI.GetInDeg() && k < NI.GetOutDeg()) { if (indeg > 0 && outdeg > 0) { int v1 = NI.GetInNId(j); int v2 = NI.GetOutNId(k); while (1) { if (v1 <= v2) { if (prev != v1) { NeighbourV.Add(v1); prev = v1; } j += 1; if (j >= indeg) { break; } v1 = NI.GetInNId(j); } else { if (prev != v2) { NeighbourV.Add(v2); prev = v2; } k += 1; if (k >= outdeg) { break; } v2 = NI.GetOutNId(k); } } } while (j < indeg) { int v = NI.GetInNId(j); if (prev != v) { NeighbourV.Add(v); prev = v; } j += 1; } while (k < outdeg) { int v = NI.GetOutNId(k); if (prev != v) { NeighbourV.Add(v); prev = v; } k += 1; } }
///////////////////////////////////////////////// // Trawling the web for emerging communities // graph, left points to right TTrawling::TTrawling(const PNGraph& Graph, const int& MinSupport) : MinSup(MinSupport) { TIntH ItemCntH; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IAssert(NI.GetOutDeg()==0 || NI.GetInDeg()==0); // edges only point from left to right if (NI.GetOutDeg()==0) { continue; } for (int e = 0; e < NI.GetOutDeg(); e++) { ItemCntH.AddDat(NI.GetOutNId(e)) += 1; } } TIntV RightV; for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { IAssert(NI.GetOutDeg()==0 || NI.GetInDeg()==0); // edges only point from left to right if (NI.GetOutDeg()==0) { continue; } RightV.Clr(false); for (int e = 0; e < NI.GetOutDeg(); e++) { const int itm = NI.GetOutNId(e); // only include items that already are above minimum support if (ItemCntH.GetDat(itm) >= MinSup) { RightV.Add(itm); } } if (! RightV.Empty()) { NIdSetH.AddDat(NI.GetId(), RightV); } } // for (int n = 0; n < NIdSetH.Len(); n++) { const TIntV& Set = NIdSetH[n]; for (int s = 0; s < Set.Len(); s++) { SetNIdH.AddDat(Set[s]).Add(n); } } }
void TVizMapContext::GetSelectDIdV(TIntV& DIdV) { DIdV.Gen(SelPointV.Len(), 0); const int SelPoints = SelPointV.Len(); for (int SelPointN = 0; SelPointN < SelPoints; SelPointN++) { DIdV.Add(VizMapFrame->GetPoint(SelPointV[SelPointN])->GetDocId()); } }
void TempMotifCounter::GetAllNodes(TIntV& nodes) { nodes = TIntV(); for (TNGraph::TNodeI it = static_graph_->BegNI(); it < static_graph_->EndNI(); it++) { nodes.Add(it.GetId()); } }
// Test subgraphs void TestSubTUNGraphs() { PUNGraph Graph; PUNGraph Graph1; PUNGraph Graph2; PUNGraph Graph3; int i; TIntV NIdV; TIntV NIdV1; Graph = GetTestTUNGraph(); PrintGraph("TUNGraph", Graph); for (i = 10; i < 15; i++) { NIdV.Add(i); } Graph1 = TSnap::GetSubGraph(Graph, NIdV); PrintGraph("TUNGraph1", Graph1); Graph2 = TSnap::GetSubGraph(Graph, NIdV, true); PrintGraph("TUNGraph2", Graph2); for (i = 0; i < 20; i += 2) { NIdV1.Add(i); } Graph3 = TSnap::GetSubGraph(Graph, NIdV1, true); PrintGraph("TUNGraph3", Graph3); }
void TNmObjBs::GetNmObjDIdV( const PBowDocBs& BowDocBs, TIntV& BowDIdV, const TStr& NmObjStr1, const TStr& NmObjStr2) const { // get first named-object-id int NmObjId1=GetNmObjId(NmObjStr1); TIntV NmObjDocIdV1; GetNmObjDocIdV(NmObjId1, NmObjDocIdV1); NmObjDocIdV1.Sort(); // get second named-object-id TIntV NmObjDocIdV2; if (!NmObjStr2.Empty()){ int NmObjId2=GetNmObjId(NmObjStr2); GetNmObjDocIdV(NmObjId2, NmObjDocIdV2); NmObjDocIdV2.Sort(); } // create joint doc-id-vector TIntV NmObjDocIdV; if (NmObjDocIdV2.Empty()){ NmObjDocIdV=NmObjDocIdV1; } else { NmObjDocIdV1.Intrs(NmObjDocIdV2, NmObjDocIdV); } // traverse named-object-documents to collect bow-document-ids BowDIdV.Gen(NmObjDocIdV.Len(), 0); for (int NmObjDocIdN=0; NmObjDocIdN<NmObjDocIdV.Len(); NmObjDocIdN++){ TStr DocNm=GetDocNm(NmObjDocIdV[NmObjDocIdN]); int DId=BowDocBs->GetDId(DocNm); if (DId!=-1){ BowDIdV.Add(DId); } } }
// Test node subgraph conversion void TestConvertSubGraphs() { PNGraph NGraph; PUNGraph UNGraph; int N1, N2, N3; int E1, E2, E3; TIntV NIdV; int i; NGraph = GetTestTNGraph(); N1 = NGraph->GetNodes(); E1 = NGraph->GetEdges(); for (i = 0; i < 20; i += 2) { NIdV.Add(i); } // TODO: fix TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true), it fails // UNGraph = TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV, true); UNGraph = TSnap::ConvertSubGraph<PUNGraph>(NGraph, NIdV); N2 = UNGraph->GetNodes(); E2 = UNGraph->GetEdges(); NGraph = TSnap::ConvertSubGraph<PNGraph>(UNGraph, NIdV); N3 = NGraph->GetNodes(); E3 = NGraph->GetEdges(); printf("---- TestConvertSubGraphs -----\n"); printf("nodes: %d,%d,%d, edges: %d,%d,%d\n", N1, N2, N3, E1, E2, E3); printf("\n"); }
// Test subgraphs void TestEdgeSubNets() { TPt <TNodeEdgeNet<TInt, TInt> > Net; TPt <TNodeEdgeNet<TInt, TInt> > Net1; TPt <TNodeEdgeNet<TInt, TInt> > Net2; TPt <TNodeEdgeNet<TInt, TInt> > Net3; TPt <TNodeEdgeNet<TInt, TInt> > Net4; int i; TIntV NIdV; TIntV NIdV1; Net = GetTestTNodeEdgeNet(); PrintNet("TestEdgeSubNets", Net); for (i = 10; i < 15; i++) { NIdV.Add(i); } Net1 = TSnap::GetSubGraph(Net, NIdV); PrintNet("TestEdgeSubNets1", Net1); for (i = 0; i < 20; i += 2) { NIdV1.Add(i); } Net2 = TSnap::GetSubGraph(Net, NIdV1); PrintNet("TestEdgeSubNets2", Net2); Net3 = TSnap::GetEDatSubGraph(Net, 1, 0); PrintNet("TestEdgeSubNets3", Net3); Net4 = TSnap::GetEDatSubGraph(Net, 2, -1); PrintNet("TestEdgeSubNets4", Net4); }
////////////////////////////////////////////////////////////////////////// // String-To-Words void TStrParser::DocStrToWIdV(const TStr& _DocStr, TIntV& WordIdV, const bool& Stemm) { TStr DocStr = _DocStr.GetUc(); // to upper case TStrV WordV; DocStr.SplitOnWs(WordV); int WordN = WordV.Len(); WordIdV.Reserve(WordN, 0); PStemmer Stemmer = TStemmer::New(stmtPorter); TIntH WordsInDoc; for (int WordC = 0; WordC < WordN; WordC++) { TStr WordStr; if (Stemm) { WordStr = Stemmer->GetStem(WordV[WordC]); } else { WordStr = WordV[WordC]; } int WId = GetWId(WordStr); if (WId == -1) { WId = WordToIdH.AddKey(WordStr); WordToIdH[WId] = 0; } WordIdV.Add(WId); // is it first time we see this word in this doc? if (!WordsInDoc.IsKey(WId)) WordsInDoc.AddKey(WId); } //do some statistics for DF DocsParsed++; for (int i = 0, l = WordsInDoc.Len(); i < l; i++) WordToIdH[WordsInDoc.GetKey(i)]++; Assert(WordV.Len() == WordIdV.Len()); }
/// Shingles by words void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase, TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen, THash<TMd5Sig, TIntV>& ShingleToClusterIds) { Err("Hashing shingles of clusters...\n"); for (int i = 0; i < ClusterIds.Len(); i++) { if (i % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len()); } TCluster C; ClusterBase->GetCluster(ClusterIds[i], C); //fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val); // Put x-word shingles into hash table; x is specified by ShingleLen parameter THashSet < TMd5Sig > CHashedShingles; GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles); for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI(); Hash < CHashedShingles.EndI(); Hash++) { TIntV ShingleClusterIds; if (ShingleToClusterIds.IsKey(*Hash)) { ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash); } ShingleClusterIds.Add(ClusterIds[i]); ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds); } } Err("Done hashing!\n"); }
void TYInvIx::GetDocIdV( const PYWordDs& WordDs, const int& MnDocFq, TIntV& DocIdV){ IAssert(MnDocFq>=0); if (MnDocFq==0){ DocIdV=AllDocIdV; } else { TIntIntH DocIdFqH(100); int MxDocFq=0; int WordIdN=WordDs->FFirstWordId(); int WordId; double WordFq; while (WordDs->FNextWordId(WordIdN, WordId, WordFq)){ if (WordIdToFirstDocIdNH.IsKey(WordId)){ int DocIdN=FFirstDocId(WordId); int DocId; while (FNextWordId(DocIdN, DocId)){ DocIdFqH.AddDat(DocId)+=int(WordFq); MxDocFq=TInt::GetMx(MxDocFq, DocIdFqH.GetDat(DocId)); } } } int NewMnDocFq=(MnDocFq<=MxDocFq) ? MnDocFq : MxDocFq-3; DocIdV.Gen(DocIdFqH.Len(), 0); int DocIdP=DocIdFqH.FFirstKeyId(); while (DocIdFqH.FNextKeyId(DocIdP)){ int DocId=DocIdFqH.GetKey(DocIdP); int DocFq=DocIdFqH[DocIdP]; if (DocFq>=NewMnDocFq){DocIdV.Add(DocId);} } } }
void TYFSelBs::GetBestWordIdV( const int& DocId, const double& EstExp, const double& SumEstPrb, const PYWordDs& IntrsWordDs, TIntV& BestWordIdV){ TIntFltKdV& WordIdEstKdV=DocIdToWordIdEstVV[DocId]; TFltIntKdV WordEstIdKdV(WordIdEstKdV.Len(), 0); double MnWordEst=TFlt::Mx; for (int WordIdN=0; WordIdN<WordIdEstKdV.Len(); WordIdN++){ int WordId=WordIdEstKdV[WordIdN].Key; double WordEst=pow(WordIdEstKdV[WordIdN].Dat, EstExp); if (IntrsWordDs->IsWordId(WordId)){ WordEstIdKdV.Add(TFltIntKd(WordEst, WordId)); MnWordEst=TFlt::GetMn(WordEst, MnWordEst); } } double SumWordEst=0; {for (int WordIdN=0; WordIdN<WordEstIdKdV.Len(); WordIdN++){ SumWordEst+=(WordEstIdKdV[WordIdN].Key-=MnWordEst);}} WordEstIdKdV.Sort(false); {BestWordIdV.Gen(WordEstIdKdV.Len(), 0); SumWordEst*=SumEstPrb; int WordIdN=0; while ((SumWordEst>=0)&&(WordIdN<WordEstIdKdV.Len())){ double WordEst=WordEstIdKdV[WordIdN].Key; int WordId=WordEstIdKdV[WordIdN].Dat; SumWordEst-=WordEst; BestWordIdV.Add(WordId); WordIdN++; }} }
PJsonVal TGraphCascade::GetPosterior(const TStrV& NodeNmV, const TFltV& QuantileV) const { PJsonVal Result = TJsonVal::NewObj(); TIntV NodeIdV; if (NodeNmV.Empty()) { // go over all zero timestamps for which samples exist TIntV FullNodeIdV; Graph.GetNIdV(FullNodeIdV); int Nodes = Graph.GetNodes(); for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = FullNodeIdV[NodeN]; if (Timestamps.IsKey(NodeId) && Sample.IsKey(NodeId) && !Sample.GetDat(NodeId).Empty() && Timestamps.GetDat(NodeId) == 0) { NodeIdV.Add(NodeId); } } } else { int Nodes = NodeNmV.Len(); for (int NodeN = 0; NodeN < Nodes; NodeN++) { if (!NodeNmIdH.IsKey(NodeNmV[NodeN])) { continue; } int NodeId = NodeNmIdH.GetDat(NodeNmV[NodeN]); if (Timestamps.IsKey(NodeId) && Sample.IsKey(NodeId) && !Sample.GetDat(NodeId).Empty() && Timestamps.GetDat(NodeId) == 0) { NodeIdV.Add(NodeId); } } } EAssertR(QuantileV.Len() > 0, "TGraphCascade::GetPosterior quantiles should not be empty!"); for (int QuantileN = 0; QuantileN < QuantileV.Len(); QuantileN++) { EAssertR((QuantileV[QuantileN] >= 0.0) && (QuantileV[QuantileN] <= 1.0), "TGraphCascade::GetPosterior quantiles should be between 0.0 and 1.0"); } int Nodes = NodeIdV.Len(); for (int NodeN = 0; NodeN < Nodes; NodeN++) { int NodeId = NodeIdV[NodeN]; TStr NodeNm = NodeIdNmH.GetDat(NodeId); int Quantiles = QuantileV.Len(); TUInt64V SampleV = Sample.GetDat(NodeId); SampleV.Sort(true); int SampleSize = SampleV.Len(); PJsonVal QuantilesArr = TJsonVal::NewArr(); for (int QuantileN = 0; QuantileN < Quantiles; QuantileN++) { int Idx = (int)floor(QuantileV[QuantileN] * SampleSize); Idx = MIN(Idx, SampleSize - 1); uint64 UnixTimestamp = TTm::GetUnixMSecsFromWinMSecs(SampleV[Idx]); QuantilesArr->AddToArr((double)UnixTimestamp); } Result->AddToObj(NodeNm, QuantilesArr); } return Result; }