double ave_path_length (PUNGraph p) { TVec<TInt> v; double tot_lengths = 0.0; for (TUNGraph::TNodeI n = p->BegNI(); n != p->EndNI(); n++) { v = v + n.GetId(); } // cerr << "vlen: " << v.Len() << endl; TBreathFS<PUNGraph> b(p); double tot_pairs = 0.0; while (v.Len () > 0) { TInt last = v[v.Len()-1]; b.DoBfs (last, true, true); for (TVec<TInt>::TIter i = v.BegI(); (*i) != last; i++) { int length; length = b.GetHops (last, (*i)); if (length == length) { tot_lengths += length; tot_pairs += 1; } } // cerr << "tps: " << tot_pairs << ", last: " << last << ", beg: " << v[*(v.BegI())] << endl; v.Del(v.Len()-1); } // cerr << "paths: " << tot_lengths << " " << tot_pairs << " " << (tot_lengths/tot_pairs) << endl; return tot_lengths / tot_pairs; }
// I embarassingly don't know how templating works. void QuoteGraph::CompareUsingMinHash(TVec<THash<TMd5Sig, TIntSet> >& BucketsVector) { THashSet<TIntPr> EdgeCache; int Count = 0; int RealCount = 0; Err("Beginning edge creation step...\n"); for (int i = 0; i < BucketsVector.Len(); i++) { Err("Processing band signature %d of %d - %d signatures\n", i+1, BucketsVector.Len(), BucketsVector[i].Len()); TVec<TMd5Sig> Buckets; BucketsVector[i].GetKeyV(Buckets); TVec<TMd5Sig>::TIter BucketEnd = Buckets.EndI(); for (TVec<TMd5Sig>::TIter BucketSig = Buckets.BegI(); BucketSig < BucketEnd; BucketSig++) { TIntSet Bucket = BucketsVector[i].GetDat(*BucketSig); Count += Bucket.Len() * (Bucket.Len() - 1) / 2; for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) { TIntSet::TIter Quote1Copy = Quote1; Quote1Copy++; for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) { if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) { EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())); EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey())); RealCount++; AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey()); } } } } } fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count); fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount); }
int main() { TLSHash LSH(7, 7, DIM, TLSHash::EUCLIDEAN); LSH.Init(); TRnd Gen; Gen.Randomize(); TVec<TFltV> DataV; for (int i=0; i<1000000; i++) { TFltV Datum; for (int j=0; j<3; j++) { Datum.Add(Gen.GetUniDev()*2100); } DataV.Add(Datum); } LSH.AddV(DataV); TVec<TPair<TFltV, TFltV> > NeighborsV = LSH.GetAllCandidatePairs(); printf("Number of Candidates: %d\n", NeighborsV.Len()); NeighborsV = LSH.GetAllNearPairs(); printf("Number of Close Pairs: %d\n", NeighborsV.Len()); for (int i=0; i<NeighborsV.Len(); i++) { outputPoint(NeighborsV[i].GetVal1()); printf(" "); outputPoint(NeighborsV[i].GetVal2()); printf("\n"); } return 0; }
/////////////////////////////////////////////////////////////////////////////// // Triad counting methods void TempMotifCounter::Count3TEdgeTriadsNaive(double delta, Counter3D& counts) { TIntV Us, Vs, Ws; GetAllStaticTriangles(Us, Vs, Ws); counts = Counter3D(2, 2, 2); #pragma omp parallel for schedule(dynamic) for (int i = 0; i < Us.Len(); i++) { int u = Us[i]; int v = Vs[i]; int w = Ws[i]; // Gather all edges in triangle (u, v, w) int uv = 0, vu = 1, uw = 2, wu = 3, vw = 4, wv = 5; TVec<TIntPair> combined; AddStarEdges(combined, u, v, uv); AddStarEdges(combined, v, u, vu); AddStarEdges(combined, u, w, uw); AddStarEdges(combined, w, u, wu); AddStarEdges(combined, v, w, vw); AddStarEdges(combined, w, v, wv); // Get the counts for this triangle combined.Sort(); ThreeTEdgeMotifCounter counter(6); TIntV edge_id(combined.Len()); TIntV timestamps(combined.Len()); for (int k = 0; k < combined.Len(); k++) { edge_id[k] = combined[k].Dat; timestamps[k] = combined[k].Key; } Counter3D local; counter.Count(edge_id, timestamps, delta, local); // Update the global counter with the various symmetries #pragma omp critical { // i --> j, k --> j, i --> k counts(0, 0, 0) += local(uv, wv, uw) + local(vu, wu, vw) + local(uw, vw, uv) + local(wu, vu, wv) + local(vw, uw, vu) + local(wv, uv, wu); // i --> j, k --> j, k --> i counts(0, 0, 1) += local(uv, wv, wu) + local(vu, wu, wv) + local(uw, vw, vu) + local(wu, vu, vw) + local(vw, uw, uv) + local(wv, uv, uw); // i --> j, j --> k, i --> k counts(0, 1, 0) += local(uv, vw, uw) + local(vu, uw, vw) + local(uw, wv, uv) + local(wu, uv, wv) + local(vw, wu, vu) + local(wv, vu, wu); // i --> j, j --> k, k --> i counts(0, 1, 1) += local(uv, vw, wu) + local(vu, uw, wv) + local(uw, wv, vu) + local(wu, uv, vw) + local(vw, wu, uv) + local(wv, vu, uw); // i --> j, k --> i, j --> k counts(1, 0, 0) += local(uv, wu, vw) + local(vu, wv, uw) + local(uw, vu, wv) + local(wu, vw, uv) + local(vw, uv, wu) + local(wv, uw, vu); // i --> j, k --> i, k --> j counts(1, 0, 1) += local(uv, wu, wv) + local(vu, wv, wu) + local(uw, vu, vw) + local(wu, vw, vu) + local(vw, uv, uw) + local(wv, uw, uv); // i --> j, i --> k, j --> k counts(1, 1, 0) += local(uv, uw, vw) + local(vu, vw, uw) + local(uw, uv, wv) + local(wu, wv, uv) + local(vw, vu, wu) + local(wv, wu, vu); // i --> j, i --> k, k --> j counts(1, 1, 1) += local(uv, uw, wv) + local(vu, vw, wu) + local(uw, uv, vw) + local(wu, wv, vu) + local(vw, vu, uw) + local(wv, wu, uv); } } }
void TFfGGen::GenFFGraphs(const double& FProb, const double& BProb, const TStr& FNm) { const int NRuns = 10; const int NNodes = 10000; TGStat::NDiamRuns = 10; //const double FProb = 0.35, BProb = 0.20; // ff1 //const double FProb = 0.37, BProb = 0.32; // ff2 //const double FProb = 0.37, BProb = 0.325; // ff22 //const double FProb = 0.37, BProb = 0.33; // ff3 //const double FProb = 0.37, BProb = 0.35; // ff4 //const double FProb = 0.38, BProb = 0.35; // ff5 TVec<PGStatVec> GAtTmV; TFfGGen FF(false, 1, FProb, BProb, 1.0, 0, 0); for (int r = 0; r < NRuns; r++) { PGStatVec GV = TGStatVec::New(tmuNodes, TGStat::AllStat()); FF.GenGraph(NNodes, GV, true); for (int i = 0; i < GV->Len(); i++) { if (i == GAtTmV.Len()) { GAtTmV.Add(TGStatVec::New(tmuNodes, TGStat::AllStat())); } GAtTmV[i]->Add(GV->At(i)); } IAssert(GAtTmV.Len() == GV->Len()); } PGStatVec AvgStat = TGStatVec::New(tmuNodes, TGStat::AllStat()); for (int i = 0; i < GAtTmV.Len(); i++) { AvgStat->Add(GAtTmV[i]->GetAvgGStat(false)); } AvgStat->PlotAllVsX(gsvNodes, FNm, TStr::Fmt("Forest Fire: F:%g B:%g (%d runs)", FProb, BProb, NRuns)); AvgStat->Last()->PlotAll(FNm, TStr::Fmt("Forest Fire: F:%g B:%g (%d runs)", FProb, BProb, NRuns)); }
///Generate graph using the AGM model. CProbV = vector of Pc PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom) { PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1); printf("AGM begins\n"); for (int i = 0; i < CmtyVV.Len(); i++) { TIntV& CmtyV = CmtyVV[i]; for (int u = 0; u < CmtyV.Len(); u++) { if ( G->IsNode(CmtyV[u])) { continue; } G->AddNode(CmtyV[u]); } double Prob = CProbV[i]; RndConnectInsideCommunity(G, CmtyV, Prob, Rnd); } if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community TIntSet NIDS; for (int c = 0; c < CmtyVV.Len(); c++) { for (int u = 0; u < CmtyVV[c].Len(); u++) { NIDS.AddKey(CmtyVV[c][u]); } } TIntV NIDV; NIDS.GetKeyV(NIDV); RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd); } printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges()); G->Defrag(); return G; }
void QuoteGraph::CompareUsingShingles(THash<TMd5Sig, TIntSet>& Shingles) { int Count = 0; int RealCount = 0; TVec<TMd5Sig> ShingleKeys; Shingles.GetKeyV(ShingleKeys); THashSet<TIntPr> EdgeCache; for (int i = 0; i < ShingleKeys.Len(); i++) { if (i % 100 == 0) { Err("Processed %d out of %d shingles, count = %d\n", i, ShingleKeys.Len(), Count); } TIntSet Bucket; Shingles.IsKeyGetDat(ShingleKeys[i], Bucket); for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) { TIntSet::TIter Quote1Copy = Quote1; Quote1Copy++; for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) { if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) { EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())); EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey())); RealCount++; AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey()); } } } int Len = Bucket.Len() * (Bucket.Len() - 1) / 2; Count += Len; } fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count); fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount); }
void TStrFeatureSpace::ToStr(const TVec<TStrFSSize>& FeatureIds, TChA& ChA, char Sep) const { for (TStrFSSize i = 0; i < FeatureIds.Len(); i++) { ChA += ISpace.KeyFromOfs(Space[FeatureIds[i]]); if (i < FeatureIds.Len() - 1) { ChA += Sep; } } }
/// save bipartite community affiliation into gexf file void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) { /// Plot bipartite graph if (CmtyVV.Len() == 0) { return; } double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0; double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax; double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len(); THash<TInt,TIntV> NIDComVH; TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV); FILE* F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n"); fprintf(F, "\t\t<nodes>\n"); for (int c = 0; c < CmtyVV.Len(); c++) { int CID = c; double XPos = c * CStep + CXMin; TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120); fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz); fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); fprintf(F, "\t\t\t</node>\n"); } for (int u = 0; u < NIDV.Len(); u++) { int NID = NIDV[u]; TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; double Size = MinSz; double XPos = NXMin + u * NStep; TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); double Alpha = 1.0; fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); fprintf(F, "\t\t\t</node>\n"); } fprintf(F, "\t\t</nodes>\n"); fprintf(F, "\t\t<edges>\n"); int EID = 0; for (int u = 0; u < NIDV.Len(); u++) { int NID = NIDV[u]; if (NIDComVH.IsKey(NID)) { for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) { int CID = NIDComVH.GetDat(NID)[c]; fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID); } } } fprintf(F, "\t\t</edges>\n"); fprintf(F, "\t</graph>\n"); fprintf(F, "</gexf>\n"); }
int TStrUtil::CountWords(const TChA& ChA, const TStrHash<TInt>& StopWordH) { TChA Tmp; TVec<char *> WrdV; SplitWords(Tmp, WrdV); int SWordCnt = 0; for (int w = 0; w < WrdV.Len(); w++) { if (StopWordH.IsKey(WrdV[w])) { SWordCnt++; } } return WrdV.Len() - SWordCnt; }
/////////////////////////////////////////////////////////////////////// // BagOfWords-Column-Matrix TBowMatrix::TBowMatrix(const TVec<PBowSpV>& BowSpV): TMatrix() { RowN = 0; ColSpVV.Gen(BowSpV.Len(), 0); for (int i = 0; i < BowSpV.Len(); i++) { ColSpVV.Add(BowSpV[i]); if (BowSpV[i]->Len() > 0) { RowN = TInt::GetMx(RowN, BowSpV[i]->GetWId(BowSpV[i]->GetWIds()-1)+1); } } }
void TIndex::TQmGixSumMerger<TQmGixItem>::Intrs(TVec<TQmGixItem>& MainV, const TVec<TQmGixItem>& JoinV) const { TVec<TQmGixItem> ResV; int ValN1 = 0; int ValN2 = 0; while ((ValN1 < MainV.Len()) && (ValN2 < JoinV.Len())) { const TQmGixItem& Val1 = MainV.GetVal(ValN1); const TQmGixItem& Val2 = JoinV.GetVal(ValN2); if (Val1 < Val2) { ValN1++; } else if (Val1 > Val2) { ValN2++; } else { ResV.Add(TQmGixItem(Val1.Key, Val1.Dat + Val2.Dat)); ValN1++; ValN2++; } } MainV = ResV; }
void TempMotifCounter::Count3TEdge3NodeStarsNaive( double delta, Counter3D& pre_counts, Counter3D& pos_counts, Counter3D& mid_counts) { TIntV centers; GetAllNodes(centers); pre_counts = Counter3D(2, 2, 2); pos_counts = Counter3D(2, 2, 2); mid_counts = Counter3D(2, 2, 2); // Get counts for each node as the center #pragma omp parallel for schedule(dynamic) for (int c = 0; c < centers.Len(); c++) { // Gather all adjacent events int center = centers[c]; TIntV nbrs; GetAllNeighbors(center, nbrs); for (int i = 0; i < nbrs.Len(); i++) { for (int j = i + 1; j < nbrs.Len(); j++) { int nbr1 = nbrs[i]; int nbr2 = nbrs[j]; TVec<TIntPair> combined; AddStarEdges(combined, center, nbr1, 0); AddStarEdges(combined, nbr1, center, 1); AddStarEdges(combined, center, nbr2, 2); AddStarEdges(combined, nbr2, center, 3); combined.Sort(); ThreeTEdgeMotifCounter counter(4); TIntV edge_id(combined.Len()); TIntV timestamps(combined.Len()); for (int k = 0; k < combined.Len(); k++) { edge_id[k] = combined[k].Dat; timestamps[k] = combined[k].Key; } Counter3D local; counter.Count(edge_id, timestamps, delta, local); #pragma omp critical { // Update with local counts for (int dir1 = 0; dir1 < 2; ++dir1) { for (int dir2 = 0; dir2 < 2; ++dir2) { for (int dir3 = 0; dir3 < 2; ++dir3) { pre_counts(dir1, dir2, dir3) += local(dir1, dir2, dir3 + 2) + local(dir1 + 2, dir2 + 2, dir3); pos_counts(dir1, dir2, dir3) += local(dir1, dir2 + 2, dir3 + 2) + local(dir1 + 2, dir2, dir3); mid_counts(dir1, dir2, dir3) += local(dir1, dir2 + 2, dir3) + local(dir1 + 2, dir2, dir3 + 2); } } } } } } } }
void TBTreeIndex<TVal>::SearchRange(const TPair<TVal, TVal>& RangeMinMax, TUInt64V& RecIdV) const { TVec<TTreeVal> ResValRecIdV; // execute query BTree.RangeQuery(TTreeVal(RangeMinMax.Val1, 0), TTreeVal(RangeMinMax.Val2, TUInt64::Mx), ResValRecIdV); // parse out record ids RecIdV.Gen(ResValRecIdV.Len(), 0); for (int ResN = 0; ResN < ResValRecIdV.Len(); ResN++) { RecIdV.Add(ResValRecIdV[ResN].Val2); } }
int TNEANetMP::AddEdge(const int& SrcNId, const int& DstNId, int EId) { int i; if (EId == -1) { EId = MxEId; MxEId++; } else { MxEId = TMath::Mx(EId+1, MxEId()); } IAssertR(!IsEdge(EId), TStr::Fmt("EdgeId %d already exists", EId)); IAssertR(IsNode(SrcNId) && IsNode(DstNId), TStr::Fmt("%d or %d not a node.", SrcNId, DstNId).CStr()); EdgeH.AddDat(EId, TEdge(EId, SrcNId, DstNId)); GetNode(SrcNId).OutEIdV.AddSorted(EId); GetNode(DstNId).InEIdV.AddSorted(EId); // update attribute columns for (i = 0; i < VecOfIntVecsE.Len(); i++) { TVec<TInt>& IntVec = VecOfIntVecsE[i]; IntVec.Ins(EdgeH.GetKeyId(EId), TInt::Mn); } TVec<TStr> DefIntVec = TVec<TStr>(); IntDefaultsE.GetKeyV(DefIntVec); for (i = 0; i < DefIntVec.Len(); i++) { TStr attr = DefIntVec[i]; TVec<TInt>& IntVec = VecOfIntVecsE[KeyToIndexTypeE.GetDat(DefIntVec[i]).Val2]; IntVec[EdgeH.GetKeyId(EId)] = GetIntAttrDefaultE(attr); } for (i = 0; i < VecOfStrVecsE.Len(); i++) { TVec<TStr>& StrVec = VecOfStrVecsE[i]; StrVec.Ins(EdgeH.GetKeyId(EId), TStr::GetNullStr()); } TVec<TStr> DefStrVec = TVec<TStr>(); IntDefaultsE.GetKeyV(DefStrVec); for (i = 0; i < DefStrVec.Len(); i++) { TStr attr = DefStrVec[i]; TVec<TStr>& StrVec = VecOfStrVecsE[KeyToIndexTypeE.GetDat(DefStrVec[i]).Val2]; StrVec[EdgeH.GetKeyId(EId)] = GetStrAttrDefaultE(attr); } for (i = 0; i < VecOfFltVecsE.Len(); i++) { TVec<TFlt>& FltVec = VecOfFltVecsE[i]; FltVec.Ins(EdgeH.GetKeyId(EId), TFlt::Mn); } TVec<TStr> DefFltVec = TVec<TStr>(); FltDefaultsE.GetKeyV(DefFltVec); for (i = 0; i < DefFltVec.Len(); i++) { TStr attr = DefFltVec[i]; TVec<TFlt>& FltVec = VecOfFltVecsE[KeyToIndexTypeE.GetDat(DefFltVec[i]).Val2]; FltVec[NodeH.GetKeyId(EId)] = GetFltAttrDefaultE(attr); } return EId; }
// returns a set of clusters such that separate types containted in the input set of clusters static TVec<TCluster> ExpandClusters(const TVec<TCluster>& clusters, const THash<TInt,TVec<TInt> >& quotePages, THash<TInt,TWebpage> pageHash){ TVec<TCluster> types = TVec<TCluster>::TVec<TCluster>(); TVec<TCluster> curtypes; printf("[ExpandClustres]\texpanding clusters..\n"); for (int i = 0; i < clusters.Len(); i++) { curtypes = TCluster::GetSingleTypeClusters(clusters[i], quotePages, pageHash); if (curtypes.Len() != clusters[i].NoTypes()) printf("clusters[%d] doesn't match (%d/%d) \n",i, curtypes.Len(), clusters[i].NoTypes()); for (int j = 0; j < curtypes.Len(); j++) types.Add(curtypes[j]); } printf("[ExpandClustrs]\texpanded %d clusters into %d types\n",clusters.Len(),types.Len()); return types; }
void LSH::MinHash(THash<TMd5Sig, TShingleIdSet>& ShingleToQuoteIds, TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) { TRnd RandomGenerator; // TODO: make this "more random" by incorporating time for (int i = 0; i < NumBands; ++i) { THash<TShingleId, TIntV> Inverted; // (QuoteID, QuoteSignatureForBand) THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs) for (int j = 0; j < BandSize; ++j) { // Create new signature TVec < TMd5Sig > Signature; ShingleToQuoteIds.GetKeyV(Signature); Signature.Shuffle(RandomGenerator); // Place in bucket - not very efficient int SigLen = Signature.Len(); for (int k = 0; k < SigLen; ++k) { TShingleIdSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]); for (TShingleIdSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) { TShingleId Key = l.GetKey(); if (Inverted.IsKey(Key)) { TIntV CurSignature = Inverted.GetDat(Key); if (CurSignature.Len() <= j) { CurSignature.Add(k); Inverted.AddDat(Key, CurSignature); } } else { TIntV NewSignature; NewSignature.Add(k); Inverted.AddDat(Key, NewSignature); } } } } TVec<TShingleId> InvertedKeys; Inverted.GetKeyV(InvertedKeys); TInt InvertedLen = InvertedKeys.Len(); for (int k = 0; k < InvertedLen; ++k) { TIntSet Bucket; TIntV Signature = Inverted.GetDat(InvertedKeys[k]); if (BandBuckets.IsKey(Signature)) { Bucket = BandBuckets.GetDat(Signature); } Bucket.AddKey(InvertedKeys[k].Val1); BandBuckets.AddDat(Signature, Bucket); } SignatureBandBuckets.Add(BandBuckets); Err("%d out of %d band signatures computed\n", i + 1, NumBands); } Err("Minhash step complete!\n"); }
uint64 TGraphEnumUtils::GetMinAndGraphIds(const TVec<PNGraph> &isoG, TVec<uint64> &graphIds) { IAssert(isoG.Len() > 0); // uint64 minGraphId = GraphId(isoG[0]); graphIds.Add(minGraphId); // for(int i=1; i<isoG.Len(); i++) { uint64 curGraphId = GraphId(isoG[i]); if(minGraphId > curGraphId) minGraphId=curGraphId; // graphIds.Add(curGraphId); } // return minGraphId; }
int main(int argc, char *argv[]) { TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin"; TFIn BaseFile(BaseString); TQuoteBase *QB = new TQuoteBase; TDocBase *DB = new TDocBase; QB->Load(BaseFile); DB->Load(BaseFile); TIntV QuoteIds; QB->GetAllQuoteIds(QuoteIds); int NumQuotes = QuoteIds.Len(); THash<TInt, TStrSet> PeakCounts; for (int i = 0; i < NumQuotes; i++) { TQuote CurQuote; if (QB->GetQuote(QuoteIds[i], CurQuote)) { TVec<TSecTm> Peaks; CurQuote.GetPeaks(DB, Peaks); TStr QuoteString; CurQuote.GetParsedContentString(QuoteString); TStrSet StringSet; if (PeakCounts.IsKey(Peaks.Len())) { StringSet = PeakCounts.GetDat(Peaks.Len()); } StringSet.AddKey(QuoteString); PeakCounts.AddDat(Peaks.Len(), StringSet); } } TIntV PeakCountKeys; PeakCounts.GetKeyV(PeakCountKeys); PeakCountKeys.Sort(true); for (int i = 0; i < PeakCountKeys.Len(); i++) { TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]); if (CurSet.Len() > 0) { printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val); printf("#########################################\n"); THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]); for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) { printf("%s\n", l.GetKey().CStr()); } printf("\n"); } } delete QB; delete DB; return 0; }
// parse: // 10:16, 16 Sep 2004 // 10:20, 2004 Sep 16 // 2005-07-07 20:30:35 // 23:24:07, 2005-07-10 // 9 July 2005 14:38 // 21:16, July 9, 2005 // 06:02, 10 July 2005 bool TStrUtil::GetTmFromStr(const char* TmStr, TSecTm& Tm) { static TStrV MonthV1, MonthV2; if (MonthV1.Empty()) { TStr("january|february|march|april|may|june|july|august|september|october|november|december").SplitOnAllCh('|', MonthV1); TStr("jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec").SplitOnAllCh('|', MonthV2); } TChA Tmp(TmStr); Tmp.ToLc(); TVec<char *> WrdV; const char* End = Tmp.CStr()+Tmp.Len(); int Col = -1, Cols=0; for (char *b = Tmp.CStr(); b <End; ) { WrdV.Add(b); while (*b && ! (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; } if (*b==':') { if(Col==-1) { Col=WrdV.Len(); } Cols++; } *b=0; b++; while (*b && (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; } } if (Cols == 2) { if (Col+1 >= WrdV.Len()) { return false; } WrdV.Del(Col+1); } if (Col<1) { return false; } const int Hr = atoi(WrdV[Col-1]); const int Min = atoi(WrdV[Col]); WrdV.Del(Col); WrdV.Del(Col-1); if (WrdV.Len() != 3) { return false; } int y=0,m=1,d=2, Mon=-1; if (TCh::IsAlpha(WrdV[0][0])) { y=2; m=0; d=1; } else if (TCh::IsAlpha(WrdV[1][0])) { y=2; m=1; d=0; } else if (TCh::IsAlpha(WrdV[2][0])) { y=0; m=2; d=1; } else { y=0; m=1; d=2; Mon = atoi(WrdV[m]); } int Day = atoi(WrdV[d]); if (Mon <= 0) { Mon = MonthV1.SearchForw(WrdV[m])+1; } if (Mon <= 0) { Mon = MonthV2.SearchForw(WrdV[m])+1; } if (Mon == 0) { return false; } int Year = atoi(WrdV[y]); if (Day > Year) { ::Swap(Day, Year); } //printf("%d-%02d-%02d %02d:%02d\n", Year, Mon, Day, Hr, Min); Tm = TSecTm(Year, Mon, Day, Hr, Min, 0); return true; }
void TAGM::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) { THash<TInt,TIntV> CmtyVH; for(int i=0;i<CmtyVV.Len();i++) { CmtyVH.AddDat(i,CmtyVV[i]); } GetNodeMembership(NIDComVH,CmtyVH); }
void StarTriad3TEdgeCounter<EdgeData>::Count(const TVec<EdgeData>& events, const TIntV& timestamps, double delta) { InitializeCounters(); if (events.Len() != timestamps.Len()) { TExcept::Throw("Number of events must match number of timestamps."); } int start = 0; int end = 0; int L = timestamps.Len(); for (int j = 0; j < L; j++) { double tj = double(timestamps[j]); // Adjust counts in pre-window [tj - delta, tj) while (start < L && double(timestamps[start]) < tj - delta) { PopPre(events[start]); start++; } // Adjust counts in post-window (tj, tj + delta] while (end < L && double(timestamps[end]) <= tj + delta) { PushPos(events[end]); end++; } // Move current event off post-window PopPos(events[j]); ProcessCurrent(events[j]); PushPre(events[j]); } }
void TBackupProfile::Restore(const TStr& BackupFolderName, const ERestoringMode& RestoringMode, const bool& ReportP) const { for (int N = 0; N < LogV.Len(); N++) { // find the folder that matches the BackupFolderName if (LogV[N].GetFolderName() == BackupFolderName) { const TVec<TBackupFolderInfo> Folders = GetFolders(); for (int N = 0; N < Folders.Len(); N++) { const TStr TargetFolder = Folders[N].Folder; TStrV PartV; TDir::SplitPath(TargetFolder, PartV); const TStr LastFolderNamePart = PartV[PartV.Len() - 1]; // do we want to first remove any existing data in the target folder? if (RestoringMode == RemoveExistingFirst) TDir::DelNonEmptyDir(TargetFolder); // copy data from backup to the destination folder const TStr SourceFolder = Destination + ProfileName + "/" + BackupFolderName + "/" + LastFolderNamePart; if (ReportP) TNotify::StdNotify->OnStatusFmt("Copying folder: %s", SourceFolder.CStr()); if (TDir::Exists(SourceFolder)) TDir::CopyDir(SourceFolder, TargetFolder, RestoringMode == OverwriteIfExisting); else TNotify::StdNotify->OnStatusFmt("WARNING: Unable to create a restore of the folder %s. The folder does not exist.", SourceFolder.CStr()); } } } }
void LogOutput::PrintClusterInformationToText(TDocBase *DB, TQuoteBase *QB, TClusterBase *CB, TIntV& ClusterIds, TSecTm PresentTime) { if (!ShouldLog) return; TStr CurDateString = PresentTime.GetDtYmdStr(); TStr TopFileName = Directory + "/text/top/topclusters_" + CurDateString + ".txt"; FILE *T = fopen(TopFileName.CStr(), "w"); for (int i = 0; i < ClusterIds.Len(); i++) { TCluster C; CB->GetCluster(ClusterIds[i], C); TStr CRepQuote; C.GetRepresentativeQuoteString(CRepQuote, QB); TIntV CQuoteIds; TVec<TUInt> CUniqueSources; C.GetQuoteIds(CQuoteIds); TCluster::GetUniqueSources(CUniqueSources, CQuoteIds, QB); fprintf(T, "%d\t%d\t%s\n", CUniqueSources.Len(), CQuoteIds.Len(), CRepQuote.CStr()); for (int j = 0; j < CQuoteIds.Len(); j++) { TQuote Q; if (QB->GetQuote(CQuoteIds[j], Q)) { TStr QuoteStr; Q.GetContentString(QuoteStr); fprintf(T, "\t%d\t%s\n", Q.GetNumSources().Val, QuoteStr.CStr()); } } } fclose(T); }
void TSQLCommand::Bind(const TVec<PSQLParameter> &ParamV) const { for(int i = 0; i < ParamV.Len(); i++) { Bind(ParamV[i], i); } }
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllCandidatePairs() { THashSet<TPair<TInt, TInt> > CandidateIdPairs; for (int i=0; i<Bands; i++) { TVec<TIntV> BucketVV; SigBucketVHV[i].GetDatV(BucketVV); for (int j=0; j<BucketVV.Len(); j++) { TIntV BucketV = BucketVV[j]; for (int k=0; k<BucketV.Len(); k++) { for (int l=k+1; l<BucketV.Len(); l++) { int First = BucketV[k], Second = BucketV[l]; if (First > Second) { int Temp = First; First = Second; Second = Temp; } CandidateIdPairs.AddKey(TPair<TInt, TInt> (First, Second)); } } } } TVec<TPair<TFltV, TFltV> > CandidatePairs; int Ind = CandidateIdPairs.FFirstKeyId(); while (CandidateIdPairs.FNextKeyId(Ind)) { TPair<TInt, TInt> IdPair = CandidateIdPairs[Ind]; TPair<TFltV, TFltV> Pair(DataV[IdPair.GetVal1()], DataV[IdPair.GetVal2()]); CandidatePairs.Add(Pair); } return CandidatePairs; }
void TestEigSvd() { PNGraph G = TSnap::GenRndGnm<PNGraph>(100,1000, true); PUNGraph UG = TSnap::ConvertGraph<PUNGraph>(G); TSnap::SaveMatlabSparseMtx(G, "test1.mtx"); TSnap::SaveMatlabSparseMtx(UG, "test2.mtx"); TFltV SngValV; TVec<TFltV> LeftV, RightV; TSnap::GetSngVec(G, 20, SngValV, LeftV, RightV); printf("Singular Values:\n"); for (int i =0; i < SngValV.Len(); i++) { printf("%d\t%f\n", i, SngValV[i]()); } printf("LEFT Singular Vectors:\n"); for (int i=0; i < LeftV[0].Len(); i++) { printf("%d\t%f\t%f\t%f\t%f\t%f\n", i, LeftV[0][i](), LeftV[1][i](), LeftV[2][i](), LeftV[3][i](), LeftV[4][i]()); } printf("RIGHT Singular Vectors:\n"); for (int i=0; i < RightV[0].Len(); i++) { printf("%d\t%f\t%f\t%f\t%f\t%f\n", i, RightV[0][i](), RightV[1][i](), RightV[2][i](), RightV[3][i](), RightV[4][i]()); } TFltV EigValV; TVec<TFltV> EigV; TSnap::GetEigVec(UG, 20, EigValV, EigV); printf("Eigen Values:\n"); for (int i =0; i < EigValV.Len(); i++) { printf("%d\t%f\n", i, EigValV[i]()); } printf("Eigen Vectors %d:\n", EigV.Len()); for (int i =0; i < EigV[0].Len(); i++) { printf("%d\t%f\t%f\t%f\t%f\t%f\n", i, EigV[0][i](), EigV[1][i](), EigV[2][i](), EigV[3][i](), EigV[4][i]()); } }
bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId) { const TIntPrV& EdgeV1 = Key1.EdgeV; const TIntPrV& EdgeV2 = Key2.EdgeV; //for (int i = 0; i < EdgeV1.Len(); i++) printf("\t%d - %d\n", EdgeV1[i].Val1, EdgeV1[i].Val2); printf("\n"); //for (int i = 0; i < EdgeV2.Len(); i++) printf("\t%d - %d\n", EdgeV2[i].Val1, EdgeV2[i].Val2); if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false; const int Nodes = NodeIdMapV[0].Len(); // fast adjecency matrix TIntV AdjMtx2(Nodes*Nodes); for (int i = 0; i < EdgeV2.Len(); i++) { AdjMtx2[EdgeV2[i].Val1*Nodes + EdgeV2[i].Val2] = 1; } for (int perm = 0; perm < NodeIdMapV.Len(); perm++) { const TIntV& NodeIdMap = NodeIdMapV[perm]; bool IsIso = true; for (int e1 = 0; e1 < EdgeV1.Len(); e1++) { const int NId1 = NodeIdMap[EdgeV1[e1].Val1]; const int NId2 = NodeIdMap[EdgeV1[e1].Val2]; if (AdjMtx2[NId1*Nodes + NId2] != 1) { IsIso = false; break; } } if (IsIso) { IsoPermId = perm; return true; } } IsoPermId = -1; return false; }
/// extract community affiliation from F_uc void TAGMFast::GetCmtyVV(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz) { CmtyVV.Gen(NumComs, 0); TIntFltH CIDSumFH(NumComs); for (int c = 0; c < SumFV.Len(); c++) { CIDSumFH.AddDat(c, SumFV[c]); } CIDSumFH.SortByDat(false); for (int c = 0; c < NumComs; c++) { int CID = CIDSumFH.GetKey(c); TIntFltH NIDFucH(F.Len() / 10); TIntV CmtyV; IAssert(SumFV[CID] == CIDSumFH.GetDat(CID)); if (SumFV[CID] < Thres) { continue; } for (int u = 0; u < F.Len(); u++) { int NID = u; if (! NodesOk) { NID = NIDV[u]; } if (GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(u, CID)); } } NIDFucH.SortByDat(false); NIDFucH.GetKeyV(CmtyV); if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); } } if ( NumComs != CmtyVV.Len()) { printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len()); } }
void TIndex::TQmGixSumItemHandler<TQmGixItem>::Merge(TVec<TQmGixItem>& ItemV, const bool& IsLocal) const { if (ItemV.Empty()) { return; } // nothing to do in this case if (!ItemV.IsSorted()) { ItemV.Sort(); } // sort if not yet sorted // merge counts int LastItemN = 0; bool ZeroP = false; for (int ItemN = 1; ItemN < ItemV.Len(); ItemN++) { if (ItemV[ItemN].Key != ItemV[ItemN - 1].Key) { LastItemN++; ItemV[LastItemN] = ItemV[ItemN]; } else { ItemV[LastItemN].Dat += ItemV[ItemN].Dat; } ZeroP = ZeroP || (ItemV[LastItemN].Dat <= 0); } // remove items with zero count if (ZeroP) { int LastIndN = 0; for (int ItemN = 0; ItemN < LastItemN + 1; ItemN++) { const TQmGixItem& Item = ItemV[ItemN]; if (Item.Dat.Val > 0 || (IsLocal && Item.Dat.Val < 0)) { ItemV[LastIndN] = Item; LastIndN++; } else if (Item.Dat.Val < 0) { TEnv::Error->OnStatusFmt("Warning: negative item count %d:%d!", (int)Item.Key, (int)Item.Dat); } } ItemV.Reserve(ItemV.Reserved(), LastIndN); } else { ItemV.Reserve(ItemV.Reserved(), LastItemN + 1); } }