// I embarassingly don't know how templating works. void QuoteGraph::CompareUsingMinHash(TVec<THash<TMd5Sig, TIntSet> >& BucketsVector) { THashSet<TIntPr> EdgeCache; int Count = 0; int RealCount = 0; Err("Beginning edge creation step...\n"); for (int i = 0; i < BucketsVector.Len(); i++) { Err("Processing band signature %d of %d - %d signatures\n", i+1, BucketsVector.Len(), BucketsVector[i].Len()); TVec<TMd5Sig> Buckets; BucketsVector[i].GetKeyV(Buckets); TVec<TMd5Sig>::TIter BucketEnd = Buckets.EndI(); for (TVec<TMd5Sig>::TIter BucketSig = Buckets.BegI(); BucketSig < BucketEnd; BucketSig++) { TIntSet Bucket = BucketsVector[i].GetDat(*BucketSig); Count += Bucket.Len() * (Bucket.Len() - 1) / 2; for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) { TIntSet::TIter Quote1Copy = Quote1; Quote1Copy++; for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) { if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) { EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())); EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey())); RealCount++; AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey()); } } } } } fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count); fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount); }
double ave_path_length (PUNGraph p) { TVec<TInt> v; double tot_lengths = 0.0; for (TUNGraph::TNodeI n = p->BegNI(); n != p->EndNI(); n++) { v = v + n.GetId(); } // cerr << "vlen: " << v.Len() << endl; TBreathFS<PUNGraph> b(p); double tot_pairs = 0.0; while (v.Len () > 0) { TInt last = v[v.Len()-1]; b.DoBfs (last, true, true); for (TVec<TInt>::TIter i = v.BegI(); (*i) != last; i++) { int length; length = b.GetHops (last, (*i)); if (length == length) { tot_lengths += length; tot_pairs += 1; } } // cerr << "tps: " << tot_pairs << ", last: " << last << ", beg: " << v[*(v.BegI())] << endl; v.Del(v.Len()-1); } // cerr << "paths: " << tot_lengths << " " << tot_pairs << " " << (tot_lengths/tot_pairs) << endl; return tot_lengths / tot_pairs; }
void TMultimodalGraphImplB::AddEdgeBatch(const TPair<TInt,TInt>& SrcNId, const TVec<TPair<TInt,TInt> >& DstNIds) { IAssertR(IsNode(SrcNId), TStr::Fmt("%d not a node.", SrcNId.GetVal2()).CStr()); for (TVec<TPair<TInt,TInt> >::TIter DstNId = DstNIds.BegI(); DstNId < DstNIds.EndI(); DstNId++) { AddEdge(SrcNId, *DstNId); } }
int main(int argc, char* argv[]) { TEnv Env(argc, argv); TStr PrefixPath = Env.GetArgs() > 1 ? Env.GetArg(1) : TStr(""); double ts1 = Tick(); TTableContext Context; TVec<PTable> NodeTblV = TVec<PTable>(); TVec<PTable> EdgeTblV = TVec<PTable>(); Schema NodeSchema = Schema(); Schema EdgeSchema = Schema(); LoadFlickrTables(PrefixPath, Context, NodeTblV, NodeSchema, EdgeTblV, EdgeSchema); double ts2 = Tick(); int ExpectedSz = 0; for (TVec<PTable>::TIter it = NodeTblV.BegI(); it < NodeTblV.EndI(); it++) { PTable Table = *it; ExpectedSz += Table->GetNumRows(); } THash<TStr, TInt> Hash(ExpectedSz); TStrV OriNIdV(ExpectedSz); MergeNodeTables(NodeTblV, NodeSchema, Hash, OriNIdV); PTable EdgeTable = MergeEdgeTables(EdgeTblV, EdgeSchema, Hash, Context); double ts3 = Tick(); TStrV V; TStrV VE; VE.Add(EdgeSchema.GetVal(2).GetVal1()); PNEANet Graph = TSnap::ToNetwork<PNEANet>(EdgeTable, EdgeSchema.GetVal(0).GetVal1(), EdgeSchema.GetVal(1).GetVal1(), V, V, VE, aaLast); double ts4 = Tick(); //int nExps = 1; int nExps = 40; TIntFltH PageRankResults; for (int i = 0; i < nExps; i++) { PageRankResults = TIntFltH(ExpectedSz); #ifdef USE_OPENMP TSnap::GetWeightedPageRankMP2(Graph, PageRankResults, EdgeSchema.GetVal(2).GetVal1(), 0.849999999999998, 0.0001, 10); #else TSnap::GetWeightedPageRank(Graph, PageRankResults, EdgeSchema.GetVal(2).GetVal1(), 0.849999999999998, 0.0001, 10); #endif } double ts5 = Tick(); PSOut ResultOut = TFOut::New(PrefixPath + TStr("page-rank-results.tsv")); for (TIntFltH::TIter it = PageRankResults.BegI(); it < PageRankResults.EndI(); it++) { ResultOut->PutStrFmtLn("%s\t%f9", OriNIdV[it.GetKey()].CStr(), it.GetDat().Val); } double ts6 = Tick(); bool isPar = false; #ifdef USE_OPENMP isPar = true; #endif // PSOut FeaturesOut = TFOut::New(PrefixPath + "features.txt"); // FeaturesOut->PutStrFmtLn("Photo %d", PPhotoTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Users %d", PUserTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Tags %d", PTagTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Comments %d", PCommentTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Locations %d", PLocationTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Photo - Owner %d", PPhotoOwnerTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Photo - Comment %d", PPhotoCommentTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Photo - Location %d", PPhotoLocationTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Comment - User %d", PCommentUserTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Comment - User %d", PCommentUserTbl->GetNumRows().Val); //// FeaturesOut->PutStrFmtLn("Photo - Tagger %d", PPhotoTaggerTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Tagger - Tag %d", PTaggerTagTbl->GetNumRows().Val); // FeaturesOut->PutStrFmtLn("Total number of nodes = %d", Graph->GetNodes()); // FeaturesOut->PutStrFmtLn("Total number of edges = %d", Graph->GetEdges()); PSOut TimeOut = TFOut::New(PrefixPath + TStr("time.txt"), true); TimeOut->PutStrFmtLn("Experiment Weighted - %s - %s", PrefixPath.CStr(), (isPar ? "Parallel" : "Sequential")); TimeOut->PutStrFmtLn("Input Time = %f", GetCPUTimeUsage(ts1, ts2)); TimeOut->PutStrFmtLn("Preprocessing Time = %f", GetCPUTimeUsage(ts2, ts3)); TimeOut->PutStrFmtLn("Conversion Time = %f", GetCPUTimeUsage(ts3, ts4)); TimeOut->PutStrFmtLn("Computing Time = %f", GetCPUTimeUsage(ts4, ts5)/nExps); TimeOut->PutStrFmtLn("Output Time = %f", GetCPUTimeUsage(ts5, ts6)); return 0; }