/// Clique Percolation method communities void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) { printf("Clique Percolation Method\n"); TExeTm ExeTm; TVec<TIntV> MaxCliques; TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques); // op RS 2012/05/15, commented out next line, a parameter is missing, // creating a warning on OS X // printf("...%d cliques found\n"); // get clique overlap matrix (graph) PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1); printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges()); // connected components are communities TCnComV CnComV; TSnap::GetWccs(OverlapGraph, CnComV); NIdCmtyVV.Clr(false); TIntSet CmtySet; for (int c = 0; c < CnComV.Len(); c++) { CmtySet.Clr(false); for (int i = 0; i <CnComV[c].Len(); i++) { const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]]; CmtySet.AddKeyV(CliqueNIdV); } NIdCmtyVV.Add(); CmtySet.GetKeyV(NIdCmtyVV.Last()); NIdCmtyVV.Last().Sort(); } printf("done [%s].\n", ExeTm.GetStr()); }
double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) { const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges(); int Vol = 0, Cut = 0; double Phi = 0.0; for (int i = 0; i < CmtyS.Len(); i++) { if (! Graph->IsNode(CmtyS[i])) { continue; } TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]); for (int e = 0; e < NI.GetOutDeg(); e++) { if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; } } Vol += NI.GetOutDeg(); } // get conductance if (Vol != Edges2) { if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); } else if (Vol == 0) { Phi = 0.0; } else { Phi = Cut / double(Vol); } } else { if (Vol == Edges2) { Phi = 1.0; } } return Phi; }
// Compute the empirical edge probability between a pair of nodes who share no community (epsilon), based on current community affiliations. double TAGMFit::CalcPNoComByCmtyVV(const int& SamplePairs) { TIntV NIdV; G->GetNIdV(NIdV); uint64 PairNoCom = 0, EdgesNoCom = 0; for (int u = 0; u < NIdV.Len(); u++) { for (int v = u + 1; v < NIdV.Len(); v++) { int SrcNID = NIdV[u], DstNID = NIdV[v]; TIntSet JointCom; TAGMUtil::GetIntersection(NIDComVH.GetDat(SrcNID),NIDComVH.GetDat(DstNID),JointCom); if(JointCom.Len() == 0) { PairNoCom++; if (G->IsEdge(SrcNID, DstNID)) { EdgesNoCom++; } if (SamplePairs > 0 && PairNoCom >= (uint64) SamplePairs) { break; } } } if (SamplePairs > 0 && PairNoCom >= (uint64) SamplePairs) { break; } } double DefaultVal = 1.0 / (double)G->GetNodes() / (double)G->GetNodes(); if (EdgesNoCom > 0) { PNoCom = (double) EdgesNoCom / (double) PairNoCom; } else { PNoCom = DefaultVal; } printf("%s / %s edges without joint com detected (PNoCom = %f)\n", TUInt64::GetStr(EdgesNoCom).CStr(), TUInt64::GetStr(PairNoCom).CStr(), PNoCom.Val); return PNoCom; }
void end( const TWallVertexVector& vb, TIntVector& polygon, TIntVector& ib ) { assert( polygonCount ); if( polygonCount == 1 ) { // trivial case, just output input polygon polygon.resize( 0 ); polygon.reserve( vertices.size() ); int idx0 = *vertices.begin(); int idx = idx0; do { polygon.push_back( idx ); const TIntVector& vnext = vertexNexts[idx]; assert( vnext.size() == 1 ); idx = vnext[0]; } while( idx != idx0 ); triangulator::process( vb, polygon, ib ); } else { // mark vertex types markVertexTypes(); // trace and triangulate the polygon(s) traceBorder( vb, polygon, ib ); } }
void LSH::ElCheapoHashing(TQuoteBase *QuoteBase, TInt ShingleLen, THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) { fprintf(stderr, "Hashing shingles the el cheapo way...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); // Put x-character (or x-word) shingles into hash table; x is specified by ShingleLen parameter TStr QContentStr; Q.GetParsedContentString(QContentStr); TChA QContentChA = TChA(QContentStr); for (int i = 0; i < QContentChA.Len() - ShingleLen + 1; i++) { TChA ShingleChA = TChA(); for (int j = 0; j < ShingleLen; j++) { ShingleChA.AddCh(QContentChA.GetCh(i + j)); } TStr Shingle = TStr(ShingleChA); const TMd5Sig ShingleMd5(Shingle); TIntSet ShingleQuoteIds; if (ShingleToQuoteIds.IsKey(ShingleMd5)) { ShingleQuoteIds = ShingleToQuoteIds.GetDat(ShingleMd5); } ShingleQuoteIds.AddKey(QuoteIds[qt]); ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); } } Err("Done with el cheapo hashing!\n"); }
///Generate graph using the AGM model. CProbV = vector of Pc PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom) { PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1); printf("AGM begins\n"); for (int i = 0; i < CmtyVV.Len(); i++) { TIntV& CmtyV = CmtyVV[i]; for (int u = 0; u < CmtyV.Len(); u++) { if ( G->IsNode(CmtyV[u])) { continue; } G->AddNode(CmtyV[u]); } double Prob = CProbV[i]; RndConnectInsideCommunity(G, CmtyV, Prob, Rnd); } if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community TIntSet NIDS; for (int c = 0; c < CmtyVV.Len(); c++) { for (int u = 0; u < CmtyVV[c].Len(); u++) { NIDS.AddKey(CmtyVV[c][u]); } } TIntV NIDV; NIDS.GetKeyV(NIDV); RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd); } printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges()); G->Defrag(); return G; }
bool TBagOfWords::Update(const TStrV& TokenStrV) { // Generate Ngrams if necessary TStrV NgramStrV; GenerateNgrams(TokenStrV, NgramStrV); // process tokens to update DF counts bool UpdateP = false; if (IsHashing()) { // consolidate tokens and get their hashed IDs TIntSet TokenIdH; for (int TokenStrN = 0; TokenStrN < NgramStrV.Len(); TokenStrN++) { const TStr& TokenStr = NgramStrV[TokenStrN]; TInt TokenId = TokenStr.GetHashTrick() % HashDim; TokenIdH.AddKey(TokenId); if (IsStoreHashWords()) { HashWordV[TokenId].AddKey(TokenStr); } } // update document counts int KeyId = TokenIdH.FFirstKeyId(); while (TokenIdH.FNextKeyId(KeyId)) { const int TokenId = TokenIdH.GetKey(KeyId); // update DF DocFqV[TokenId]++; } } else { // consolidate tokens TStrH TokenStrH; for (int TokenStrN = 0; TokenStrN < NgramStrV.Len(); TokenStrN++) { const TStr& TokenStr = NgramStrV[TokenStrN]; TokenStrH.AddKey(TokenStr); } // update document counts and update vocabulary with new tokens int KeyId = TokenStrH.FFirstKeyId(); while (TokenStrH.FNextKeyId(KeyId)) { // get token const TStr& TokenStr = TokenStrH.GetKey(KeyId); // different processing for hashing int TokenId = TokenSet.GetKeyId(TokenStr); if (TokenId == -1) { // new token, remember the dimensionality change UpdateP = true; // remember the new token TokenId = TokenSet.AddKey(TokenStr); // increase document count table const int TokenDfId = DocFqV.Add(0); // increase also the old count table OldDocFqV.Add(0.0); // make sure we DF vector and TokenSet still in sync IAssert(TokenId == TokenDfId); IAssert(DocFqV.Len() == OldDocFqV.Len()); } // document count update DocFqV[TokenId]++; } } // update document count Docs++; // tell if dimension changed return UpdateP; }
double TAGMFit::SelectLambdaSum(const TFltV& NewLambdaV, const TIntSet& ComK) { double Result = 0.0; for (TIntSet::TIter SI = ComK.BegI(); SI < ComK.EndI(); SI++) { IAssert(NewLambdaV[SI.GetKey()] >= 0); Result += NewLambdaV[SI.GetKey()]; } return Result; }
void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) { TUNGraph::TNodeI NI = Graph->GetNI(NID); NBCmtyS.Gen(NI.GetDeg()); NBCmtyS.AddKey(NID); for (int e = 0; e < NI.GetDeg(); e++) { NBCmtyS.AddKey(NI.GetNbrNId(e)); } }
// YES I COPIED AND PASTED CODE my section leader would be so ashamed :D void LSH::MinHash(THash<TMd5Sig, TIntSet>& ShingleToQuoteIds, TVec<THash<TIntV, TIntSet> >& SignatureBandBuckets) { TRnd RandomGenerator; // TODO: make this "more random" by incorporating time for (int i = 0; i < NumBands; ++i) { THash < TInt, TIntV > Inverted; // (QuoteID, QuoteSignatureForBand) THash < TIntV, TIntSet > BandBuckets; // (BandSignature, QuoteIDs) for (int j = 0; j < BandSize; ++j) { // Create new signature TVec < TMd5Sig > Signature; ShingleToQuoteIds.GetKeyV(Signature); Signature.Shuffle(RandomGenerator); // Place in bucket - not very efficient int SigLen = Signature.Len(); for (int k = 0; k < SigLen; ++k) { TIntSet CurSet = ShingleToQuoteIds.GetDat(Signature[k]); for (TIntSet::TIter l = CurSet.BegI(); l < CurSet.EndI(); l++) { TInt Key = l.GetKey(); if (Inverted.IsKey(Key)) { TIntV CurSignature = Inverted.GetDat(Key); if (CurSignature.Len() <= j) { CurSignature.Add(k); Inverted.AddDat(Key, CurSignature); } } else { TIntV NewSignature; NewSignature.Add(k); Inverted.AddDat(Key, NewSignature); } } } } TIntV InvertedKeys; Inverted.GetKeyV(InvertedKeys); TInt InvertedLen = InvertedKeys.Len(); for (int k = 0; k < InvertedLen; ++k) { TIntSet Bucket; TIntV Signature = Inverted.GetDat(InvertedKeys[k]); if (BandBuckets.IsKey(Signature)) { Bucket = BandBuckets.GetDat(Signature); } Bucket.AddKey(InvertedKeys[k]); BandBuckets.AddDat(Signature, Bucket); } SignatureBandBuckets.Add(BandBuckets); Err("%d out of %d band signatures computed\n", i + 1, NumBands); } Err("Minhash step complete!\n"); }
// Helper: Return GroupSet based on NodeID void GetGroupSet(int NId, TIntSet& GroupSet) { GroupSet.Clr(); switch (NId) { case 0: GroupSet.AddKey(2); GroupSet.AddKey(4); GroupSet.AddKey(5); break; case 1: // Empty Set break; case 2: GroupSet.AddKey(0); GroupSet.AddKey(3); GroupSet.AddKey(5); break; case 3: GroupSet.AddKey(0); break; case 4: GroupSet.AddKey(0); break; case 5: GroupSet.AddKey(0); GroupSet.AddKey(1); break; default: ASSERT_FALSE(true); // NId Outside Graph Construction FAIL break; } }
void TNetInfBs::SavePajek(const TStr& OutFNm) { TIntSet NIdSet; FILE *F = fopen(OutFNm.CStr(), "wt"); fprintf(F, "*Vertices %d\r\n", NIdSet.Len()); for (THash<TInt, TNodeInfo>::TIter NI = NodeNmH.BegI(); NI < NodeNmH.EndI(); NI++) { const TNodeInfo& I = NI.GetDat(); fprintf(F, "%d \"%s\" ic Blue x_fact %f y_fact %f\r\n", NI.GetKey().Val, I.Name.CStr(), TMath::Mx<double>(log((double)I.Vol)-5,1), TMath::Mx<double>(log((double)I.Vol)-5,1)); } fprintf(F, "*Arcs\r\n"); for (TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { fprintf(F, "%d %d 1\r\n", EI.GetSrcNId(), EI.GetDstNId()); } fclose(F); }
void begin( int totalVerts ) { polygonCount = 0; vertices.clear(); borderVertices.clear(); vertexNexts.clear(); vertexPrevs.clear(); vertexUseCount.resize( 0 ); vertexUseCount.resize( totalVerts, 0 ); vertexTraceID.resize( totalVerts, -1 ); vertexTypes.resize( 0 ); vertexTypes.resize( totalVerts, VTYPE_NONE ); }
void TGraphKey::TakeGraph(const PNGraph& Graph, TIntPrV& NodeMap) { TIntSet NodeIdH; int n = 0; NodeMap.Gen(Graph->GetNodes(), 0); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, n++) { NodeIdH.AddKey(NI.GetId()); NodeMap.Add(TIntPr(NI.GetId(), n)); } Nodes = Graph->GetNodes(); EdgeV.Gen(Nodes, 0); for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { const int NewNId = NodeIdH.GetKeyId(NI.GetId()); for (int i = 0; i < NI.GetOutDeg(); i++) { EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i)))); } } EdgeV.Sort(true); EdgeV.Pack(); }
// For each (u, v) in edges, precompute C_uv (the set of communities u and v share). void TAGMFit::GetEdgeJointCom() { ComEdgesV.Gen(CIDNSetV.Len()); EdgeComVH.Gen(G->GetEdges()); for (TUNGraph::TNodeI SrcNI = G->BegNI(); SrcNI < G->EndNI(); SrcNI++) { int SrcNID = SrcNI.GetId(); for (int v = 0; v < SrcNI.GetDeg(); v++) { int DstNID = SrcNI.GetNbrNId(v); if (SrcNID >= DstNID) { continue; } TIntSet JointCom; IAssert(NIDComVH.IsKey(SrcNID)); IAssert(NIDComVH.IsKey(DstNID)); TAGMUtil::GetIntersection(NIDComVH.GetDat(SrcNID), NIDComVH.GetDat(DstNID), JointCom); EdgeComVH.AddDat(TIntPr(SrcNID,DstNID),JointCom); for (int k = 0; k < JointCom.Len(); k++) { ComEdgesV[JointCom[k]]++; } } } IAssert(EdgeComVH.Len() == G->GetEdges()); }
void QuoteGraph::CompareUsingShingles(THash<TMd5Sig, TIntSet>& Shingles) { int Count = 0; int RealCount = 0; TVec<TMd5Sig> ShingleKeys; Shingles.GetKeyV(ShingleKeys); THashSet<TIntPr> EdgeCache; for (int i = 0; i < ShingleKeys.Len(); i++) { if (i % 100 == 0) { Err("Processed %d out of %d shingles, count = %d\n", i, ShingleKeys.Len(), Count); } TIntSet Bucket; Shingles.IsKeyGetDat(ShingleKeys[i], Bucket); for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) { TIntSet::TIter Quote1Copy = Quote1; Quote1Copy++; for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) { if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) { EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())); EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey())); RealCount++; AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey()); } } } int Len = Bucket.Len() * (Bucket.Len() - 1) / 2; Count += Len; } fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count); fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount); }
// I embarassingly don't know how templating works. void QuoteGraph::CompareUsingMinHash(TVec<THash<TMd5Sig, TIntSet> >& BucketsVector) { THashSet<TIntPr> EdgeCache; int Count = 0; int RealCount = 0; Err("Beginning edge creation step...\n"); for (int i = 0; i < BucketsVector.Len(); i++) { Err("Processing band signature %d of %d - %d signatures\n", i+1, BucketsVector.Len(), BucketsVector[i].Len()); TVec<TMd5Sig> Buckets; BucketsVector[i].GetKeyV(Buckets); TVec<TMd5Sig>::TIter BucketEnd = Buckets.EndI(); for (TVec<TMd5Sig>::TIter BucketSig = Buckets.BegI(); BucketSig < BucketEnd; BucketSig++) { TIntSet Bucket = BucketsVector[i].GetDat(*BucketSig); Count += Bucket.Len() * (Bucket.Len() - 1) / 2; for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) { TIntSet::TIter Quote1Copy = Quote1; Quote1Copy++; for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) { if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) { EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())); EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey())); RealCount++; AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey()); } } } } } fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count); fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount); }
void TGStatVec::SaveTxt(const TStr& FNmPref, const TStr& Desc) const { FILE *F = fopen(TStr::Fmt("growth.%s.tab", FNmPref.CStr()).CStr(), "wt"); fprintf(F, "# %s\n", Desc.CStr()); fprintf(F, "# %s", TTmInfo::GetTmUnitStr(TmUnit).CStr()); TIntSet StatValSet; for (int i = 0; i < Len(); i++) { for (int v = gsvNone; v < gsvMx; v++) { if (At(i)->HasVal(TGStatVal(v))) { StatValSet.AddKey(v); } } } TIntV StatValV; StatValSet.GetKeyV(StatValV); StatValV.Sort(); for (int sv = 0; sv < StatValV.Len(); sv++) { fprintf(F, "\t%s", TGStat::GetValStr(TGStatVal(StatValV[sv].Val)).CStr()); } fprintf(F, "Time\n"); for (int i = 0; i < Len(); i++) { const TGStat& G = *At(i); for (int sv = 0; sv < StatValV.Len(); sv++) { fprintf(F, "%g\t", G.GetVal(TGStatVal(StatValV[sv].Val))); } fprintf(F, "%s\n", G.GetTmStr().CStr()); } fclose(F); }
void markVertexTypes() { TIntSet::const_iterator vit, vitEnd = vertices.end(); // first pass: mark all interior and single-border vertices for( vit = vertices.begin(); vit != vitEnd; ++vit ) { int idx = *vit; if( vertexUseCount[idx] == 1 ) { vertexTypes[idx] = VTYPE_SINGLE; borderVertices.insert( idx ); } else if( isVertexInterior(idx) ) vertexTypes[idx] = VTYPE_INTERIOR; } // now, the unmarked vertices are all shared and on border for( vit = vertices.begin(); vit != vitEnd; ++vit ) { int idx = *vit; if( vertexTypes[idx] == VTYPE_NONE ) { vertexTypes[idx] = VTYPE_MULTI; borderVertices.insert( idx ); } } }
void addPolygon( const TIntVector& ib ) { ++polygonCount; TIntVector::const_iterator vit, vitEnd = ib.end(); for( vit = ib.begin(); vit != vitEnd; ++vit ) { int idx = *vit; assert( idx >= 0 && idx < vertexUseCount.size() ); vertices.insert( idx ); ++vertexUseCount[idx]; int idxNext = (vit==vitEnd-1) ? ib.front() : *(vit+1); int idxPrev = (vit==ib.begin()) ? ib.back() : *(vit-1); vertexNexts[idx].push_back( idxNext ); vertexPrevs[idx].push_back( idxPrev ); } }
void LSH::WordHashing(TQuoteBase *QuoteBase, THash<TMd5Sig, TIntSet>& ShingleToQuoteIds) { fprintf(stderr, "Hashing shingles using words...\n"); TIntV QuoteIds; QuoteBase->GetAllQuoteIds(QuoteIds); THash<TStr, TIntSet> Temp; for (int qt = 0; qt < QuoteIds.Len(); qt++) { if (qt % 1000 == 0) { fprintf(stderr, "%d out of %d completed\n", qt, QuoteIds.Len()); } TQuote Q; QuoteBase->GetQuote(QuoteIds[qt], Q); TStrV Content; Q.GetParsedContent(Content); int ContentLen = Content.Len(); for (int i = 0; i < ContentLen; i++) { const TMd5Sig ShingleMd5(Content[i]); TIntSet ShingleQuoteIds; ShingleToQuoteIds.IsKeyGetDat(ShingleMd5, ShingleQuoteIds); ShingleQuoteIds.AddKey(QuoteIds[qt]); ShingleToQuoteIds.AddDat(ShingleMd5, ShingleQuoteIds); ///// COMMENT OUT LATER TIntSet TempSet; Temp.IsKeyGetDat(Content[i], TempSet); TempSet.AddKey(QuoteIds[qt]); Temp.AddDat(Content[i], TempSet); } } TVec<TStr> ShingleKeys; Temp.GetKeyV(ShingleKeys); ShingleKeys.SortCmp(TCmpSetByLen(false, &Temp)); for (int i = 0; i < 100; i++) { TIntSet TempSet = Temp.GetDat(ShingleKeys[i]); Err("%d: %s - %d \n", i, ShingleKeys[i].CStr(), TempSet.Len()); } Err("Done with word hashing!\n"); }
/// Barabasi-Albert model of scale-free graphs. /// The graph has power-law degree distribution. /// See: Emergence of scaling in random networks by Barabasi and Albert. /// URL: http://arxiv.org/abs/cond-mat/9910332 PUNGraph GenPrefAttach(const int& Nodes, const int& NodeOutDeg, TRnd& Rnd) { PUNGraph GraphPt = PUNGraph::New(); TUNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, NodeOutDeg*Nodes); TIntV NIdV(NodeOutDeg*Nodes, 0); // first edge Graph.AddNode(0); Graph.AddNode(1); NIdV.Add(0); NIdV.Add(1); Graph.AddEdge(0, 1); TIntSet NodeSet; for (int node = 2; node < Nodes; node++) { NodeSet.Clr(false); while (NodeSet.Len() < NodeOutDeg && NodeSet.Len() < node) { NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]); } const int N = Graph.AddNode(); for (int i = 0; i < NodeSet.Len(); i++) { Graph.AddEdge(N, NodeSet[i]); NIdV.Add(N); NIdV.Add(NodeSet[i]); } } return GraphPt; }
// Simple test for Defrag() TEST(THashSet, Defrag) { TIntSet* TestSet = new TIntSet(); // fragment the set (IsKeyIdEqKeyN() will be false) TestSet->AddKey(6); TestSet->AddKey(4); EXPECT_EQ(1,TestSet->IsKeyIdEqKeyN()); TestSet->AddKey(2); EXPECT_EQ(1,TestSet->IsKeyIdEqKeyN()); TestSet->DelKey(2); EXPECT_EQ(0,TestSet->IsKeyIdEqKeyN()); TestSet->Defrag(); EXPECT_EQ(1,TestSet->IsKeyIdEqKeyN()); TestSet->DelKey(4); EXPECT_EQ(0,TestSet->IsKeyIdEqKeyN()); TestSet->Defrag(); EXPECT_EQ(1,TestSet->IsKeyIdEqKeyN()); // this does not work with a fragmented set TestSet->DelKeyId(TestSet->GetRndKeyId(TInt::Rnd)); EXPECT_EQ(0,TestSet->IsKeyIdEqKeyN()); delete TestSet; }
void traceBorder( const TWallVertexVector& vb, TIntVector& polygon, TIntVector& ib ) { static int traceID = 0; ++traceID; int idx0 = getBorderIndex(); assert( idx0 >= 0 && idx0 < vertexTypes.size() ); ib.resize( 0 ); polygon.resize( 0 ); polygon.reserve( vertices.size()/2 ); TIntVector localPolygon; localPolygon.reserve( 128 ); TIntVector localIB; localIB.reserve( 128 ); int idxPrev = idx0; int idx = idx0; int debugCounter = 0; int debugLoopCounter = 0; do { localIB.resize( 0 ); bool willFormLoop; do{ localPolygon.push_back( idx ); borderVertices.erase( idx ); vertexTraceID[idx] = traceID; assert( ++debugCounter <= vertices.size()*2 ); // Next vertex is the neighbor of current, that is not interior // and that is not the previous one. // When there are many possible ones, trace based on angle. int idxNext = -1; SVector2 prevToCurr = vb[idx] - vb[idxPrev]; if( prevToCurr.lengthSq() < 1.0e-6f ) prevToCurr.set( -0.01f, -0.01f ); TIntIntsMap::const_iterator it; it = vertexNexts.find( idx ); assert( it != vertexNexts.end() ); const TIntVector& vnext = it->second; int n = vnext.size(); float bestAngle = 100.0f; for( int i = 0; i < n; ++i ) { int idx1 = vnext[i]; if( idx1 != idxPrev && vertexTypes[idx1] != VTYPE_INTERIOR ) { //if( idx1 != idxPrev ) { SVector2 currToNext = vb[idx1] - vb[idx]; float ang = signedAngle2D( prevToCurr, currToNext ); if( ang < bestAngle ) { bestAngle = ang; idxNext = idx1; } } } assert( bestAngle > -4.0f && bestAngle < 4.0f ); assert( idxNext >= 0 ); willFormLoop = (vertexTraceID[idxNext] == traceID); // Optimization: if best angle is zero, then we're walking // in a straight line. Optimize out the current vertex. if( bestAngle == 0.0f && idx != idx0 && !willFormLoop ) { localPolygon.pop_back(); } idxPrev = idx; idx = idxNext; } while( !willFormLoop ); assert( localPolygon.size() >= 3 ); //if( localPolygon.size() < 3 ) { // return; //} assert( ++debugLoopCounter < vertices.size() ); if( idx == idx0 ) { // The polygon is simple or we found the last loop. // Triangulate local and append to results. triangulator::process( vb, localPolygon, localIB ); polygon.insert( polygon.end(), localPolygon.begin(), localPolygon.end() ); ib.insert( ib.end(), localIB.begin(), localIB.end() ); // We can have separated other loops. Try fetching them as well. idx0 = getBorderIndex(); if( idx0 == -1 ) { return; } else { localPolygon.resize( 0 ); idxPrev = idx0; idx = idx0; } } else { // The polygon must be complex, and we just found a closed loop. // Take only the loop, triangulate it, append to results, continue. TIntVector::const_iterator itLoopStart = std::find( localPolygon.begin(), localPolygon.end(), idx ); assert( itLoopStart != localPolygon.end() ); // append to results TIntVector loopPolygon( itLoopStart, localPolygon.end() ); triangulator::process( vb, loopPolygon, localIB ); polygon.insert( polygon.end(), loopPolygon.begin(), loopPolygon.end() ); ib.insert( ib.end(), localIB.begin(), localIB.end() ); // continue - remove the looped polygon from local localPolygon.resize( itLoopStart - localPolygon.begin() ); } } while( true ); }
int getBorderIndex() { if( borderVertices.empty() ) return -1; return *borderVertices.begin(); }
void LSH::MinHash(TQuoteBase *QB, THashSet<TMd5Sig>& Shingles, TVec<THash<TMd5Sig, TIntSet> >& SignatureBandBuckets) { Err("Creating buckets...\n"); THash < TMd5Sig, TIntV > Signatures; ComputeSignatures(Shingles, Signatures, NumBands * BandSize); // bucket creation for (int i = 0; i < NumBands; ++i) { SignatureBandBuckets.Add(THash<TMd5Sig, TIntSet>()); } // bucket filling int NumShingles = Shingles.Len(); THash<TInt, TQuote> Quotes; QB->GetIdToTQuotes(Quotes); THash<TInt, TQuote>::TIter CurI = Quotes.BegI(); THash<TInt, TQuote>::TIter EndI = Quotes.EndI(); TQuote Q; // SKYFALL for (; CurI < EndI; CurI++) { Q = CurI.GetDat(); TStrV Content; Q.GetParsedContent(Content); TInt Id = Q.GetId(); // signature for quote int ContentLen = Content.Len(); TVec < TIntV > Signature; for (int i = 0; i < ContentLen; i++) { const TMd5Sig ShingleMd5(Content[i]); Signature.Add(Signatures.GetDat(ShingleMd5)); } // place in bucket if (ContentLen < WordWindow) { for (int i = 0; i < NumBands; ++i) { TStr Sig; for (int j = 0; j < BandSize; ++j) { int CurSig = i * BandSize + j; TInt min = NumShingles; for (int k = 0; k < ContentLen; k++) { if (Signature[k][CurSig] < min) { min = Signature[k][CurSig]; } } Sig += min.GetStr() + "-"; } //Err(Sig.CStr()); const TMd5Sig SigMd5(Sig); TIntSet Bucket; SignatureBandBuckets[i].IsKeyGetDat(SigMd5, Bucket); Bucket.AddKey(Id); SignatureBandBuckets[i].AddDat(SigMd5, Bucket); } } else { } } Err("Minhash step complete!\n"); }
/// Newton method: DEPRECATED int TAGMFast::MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm) { TExeTm ExeTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; double PrevL = TFlt::Mn, CurL; TUNGraph::TNodeI UI; TIntV NIdxV; G->GetNIdV(NIdxV); int CID, UID, NewtonIter; double Fuc, PrevFuc, Grad, H; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { if (! PlotNm.Empty() && iter % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } UID = NIdxV[ui]; //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TIntSet CIDSet; UI = G->GetNI(UID); if (UI.GetDeg() == 0) { //if the node is isolated, clear its membership and skip if (! F[UID].Empty()) { F[UID].Clr(); } continue; } for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[UID].BegI(); CI < F[UID].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(UID, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } for (TIntSet::TIter CI = CIDSet.BegI(); CI < CIDSet.EndI(); CI++) { CID = CI.GetKey(); //optimize for UID, CID //compute constants TFltV AlphaKV(UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } AlphaKV[e] = (1 - PNoCom) * exp(- DotProduct(UID, UI.GetNbrNId(e)) + GetCom(UI.GetNbrNId(e), CID) * GetCom(UID, CID)); IAssertR(AlphaKV[e] <= 1.0, TStr::Fmt("AlphaKV=%f, %f, %f", AlphaKV[e].Val, PNoCom.Val, GetCom(UI.GetNbrNId(e), CID))); } Fuc = GetCom(UID, CID); PrevFuc = Fuc; Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; if (Grad <= 1e-3 && Grad >= -0.1) { continue; } NewtonIter = 0; while (NewtonIter++ < 10) { Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; H = HessianForOneVar(AlphaKV, UID, CID, Fuc); if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; } if (fabs(Grad) < 1e-3) { break; } if (H == 0.0) { Fuc = 0.0; break; } double NewtonStep = - Grad / H; if (NewtonStep < -0.5) { NewtonStep = - 0.5; } Fuc += NewtonStep; if (Fuc < 0.0) { Fuc = 0.0; } } if (Fuc == 0.0) { DelCom(UID, CID); } else { AddCom(UID, CID, Fuc); } } } if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) { GradU.Gen(CIDSet.Len()); TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); int Deg = NI.GetDeg(); TFltV PredV(Deg), GradV(CIDSet.Len()); TIntV CIDV(CIDSet.Len()); if (DoParallel && Deg + CIDSet.Len() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } #pragma omp parallel for schedule(static, 1) for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } else { for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } //add regularization if (RegCoef > 0.0) { //L1 for (int c = 0; c < GradV.Len(); c++) { GradV[c] -= RegCoef; } } if (RegCoef < 0.0) { //L2 for (int c = 0; c < GradV.Len(); c++) { GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); } } for (int c = 0; c < GradV.Len(); c++) { if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; } if (fabs(GradV[c]) < 0.0001) { continue; } GradU.AddDat(CIDV[c], GradV[c]); } for (int c = 0; c < GradU.Len(); c++) { if (GradU[c] >= 10) { GradU[c] = 10; } if (GradU[c] <= -10) { GradU[c] = -10; } IAssert(GradU[c] >= -10); } }
namespace polygon_merger { int polygonCount; typedef std::set<int> TIntSet; TIntSet vertices; TIntSet borderVertices; typedef std::map< int, TIntVector > TIntIntsMap; TIntIntsMap vertexNexts; TIntIntsMap vertexPrevs; TIntVector vertexUseCount; TIntVector vertexTraceID; enum eVertexType { VTYPE_NONE, ///< Not computed yet VTYPE_INTERIOR, ///< Completely interior VTYPE_SINGLE, ///< On the border, belongs to single polygon VTYPE_MULTI, ///< On the border, belongs to multiple polygons VTYPE_DONE, ///< Already fully processed (traced into result polygon) }; TIntVector vertexTypes; bool isVertexInterior( int idx ) { assert( idx >= 0 && idx < vertexUseCount.size() ); // vertex is interior if it's use count is >1, AND // it's all neighbors' use counts are >1, AND it has exactly "usecount" // different neighbors. int useCount = vertexUseCount[idx]; assert( useCount >= 1 ); if( useCount < 2 ) return false; TIntIntsMap::const_iterator it; int i, n; it = vertexNexts.find( idx ); assert( it != vertexNexts.end() ); const TIntVector& vnext = it->second; n = vnext.size(); assert( n == useCount ); for( i = 0; i < n; ++i ) { if( vertexUseCount[vnext[i]] < 2 ) return false; } it = vertexPrevs.find( idx ); assert( it != vertexPrevs.end() ); const TIntVector& vprev = it->second; n = vprev.size(); assert( n == useCount ); for( i = 0; i < n; ++i ) { int nidx = vprev[i]; if( vertexUseCount[nidx] < 2 ) return false; if( std::find( vnext.begin(), vnext.end(), nidx ) == vnext.end() ) return false; } return true; // fall through: must be interior } void markVertexTypes() { TIntSet::const_iterator vit, vitEnd = vertices.end(); // first pass: mark all interior and single-border vertices for( vit = vertices.begin(); vit != vitEnd; ++vit ) { int idx = *vit; if( vertexUseCount[idx] == 1 ) { vertexTypes[idx] = VTYPE_SINGLE; borderVertices.insert( idx ); } else if( isVertexInterior(idx) ) vertexTypes[idx] = VTYPE_INTERIOR; } // now, the unmarked vertices are all shared and on border for( vit = vertices.begin(); vit != vitEnd; ++vit ) { int idx = *vit; if( vertexTypes[idx] == VTYPE_NONE ) { vertexTypes[idx] = VTYPE_MULTI; borderVertices.insert( idx ); } } } int getBorderIndex() { if( borderVertices.empty() ) return -1; return *borderVertices.begin(); } void traceBorder( const TWallVertexVector& vb, TIntVector& polygon, TIntVector& ib ) { static int traceID = 0; ++traceID; int idx0 = getBorderIndex(); assert( idx0 >= 0 && idx0 < vertexTypes.size() ); ib.resize( 0 ); polygon.resize( 0 ); polygon.reserve( vertices.size()/2 ); TIntVector localPolygon; localPolygon.reserve( 128 ); TIntVector localIB; localIB.reserve( 128 ); int idxPrev = idx0; int idx = idx0; int debugCounter = 0; int debugLoopCounter = 0; do { localIB.resize( 0 ); bool willFormLoop; do{ localPolygon.push_back( idx ); borderVertices.erase( idx ); vertexTraceID[idx] = traceID; assert( ++debugCounter <= vertices.size()*2 ); // Next vertex is the neighbor of current, that is not interior // and that is not the previous one. // When there are many possible ones, trace based on angle. int idxNext = -1; SVector2 prevToCurr = vb[idx] - vb[idxPrev]; if( prevToCurr.lengthSq() < 1.0e-6f ) prevToCurr.set( -0.01f, -0.01f ); TIntIntsMap::const_iterator it; it = vertexNexts.find( idx ); assert( it != vertexNexts.end() ); const TIntVector& vnext = it->second; int n = vnext.size(); float bestAngle = 100.0f; for( int i = 0; i < n; ++i ) { int idx1 = vnext[i]; if( idx1 != idxPrev && vertexTypes[idx1] != VTYPE_INTERIOR ) { //if( idx1 != idxPrev ) { SVector2 currToNext = vb[idx1] - vb[idx]; float ang = signedAngle2D( prevToCurr, currToNext ); if( ang < bestAngle ) { bestAngle = ang; idxNext = idx1; } } } assert( bestAngle > -4.0f && bestAngle < 4.0f ); assert( idxNext >= 0 ); willFormLoop = (vertexTraceID[idxNext] == traceID); // Optimization: if best angle is zero, then we're walking // in a straight line. Optimize out the current vertex. if( bestAngle == 0.0f && idx != idx0 && !willFormLoop ) { localPolygon.pop_back(); } idxPrev = idx; idx = idxNext; } while( !willFormLoop ); assert( localPolygon.size() >= 3 ); //if( localPolygon.size() < 3 ) { // return; //} assert( ++debugLoopCounter < vertices.size() ); if( idx == idx0 ) { // The polygon is simple or we found the last loop. // Triangulate local and append to results. triangulator::process( vb, localPolygon, localIB ); polygon.insert( polygon.end(), localPolygon.begin(), localPolygon.end() ); ib.insert( ib.end(), localIB.begin(), localIB.end() ); // We can have separated other loops. Try fetching them as well. idx0 = getBorderIndex(); if( idx0 == -1 ) { return; } else { localPolygon.resize( 0 ); idxPrev = idx0; idx = idx0; } } else { // The polygon must be complex, and we just found a closed loop. // Take only the loop, triangulate it, append to results, continue. TIntVector::const_iterator itLoopStart = std::find( localPolygon.begin(), localPolygon.end(), idx ); assert( itLoopStart != localPolygon.end() ); // append to results TIntVector loopPolygon( itLoopStart, localPolygon.end() ); triangulator::process( vb, loopPolygon, localIB ); polygon.insert( polygon.end(), loopPolygon.begin(), loopPolygon.end() ); ib.insert( ib.end(), localIB.begin(), localIB.end() ); // continue - remove the looped polygon from local localPolygon.resize( itLoopStart - localPolygon.begin() ); } } while( true ); } void begin( int totalVerts ) { polygonCount = 0; vertices.clear(); borderVertices.clear(); vertexNexts.clear(); vertexPrevs.clear(); vertexUseCount.resize( 0 ); vertexUseCount.resize( totalVerts, 0 ); vertexTraceID.resize( totalVerts, -1 ); vertexTypes.resize( 0 ); vertexTypes.resize( totalVerts, VTYPE_NONE ); } void addPolygon( const TIntVector& ib ) { ++polygonCount; TIntVector::const_iterator vit, vitEnd = ib.end(); for( vit = ib.begin(); vit != vitEnd; ++vit ) { int idx = *vit; assert( idx >= 0 && idx < vertexUseCount.size() ); vertices.insert( idx ); ++vertexUseCount[idx]; int idxNext = (vit==vitEnd-1) ? ib.front() : *(vit+1); int idxPrev = (vit==ib.begin()) ? ib.back() : *(vit-1); vertexNexts[idx].push_back( idxNext ); vertexPrevs[idx].push_back( idxPrev ); } } void end( const TWallVertexVector& vb, TIntVector& polygon, TIntVector& ib ) { assert( polygonCount ); if( polygonCount == 1 ) { // trivial case, just output input polygon polygon.resize( 0 ); polygon.reserve( vertices.size() ); int idx0 = *vertices.begin(); int idx = idx0; do { polygon.push_back( idx ); const TIntVector& vnext = vertexNexts[idx]; assert( vnext.size() == 1 ); idx = vnext[0]; } while( idx != idx0 ); triangulator::process( vb, polygon, ib ); } else { // mark vertex types markVertexTypes(); // trace and triangulate the polygon(s) traceBorder( vb, polygon, ib ); } } }; // namespace polygon_merger