int main() { TLSHash LSH(7, 7, DIM, TLSHash::EUCLIDEAN); LSH.Init(); TRnd Gen; Gen.Randomize(); TVec<TFltV> DataV; for (int i=0; i<1000000; i++) { TFltV Datum; for (int j=0; j<3; j++) { Datum.Add(Gen.GetUniDev()*2100); } DataV.Add(Datum); } LSH.AddV(DataV); TVec<TPair<TFltV, TFltV> > NeighborsV = LSH.GetAllCandidatePairs(); printf("Number of Candidates: %d\n", NeighborsV.Len()); NeighborsV = LSH.GetAllNearPairs(); printf("Number of Close Pairs: %d\n", NeighborsV.Len()); for (int i=0; i<NeighborsV.Len(); i++) { outputPoint(NeighborsV[i].GetVal1()); printf(" "); outputPoint(NeighborsV[i].GetVal2()); printf("\n"); } return 0; }
/// Generates a small-world graph using the Watts-Strogatz model. /// We assume a circle where each node creates links to NodeOutDeg other nodes. /// This way at the end each node is connected to 2*NodeOutDeg other nodes. /// See: Collective dynamics of 'small-world' networks. Watts and Strogatz. /// URL: http://research.yahoo.com/files/w_s_NATURE_0.pdf PUNGraph GenSmallWorld(const int& Nodes, const int& NodeOutDeg, const double& RewireProb, TRnd& Rnd) { THashSet<TIntPr> EdgeSet(Nodes*NodeOutDeg); IAssertR(Nodes > NodeOutDeg, TStr::Fmt("Insufficient nodes for out degree, %d!", NodeOutDeg)); for (int node = 0; node < Nodes; node++) { const int src = node; for (int edge = 1; edge <= NodeOutDeg; edge++) { int dst = (node+edge) % Nodes; // edge to next neighbor if (Rnd.GetUniDev() < RewireProb) { // random edge dst = Rnd.GetUniDevInt(Nodes); while (dst == src || EdgeSet.IsKey(TIntPr(src, dst))) { dst = Rnd.GetUniDevInt(Nodes); } } EdgeSet.AddKey(TIntPr(src, dst)); } } PUNGraph GraphPt = TUNGraph::New(); TUNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, EdgeSet.Len()); int node; for (node = 0; node < Nodes; node++) { IAssert(Graph.AddNode(node) == node); } for (int edge = 0; edge < EdgeSet.Len(); edge++) { Graph.AddEdge(EdgeSet[edge].Val1, EdgeSet[edge].Val2); } Graph.Defrag(); return GraphPt; }
PBPGraph GenRndBipart(const int& LeftNodes, const int& RightNodes, const int& Edges, TRnd& Rnd) { PBPGraph G = TBPGraph::New(); for (int i = 0; i < LeftNodes; i++) { G->AddNode(i, true); } for (int i = 0; i < RightNodes; i++) { G->AddNode(LeftNodes+i, false); } IAssertR(Edges <= LeftNodes*RightNodes, "Too many edges in the bipartite graph!"); for (int edges = 0; edges < Edges; ) { const int LNId = Rnd.GetUniDevInt(LeftNodes); const int RNId = LeftNodes + Rnd.GetUniDevInt(RightNodes); if (G->AddEdge(LNId, RNId) != -2) { edges++; } // is new edge } return G; }
void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){ int CNodes = CmtyV.Len(); int CEdges = Rnd.GetBinomialDev(Prob,CNodes*(CNodes-1)/2); THashSet<TIntPr> NewEdgeSet(CEdges); for (int edge = 0; edge < CEdges; ) { int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)]; int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)]; if(SrcNId>DstNId){Swap(SrcNId,DstNId);} if (SrcNId != DstNId && !NewEdgeSet.IsKey(TIntPr(SrcNId,DstNId))) { // is new edge NewEdgeSet.AddKey(TIntPr(SrcNId,DstNId)); Graph->AddEdge(SrcNId,DstNId); edge++; } } }
int TBPGraph::GetRndNId(TRnd& Rnd) { const int NNodes = GetNodes(); if (Rnd.GetUniDevInt(NNodes) < GetLNodes()) { return GetRndLNId(Rnd); } else { return GetRndRNId(Rnd); } }
THash<TInt, TInt> * choose_seeds (const PUNGraph g, const int num, const int * infection_state, const int infect) { THash<TInt, TInt> choices; THash<TInt, TUNGraph::TNode> nodes; THash<TInt, TInt> * output = new THash<TInt, TInt> (); TInt weight = 0; TInt num_total = 0; for (TUNGraph::TNodeI n = g->BegNI(); n != g->EndNI(); n++) { //cout << "nodeID: " << n.GetId() << ",\tStatus: " << infection_state[n.GetId () - 1] << endl; if (infection_state[n.GetId () - 1] != infect) { weight += n.GetDeg (); choices.AddDat (num_total, weight); nodes.AddDat (num_total, n.GetId()); num_total++; } } // TRnd random ((int) time(NULL)); // TRnd random (0); TInt num_chosen = 0; while (num_chosen < num) { TInt choice = my_random.GetUniDevInt (weight); TUNGraph::TNode node_choice = nodes[find (choice, choices, 0, num_total-1)]; if (!output->IsKey(node_choice.GetId())) { num_chosen++; // cout << node_choice.GetId () << "\n"; output->AddDat(node_choice.GetId (), 1); } } return output; }
bool test(PGraph &graph, bool followOut, bool followIn) { printf("\n================================\nFollowOut: %d, FollowIn: %d\n", followOut, followIn); int iters = 10; for (int k = 0; k < iters; k++) { TRnd rnd = TRnd((int)time(0)); int start = graph->GetRndNId(rnd); rnd.PutSeed(0); // int target = graph->GetRndNId(rnd); // printf("Start node: %d, target node: %d\n", start, target); int target = -1; printf("Start node: %d\n", start); struct timeval tv1, tv2; gettimeofday(&tv1, NULL); /* Hybrid */ TBreathFS<PGraph> bfs_hybrid(graph, true); int maxDist_hybrid = bfs_hybrid.DoBfsHybrid(start, followOut, followIn, target); gettimeofday(&tv2, NULL); double time_hybrid = timeInSeconds(tv1, tv2); /* Original */ gettimeofday(&tv1, NULL); TBreathFS<PGraph> bfs(graph, true); int maxDist = bfs.DoBfs(start, followOut, followIn, target); gettimeofday(&tv2, NULL); double time = timeInSeconds(tv1, tv2); /* Check results */ if (maxDist_hybrid != maxDist) { printf("MaxDist incorrect.\n"); return false; } if (target == -1) { if (!checkResults<PGraph>(bfs_hybrid, bfs)) { printf("NIdDistH values incorrect!\n"); return false; } } printf("Execution times: Original: %.2f, Hybrid: %.2f\n", time, time_hybrid); } return true; }
//Initialize positive embeddings void InitPosEmb(TIntV& Vocab, int& Dimensions, TRnd& Rnd, TVVec<TFlt, int64>& SynPos) { SynPos = TVVec<TFlt, int64>(Vocab.Len(),Dimensions); for (int64 i = 0; i < SynPos.GetXDim(); i++) { for (int j = 0; j < SynPos.GetYDim(); j++) { SynPos(i,j) =(Rnd.GetUniDev()-0.5)/Dimensions; } } }
void TLSHash::Init() { TRnd Gen; Gen.Randomize(); for (int i=0; i<Bands*Rows; i++) { if (Type == JACCARD) { HashFuncV.Add(TPt<HashFunc>(new JaccardHash(Gen, Dim))); } else if (Type == COSINE) { HashFuncV.Add(TPt<HashFunc>(new CosineHash(Gen, Dim))); } else { HashFuncV.Add(TPt<HashFunc>(new EuclideanHash(Gen, Dim))); } } for (int i=0; i<Bands; i++) { SigBucketVHV.Add(THash<TInt, TIntV> (ExpectedSz, true)); } }
///Generate sequence from Power law void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) { SzSeq.Gen(SeqLen, 0); while (SzSeq.Len() < SeqLen) { int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha)); if (Sz >= Min && Sz <= Max) { SzSeq.Add(Sz); } } }
/// Generates a random scale-free network using the Copying Model. /// The generating process operates as follows: Node u is added to a graph, it /// selects a random node v, and with prob Beta it links to v, with 1-Beta /// links u links to neighbor of v. The power-law degree exponent is -1/(1-Beta). /// See: Stochastic models for the web graph. /// Kumar, Raghavan, Rajagopalan, Sivakumar, Tomkins, Upfal. /// URL: http://snap.stanford.edu/class/cs224w-readings/kumar00stochastic.pdf PNGraph GenCopyModel(const int& Nodes, const double& Beta, TRnd& Rnd) { PNGraph GraphPt = TNGraph::New(); TNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, Nodes); const int startNId = Graph.AddNode(); Graph.AddEdge(startNId, startNId); for (int n = 1; n < Nodes; n++) { const int rnd = Graph.GetRndNId(); const int NId = Graph.AddNode(); if (Rnd.GetUniDev() < Beta) { Graph.AddEdge(NId, rnd); } else { const TNGraph::TNodeI NI = Graph.GetNI(rnd); const int rnd2 = Rnd.GetUniDevInt(NI.GetOutDeg()); Graph.AddEdge(NId, NI.GetOutNId(rnd2)); } } return GraphPt; }
/// Sample random point from the surface of a Dim-dimensional unit sphere. void GetSphereDev(const int& Dim, TRnd& Rnd, TFltV& ValV) { if (ValV.Len() != Dim) { ValV.Gen(Dim); } double Length = 0.0; for (int i = 0; i < Dim; i++) { ValV[i] = Rnd.GetNrmDev(); Length += TMath::Sqr(ValV[i]); } Length = 1.0 / sqrt(Length); for (int i = 0; i < Dim; i++) { ValV[i] *= Length; } }
/// Generates a random undirect graph with a given degree sequence DegSeqV. /// Configuration model operates as follows. For each node N, of degree /// DeqSeqV[N] we create DeqSeqV[N] spokes (half-edges). We then pick two /// spokes at random, and connect the spokes endpoints. We continue this /// process until no spokes are left. Generally this generates a multigraph /// (i.e., spokes out of same nodes can be chosen multiple times).We ignore /// (discard) self-loops and multiple edges. Thus, the generated graph will /// only approximate follow the given degree sequence. The method is very fast! PUNGraph GenConfModel(const TIntV& DegSeqV, TRnd& Rnd) { const int Nodes = DegSeqV.Len(); PUNGraph GraphPt = TUNGraph::New(); TUNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, -1); TIntV NIdDegV(DegSeqV.Len(), 0); int DegSum=0, edges=0; for (int node = 0; node < Nodes; node++) { Graph.AddNode(node); for (int d = 0; d < DegSeqV[node]; d++) { NIdDegV.Add(node); } DegSum += DegSeqV[node]; } NIdDegV.Shuffle(Rnd); TIntPrSet EdgeH(DegSum/2); // set of all edges, is faster than graph edge lookup if (DegSum % 2 != 0) { printf("Seg seq is odd [%d]: ", DegSeqV.Len()); for (int d = 0; d < TMath::Mn(100, DegSeqV.Len()); d++) { printf(" %d", (int)DegSeqV[d]); } printf("\n"); } int u=0, v=0; for (int c = 0; NIdDegV.Len() > 1; c++) { u = Rnd.GetUniDevInt(NIdDegV.Len()); while ((v = Rnd.GetUniDevInt(NIdDegV.Len())) == u) { } if (u > v) { Swap(u, v); } const int E1 = NIdDegV[u]; const int E2 = NIdDegV[v]; if (v == NIdDegV.Len()-1) { NIdDegV.DelLast(); } else { NIdDegV[v] = NIdDegV.Last(); NIdDegV.DelLast(); } if (u == NIdDegV.Len()-1) { NIdDegV.DelLast(); } else { NIdDegV[u] = NIdDegV.Last(); NIdDegV.DelLast(); } if (E1 == E2 || EdgeH.IsKey(TIntPr(E1, E2))) { continue; } EdgeH.AddKey(TIntPr(E1, E2)); Graph.AddEdge(E1, E2); edges++; if (c % (DegSum/100+1) == 0) { printf("\r configuration model: iter %d: edges: %d, left: %d", c, edges, NIdDegV.Len()/2); } } printf("\n"); return GraphPt; }
PGraph GenRndGnm(const int& Nodes, const int& Edges, const bool& IsDir, TRnd& Rnd) { PGraph GraphPt = PGraph::New(); typename PGraph::TObj& Graph = *GraphPt; Graph.Reserve(Nodes, Edges); for (int node = 0; node < Nodes; node++) { IAssert(Graph.AddNode(node) == node); } for (int edge = 0; edge < Edges; ) { const int SrcNId = Rnd.GetUniDevInt(Nodes); const int DstNId = Rnd.GetUniDevInt(Nodes); if (SrcNId != DstNId && Graph.AddEdge(SrcNId, DstNId) != -2) { if (! IsDir) { Graph.AddEdge(DstNId, SrcNId); } edge++; } } return GraphPt; }
/// Generates a random scale-free graph using the Geometric Preferential /// Attachment model by Flexman, Frieze and Vera. /// See: A geometric preferential attachment model of networks by Flexman, /// Frieze and Vera. WAW 2004. /// URL: http://math.cmu.edu/~af1p/Texfiles/GeoWeb.pdf PUNGraph GenGeoPrefAttach(const int& Nodes, const int& OutDeg, const double& Beta, TRnd& Rnd) { PUNGraph G = TUNGraph::New(Nodes, Nodes*OutDeg); TFltTrV PointV(Nodes, 0); TFltV ValV; // points on a sphere of radius 1/(2*pi) const double Rad = 0.5 * TMath::Pi; for (int i = 0; i < Nodes; i++) { TSnapDetail::GetSphereDev(3, Rnd, ValV); PointV.Add(TFltTr(Rad*ValV[0], Rad*ValV[1], Rad*ValV[2])); } const double R2 = TMath::Sqr(log((double) Nodes) / (pow((double) Nodes, 0.5-Beta))); TIntV DegV, NIdV; int SumDeg; for (int t = 0; t < Nodes; t++) { const int pid = t; const TFltTr& P1 = PointV[pid]; // add node if (! G->IsNode(pid)) { G->AddNode(pid); } // find neighborhood DegV.Clr(false); NIdV.Clr(false); SumDeg=0; for (int p = 0; p < t; p++) { const TFltTr& P2 = PointV[p]; if (TMath::Sqr(P1.Val1-P2.Val1)+TMath::Sqr(P1.Val2-P2.Val2)+TMath::Sqr(P1.Val3-P2.Val3) < R2) { NIdV.Add(p); DegV.Add(G->GetNI(p).GetDeg()+1); SumDeg += DegV.Last(); } } // add edges for (int m = 0; m < OutDeg; m++) { const int rnd = Rnd.GetUniDevInt(SumDeg); int sum = 0, dst = -1; for (int s = 0; s < DegV.Len(); s++) { sum += DegV[s]; if (rnd < sum) { dst=s; break; } } if (dst != -1) { G->AddEdge(pid, NIdV[dst]); SumDeg -= DegV[dst]; NIdV.Del(dst); DegV.Del(dst); } } } return G; }
THash <TInt, TInt> * choose (const TInt & population_size, const TInt & sample_size) { THash <TInt, TInt> * hits = new THash <TInt, TInt> (); //TRnd random ((int)time(NULL)); //TRnd random (0); TInt min = TMath::Mn<TInt> (population_size, sample_size); for (int i = 0; i < min; i++) { TInt chosen = my_random.GetUniDevInt (population_size - i); if (hits->IsKey (chosen)) { hits->AddDat((*hits)(chosen), population_size - i - 1); } hits->AddDat(chosen, population_size - i - 1); } return hits; }
/// rewire bipartite community affiliation graphs void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) { THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len()); TIntV NDegV; TIntV CDegV; for (int i = 0; i < CmtyVH.Len(); i++) { int CID = CmtyVH.GetKey(i); for (int j = 0; j < CmtyVH[i].Len(); j++) { int NID = CmtyVH[i][j]; NDegV.Add(NID); CDegV.Add(CID); } } TIntPrSet CNIDSet(CDegV.Len()); int c=0; while (c++ < 15 && CDegV.Len() > 1) { for (int i = 0; i < CDegV.Len(); i++) { int u = Rnd.GetUniDevInt(CDegV.Len()); int v = Rnd.GetUniDevInt(NDegV.Len()); if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); if (u == CDegV.Len() - 1) { CDegV.DelLast(); } else { CDegV[u] = CDegV.Last(); CDegV.DelLast(); } if ( v == NDegV.Len() - 1) { NDegV.DelLast(); } else { NDegV[v] = NDegV.Last(); NDegV.DelLast(); } } } for (int i = 0; i < CNIDSet.Len(); i++) { TIntPr CNIDPr = CNIDSet[i]; IAssert(CmtyVH.IsKey(CNIDPr.Val1)); NewCmtyVH.AddDat(CNIDPr.Val1); NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); } CmtyVH = NewCmtyVH; }
/// Generates a random scale-free graph with power-law degree distribution with /// exponent PowerExp. The method uses either the Configuration model (fast but /// the result is approximate) or the Edge Rewiring method (slow but exact). PUNGraph GenRndPowerLaw(const int& Nodes, const double& PowerExp, const bool& ConfModel, TRnd& Rnd) { TIntV DegSeqV; uint DegSum=0; for (int n = 0; n < Nodes; n++) { const int Val = (int) TMath::Round(Rnd.GetPowerDev(PowerExp)); if (! (Val >= 1 && Val < Nodes/2)) { n--; continue; } // skip nodes with too large degree DegSeqV.Add(Val); DegSum += Val; } printf("%d nodes, %u edges\n", Nodes, DegSum); if (DegSum % 2 == 1) { DegSeqV[0] += 1; } if (ConfModel) { // use configuration model -- fast but does not exactly obey the degree sequence return GenConfModel(DegSeqV, Rnd); } else { PUNGraph G = TSnap::GenDegSeq(DegSeqV, Rnd); return TSnap::GenRewire(G, 10, Rnd); } }
void TTransCorpus::Shuffle(const PTransCorpus& InFirstTransCorpus, const PTransCorpus& InSecondTransCorpus, TRnd& Rnd, const double& SwapProb, PTransCorpus& OutFirstTransCorpus, PTransCorpus& OutSecondTransCorpus) { // prepare new corpuses OutFirstTransCorpus = TTransCorpus::New(); OutSecondTransCorpus = TTransCorpus::New(); // swap sentences TIntV FirstSentIdV, SecondSentIdV; InFirstTransCorpus->GetSentIdV(FirstSentIdV); InSecondTransCorpus->GetSentIdV(SecondSentIdV); for (int SentIdN = 0; SentIdN < FirstSentIdV.Len(); SentIdN++) { // get sentence id const int SentId = FirstSentIdV[SentIdN]; // check if id same in both cases IAssert(SecondSentIdV[SentIdN] == SentId); // read sentences TStr OrgStr1 = InFirstTransCorpus->GetOrgStr(SentId); TStr OrgStr2 = InSecondTransCorpus->GetOrgStr(SentId); IAssert(OrgStr1 == OrgStr2 ); TStr RefTransStr1 = InFirstTransCorpus->GetRefTransStrV(SentId)[0]; TStr RefTransStr2 = InSecondTransCorpus->GetRefTransStrV(SentId)[0]; IAssert(RefTransStr1 == RefTransStr2); TStr FirstTransStr = InFirstTransCorpus->GetTransStr(SentId); TStr SecondTransStr = InSecondTransCorpus->GetTransStr(SentId); // swap sentences if (Rnd.GetUniDev() < SwapProb) { // we swap OutFirstTransCorpus->AddSentence(SentId, OrgStr1, SecondTransStr, RefTransStr1); OutSecondTransCorpus->AddSentence(SentId, OrgStr1, FirstTransStr, RefTransStr1); } else { // no swap OutFirstTransCorpus->AddSentence(SentId, OrgStr1, FirstTransStr, RefTransStr1); OutSecondTransCorpus->AddSentence(SentId, OrgStr1, SecondTransStr, RefTransStr1); } } }
///Generate bipartite community affiliation from given power law coefficients for membership distribution and community size distribution. void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) { const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len(); TIntV NDegV,CDegV; TIntPrSet CNIDSet; TIntSet HitNodes(Nodes); THash<TInt,TIntV> CmtyVH; for (int i = 0; i < CIDSzPrV.Len(); i++) { for (int j = 0; j < CIDSzPrV[i].Val2; j++) { CDegV.Add(CIDSzPrV[i].Val1); } } for (int i = 0; i < NIDMemPrV.Len(); i++) { for (int j = 0; j < NIDMemPrV[i].Val2; j++) { NDegV.Add(NIDMemPrV[i].Val1); } } while (CDegV.Len() < (int) (1.2 * Nodes)) { CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1); } while (NDegV.Len() < CDegV.Len()) { NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1); } printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len()); int c=0; while (c++ < 15 && CDegV.Len() > 1) { for (int i = 0; i < CDegV.Len(); i++) { int u = Rnd.GetUniDevInt(CDegV.Len()); int v = Rnd.GetUniDevInt(NDegV.Len()); if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); HitNodes.AddKey(NDegV[v]); if (u == CDegV.Len() - 1) { CDegV.DelLast(); } else { CDegV[u] = CDegV.Last(); CDegV.DelLast(); } if (v == NDegV.Len() - 1) { NDegV.DelLast(); } else { NDegV[v] = NDegV.Last(); NDegV.DelLast(); } } } //make sure that every node belongs to at least one community for (int i = 0; i < Nodes; i++) { int NID = NIDMemPrV[i].Val1; if (! HitNodes.IsKey(NID)) { CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID)); HitNodes.AddKey(NID); } } IAssert(HitNodes.Len() == Nodes); for (int i = 0; i < CNIDSet.Len(); i++) { TIntPr CNIDPr = CNIDSet[i]; CmtyVH.AddDat(CNIDPr.Val1); CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); } CmtyVH.GetDatV(CmtyVV); }
int64 RndUnigramInt(TIntV& KTable, TFltV& UTable, TRnd& Rnd) { TInt X = KTable[static_cast<int64>(Rnd.GetUniDev()*KTable.Len())]; double Y = Rnd.GetUniDev(); return Y < UTable[X] ? X : KTable[X]; }
void TrainModel(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize, int& Iter, bool& Verbose, TIntV& KTable, TFltV& UTable, int64& WordCntAll, TFltV& ExpTable, double& Alpha, int64 CurrWalk, TRnd& Rnd, TVVec<TFlt, int64>& SynNeg, TVVec<TFlt, int64>& SynPos) { TFltV Neu1V(Dimensions); TFltV Neu1eV(Dimensions); int64 AllWords = WalksVV.GetXDim()*WalksVV.GetYDim(); TIntV WalkV(WalksVV.GetYDim()); for (int j = 0; j < WalksVV.GetYDim(); j++) { WalkV[j] = WalksVV(CurrWalk,j); } for (int64 WordI=0; WordI<WalkV.Len(); WordI++) { if ( WordCntAll%10000 == 0 ) { if ( Verbose ) { printf("\rLearning Progress: %.2lf%% ",(double)WordCntAll*100/(double)(Iter*AllWords)); fflush(stdout); } Alpha = StartAlpha * (1 - WordCntAll / static_cast<double>(Iter * AllWords + 1)); if ( Alpha < StartAlpha * 0.0001 ) { Alpha = StartAlpha * 0.0001; } } int64 Word = WalkV[WordI]; for (int i = 0; i < Dimensions; i++) { Neu1V[i] = 0; Neu1eV[i] = 0; } int Offset = Rnd.GetUniDevInt() % WinSize; for (int a = Offset; a < WinSize * 2 + 1 - Offset; a++) { if (a == WinSize) { continue; } int64 CurrWordI = WordI - WinSize + a; if (CurrWordI < 0){ continue; } if (CurrWordI >= WalkV.Len()){ continue; } int64 CurrWord = WalkV[CurrWordI]; for (int i = 0; i < Dimensions; i++) { Neu1eV[i] = 0; } //negative sampling for (int j = 0; j < NegSamN+1; j++) { int64 Target, Label; if (j == 0) { Target = Word; Label = 1; } else { Target = RndUnigramInt(KTable, UTable, Rnd); if (Target == Word) { continue; } Label = 0; } double Product = 0; for (int i = 0; i < Dimensions; i++) { Product += SynPos(CurrWord,i) * SynNeg(Target,i); } double Grad; //Gradient multiplied by learning rate if (Product > MaxExp) { Grad = (Label - 1) * Alpha; } else if (Product < -MaxExp) { Grad = Label * Alpha; } else { double Exp = ExpTable[static_cast<int>(Product*ExpTablePrecision)+TableSize/2]; Grad = (Label - 1 + 1 / (1 + Exp)) * Alpha; } for (int i = 0; i < Dimensions; i++) { Neu1eV[i] += Grad * SynNeg(Target,i); SynNeg(Target,i) += Grad * SynPos(CurrWord,i); } } for (int i = 0; i < Dimensions; i++) { SynPos(CurrWord,i) += Neu1eV[i]; } } WordCntAll++; } }
/// R-MAT Generator. The modes is based on the recursive descent into a 2x2 /// matrix [A,B; C, 1-(A+B+C)]. /// See: R-MAT Generator: A Recursive Model for Graph Mining. /// D. Chakrabarti, Y. Zhan and C. Faloutsos, in SIAM Data Mining 2004. /// URL: http://www.cs.cmu.edu/~deepay/mywww/papers/siam04.pdf PNGraph GenRMat(const int& Nodes, const int& Edges, const double& A, const double& B, const double& C, TRnd& Rnd) { PNGraph GraphPt = TNGraph::New(); TNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, Edges); IAssert(A+B+C < 1.0); int rngX, rngY, offX, offY; int Depth=0, Collisions=0, Cnt=0, PctDone=0; const int EdgeGap = Edges / 100 + 1; // sum of parameters (probabilities) TVec<double> sumA(128, 0), sumAB(128, 0), sumAC(128, 0), sumABC(128, 0); // up to 2^128 vertices ~ 3.4e38 for (int i = 0; i < 128; i++) { const double a = A * (Rnd.GetUniDev() + 0.5); const double b = B * (Rnd.GetUniDev() + 0.5); const double c = C * (Rnd.GetUniDev() + 0.5); const double d = (1.0 - (A+B+C)) * (Rnd.GetUniDev() + 0.5); const double abcd = a+b+c+d; sumA.Add(a / abcd); sumAB.Add((a+b) / abcd); sumAC.Add((a+c) / abcd); sumABC.Add((a+b+c) / abcd); } // nodes for (int node = 0; node < Nodes; node++) { IAssert(Graph.AddNode(-1) == node); } // edges for (int edge = 0; edge < Edges; ) { rngX = Nodes; rngY = Nodes; offX = 0; offY = 0; Depth = 0; // recurse the matrix while (rngX > 1 || rngY > 1) { const double RndProb = Rnd.GetUniDev(); if (rngX>1 && rngY>1) { if (RndProb < sumA[Depth]) { rngX/=2; rngY/=2; } else if (RndProb < sumAB[Depth]) { offX+=rngX/2; rngX-=rngX/2; rngY/=2; } else if (RndProb < sumABC[Depth]) { offY+=rngY/2; rngX/=2; rngY-=rngY/2; } else { offX+=rngX/2; offY+=rngY/2; rngX-=rngX/2; rngY-=rngY/2; } } else if (rngX>1) { // row vector if (RndProb < sumAC[Depth]) { rngX/=2; rngY/=2; } else { offX+=rngX/2; rngX-=rngX/2; rngY/=2; } } else if (rngY>1) { // column vector if (RndProb < sumAB[Depth]) { rngX/=2; rngY/=2; } else { offY+=rngY/2; rngX/=2; rngY-=rngY/2; } } else { Fail; } Depth++; } // add edge const int NId1 = offX; const int NId2 = offY; if (NId1 != NId2 && ! Graph.IsEdge(NId1, NId2)) { Graph.AddEdge(NId1, NId2); if (++Cnt > EdgeGap) { Cnt=0; printf("\r %d%% edges", ++PctDone); } edge++; } else { Collisions++; } } printf("\r RMat: nodes:%d, edges:%d, Iterations:%d, Collisions:%d (%.1f%%).\n", Nodes, Edges, Edges+Collisions, Collisions, 100*Collisions/double(Edges+Collisions)); Graph.Defrag(); return GraphPt; }
TLSHash::EuclideanHash::EuclideanHash(TRnd &Gen, int Dim) { for (int j=0; j<Dim; j++) { Line.Add(Gen.GetNrmDev()); } Line.Add(Gen.GetUniDevInt(Gap)); }