/// Generates a small-world graph using the Watts-Strogatz model. /// We assume a circle where each node creates links to NodeOutDeg other nodes. /// This way at the end each node is connected to 2*NodeOutDeg other nodes. /// See: Collective dynamics of 'small-world' networks. Watts and Strogatz. /// URL: http://research.yahoo.com/files/w_s_NATURE_0.pdf PUNGraph GenSmallWorld(const int& Nodes, const int& NodeOutDeg, const double& RewireProb, TRnd& Rnd) { THashSet<TIntPr> EdgeSet(Nodes*NodeOutDeg); IAssertR(Nodes > NodeOutDeg, TStr::Fmt("Insufficient nodes for out degree, %d!", NodeOutDeg)); for (int node = 0; node < Nodes; node++) { const int src = node; for (int edge = 1; edge <= NodeOutDeg; edge++) { int dst = (node+edge) % Nodes; // edge to next neighbor if (Rnd.GetUniDev() < RewireProb) { // random edge dst = Rnd.GetUniDevInt(Nodes); while (dst == src || EdgeSet.IsKey(TIntPr(src, dst))) { dst = Rnd.GetUniDevInt(Nodes); } } EdgeSet.AddKey(TIntPr(src, dst)); } } PUNGraph GraphPt = TUNGraph::New(); TUNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, EdgeSet.Len()); int node; for (node = 0; node < Nodes; node++) { IAssert(Graph.AddNode(node) == node); } for (int edge = 0; edge < EdgeSet.Len(); edge++) { Graph.AddEdge(EdgeSet[edge].Val1, EdgeSet[edge].Val2); } Graph.Defrag(); return GraphPt; }
int main() { TLSHash LSH(7, 7, DIM, TLSHash::EUCLIDEAN); LSH.Init(); TRnd Gen; Gen.Randomize(); TVec<TFltV> DataV; for (int i=0; i<1000000; i++) { TFltV Datum; for (int j=0; j<3; j++) { Datum.Add(Gen.GetUniDev()*2100); } DataV.Add(Datum); } LSH.AddV(DataV); TVec<TPair<TFltV, TFltV> > NeighborsV = LSH.GetAllCandidatePairs(); printf("Number of Candidates: %d\n", NeighborsV.Len()); NeighborsV = LSH.GetAllNearPairs(); printf("Number of Close Pairs: %d\n", NeighborsV.Len()); for (int i=0; i<NeighborsV.Len(); i++) { outputPoint(NeighborsV[i].GetVal1()); printf(" "); outputPoint(NeighborsV[i].GetVal2()); printf("\n"); } return 0; }
//Initialize positive embeddings void InitPosEmb(TIntV& Vocab, int& Dimensions, TRnd& Rnd, TVVec<TFlt, int64>& SynPos) { SynPos = TVVec<TFlt, int64>(Vocab.Len(),Dimensions); for (int64 i = 0; i < SynPos.GetXDim(); i++) { for (int j = 0; j < SynPos.GetYDim(); j++) { SynPos(i,j) =(Rnd.GetUniDev()-0.5)/Dimensions; } } }
/// Generates a random scale-free network using the Copying Model. /// The generating process operates as follows: Node u is added to a graph, it /// selects a random node v, and with prob Beta it links to v, with 1-Beta /// links u links to neighbor of v. The power-law degree exponent is -1/(1-Beta). /// See: Stochastic models for the web graph. /// Kumar, Raghavan, Rajagopalan, Sivakumar, Tomkins, Upfal. /// URL: http://snap.stanford.edu/class/cs224w-readings/kumar00stochastic.pdf PNGraph GenCopyModel(const int& Nodes, const double& Beta, TRnd& Rnd) { PNGraph GraphPt = TNGraph::New(); TNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, Nodes); const int startNId = Graph.AddNode(); Graph.AddEdge(startNId, startNId); for (int n = 1; n < Nodes; n++) { const int rnd = Graph.GetRndNId(); const int NId = Graph.AddNode(); if (Rnd.GetUniDev() < Beta) { Graph.AddEdge(NId, rnd); } else { const TNGraph::TNodeI NI = Graph.GetNI(rnd); const int rnd2 = Rnd.GetUniDevInt(NI.GetOutDeg()); Graph.AddEdge(NId, NI.GetOutNId(rnd2)); } } return GraphPt; }
void TTransCorpus::Shuffle(const PTransCorpus& InFirstTransCorpus, const PTransCorpus& InSecondTransCorpus, TRnd& Rnd, const double& SwapProb, PTransCorpus& OutFirstTransCorpus, PTransCorpus& OutSecondTransCorpus) { // prepare new corpuses OutFirstTransCorpus = TTransCorpus::New(); OutSecondTransCorpus = TTransCorpus::New(); // swap sentences TIntV FirstSentIdV, SecondSentIdV; InFirstTransCorpus->GetSentIdV(FirstSentIdV); InSecondTransCorpus->GetSentIdV(SecondSentIdV); for (int SentIdN = 0; SentIdN < FirstSentIdV.Len(); SentIdN++) { // get sentence id const int SentId = FirstSentIdV[SentIdN]; // check if id same in both cases IAssert(SecondSentIdV[SentIdN] == SentId); // read sentences TStr OrgStr1 = InFirstTransCorpus->GetOrgStr(SentId); TStr OrgStr2 = InSecondTransCorpus->GetOrgStr(SentId); IAssert(OrgStr1 == OrgStr2 ); TStr RefTransStr1 = InFirstTransCorpus->GetRefTransStrV(SentId)[0]; TStr RefTransStr2 = InSecondTransCorpus->GetRefTransStrV(SentId)[0]; IAssert(RefTransStr1 == RefTransStr2); TStr FirstTransStr = InFirstTransCorpus->GetTransStr(SentId); TStr SecondTransStr = InSecondTransCorpus->GetTransStr(SentId); // swap sentences if (Rnd.GetUniDev() < SwapProb) { // we swap OutFirstTransCorpus->AddSentence(SentId, OrgStr1, SecondTransStr, RefTransStr1); OutSecondTransCorpus->AddSentence(SentId, OrgStr1, FirstTransStr, RefTransStr1); } else { // no swap OutFirstTransCorpus->AddSentence(SentId, OrgStr1, FirstTransStr, RefTransStr1); OutSecondTransCorpus->AddSentence(SentId, OrgStr1, SecondTransStr, RefTransStr1); } } }
/// R-MAT Generator. The modes is based on the recursive descent into a 2x2 /// matrix [A,B; C, 1-(A+B+C)]. /// See: R-MAT Generator: A Recursive Model for Graph Mining. /// D. Chakrabarti, Y. Zhan and C. Faloutsos, in SIAM Data Mining 2004. /// URL: http://www.cs.cmu.edu/~deepay/mywww/papers/siam04.pdf PNGraph GenRMat(const int& Nodes, const int& Edges, const double& A, const double& B, const double& C, TRnd& Rnd) { PNGraph GraphPt = TNGraph::New(); TNGraph& Graph = *GraphPt; Graph.Reserve(Nodes, Edges); IAssert(A+B+C < 1.0); int rngX, rngY, offX, offY; int Depth=0, Collisions=0, Cnt=0, PctDone=0; const int EdgeGap = Edges / 100 + 1; // sum of parameters (probabilities) TVec<double> sumA(128, 0), sumAB(128, 0), sumAC(128, 0), sumABC(128, 0); // up to 2^128 vertices ~ 3.4e38 for (int i = 0; i < 128; i++) { const double a = A * (Rnd.GetUniDev() + 0.5); const double b = B * (Rnd.GetUniDev() + 0.5); const double c = C * (Rnd.GetUniDev() + 0.5); const double d = (1.0 - (A+B+C)) * (Rnd.GetUniDev() + 0.5); const double abcd = a+b+c+d; sumA.Add(a / abcd); sumAB.Add((a+b) / abcd); sumAC.Add((a+c) / abcd); sumABC.Add((a+b+c) / abcd); } // nodes for (int node = 0; node < Nodes; node++) { IAssert(Graph.AddNode(-1) == node); } // edges for (int edge = 0; edge < Edges; ) { rngX = Nodes; rngY = Nodes; offX = 0; offY = 0; Depth = 0; // recurse the matrix while (rngX > 1 || rngY > 1) { const double RndProb = Rnd.GetUniDev(); if (rngX>1 && rngY>1) { if (RndProb < sumA[Depth]) { rngX/=2; rngY/=2; } else if (RndProb < sumAB[Depth]) { offX+=rngX/2; rngX-=rngX/2; rngY/=2; } else if (RndProb < sumABC[Depth]) { offY+=rngY/2; rngX/=2; rngY-=rngY/2; } else { offX+=rngX/2; offY+=rngY/2; rngX-=rngX/2; rngY-=rngY/2; } } else if (rngX>1) { // row vector if (RndProb < sumAC[Depth]) { rngX/=2; rngY/=2; } else { offX+=rngX/2; rngX-=rngX/2; rngY/=2; } } else if (rngY>1) { // column vector if (RndProb < sumAB[Depth]) { rngX/=2; rngY/=2; } else { offY+=rngY/2; rngX/=2; rngY-=rngY/2; } } else { Fail; } Depth++; } // add edge const int NId1 = offX; const int NId2 = offY; if (NId1 != NId2 && ! Graph.IsEdge(NId1, NId2)) { Graph.AddEdge(NId1, NId2); if (++Cnt > EdgeGap) { Cnt=0; printf("\r %d%% edges", ++PctDone); } edge++; } else { Collisions++; } } printf("\r RMat: nodes:%d, edges:%d, Iterations:%d, Collisions:%d (%.1f%%).\n", Nodes, Edges, Edges+Collisions, Collisions, 100*Collisions/double(Edges+Collisions)); Graph.Defrag(); return GraphPt; }
int64 RndUnigramInt(TIntV& KTable, TFltV& UTable, TRnd& Rnd) { TInt X = KTable[static_cast<int64>(Rnd.GetUniDev()*KTable.Len())]; double Y = Rnd.GetUniDev(); return Y < UTable[X] ? X : KTable[X]; }