예제 #1
0
int main() {
  TLSHash LSH(7, 7, DIM, TLSHash::EUCLIDEAN);
  LSH.Init();

  TRnd Gen;
  Gen.Randomize();

  TVec<TFltV> DataV;
  for (int i=0; i<1000000; i++) {
    TFltV Datum;
    for (int j=0; j<3; j++) {
      Datum.Add(Gen.GetUniDev()*2100);
    }
    DataV.Add(Datum);
  }
  LSH.AddV(DataV);
  
  TVec<TPair<TFltV, TFltV> > NeighborsV = LSH.GetAllCandidatePairs();
  printf("Number of Candidates: %d\n", NeighborsV.Len());

  NeighborsV = LSH.GetAllNearPairs();
  printf("Number of Close Pairs: %d\n", NeighborsV.Len());
  for (int i=0; i<NeighborsV.Len(); i++) {
    outputPoint(NeighborsV[i].GetVal1());
    printf(" ");
    outputPoint(NeighborsV[i].GetVal2());
    printf("\n");
  }
  return 0;
}
예제 #2
0
/// Generates a small-world graph using the Watts-Strogatz model.
/// We assume a circle where each node creates links to NodeOutDeg other nodes. 
/// This way at the end each node is connected to 2*NodeOutDeg other nodes.
/// See: Collective dynamics of 'small-world' networks. Watts and Strogatz.
/// URL: http://research.yahoo.com/files/w_s_NATURE_0.pdf
PUNGraph GenSmallWorld(const int& Nodes, const int& NodeOutDeg, const double& RewireProb, TRnd& Rnd) {
  THashSet<TIntPr> EdgeSet(Nodes*NodeOutDeg);
  
  IAssertR(Nodes > NodeOutDeg, TStr::Fmt("Insufficient nodes for out degree, %d!", NodeOutDeg));
  for (int node = 0; node < Nodes; node++) {
    const int src = node;
    for (int edge = 1; edge <= NodeOutDeg; edge++) {
      int dst = (node+edge) % Nodes;      // edge to next neighbor
      if (Rnd.GetUniDev() < RewireProb) { // random edge
        dst = Rnd.GetUniDevInt(Nodes);
        while (dst == src || EdgeSet.IsKey(TIntPr(src, dst))) {
          dst = Rnd.GetUniDevInt(Nodes); }
      }
      EdgeSet.AddKey(TIntPr(src, dst));
    }
  }
  PUNGraph GraphPt = TUNGraph::New();
  TUNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, EdgeSet.Len());
  int node;
  for (node = 0; node < Nodes; node++) {
    IAssert(Graph.AddNode(node) == node);
  }
  for (int edge = 0; edge < EdgeSet.Len(); edge++) {
    Graph.AddEdge(EdgeSet[edge].Val1, EdgeSet[edge].Val2);
  }
  Graph.Defrag();
  return GraphPt;
}
예제 #3
0
PBPGraph GenRndBipart(const int& LeftNodes, const int& RightNodes, const int& Edges, TRnd& Rnd) {
  PBPGraph G = TBPGraph::New();
  for (int i = 0; i < LeftNodes; i++) { G->AddNode(i, true); }
  for (int i = 0; i < RightNodes; i++) { G->AddNode(LeftNodes+i, false); }
  IAssertR(Edges <= LeftNodes*RightNodes, "Too many edges in the bipartite graph!");
  for (int edges = 0; edges < Edges; ) {
    const int LNId = Rnd.GetUniDevInt(LeftNodes);
    const int RNId = LeftNodes + Rnd.GetUniDevInt(RightNodes);
    if (G->AddEdge(LNId, RNId) != -2) { edges++; } // is new edge
  }
  return G;
}
예제 #4
0
void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){
	int CNodes = CmtyV.Len();
	int CEdges = Rnd.GetBinomialDev(Prob,CNodes*(CNodes-1)/2);
	THashSet<TIntPr> NewEdgeSet(CEdges);
	for (int edge = 0; edge < CEdges; ) {
		int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
		int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)];
		if(SrcNId>DstNId){Swap(SrcNId,DstNId);}
		if (SrcNId != DstNId && !NewEdgeSet.IsKey(TIntPr(SrcNId,DstNId))) { // is new edge
			NewEdgeSet.AddKey(TIntPr(SrcNId,DstNId));
			Graph->AddEdge(SrcNId,DstNId);
			edge++; 
		} 
	}
}
예제 #5
0
파일: graph.cpp 프로젝트: arbenson/snap
int TBPGraph::GetRndNId(TRnd& Rnd) { 
  const int NNodes = GetNodes();
  if (Rnd.GetUniDevInt(NNodes) < GetLNodes()) {
    return GetRndLNId(Rnd); }
  else {
    return GetRndRNId(Rnd); }
}
예제 #6
0
THash<TInt, TInt> * choose_seeds (const PUNGraph g, const int num, const int * infection_state, const int infect) {

  THash<TInt, TInt> choices; 
  THash<TInt, TUNGraph::TNode> nodes;
  THash<TInt, TInt> * output = new THash<TInt, TInt> ();
  TInt weight = 0;
  TInt num_total = 0;
  for (TUNGraph::TNodeI n = g->BegNI(); n != g->EndNI(); n++) {
    //cout << "nodeID: " << n.GetId() << ",\tStatus: " << infection_state[n.GetId () - 1] << endl;
    if (infection_state[n.GetId () - 1] != infect) {
      weight += n.GetDeg ();
      choices.AddDat (num_total, weight);
      nodes.AddDat (num_total, n.GetId());
      num_total++;
    }
  }
  //  TRnd random ((int) time(NULL));
  // TRnd random (0);
  TInt num_chosen = 0;
  while (num_chosen < num) {
    TInt choice = my_random.GetUniDevInt (weight);
    TUNGraph::TNode node_choice = nodes[find (choice, choices, 0,  num_total-1)];
    if (!output->IsKey(node_choice.GetId())) {
      num_chosen++;
      // cout << node_choice.GetId () << "\n";
      output->AddDat(node_choice.GetId (), 1);
    }
  }
  return output;
}
예제 #7
0
파일: test.cpp 프로젝트: IsmaelAli/snap
bool test(PGraph &graph, bool followOut, bool followIn) {
  printf("\n================================\nFollowOut: %d, FollowIn: %d\n", followOut, followIn);
  int iters = 10;
  for (int k = 0; k < iters; k++) {
    TRnd rnd = TRnd((int)time(0));
    int start = graph->GetRndNId(rnd);
    rnd.PutSeed(0);
//    int target = graph->GetRndNId(rnd);
//    printf("Start node: %d, target node: %d\n", start, target);
    int target = -1;
    printf("Start node: %d\n", start);

    struct timeval tv1, tv2;
    gettimeofday(&tv1, NULL);

    /* Hybrid */
    TBreathFS<PGraph> bfs_hybrid(graph, true);
    int maxDist_hybrid = bfs_hybrid.DoBfsHybrid(start, followOut, followIn, target);

    gettimeofday(&tv2, NULL);
    double time_hybrid = timeInSeconds(tv1, tv2);

    /* Original */
    gettimeofday(&tv1, NULL);

    TBreathFS<PGraph> bfs(graph, true);
    int maxDist = bfs.DoBfs(start, followOut, followIn, target);

    gettimeofday(&tv2, NULL);
    double time = timeInSeconds(tv1, tv2);

    /* Check results */
    if (maxDist_hybrid != maxDist) {
      printf("MaxDist incorrect.\n");
      return false;
    }
    if (target == -1) {
      if (!checkResults<PGraph>(bfs_hybrid, bfs)) {
        printf("NIdDistH values incorrect!\n");
        return false;
      }
    }

    printf("Execution times: Original: %.2f, Hybrid: %.2f\n", time, time_hybrid);
  }
  return true;
}
예제 #8
0
파일: word2vec.cpp 프로젝트: jsw883/snap
//Initialize positive embeddings
void InitPosEmb(TIntV& Vocab, int& Dimensions, TRnd& Rnd, TVVec<TFlt, int64>& SynPos) {
  SynPos = TVVec<TFlt, int64>(Vocab.Len(),Dimensions);
  for (int64 i = 0; i < SynPos.GetXDim(); i++) {
    for (int j = 0; j < SynPos.GetYDim(); j++) {
      SynPos(i,j) =(Rnd.GetUniDev()-0.5)/Dimensions;
    }
  }
}
예제 #9
0
파일: lsh.cpp 프로젝트: EDzhangjianyu/snap
void TLSHash::Init() {
  TRnd Gen;
  Gen.Randomize();

  for (int i=0; i<Bands*Rows; i++) {
    if (Type == JACCARD) {
      HashFuncV.Add(TPt<HashFunc>(new JaccardHash(Gen, Dim)));
    } else if (Type == COSINE) {
      HashFuncV.Add(TPt<HashFunc>(new CosineHash(Gen, Dim)));
    } else {
      HashFuncV.Add(TPt<HashFunc>(new EuclideanHash(Gen, Dim)));
    }
  }

  for (int i=0; i<Bands; i++) {
    SigBucketVHV.Add(THash<TInt, TIntV> (ExpectedSz, true));
  }
}
예제 #10
0
///Generate sequence from Power law
void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) {
    SzSeq.Gen(SeqLen, 0);
    while (SzSeq.Len() < SeqLen) {
        int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha));
        if (Sz >= Min && Sz <= Max) {
            SzSeq.Add(Sz);
        }
    }
}
예제 #11
0
/// Generates a random scale-free network using the Copying Model.
/// The generating process operates as follows: Node u is added to a graph, it
/// selects a random node v, and with prob Beta it links to v, with 1-Beta 
/// links u links to neighbor of v. The power-law degree exponent is -1/(1-Beta).
/// See: Stochastic models for the web graph.
/// Kumar, Raghavan, Rajagopalan, Sivakumar, Tomkins, Upfal.
/// URL: http://snap.stanford.edu/class/cs224w-readings/kumar00stochastic.pdf
PNGraph GenCopyModel(const int& Nodes, const double& Beta, TRnd& Rnd) {
  PNGraph GraphPt = TNGraph::New();
  TNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, Nodes);
  const int startNId = Graph.AddNode();
  Graph.AddEdge(startNId, startNId);
  for (int n = 1; n < Nodes; n++) {
    const int rnd = Graph.GetRndNId();
    const int NId = Graph.AddNode();
    if (Rnd.GetUniDev() < Beta) {
      Graph.AddEdge(NId, rnd); }
    else {
      const TNGraph::TNodeI NI = Graph.GetNI(rnd);
      const int rnd2 = Rnd.GetUniDevInt(NI.GetOutDeg());
      Graph.AddEdge(NId, NI.GetOutNId(rnd2));
    }
  }
  return GraphPt;
}
예제 #12
0
/// Sample random point from the surface of a Dim-dimensional unit sphere.
void GetSphereDev(const int& Dim, TRnd& Rnd, TFltV& ValV) {
  if (ValV.Len() != Dim) { ValV.Gen(Dim); }
  double Length = 0.0;
  for (int i = 0; i < Dim; i++) {
    ValV[i] = Rnd.GetNrmDev();
    Length += TMath::Sqr(ValV[i]); }
  Length = 1.0 / sqrt(Length);
  for (int i = 0; i < Dim; i++) {
    ValV[i] *= Length;
  }
}
예제 #13
0
/// Generates a random undirect graph with a given degree sequence DegSeqV.
/// Configuration model operates as follows. For each node N, of degree
/// DeqSeqV[N] we create DeqSeqV[N] spokes (half-edges). We then pick two
/// spokes at random, and connect the spokes endpoints. We continue this
/// process until no spokes are left. Generally this generates a multigraph
/// (i.e., spokes out of same nodes can be chosen multiple times).We ignore
/// (discard) self-loops and multiple edges. Thus, the generated graph will
/// only approximate follow the given degree sequence. The method is very fast!
PUNGraph GenConfModel(const TIntV& DegSeqV, TRnd& Rnd) {
  const int Nodes = DegSeqV.Len();
  PUNGraph GraphPt = TUNGraph::New();
  TUNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, -1);
  TIntV NIdDegV(DegSeqV.Len(), 0);
  int DegSum=0, edges=0;
  for (int node = 0; node < Nodes; node++) {
    Graph.AddNode(node);
    for (int d = 0; d < DegSeqV[node]; d++) { NIdDegV.Add(node); }
    DegSum += DegSeqV[node];
  }
  NIdDegV.Shuffle(Rnd);
  TIntPrSet EdgeH(DegSum/2); // set of all edges, is faster than graph edge lookup
  if (DegSum % 2 != 0) {
    printf("Seg seq is odd [%d]: ", DegSeqV.Len());
    for (int d = 0; d < TMath::Mn(100, DegSeqV.Len()); d++) { printf("  %d", (int)DegSeqV[d]); }
    printf("\n");
  }
  int u=0, v=0;
  for (int c = 0; NIdDegV.Len() > 1; c++) {
    u = Rnd.GetUniDevInt(NIdDegV.Len());
    while ((v = Rnd.GetUniDevInt(NIdDegV.Len())) == u) { }
    if (u > v) { Swap(u, v); }
    const int E1 = NIdDegV[u];
    const int E2 = NIdDegV[v];
    if (v == NIdDegV.Len()-1) { NIdDegV.DelLast(); } 
    else { NIdDegV[v] = NIdDegV.Last();  NIdDegV.DelLast(); }
    if (u == NIdDegV.Len()-1) { NIdDegV.DelLast(); } 
    else { NIdDegV[u] = NIdDegV.Last();  NIdDegV.DelLast(); }
    if (E1 == E2 || EdgeH.IsKey(TIntPr(E1, E2))) { continue; }
    EdgeH.AddKey(TIntPr(E1, E2));
    Graph.AddEdge(E1, E2);
    edges++;
    if (c % (DegSum/100+1) == 0) { printf("\r configuration model: iter %d: edges: %d, left: %d", c, edges, NIdDegV.Len()/2); }
  }
  printf("\n");
  return GraphPt;
}
예제 #14
0
PGraph GenRndGnm(const int& Nodes, const int& Edges, const bool& IsDir, TRnd& Rnd) {
  PGraph GraphPt = PGraph::New();
  typename PGraph::TObj& Graph = *GraphPt;
  Graph.Reserve(Nodes, Edges);
  for (int node = 0; node < Nodes; node++) {
    IAssert(Graph.AddNode(node) == node);
  }
  for (int edge = 0; edge < Edges; ) {
    const int SrcNId = Rnd.GetUniDevInt(Nodes);
    const int DstNId = Rnd.GetUniDevInt(Nodes);
    if (SrcNId != DstNId && Graph.AddEdge(SrcNId, DstNId) != -2) {
      if (! IsDir) { Graph.AddEdge(DstNId, SrcNId); }
      edge++;
    }
  }
  return GraphPt;
}
예제 #15
0
/// Generates a random scale-free graph using the Geometric Preferential
/// Attachment model by Flexman, Frieze and Vera.
/// See: A geometric preferential attachment model of networks by Flexman,
/// Frieze and Vera. WAW 2004.
/// URL: http://math.cmu.edu/~af1p/Texfiles/GeoWeb.pdf
PUNGraph GenGeoPrefAttach(const int& Nodes, const int& OutDeg, const double& Beta, TRnd& Rnd) {
  PUNGraph G = TUNGraph::New(Nodes, Nodes*OutDeg);
  TFltTrV PointV(Nodes, 0);
  TFltV ValV;
  // points on a sphere of radius 1/(2*pi)
  const double Rad = 0.5 * TMath::Pi;
  for (int i = 0; i < Nodes; i++) {
    TSnapDetail::GetSphereDev(3, Rnd, ValV);
    PointV.Add(TFltTr(Rad*ValV[0], Rad*ValV[1], Rad*ValV[2]));
  }
  const double R2 = TMath::Sqr(log((double) Nodes) / (pow((double) Nodes, 0.5-Beta)));
  TIntV DegV, NIdV;
  int SumDeg;
  for (int t = 0; t < Nodes; t++) {
    const int pid = t;
    const TFltTr& P1 = PointV[pid];
    // add node
    if (! G->IsNode(pid)) { G->AddNode(pid); }
    // find neighborhood
    DegV.Clr(false);  NIdV.Clr(false);  SumDeg=0;
    for (int p = 0; p < t; p++) {
      const TFltTr& P2 = PointV[p];
      if (TMath::Sqr(P1.Val1-P2.Val1)+TMath::Sqr(P1.Val2-P2.Val2)+TMath::Sqr(P1.Val3-P2.Val3) < R2) {
        NIdV.Add(p);
        DegV.Add(G->GetNI(p).GetDeg()+1);
        SumDeg += DegV.Last();
      }
    }
    // add edges
    for (int m = 0; m < OutDeg; m++) {
      const int rnd = Rnd.GetUniDevInt(SumDeg);
      int sum = 0, dst = -1;
      for (int s = 0; s < DegV.Len(); s++) {
        sum += DegV[s];
        if (rnd < sum) { dst=s;  break; }
      }
      if (dst != -1) {
        G->AddEdge(pid, NIdV[dst]);
        SumDeg -= DegV[dst];
        NIdV.Del(dst);  DegV.Del(dst);
      }
    }
  }
  return G;
}
예제 #16
0
  THash <TInt, TInt> * choose (const TInt & population_size, const TInt & sample_size) {

    THash <TInt, TInt> * hits = new THash <TInt, TInt> ();
    //TRnd random ((int)time(NULL));
    //TRnd random (0);
    TInt min = TMath::Mn<TInt> (population_size, sample_size);

    for (int i = 0; i < min; i++) {

      TInt chosen = my_random.GetUniDevInt (population_size - i);
      if (hits->IsKey (chosen)) {

        hits->AddDat((*hits)(chosen), population_size - i - 1);
      }
      hits->AddDat(chosen, population_size - i - 1);
    }  
    return hits; 
  }
예제 #17
0
/// rewire bipartite community affiliation graphs
void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) {
    THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len());
    TIntV NDegV;
    TIntV CDegV;
    for (int i = 0; i < CmtyVH.Len(); i++) {
        int CID = CmtyVH.GetKey(i);
        for (int j = 0; j < CmtyVH[i].Len(); j++) {
            int NID = CmtyVH[i][j];
            NDegV.Add(NID);
            CDegV.Add(CID);
        }
    }
    TIntPrSet CNIDSet(CDegV.Len());
    int c=0;
    while (c++ < 15 && CDegV.Len() > 1) {
        for (int i = 0; i < CDegV.Len(); i++) {
            int u = Rnd.GetUniDevInt(CDegV.Len());
            int v = Rnd.GetUniDevInt(NDegV.Len());
            if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) {
                continue;
            }
            CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
            if (u == CDegV.Len() - 1) {
                CDegV.DelLast();
            }  else {
                CDegV[u] = CDegV.Last();
                CDegV.DelLast();
            }
            if ( v == NDegV.Len() - 1) {
                NDegV.DelLast();
            }  else {
                NDegV[v] = NDegV.Last();
                NDegV.DelLast();
            }
        }
    }
    for (int i = 0; i < CNIDSet.Len(); i++) {
        TIntPr CNIDPr = CNIDSet[i];
        IAssert(CmtyVH.IsKey(CNIDPr.Val1));
        NewCmtyVH.AddDat(CNIDPr.Val1);
        NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
    }
    CmtyVH = NewCmtyVH;
}
예제 #18
0
/// Generates a random scale-free graph with power-law degree distribution with
/// exponent PowerExp. The method uses either the Configuration model (fast but
/// the result is approximate) or the Edge Rewiring method (slow but exact).
PUNGraph GenRndPowerLaw(const int& Nodes, const double& PowerExp, const bool& ConfModel, TRnd& Rnd) {
  TIntV DegSeqV;
  uint DegSum=0;
  for (int n = 0; n < Nodes; n++) {
    const int Val = (int) TMath::Round(Rnd.GetPowerDev(PowerExp));
    if (! (Val >= 1 && Val < Nodes/2)) { n--; continue; } // skip nodes with too large degree
    DegSeqV.Add(Val);
    DegSum += Val;
  }
  printf("%d nodes, %u edges\n", Nodes, DegSum);
  if (DegSum % 2 == 1) { DegSeqV[0] += 1; }
  if (ConfModel) {
    // use configuration model -- fast but does not exactly obey the degree sequence
    return GenConfModel(DegSeqV, Rnd);
  } else {
    PUNGraph G = TSnap::GenDegSeq(DegSeqV, Rnd);
    return TSnap::GenRewire(G, 10, Rnd);
  }
}
예제 #19
0
void TTransCorpus::Shuffle(const PTransCorpus& InFirstTransCorpus, 
        const PTransCorpus& InSecondTransCorpus, TRnd& Rnd, const double& SwapProb,
        PTransCorpus& OutFirstTransCorpus, PTransCorpus& OutSecondTransCorpus) {

    // prepare new corpuses
    OutFirstTransCorpus = TTransCorpus::New();
    OutSecondTransCorpus = TTransCorpus::New();
    // swap sentences
    TIntV FirstSentIdV, SecondSentIdV;
    InFirstTransCorpus->GetSentIdV(FirstSentIdV);
    InSecondTransCorpus->GetSentIdV(SecondSentIdV);
    for (int SentIdN = 0; SentIdN < FirstSentIdV.Len(); SentIdN++) {
        // get sentence id
        const int SentId = FirstSentIdV[SentIdN];
        // check if id same in both cases
        IAssert(SecondSentIdV[SentIdN] == SentId);
        // read sentences
        TStr OrgStr1 = InFirstTransCorpus->GetOrgStr(SentId);
        TStr OrgStr2 = InSecondTransCorpus->GetOrgStr(SentId);
        IAssert(OrgStr1 == OrgStr2 );
        TStr RefTransStr1 = InFirstTransCorpus->GetRefTransStrV(SentId)[0];
        TStr RefTransStr2 = InSecondTransCorpus->GetRefTransStrV(SentId)[0];
        IAssert(RefTransStr1 == RefTransStr2);
        TStr FirstTransStr = InFirstTransCorpus->GetTransStr(SentId);
        TStr SecondTransStr = InSecondTransCorpus->GetTransStr(SentId);
        // swap sentences
        if (Rnd.GetUniDev() < SwapProb) { 
            // we swap
            OutFirstTransCorpus->AddSentence(SentId, OrgStr1, SecondTransStr, RefTransStr1);
            OutSecondTransCorpus->AddSentence(SentId, OrgStr1, FirstTransStr, RefTransStr1);
        } else {
            // no swap
            OutFirstTransCorpus->AddSentence(SentId, OrgStr1, FirstTransStr, RefTransStr1);
            OutSecondTransCorpus->AddSentence(SentId, OrgStr1, SecondTransStr, RefTransStr1);
        }
    }
}
예제 #20
0
///Generate bipartite community affiliation from given power law coefficients for membership distribution and community size distribution.
void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) {
    const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len();
    TIntV NDegV,CDegV;
    TIntPrSet CNIDSet;
    TIntSet HitNodes(Nodes);
    THash<TInt,TIntV> CmtyVH;
    for (int i = 0; i < CIDSzPrV.Len(); i++) {
        for (int j = 0; j < CIDSzPrV[i].Val2; j++) {
            CDegV.Add(CIDSzPrV[i].Val1);
        }
    }
    for (int i = 0; i < NIDMemPrV.Len(); i++) {
        for (int j = 0; j < NIDMemPrV[i].Val2; j++) {
            NDegV.Add(NIDMemPrV[i].Val1);
        }
    }
    while (CDegV.Len() < (int) (1.2 * Nodes)) {
        CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1);
    }
    while (NDegV.Len() < CDegV.Len()) {
        NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1);
    }
    printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len());
    int c=0;
    while (c++ < 15 && CDegV.Len() > 1) {
        for (int i = 0; i < CDegV.Len(); i++) {
            int u = Rnd.GetUniDevInt(CDegV.Len());
            int v = Rnd.GetUniDevInt(NDegV.Len());
            if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) {
                continue;
            }
            CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v]));
            HitNodes.AddKey(NDegV[v]);
            if (u == CDegV.Len() - 1) {
                CDegV.DelLast();
            }
            else {
                CDegV[u] = CDegV.Last();
                CDegV.DelLast();
            }
            if (v == NDegV.Len() - 1) {
                NDegV.DelLast();
            }
            else {
                NDegV[v] = NDegV.Last();
                NDegV.DelLast();
            }
        }
    }
    //make sure that every node belongs to at least one community
    for (int i = 0; i < Nodes; i++) {
        int NID = NIDMemPrV[i].Val1;
        if (! HitNodes.IsKey(NID)) {
            CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID));
            HitNodes.AddKey(NID);
        }
    }
    IAssert(HitNodes.Len() == Nodes);
    for (int i = 0; i < CNIDSet.Len(); i++) {
        TIntPr CNIDPr = CNIDSet[i];
        CmtyVH.AddDat(CNIDPr.Val1);
        CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2);
    }
    CmtyVH.GetDatV(CmtyVV);
}
예제 #21
0
파일: word2vec.cpp 프로젝트: jsw883/snap
int64 RndUnigramInt(TIntV& KTable, TFltV& UTable, TRnd& Rnd) {
  TInt X = KTable[static_cast<int64>(Rnd.GetUniDev()*KTable.Len())];
  double Y = Rnd.GetUniDev();
  return Y < UTable[X] ? X : KTable[X];
}
예제 #22
0
파일: word2vec.cpp 프로젝트: jsw883/snap
void TrainModel(TVVec<TInt, int64>& WalksVV, int& Dimensions, int& WinSize, int& Iter, bool& Verbose,
   TIntV& KTable, TFltV& UTable, int64& WordCntAll, TFltV& ExpTable, double& Alpha,
   int64 CurrWalk, TRnd& Rnd, TVVec<TFlt, int64>& SynNeg, TVVec<TFlt, int64>& SynPos)  {
  TFltV Neu1V(Dimensions);
  TFltV Neu1eV(Dimensions);
  int64 AllWords = WalksVV.GetXDim()*WalksVV.GetYDim();
  TIntV WalkV(WalksVV.GetYDim());
  for (int j = 0; j < WalksVV.GetYDim(); j++) { WalkV[j] = WalksVV(CurrWalk,j); }
  for (int64 WordI=0; WordI<WalkV.Len(); WordI++) {
    if ( WordCntAll%10000 == 0 ) {
      if ( Verbose ) {
        printf("\rLearning Progress: %.2lf%% ",(double)WordCntAll*100/(double)(Iter*AllWords));
        fflush(stdout);
      }
      Alpha = StartAlpha * (1 - WordCntAll / static_cast<double>(Iter * AllWords + 1));
      if ( Alpha < StartAlpha * 0.0001 ) { Alpha = StartAlpha * 0.0001; }
    }
    int64 Word = WalkV[WordI];
    for (int i = 0; i < Dimensions; i++) {
      Neu1V[i] = 0;
      Neu1eV[i] = 0;
    }
    int Offset = Rnd.GetUniDevInt() % WinSize;
    for (int a = Offset; a < WinSize * 2 + 1 - Offset; a++) {
      if (a == WinSize) { continue; }
      int64 CurrWordI = WordI - WinSize + a;
      if (CurrWordI < 0){ continue; }
      if (CurrWordI >= WalkV.Len()){ continue; }
      int64 CurrWord = WalkV[CurrWordI];
      for (int i = 0; i < Dimensions; i++) { Neu1eV[i] = 0; }
      //negative sampling
      for (int j = 0; j < NegSamN+1; j++) {
        int64 Target, Label;
        if (j == 0) {
          Target = Word;
          Label = 1;
        } else {
          Target = RndUnigramInt(KTable, UTable, Rnd);
          if (Target == Word) { continue; }
          Label = 0;
        }
        double Product = 0;
        for (int i = 0; i < Dimensions; i++) {
          Product += SynPos(CurrWord,i) * SynNeg(Target,i);
        }
        double Grad;                     //Gradient multiplied by learning rate
        if (Product > MaxExp) { Grad = (Label - 1) * Alpha; }
        else if (Product < -MaxExp) { Grad = Label * Alpha; }
        else { 
          double Exp = ExpTable[static_cast<int>(Product*ExpTablePrecision)+TableSize/2];
          Grad = (Label - 1 + 1 / (1 + Exp)) * Alpha;
        }
        for (int i = 0; i < Dimensions; i++) { 
          Neu1eV[i] += Grad * SynNeg(Target,i);
          SynNeg(Target,i) += Grad * SynPos(CurrWord,i);
        }
      }
      for (int i = 0; i < Dimensions; i++) {
        SynPos(CurrWord,i) += Neu1eV[i];
      }
    }
    WordCntAll++;
  }
}
예제 #23
0
/// R-MAT Generator. The modes is based on the recursive descent into a 2x2
/// matrix [A,B; C, 1-(A+B+C)].
/// See: R-MAT Generator: A Recursive Model for Graph Mining. 
/// D. Chakrabarti, Y. Zhan and C. Faloutsos, in SIAM Data Mining 2004. 
/// URL: http://www.cs.cmu.edu/~deepay/mywww/papers/siam04.pdf
PNGraph GenRMat(const int& Nodes, const int& Edges, const double& A, const double& B, const double& C, TRnd& Rnd) {
  PNGraph GraphPt = TNGraph::New();
  TNGraph& Graph = *GraphPt;
  Graph.Reserve(Nodes, Edges);
  IAssert(A+B+C < 1.0);
  int rngX, rngY, offX, offY;
  int Depth=0, Collisions=0, Cnt=0, PctDone=0;
  const int EdgeGap = Edges / 100 + 1;
  // sum of parameters (probabilities)
  TVec<double> sumA(128, 0), sumAB(128, 0), sumAC(128, 0), sumABC(128, 0);  // up to 2^128 vertices ~ 3.4e38
  for (int i = 0; i < 128; i++) {
    const double a = A * (Rnd.GetUniDev() + 0.5);
    const double b = B * (Rnd.GetUniDev() + 0.5);
    const double c = C * (Rnd.GetUniDev() + 0.5);
    const double d = (1.0 - (A+B+C)) * (Rnd.GetUniDev() + 0.5);
    const double abcd = a+b+c+d;
    sumA.Add(a / abcd);
    sumAB.Add((a+b) / abcd);
    sumAC.Add((a+c) / abcd);
    sumABC.Add((a+b+c) / abcd);
  }
  // nodes
  for (int node = 0; node < Nodes; node++) {
    IAssert(Graph.AddNode(-1) == node);
  }
  // edges
  for (int edge = 0; edge < Edges; ) {
    rngX = Nodes;  rngY = Nodes;  offX = 0;  offY = 0;
    Depth = 0;
    // recurse the matrix
    while (rngX > 1 || rngY > 1) {
      const double RndProb = Rnd.GetUniDev();
      if (rngX>1 && rngY>1) {
        if (RndProb < sumA[Depth]) { rngX/=2; rngY/=2; }
        else if (RndProb < sumAB[Depth]) { offX+=rngX/2;  rngX-=rngX/2;  rngY/=2; }
        else if (RndProb < sumABC[Depth]) { offY+=rngY/2;  rngX/=2;  rngY-=rngY/2; }
        else { offX+=rngX/2;  offY+=rngY/2;  rngX-=rngX/2;  rngY-=rngY/2; }
      } else
      if (rngX>1) { // row vector
        if (RndProb < sumAC[Depth]) { rngX/=2; rngY/=2; }
        else { offX+=rngX/2;  rngX-=rngX/2;  rngY/=2; }
      } else
      if (rngY>1) { // column vector
        if (RndProb < sumAB[Depth]) { rngX/=2; rngY/=2; }
        else { offY+=rngY/2;  rngX/=2;  rngY-=rngY/2; }
      } else { Fail; }
      Depth++;
    }
    // add edge
    const int NId1 = offX;
    const int NId2 = offY;
    if (NId1 != NId2 && ! Graph.IsEdge(NId1, NId2)) {
      Graph.AddEdge(NId1, NId2);
      if (++Cnt > EdgeGap) {
        Cnt=0;  printf("\r  %d%% edges", ++PctDone); }
      edge++;
    } else {
      Collisions++; }
  }
  printf("\r  RMat: nodes:%d, edges:%d, Iterations:%d, Collisions:%d (%.1f%%).\n", Nodes, Edges,
    Edges+Collisions, Collisions, 100*Collisions/double(Edges+Collisions));
  Graph.Defrag();
  return GraphPt;
}
예제 #24
0
파일: lsh.cpp 프로젝트: EDzhangjianyu/snap
TLSHash::EuclideanHash::EuclideanHash(TRnd &Gen, int Dim) {
  for (int j=0; j<Dim; j++) {
    Line.Add(Gen.GetNrmDev());
  }
  Line.Add(Gen.GetUniDevInt(Gap));
}