示例#1
0
// to get first few eigenvectors
void GetEigVec(const PUNGraph& Graph, const int& EigVecs, TFltV& EigValV, TVec<TFltV>& EigVecV) {
  const int Nodes = Graph->GetNodes();
  // Lanczos
  TUNGraphMtx GraphMtx(Graph);
  int CalcVals = int(2*EigVecs);
  if (CalcVals > Nodes) { CalcVals = Nodes; }
  TFltVV EigVecVV;
  //while (EigValV.Len() < EigVecs && CalcVals < 10*EigVecs) {
  try {
    TSparseSVD::Lanczos(GraphMtx, EigVecs, 2*EigVecs, ssotFull, EigValV, EigVecVV, false); }
  catch(...) {
    printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); }
  if (EigValV.Len() < EigVecs) {
    printf("  ***TRIED %d GOT %d values** \n", CalcVals, EigValV.Len()); }
  //  CalcVals += EigVecs;
  //}
  TFltIntPrV EigValIdV;
  for (int i = 0; i < EigValV.Len(); i++) {
    EigValIdV.Add(TFltIntPr(EigValV[i], i)); 
  }
  EigValIdV.Sort(false);
  EigValV.Sort(false);
  for (int v = 0; v < EigValIdV.Len(); v++) { // vector components are not sorted!!!
    EigVecV.Add();
    EigVecVV.GetCol(EigValIdV[v].Val2, EigVecV.Last());
  }
  IsAllValVNeg(EigVecV[0], true);
}
示例#2
0
void GetEigVals(const PUNGraph& Graph, const int& EigVals, TFltV& EigValV) {
  // Lanczos
  TUNGraphMtx GraphMtx(Graph);
  //const int Nodes = Graph->GetNodes();
  //int CalcVals = int(2*EigVals);
  //if (CalcVals > Nodes) { CalcVals = Nodes; }
  //while (EigValV.Len() < EigVals && CalcVals < 3*EigVals) {
  try {
    if (EigVals > 4) { 
      TSparseSVD::SimpleLanczos(GraphMtx, 2*EigVals, EigValV, false); }
    else { TFltVV EigVecVV; // this is much more precise, but also much slower
      TSparseSVD::Lanczos(GraphMtx, EigVals, 3*EigVals, ssotFull, EigValV, EigVecVV, false); }
  }
  catch(...) {
    printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); }
  if (EigValV.Len() < EigVals) {
    printf("  ***TRIED %d GOT %d values** \n", 2*EigVals, EigValV.Len()); }
  //  CalcVals += EigVals;
  //}
  EigValV.Sort(false);
  /*if (EigValV.Len() > EigVals) {
    EigValV.Del(EigVals, EigValV.Len()-1); }
  else {
    while (EigValV.Len() < EigVals) EigValV.Add(1e-6); 
  }
  IAssert(EigValV.Len() == EigVals);*/
}
示例#3
0
文件: statplot.cpp 项目: Accio/snap
void PlotSngValRank(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr) {
  TFltV SngValV;
  TSnap::GetSngVals(Graph, SngVals, SngValV);
  SngValV.Sort(false);
  if (DescStr.Empty()) { DescStr = FNmPref; }
  TGnuPlot::PlotValV(SngValV, "sngVal."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f",
    DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), SngValV[0].Val), "Rank", "Singular value", gpsLog10XY, false, gpwLinesPoints);
}
示例#4
0
文件: anomaly.cpp 项目: blazs/qminer
void TNearestNeighbor::UpdateThreshold() {
    ThresholdV.Gen(RateV.Len(), 0);
    // sort distances
    TFltV SortedV = DistV; SortedV.Sort(true);
    // establish thrashold for each rate
    for (const double Rate : RateV) {
        // element Id corresponding to Rate-th percentile
        const int Elt = (int)floor((1.0 - Rate) * SortedV.Len());
        // remember the distance as threshold
        ThresholdV.Add(SortedV[Elt]);
    }
}
示例#5
0
void TGraphKey::TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph) {
  const int Edges = Graph->GetEdges();
  Nodes = Graph->GetNodes();
  VariantId = 0;
  SigV.Gen(2+Nodes, 0);
  // degree sequence
  TIntPrV DegV(Nodes, 0);
  for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
    DegV.Add(TIntPr(NodeI.GetInDeg(), NodeI.GetOutDeg()));
  }
  DegV.Sort(false);
  SigV.Add(TFlt(Nodes));
  SigV.Add(TFlt(Edges));
  for (int i = 0; i < DegV.Len(); i++) {
    SigV.Add(DegV[i].Val1());
    SigV.Add(DegV[i].Val2());
  }
  // singular values signature
  //   it turns out that it is cheaper to do brute force isomorphism
  //   checking than to calculate SVD and then check isomorphism
  if (Nodes >= MnSvdGraph && Nodes < MxSvdGraph) {
    // perform full SVD
    TFltVV AdjMtx(Nodes+1, Nodes+1);
    TFltV SngValV;
    TFltVV LSingV, RSingV;
    TIntH NodeIdH;
    // create adjecency matrix
    for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
      NodeIdH.AddKey(NodeI.GetId());
    }
    for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
      const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1;
      for (int e = 0; e < NodeI.GetOutDeg(); e++) {
        const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1;  // no self edges
        if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
      }
    }
    try { // can fail to converge but results seem to be good
      TSvd::Svd(AdjMtx, LSingV, SngValV, RSingV);
    } catch(...) {
      printf("\n***No SVD convergence: G(%d, %d): SngValV.Len():%d\n", Nodes(), Graph->GetEdges(), SngValV.Len());
    }
    // round singular values
    SngValV.Sort(false);
    for (int i = 0; i < SngValV.Len(); i++) {
      SigV.Add(TMath::Round(SngValV[i], RoundTo));
    }
  }
  //printf("SIG:\n");  for (int i = 0; i < SigV.Len(); i++) { printf("\t%f\n", SigV[i]); }
  SigV.Pack();
}
示例#6
0
void GetSngVals(const PNGraph& Graph, const int& SngVals, TFltV& SngValV) {
  const int Nodes = Graph->GetNodes();
  IAssert(SngVals > 0);
  if (Nodes < 100) {
    // perform full SVD
    TFltVV AdjMtx(Nodes+1, Nodes+1);
    TFltVV LSingV, RSingV;
    TIntH NodeIdH;
    // create adjecency matrix
    for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
      NodeIdH.AddKey(NodeI.GetId()); }
    for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
      const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1;
      for (int e = 0; e < NodeI.GetOutDeg(); e++) {
        const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1;  // no self edges
        if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
      }
    }
    try { // can fail to converge but results seem to be good
      TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV); }
    catch(...) {
      printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); }
  } else {
    // Lanczos
    TNGraphMtx GraphMtx(Graph);
    int CalcVals = int(2*SngVals);
    //if (CalcVals > Nodes) { CalcVals = int(2*Nodes); }
    //if (CalcVals > Nodes) { CalcVals = Nodes; }
    //while (SngValV.Len() < SngVals && CalcVals < 10*SngVals) {
    try {
      if (SngVals > 4) { 
        TSparseSVD::SimpleLanczosSVD(GraphMtx, 2*SngVals, SngValV, false); }
      else { TFltVV LSingV, RSingV;  // this is much more precise, but also much slower
        TSparseSVD::LanczosSVD(GraphMtx, SngVals, 3*SngVals, ssotFull, SngValV, LSingV, RSingV); }
    }
    catch(...) {
      printf("\n  ***EXCEPTION:  TRIED %d GOT %d values** \n", 2*SngVals, SngValV.Len()); }
    if (SngValV.Len() < SngVals) {
      printf("  ***TRIED %d GOT %d values** \n", CalcVals, SngValV.Len()); }
    //  CalcVals += SngVals;
    //}
  }
  SngValV.Sort(false);
  //if (SngValV.Len() > SngVals) {
  //  SngValV.Del(SngVals, SngValV.Len()-1); }
  //else {
  //  while (SngValV.Len() < SngVals) SngValV.Add(1e-6); }
  //IAssert(SngValV.Len() == SngVals);
}
示例#7
0
double TNetInfBs::GetBound(const TIntPr& Edge, double& CurProb) {
	double Bound = 0;
	TFltV Bounds;

	// bound could be computed faster (using lazy evaluation, as in the optimization procedure)
	for (int e=0; e < EdgeGainV.Len(); e++) {
		const TIntPr& EE = EdgeGainV[e].Val2;
		if (EE != Edge && !Graph->IsEdge(EE.Val1, EE.Val2)) {
			const double EProb = GetAllCascProb(EE.Val1, EE.Val2);
			if (EProb > CurProb) Bounds.Add(EProb - CurProb); }
	}

	Bounds.Sort(false);
	for (int i=0; i<Graph->GetEdges() && i<Bounds.Len(); i++) Bound += Bounds[i];

	return Bound;
}
示例#8
0
void GetSngVec(const PNGraph& Graph, const int& SngVecs, TFltV& SngValV, TVec<TFltV>& LeftSV, TVec<TFltV>& RightSV) {
  const int Nodes = Graph->GetNodes();
  SngValV.Clr();
  LeftSV.Clr();
  RightSV.Clr();
  TFltVV LSingV, RSingV;
  if (Nodes < 100) {
    // perform full SVD
    TFltVV AdjMtx(Nodes+1, Nodes+1);
    TIntH NodeIdH;
    // create adjecency matrix (1-based)
    for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
      NodeIdH.AddKey(NodeI.GetId()); }
    for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) {
      const int NodeId = NodeIdH.GetKeyId(NodeI.GetId())+1;
      for (int e = 0; e < NodeI.GetOutDeg(); e++) {
        const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e))+1;  // no self edges
        if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1;
      }
    }
    try { // can fail to converge but results seem to be good
      TSvd::Svd1Based(AdjMtx, LSingV, SngValV, RSingV);
    } catch(...) {
      printf("\n***No SVD convergence: G(%d, %d)\n", Nodes, Graph->GetEdges()); 
    }
  } else { // Lanczos
    TNGraphMtx GraphMtx(Graph);
    TSparseSVD::LanczosSVD(GraphMtx, SngVecs, 2*SngVecs, ssotFull, SngValV, LSingV, RSingV);
    //TGAlg::SaveFullMtx(Graph, "adj_mtx.txt");
    //TLAMisc::DumpTFltVVMjrSubMtrx(LSingV, LSingV.GetRows(), LSingV.GetCols(), "LSingV2.txt"); // save MTX
  }
  TFltIntPrV SngValIdV;
  for (int i = 0; i < SngValV.Len(); i++) {
    SngValIdV.Add(TFltIntPr(SngValV[i], i)); 
  }
  SngValIdV.Sort(false);
  SngValV.Sort(false);
  for (int v = 0; v < SngValIdV.Len(); v++) { 
    LeftSV.Add();
    LSingV.GetCol(SngValIdV[v].Val2, LeftSV.Last());
    RightSV.Add();
    RSingV.GetCol(SngValIdV[v].Val2, RightSV.Last());
  }
  IsAllValVNeg(LeftSV[0], true);
  IsAllValVNeg(RightSV[0], true);
}
示例#9
0
// Computes GINI coefficient of egonet as a subset of the parent graph (edges into and out of the egonet ARE considered)
double TSnap::GetGiniCoefficient(const TIntFltH DegH, const TIntV NIdV) {
  typename TIntV::TIter VI;
  typename TFltV::TIter DI;
  TFltV DegV;
  const int n = NIdV.Len();
  // DegV.Gen(n); // NOTE: don't use Gen() and Sort() on the same object (!)
  for (VI = NIdV.BegI(); VI < NIdV.EndI(); VI++) {
    DegV.Add(DegH.GetDat(VI->Val)); // might need to change this (in / out / undirected)
  }
  DegV.Sort();
  int i = 0;
  double numerator = 0.0, denominator = 0.0;
  for (DI = DegV.BegI(); DI < DegV.EndI(); DI++, i++) {
    numerator += (i + 1)*DegV[i];
    denominator += DegV[i];
  }
  return(double(2*numerator) / double(n*denominator) - double(n + 1) / double(n));
}
示例#10
0
文件: statplot.cpp 项目: Accio/snap
void PlotSngValDistr(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr) {
  const int NBuckets = 50;
  TFltV SngValV;
  for (int f = 1; SngValV.Empty() && f < 4; f++) {
    TSnap::GetSngVals(Graph, f*SngVals, SngValV);
  }
  SngValV.Sort(true);
  THash<TFlt, TFlt> BucketCntH;
  double Step = (SngValV.Last()-SngValV[0]) / double(NBuckets-1);
  for (int i = 0; i < NBuckets; i++) {
    BucketCntH.AddDat(SngValV[0]+Step*(i+0.5), 0);
  }
  for (int i = 0; i < SngValV.Len(); i++) {
    const int Bucket = (int) floor((SngValV[i]-SngValV[0]) / Step);
    BucketCntH[Bucket] += 1;
  }
  TFltPrV EigCntV;
  BucketCntH.GetKeyDatPrV(EigCntV);
  if (DescStr.Empty()) { DescStr = FNmPref; }
  TGnuPlot::PlotValV(EigCntV, "sngDistr."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest eig val = %f", DescStr.CStr(),
    Graph->GetNodes(), Graph->GetEdges(), SngValV.Last().Val), "Singular value", "Count", gpsAuto, false, gpwLinesPoints);
}
示例#11
0
void TNetInfBs::GreedyOpt(const int& MxEdges) {
    double CurProb = GetAllCascProb(-1, -1);
    double LastGain = TFlt::Mx;
    int attempts = 0;
    bool msort = false;

    for (int k = 0; k < MxEdges && EdgeGainV.Len() > 0; k++) {
      double prev = CurProb;

      const TIntPr BestE = GetBestEdge(CurProb, LastGain, msort, attempts);
      if (BestE == TIntPr(-1, -1)) // if we cannot add more edges, we stop
    	  break;

      if (CompareGroundTruth) {
    	  double precision = 0, recall = 0;
    	  if (PrecisionRecall.Len() > 1) {
    		  precision = PrecisionRecall[PrecisionRecall.Len()-1].Val2.Val;
    		  recall = PrecisionRecall[PrecisionRecall.Len()-1].Val1.Val;
    	  }
    	  if (GroundTruth->IsEdge(BestE.Val1, BestE.Val2)) {
			  recall++;
		  }	else {
			  precision++;
		  }

    	  PrecisionRecall.Add(TPair<TFlt, TFlt>(recall, precision));
      }

      Graph->AddEdge(BestE.Val1, BestE.Val2); // add edge to network

      double Bound = 0;
      if (BoundOn)
    	  Bound = GetBound(BestE, prev);

      // localized update!
      TIntV &CascsEdge = CascPerEdge.GetDat(BestE); // only check cascades that contain the edge
      for (int c = 0; c < CascsEdge.Len(); c++) {
    	  CascV[CascsEdge[c]].UpdateProb(BestE.Val1, BestE.Val2, true); // update probabilities
      }

      // some extra info for the added edge
      TInt Vol; TFlt AverageTimeDiff; TFltV TimeDiffs;
      Vol = 0; AverageTimeDiff = 0;
      for (int i=0; i< CascV.Len(); i++) {
    	  if (CascV[i].IsNode(BestE.Val2) && CascV[i].GetParent(BestE.Val2) == BestE.Val1) {
    		  Vol += 1; TimeDiffs.Add(CascV[i].GetTm(BestE.Val2)-CascV[i].GetTm(BestE.Val1));
    		  AverageTimeDiff += TimeDiffs[TimeDiffs.Len()-1]; }
      }
      AverageTimeDiff /= Vol;
      if (TimeDiffs.Len() > 0)
    	  TimeDiffs.Sort();
      else
    	  TimeDiffs.Add(0);

      // compute bound only if explicitly required
      EdgeInfoH.AddDat(BestE) = TEdgeInfo(Vol,
										  LastGain,
										  Bound,
										  TimeDiffs[(int)(TimeDiffs.Len()/2)],
										  AverageTimeDiff);
    }

    if (CompareGroundTruth) {
  	  for (int i=0; i<PrecisionRecall.Len(); i++) {
  		  PrecisionRecall[i].Val2 = 1.0 - PrecisionRecall[i].Val2/(PrecisionRecall[i].Val2+PrecisionRecall[i].Val1);
  		  PrecisionRecall[i].Val1 /= (double)GroundTruth->GetEdges();
  	  }
    }
}