Beispiel #1
0
int GetWeightedPageRankMP1(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) {
  if (!Graph->IsFltAttrE(Attr)) return -1;

  TFltV Weights = Graph->GetFltAttrVecE(Attr);

  int mxid = Graph->GetMxNId();
  TFltV OutWeights(mxid);
  Graph->GetWeightOutEdgesV(OutWeights, Weights);
  /*for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    OutWeights[NI.GetId()] = Graph->GetWeightOutEdges(NI, Attr);
  }*/


  /*TIntFltH Weights;
  for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    Weights.AddDat(NI.GetId(), Graph->GetWeightOutEdges(NI, Attr));
  }*/

  const int NNodes = Graph->GetNodes();
  TVec<TNEANet::TNodeI> NV;
  //const double OneOver = 1.0/double(NNodes);
  PRankH.Gen(NNodes);
  for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NV.Add(NI);
    PRankH.AddDat(NI.GetId(), 1.0/NNodes);
    //IAssert(NI.GetId() == PRankH.GetKey(PRankH.Len()-1));
  }
  TFltV TmpV(NNodes);
  for (int iter = 0; iter < MaxIter; iter++) {
    #pragma omp parallel for schedule(dynamic,10000)
    for (int j = 0; j < NNodes; j++) {
      TNEANet::TNodeI NI = NV[j];
      TmpV[j] = 0;
      for (int e = 0; e < NI.GetInDeg(); e++) {
        const int InNId = NI.GetInNId(e);
        const TFlt OutWeight = OutWeights[InNId];
        int EId = Graph->GetEId(InNId, NI.GetId());
        const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)];
        if (OutWeight > 0) {
          TmpV[j] += PRankH.GetDat(InNId) * Weight / OutWeight; }
      }
      TmpV[j] =  C*TmpV[j]; // Berkhin (the correct way of doing it)
      //TmpV[j] =  C*TmpV[j] + (1.0-C)*OneOver; // iGraph
    }
    double diff=0, sum=0, NewVal;
    #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000)
    for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; }
    const double Leaked = (1.0-sum) / double(NNodes);
    #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000)
    for (int i = 0; i < PRankH.Len(); i++) { // re-instert leaked PageRank
      NewVal = TmpV[i] + Leaked; // Berkhin
      //NewVal = TmpV[i] / sum;  // iGraph
      diff += fabs(NewVal-PRankH[i]);
      PRankH[i] = NewVal;
    }
    if (diff < Eps) { break; }
  }
  return 0;
}
Beispiel #2
0
double TAGMFast::LikelihoodForRow(const int UID, const TIntFltH& FU) {
  double L = 0.0;
  TFltV HOSumFV; //adjust for Fv of v hold out
  if (HOVIDSV[UID].Len() > 0) {
    HOSumFV.Gen(SumFV.Len());
    
    for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
      for (int c = 0; c < SumFV.Len(); c++) {
        HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
      }
    }
  }

  TUNGraph::TNodeI NI = G->GetNI(UID);
  if (DoParallel && NI.GetDeg() > 10) {
#pragma omp parallel for schedule(static, 1)
    for (int e = 0; e < NI.GetDeg(); e++) {
      int v = NI.GetNbrNId(e);
      if (v == UID) { continue; }
      if (HOVIDSV[UID].IsKey(v)) { continue; }
      double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
#pragma omp atomic
      L += LU;
    }
    for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
      L -= LU;
    }
  } else {
    for (int e = 0; e < NI.GetDeg(); e++) {
      int v = NI.GetNbrNId(e);
      if (v == UID) { continue; }
      if (HOVIDSV[UID].IsKey(v)) { continue; }
      L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
    }
    for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
    }
  }
  //add regularization
  if (RegCoef > 0.0) { //L1
    L -= RegCoef * Sum(FU);
  }
  if (RegCoef < 0.0) { //L2
    L += RegCoef * Norm2(FU);
  }

  return L;
}
Beispiel #3
0
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& EigenH, const double& Eps, const int& MaxIter) {
  const int NNodes = Graph->GetNodes();
  EigenH.Gen(NNodes);
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    EigenH.AddDat(NI.GetId(), 1.0/NNodes);
    IAssert(NI.GetId() == EigenH.GetKey(EigenH.Len()-1));
  }
  TFltV TmpV(NNodes);
  double diff = TFlt::Mx;
  for (int iter = 0; iter < MaxIter; iter++) {
    int j = 0;
    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
      TmpV[j] = 0;
      for (int e = 0; e < NI.GetOutDeg(); e++) {
        TmpV[j] += EigenH.GetDat(NI.GetOutNId(e)); }
    }
    double sum = 0;
    for (int i = 0; i < TmpV.Len(); i++) {
      EigenH[i] = TmpV[i];
      sum += EigenH[i];
    }
    for (int i = 0; i < EigenH.Len(); i++) {
      EigenH[i] /= sum; }
    if (fabs(diff-sum) < Eps) { break; }
    //printf("\tdiff:%f\tsum:%f\n", fabs(diff-sum), sum);
    diff = sum;
  }
}
Beispiel #4
0
// Test GetNodeClustCf (Vector)
TEST(triad, TestGetNodeClustCfVector) {
  // Test TUNGraph
  PUNGraph GraphTUN = TriadGetTestTUNGraph();
  TIntFltH NIdCCfH;

  TSnap::GetNodeClustCf(GraphTUN, NIdCCfH);
  for (int i = 0; i < GraphTUN->GetNodes(); i++) {
    double ClustCf = NIdCCfH.GetDat(i);
    VerifyNodeClustCf(i, ClustCf);
  }

  // TNGraph should be treated as TUNGraph for calculations
  PNGraph GraphTN = TriadGetTestTNGraph();
  NIdCCfH.Clr();

  TSnap::GetNodeClustCf(GraphTN, NIdCCfH);
  for (int i = 0; i < GraphTN->GetNodes(); i++) {
    double ClustCf = NIdCCfH.GetDat(i);
    VerifyNodeClustCf(i, ClustCf);
  }

  // TNEGraph should be treated as TUNGraph for calculations
  PNEGraph GraphTNE = TriadGetTestTNEGraph();
  NIdCCfH.Clr();

  TSnap::GetNodeClustCf(GraphTNE, NIdCCfH);
  for (int i = 0; i < GraphTNE->GetNodes(); i++) {
    double ClustCf = NIdCCfH.GetDat(i);
    VerifyNodeClustCf(i, ClustCf);
  }
}
Beispiel #5
0
void getPageRankFromVect(const PNGraph& graph, std::vector<int> srcIds, std::vector<int> dstIds, int sampleSz, char* fileName) {
	std::random_shuffle(srcIds.begin(), srcIds.end());
	std::random_shuffle(dstIds.begin(), dstIds.end());
	std::ofstream outputFile;
	outputFile.open(fileName);

	for (int i = 0; i < sampleSz; ) {
		int srcNodeId = srcIds[rand() % srcIds.size()];
		int dstNodeId = dstIds[rand() % dstIds.size()];
		if (!graph->IsNode(srcNodeId)) continue;
		if (!graph->IsNode(dstNodeId)) continue;
		int shortPath = TSnap::GetShortPath(graph, srcNodeId, dstNodeId, true);
		if (shortPath > 4 || shortPath <= 2) continue;

		PNGraph subgraph = getFourHopGraph(graph, srcNodeId, dstNodeId);
		TIntFltH pageRankScores;
		TSnap::GetPageRank(subgraph, pageRankScores);

		// Calculate total PR score;
		/*
		double totalPR = 0.0;
		for (TIntFltH::TIter itr = pageRankScores.BegI(); itr != pageRankScores.EndI(); itr++) {
			totalPR += itr.GetDat();
		}*/

		int numOfNodesInSubGraph = subgraph->GetNodes();
		double normalizedSrcPR = pageRankScores.GetDat(srcNodeId) * numOfNodesInSubGraph;
		double normalizedDstPR = pageRankScores.GetDat(dstNodeId) * numOfNodesInSubGraph;

		char buffer[100];
		printf("%d, %d\n", i, numOfNodesInSubGraph);
		sprintf(buffer, "%d\t%f\t%d\t%f", srcNodeId, normalizedSrcPR, dstNodeId, normalizedDstPR);
		std::cout << buffer << std::endl;
		outputFile << buffer << std::endl;
		++i;
	}
	outputFile.close();
}
Beispiel #6
0
double TAGMFast::GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) {
  double StepSize = 1.0;
  double InitLikelihood = LikelihoodForRow(UID);
  TIntFltH NewVarV(DeltaV.Len());
  for(int iter = 0; iter < MaxIter; iter++) {
    for (int i = 0; i < DeltaV.Len(); i++){
      int CID = DeltaV.GetKey(i);
      double NewVal = GetCom(UID, CID) + StepSize * DeltaV.GetDat(CID);
      if (NewVal < MinVal) { NewVal = MinVal; }
      if (NewVal > MaxVal) { NewVal = MaxVal; }
      NewVarV.AddDat(CID, NewVal);
    }
    if (LikelihoodForRow(UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
      StepSize *= Beta;
    } else {
      break;
    }
    if (iter == MaxIter - 1) { 
      StepSize = 0.0;
      break;
    }
  }
  return StepSize;
}
Beispiel #7
0
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& NIdEigenH, const double& Eps, const int& MaxIter) {
  const int NNodes = Graph->GetNodes();
  NIdEigenH.Gen(NNodes);
  // initialize vector values
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NIdEigenH.AddDat(NI.GetId(), 1.0 / NNodes);
    IAssert(NI.GetId() == NIdEigenH.GetKey(NIdEigenH.Len() - 1));
  }
  TFltV TmpV(NNodes);
  for (int iter = 0; iter < MaxIter; iter++) {
    int j = 0;
    // add neighbor values
    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
      TmpV[j] = 0;
      for (int e = 0; e < NI.GetOutDeg(); e++) {
        TmpV[j] += NIdEigenH.GetDat(NI.GetOutNId(e));
      }
    }

    // normalize
    double sum = 0;
    for (int i = 0; i < TmpV.Len(); i++) {
      sum += (TmpV[i] * TmpV[i]);
    }
    sum = sqrt(sum);
    for (int i = 0; i < TmpV.Len(); i++) {
      TmpV[i] /= sum;
    }

    // compute difference
    double diff = 0.0;
    j = 0;
    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
      diff += fabs(NIdEigenH.GetDat(NI.GetId()) - TmpV[j]);
    }

    // set new values
    j = 0;
    for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
      NIdEigenH.AddDat(NI.GetId(), TmpV[j]);
    }

    if (diff < Eps) {
      break;
    }
  }
}
Beispiel #8
0
// Computes GINI coefficient of egonet as a subset of the parent graph (edges into and out of the egonet ARE considered)
double TSnap::GetGiniCoefficient(const TIntFltH DegH, const TIntV NIdV) {
  typename TIntV::TIter VI;
  typename TFltV::TIter DI;
  TFltV DegV;
  const int n = NIdV.Len();
  // DegV.Gen(n); // NOTE: don't use Gen() and Sort() on the same object (!)
  for (VI = NIdV.BegI(); VI < NIdV.EndI(); VI++) {
    DegV.Add(DegH.GetDat(VI->Val)); // might need to change this (in / out / undirected)
  }
  DegV.Sort();
  int i = 0;
  double numerator = 0.0, denominator = 0.0;
  for (DI = DegV.BegI(); DI < DegV.EndI(); DI++, i++) {
    numerator += (i + 1)*DegV[i];
    denominator += DegV[i];
  }
  return(double(2*numerator) / double(n*denominator) - double(n + 1) / double(n));
}
int main(int argc, char* argv[]) {
 Env = TEnv(argc, argv, TNotify::StdNotify);
 Env.PrepArgs(TStr::Fmt("Inverse PageRank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
 TExeTm ExeTm;
 Try
	const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File" );
	const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File");
	FILE* fpI = fopen(Iput.CStr(), "r");
	FILE* fpO = fopen(Oput.CStr(), "w");
	

	const double C    = 0.85;
	const int MaxIter = 50;
	const double Eps  = 1e-9;

	PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput);
	fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges());
	const int NNodes = Graph->GetNodes();
	const double OneOver = (double) 1.0 / (double) NNodes;
	
	TIntFltH PRankH;
	PRankH.Gen(NNodes);
	
	for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
    	PRankH.AddDat(NI.GetId(), OneOver);
    
    TFltV TmpV(NNodes);
	for (int iter = 0; iter < MaxIter; iter++) {
    	int j = 0;
    	for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
			TmpV[j] = 0;
	        for (int e = 0; e < NI.GetOutDeg(); e++) {
				const int OutNId = NI.GetOutNId(e);
				const int InDeg = Graph->GetNI(OutNId).GetInDeg();
				if (InDeg > 0) 
					TmpV[j] += PRankH.GetDat(OutNId) / InDeg;
			}
			TmpV[j] =  C * TmpV[j]; 
    	}
    	
	for (int i = 0; i < PRankH.Len(); i++)
		PRankH[i] = TmpV[i];
    	/*
    	double diff = 0, sum = 0, NewVal;
		for (int i = 0; i < TmpV.Len(); i++)
			sum += TmpV[i];

		const double Leaked = (double) (1.0 - sum) / (double) NNodes;
		for (int i = 0; i < PRankH.Len(); i++) {
			NewVal = TmpV[i] + Leaked;
			diff += fabs(NewVal - PRankH[i]);
			PRankH[i] = NewVal;
		}
		if (diff < Eps)
			break;
		*/
	}
	
	fprintf(fpO, "Node ID\t\tInverse PageRank\n");
	for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
		int Id = NI.GetId();
		double ipr = PRankH.GetDat(Id);
		fprintf(fpO, "%d\t\t\t%.5lf\n", Id, ipr);
	}
 Catch
	printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	
	return 0;
}
Beispiel #10
0
void TNetInfBs::GenCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed, const double& delta,
						   const double& std_waiting_time, const double& std_beta) {
	TIntFltH InfectedNIdH; TIntH InfectedBy;
	double GlobalTime; int StartNId;
	double alpha, beta;

	if (GroundTruth->GetNodes() == 0)
		return;

	while (C.Len() < 2) {
		C.Clr();
		InfectedNIdH.Clr();
		InfectedBy.Clr();
		GlobalTime = 0;

		StartNId = GroundTruth->GetRndNId();
		InfectedNIdH.AddDat(StartNId) = GlobalTime;

		while (true) {
			// sort by time & get the oldest node that did not run infection
			InfectedNIdH.SortByDat(true);
			const int& NId = InfectedNIdH.BegI().GetKey();
			GlobalTime = InfectedNIdH.BegI().GetDat();

			// all the nodes has run infection
			if (GlobalTime >= window)
				break;

			// add current oldest node to the network and set its time
			C.Add(NId, GlobalTime);

			// run infection from the current oldest node
			const TNGraph::TNodeI NI = GroundTruth->GetNI(NId);
			for (int e = 0; e < NI.GetOutDeg(); e++) {
				const int DstNId = NI.GetOutNId(e);

				beta = Betas.GetDat(TIntPr(NId, DstNId));

				// flip biased coin (set by beta)
				if (TInt::Rnd.GetUniDev() > beta+std_beta*TFlt::Rnd.GetNrmDev())
					continue;

				alpha = Alphas.GetDat(TIntPr(NId, DstNId));

				// not infecting the parent
				if (InfectedBy.IsKey(NId) && InfectedBy.GetDat(NId).Val == DstNId)
					continue;

				double sigmaT;
				switch (TModel) {
				case 0:
					// exponential with alpha parameter
					sigmaT = TInt::Rnd.GetExpDev(alpha);
					break;
				case 1:
					// power-law with alpha parameter
					sigmaT = TInt::Rnd.GetPowerDev(alpha);
					while (sigmaT < delta) { sigmaT = TInt::Rnd.GetPowerDev(alpha); }
					break;
				case 2:
					// rayleigh with alpha parameter
					sigmaT = TInt::Rnd.GetRayleigh(1/sqrt(alpha));
					break;
				default:
					sigmaT = 1;
					break;
				}

				// avoid negative time diffs in case of noise
				if (std_waiting_time > 0)
					sigmaT = TFlt::GetMx(0.0, sigmaT + std_waiting_time*TFlt::Rnd.GetNrmDev());

				double t1 = GlobalTime + sigmaT;

				if (InfectedNIdH.IsKey(DstNId)) {
					double t2 = InfectedNIdH.GetDat(DstNId);
					if (t2 > t1 && t2 != window) {
						InfectedNIdH.GetDat(DstNId) = t1;
						InfectedBy.GetDat(DstNId) = NId;
					}
				} else {
					InfectedNIdH.AddDat(DstNId) = t1;
					InfectedBy.AddDat(DstNId) = NId;
				}
			}

			// we cannot delete key (otherwise, we cannot sort), so we assign a big time (window cut-off)
			InfectedNIdH.GetDat(NId) = window;
		}

	}

	C.Sort();

	for (TIntH::TIter EI = InfectedBy.BegI(); EI < InfectedBy.EndI(); EI++) {
		TIntPr Edge(EI.GetDat().Val, EI.GetKey().Val);

		if (!EdgesUsed.IsKey(Edge)) EdgesUsed.AddDat(Edge) = 0;

		EdgesUsed.GetDat(Edge) += 1;
	}
}
Beispiel #11
0
 int GetMxQNId() const { return NIdQH.GetKey(MxQId); }
Beispiel #12
0
 void DelLink(const int& K) { const int NId=GetMxQNId();
   NIdQH.DelKey(K); if (NId==K) { UpdateMaxQ(); }  }
Beispiel #13
0
int main(int argc, char* argv[]) {
  
  setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?)
   
  Env = TEnv(argc, argv, TNotify::StdNotify);
  Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
  
  TExeTm ExeTm;
  
  Try
  
  const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network");
  const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)");
  const TStr BseFNm = OutFNm.RightOfLast('/');
  const int k = Env.GetIfArgPrefixInt("-k:", 1, "depth of weighted degree distributions (1 / 2 / ...)");
  const bool c = Env.GetIfArgPrefixBool("-c:", false, "collate centralities into matrix (T / F)");
  
  // Load graph and create directed and undirected graphs (pointer to the same memory)
  printf("\nLoading %s...", InFNm.CStr());
  PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm);
  printf(" DONE\n");
  printf("  nodes: %d\n", WGraph->GetNodes());
  printf("  edges: %d\n", WGraph->GetEdges());
  printf("  time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  
  // Declare variables
  TIntFltVH FirstWDegVH;
  TIntFltVH kWInDegVH, kWOutDegVH, kWDegVH;
  TIntFltVH WDegCentrVH, WEigCentrVH;
  TFltV WEigDiffV;
  TIntFltH WPgRH;
  double WPgRDiff;
  TFltWNGraph::TNodeI NI;
  TFltV::TIter VI;
  
  // CENTRALITY (computations)
  
  // Weighted first degree distributions
  
  printf("\nComputing weighted degree distributions...");
  TSnap::GetWDegVH(WGraph, FirstWDegVH);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  
  // 1:k degree distributions
  
  printf("Computing egonet degrees for k = 1 to %d (in / out / undirected)\n", k);
  TSnap::TFixedMemorykWDeg<TFlt, TWNGraph> FixedMemorykWDeg(WGraph, k);
  printf("  ...");
  FixedMemorykWDeg.GetkWInDegSeqH(kWInDegVH);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  printf("  ...");
  FixedMemorykWDeg.GetkWOutDegSeqH(kWOutDegVH);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  printf("  ...");
  FixedMemorykWDeg.GetkWDegSeqH(kWDegVH);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  
  // Centrality measures
  
  printf("Computing weighted degree centrality...");
  TSnap::GetWDegreeCentrVH(WGraph, WDegCentrVH, 0.5);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  
  printf("Computing weighted eigenvector centrality...");
  WEigDiffV = TSnap::GetWEigenVectorCentrVH<TFlt>(WGraph, WEigCentrVH, 1e-4, 1000);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  printf("  convergence differences (in / out / undirected)\n");
  printf("    %f\n", double(WEigDiffV[0]));
  printf("    %f\n", double(WEigDiffV[1]));
  printf("    %f\n", double(WEigDiffV[2]));
  
  printf("Computing weighted PageRank centrality...");
  WPgRDiff = TSnap::GetWPageRank<TFlt>(WGraph, WPgRH, 0.85, 1e-4, 1000);
  printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  printf("  convergence difference: %f\n", double(WPgRDiff));
  
  // OUTPUTTING (mostly verbose printing statements, don't get scared)
  
  if (c) {
    
    printf("\nSaving %s.wcentr...", BseFNm.CStr());
    const TStr AggFNm = TStr::Fmt("%s.wcentr", OutFNm.CStr());
    FILE *F = fopen(AggFNm.CStr(), "wt");
    fprintf(F,"# Node centrality distributions on the directed / undirected graph (as applicable)\n");
    fprintf(F,"# Nodes: %d\tEdges: %d\n", WGraph->GetNodes(), WGraph->GetEdges());
    fprintf(F,"# NodeId\tWInDegCentr\tWOutDegCentr\tWDegCentr\tWInEigCentr\tWOutEigCentr\tWEigCentr\tWPgRCentr\n");
    for (NI = WGraph->BegNI(); NI < WGraph->EndNI(); NI++) {
      const int NId = NI.GetId(); fprintf(F, "%d", NId);
      const TFltV WDegCentrV = WDegCentrVH.GetDat(NId);
      for (VI = WDegCentrV.BegI(); VI < WDegCentrV.EndI(); VI++) { fprintf(F, "\t%f", VI->Val); }
      const TFltV WEigCentrV = WEigCentrVH.GetDat(NId);
      for (VI = WEigCentrV.BegI(); VI < WEigCentrV.EndI(); VI++) { fprintf(F, "\t%f", VI->Val); }
      const double WPgRCentr = WPgRH.GetDat(NId); fprintf(F, "\t%f", WPgRCentr);
      fprintf(F, "\n");
    }
    printf(" DONE\n");
    
  } else {
    
    printf("\nSaving %s.wdeg.centr...", BseFNm.CStr());
    TSnap::SaveTxt(WDegCentrVH, TStr::Fmt("%s.wdeg.centr", OutFNm.CStr()), "Weighted degree centrality (in / out / undirected)", "NodeId", "WInDegCentr\tWOutDegCentr\tWDegCentr");
    printf(" DONE\n");
    
    printf("Saving %s.weig...", BseFNm.CStr());
    TSnap::SaveTxt(WEigCentrVH, TStr::Fmt("%s.weig", OutFNm.CStr()), "Weighted eigenvector centrality (in / out / undirected)", "NodeId", "WInEigCentr\tWOutEigCentr\tWEigCentr");
    printf(" DONE\n");
    
    printf("Saving %s.wpgr...", BseFNm.CStr());
    TSnap::SaveTxt(WPgRH, TStr::Fmt("%s.wpgr", OutFNm.CStr()), "Weighted PageRank centrality (wpgr)", "NodeId", "WPageRank");
    printf(" DONE\n");
    
  }
  
  printf("Saving %s.wdeg...", BseFNm.CStr());
  TSnap::SaveTxt(FirstWDegVH, TStr::Fmt("%s.wdeg", OutFNm.CStr()), "Weighted degree distributions (in / out / undirected)", "NodeId", "WInDeg\tWOutDeg\tWDeg");
  printf(" DONE\n");
  
  printf("Saving %s.kwdeg.IN...", BseFNm.CStr());
  TSnap::SaveTxt(kWInDegVH, TStr::Fmt("%s.kwdeg.IN", OutFNm.CStr()), TStr::Fmt("1 to %d weighted in degree distributions (kdeg.IN)", k), "NodeId", "kWInDegH");
  printf(" DONE\n");
  
  printf("Saving %s.kwdeg.OUT...", BseFNm.CStr());
  TSnap::SaveTxt(kWOutDegVH, TStr::Fmt("%s.kwdeg.OUT", OutFNm.CStr()), TStr::Fmt("1 to %d weighted out degree distributions (kdeg.OUT)", k), "NodeId", "kWOutDegH");
  printf(" DONE\n");
  
  printf("Saving %s.kwdeg...", BseFNm.CStr());
  TSnap::SaveTxt(kWDegVH, TStr::Fmt("%s.kwdeg", OutFNm.CStr()), TStr::Fmt("1 to %d weighted degree distributions (kdeg)", k), "NodeId", "kWDegH");
  printf(" DONE\n");
  
  Catch
  
  printf("\nrun time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
  return 0;
  
}
Beispiel #14
0
int GetWeightedPageRankMP2(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) {
  if (!Graph->IsFltAttrE(Attr)) return -1;
  const int NNodes = Graph->GetNodes();
  TVec<TNEANet::TNodeI> NV;

  //const double OneOver = 1.0/double(NNodes);
  PRankH.Gen(NNodes);
  int MxId;

  for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    NV.Add(NI);
    PRankH.AddDat(NI.GetId(), 1.0/NNodes);
    int Id = NI.GetId();
    if (Id > MxId) {
      MxId = Id;
    }
  }

  TFltV PRankV(MxId+1);
  TFltV OutWeights(MxId+1);

  TFltV Weights = Graph->GetFltAttrVecE(Attr);

  #pragma omp parallel for schedule(dynamic,10000)
  for (int j = 0; j < NNodes; j++) {
    TNEANet::TNodeI NI = NV[j];
    int Id = NI.GetId();
    OutWeights[Id] = Graph->GetWeightOutEdges(NI, Attr);
    PRankV[Id] = 1/NNodes;
  }

  TFltV TmpV(NNodes);
  for (int iter = 0; iter < MaxIter; iter++) {

    #pragma omp parallel for schedule(dynamic,10000)
    for (int j = 0; j < NNodes; j++) {
      TNEANet::TNodeI NI = NV[j];
      TFlt Tmp = 0;
      for (int e = 0; e < NI.GetInDeg(); e++) {
        const int InNId = NI.GetInNId(e);

        const TFlt OutWeight = OutWeights[InNId];

        int EId = Graph->GetEId(InNId, NI.GetId());
        const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)];

        if (OutWeight > 0) {
          Tmp += PRankH.GetDat(InNId) * Weight / OutWeight; 
        }
      }
      TmpV[j] =  C*Tmp; // Berkhin (the correct way of doing it)
      //TmpV[j] =  C*TmpV[j] + (1.0-C)*OneOver; // iGraph
    }

    double sum = 0;
    #pragma omp parallel for reduction(+:sum) schedule(dynamic,10000)
    for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; }
    const double Leaked = (1.0-sum) / double(NNodes);

    double diff = 0;
    #pragma omp parallel for reduction(+:diff) schedule(dynamic,10000)
    for (int i = 0; i < NNodes; i++) {
      TNEANet::TNodeI NI = NV[i];
      double NewVal = TmpV[i] + Leaked; // Berkhin
      //NewVal = TmpV[i] / sum;  // iGraph
      int Id = NI.GetId();
      diff += fabs(NewVal-PRankV[Id]);
      PRankV[Id] = NewVal;
    }
    if (diff < Eps) { break; }
  }

  #pragma omp parallel for schedule(dynamic,10000)
  for (int i = 0; i < NNodes; i++) {
    TNEANet::TNodeI NI = NV[i];
    PRankH[i] = PRankV[NI.GetId()];
  }

  return 0;
}
Beispiel #15
0
int TAGMFast::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
  //parallel
  time_t InitTime = time(NULL);
  uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
  TExeTm ExeTm, CheckTm;
  double PrevL = Likelihood(true);
  TIntFltPrV IterLV;
  int PrevIter = 0;
  int iter = 0;
  TIntV NIdxV(F.Len(), 0);
  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
  TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
  NIDOPTV.PutAll(0);
  TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
  TIntV NewNIDV(ChunkNum * ChunkSize);
  for (iter = 0; iter < MaxIter; iter++) {
    NIdxV.Clr(false);
    for (int i = 0; i < F.Len(); i++) { 
      if (NIDOPTV[i] == 0) {  NIdxV.Add(i); }
    }
    IAssert (NIdxV.Len() <= F.Len());
    NIdxV.Shuffle(Rnd);
    // compute gradient for chunk of nodes
#pragma omp parallel for schedule(static, 1)
    for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
      TIntFltH GradV;
      for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
        NewNIDV[ui] = -1;
        if (ui > NIdxV.Len()) { continue; }
        int u = NIdxV[ui]; //
        //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
        TUNGraph::TNodeI UI = G->GetNI(u);
        TIntSet CIDSet(5 * UI.GetDeg());
        TIntFltH CurFU = F[u];
        for (int e = 0; e < UI.GetDeg(); e++) {
          if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
          TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
          for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
            CIDSet.AddKey(CI.GetKey());
          }
        }
        if (CIDSet.Empty()) { 
          CurFU.Clr();
        }
        else {
          for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
            if (! CIDSet.IsKey(CI.GetKey())) {
              CurFU.DelIfKey(CI.GetKey());
            }
          }
          GradientForRow(u, GradV, CIDSet);
          if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
          double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5);
          if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; }
          for (int ci = 0; ci < GradV.Len(); ci++) {
            int CID = GradV.GetKey(ci);
            double Change = LearnRate * GradV.GetDat(CID);
            double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
            if (NewFuc <= 0.0) {
              CurFU.DelIfKey(CID);
            } else {
              CurFU.AddDat(CID) = NewFuc;
            }
          }
          CurFU.Defrag();
        }
        //store changes
        NewF[ui] = CurFU;
        NewNIDV[ui] = u;
      }
    }
    int NumNoChangeGrad = 0;
    int NumNoChangeStepSize = 0;
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID == -1) { NumNoChangeGrad++; continue; }
      if (NewNID == -2) { NumNoChangeStepSize++; continue; }
      for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
        SumFV[CI.GetKey()] -= CI.GetDat();
      }
    }
#pragma omp parallel for
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID < 0) { continue; }
      F[NewNID] = NewF[ui];
    }
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID < 0) { continue; }
      for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
        SumFV[CI.GetKey()] += CI.GetDat();
      }
    }
    // update the nodes who are optimal
    for (int ui = 0; ui < NewNIDV.Len(); ui++) {
      int NewNID = NewNIDV[ui];
      if (NewNID < 0) { continue; }
      TUNGraph::TNodeI UI = G->GetNI(NewNID);
      NIDOPTV[NewNID] = 0;
      for (int e = 0; e < UI.GetDeg(); e++) {
        NIDOPTV[UI.GetNbrNId(e)] = 0;
      }
    }
    int OPTCnt = 0;
    for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
    if (! PlotNm.Empty()) {
      printf("\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), int(TSecTm::GetCurTm().GetAbsSecs() - StartTm));
      if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
      fflush(stdout);
    }
    if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
      PrevIter = iter;
      double CurL = Likelihood(true);
      IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
      printf("\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL,  CurL - PrevL, int(time(NULL) - InitTime));
       fflush(stdout);
      if (CurL - PrevL <= Thres * fabs(PrevL)) { 
        break;
      }
      else {
        PrevL = CurL;
      }
    }
  }
  if (! PlotNm.Empty()) {
    printf("\nMLE completed with %d iterations(%s secs)\n", iter, int(TSecTm::GetCurTm().GetAbsSecs() - StartTm));
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");[]
  } else {
int main(int argc, char* argv[]) {
 Env = TEnv(argc, argv, TNotify::StdNotify);
 Env.PrepArgs(TStr::Fmt("Trust Rank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
 TExeTm ExeTm;
 Try
	const TStr Gnod = Env.GetIfArgPrefixStr("-g:", "Gnode.txt", "Good Nodes");
	const TStr Bnod = Env.GetIfArgPrefixStr("-b:", "Bnode.txt", "Bad Nodes" );
	const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File");
	const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File");
	const double C	  = 0.85;
	const int MaxIter = 50;
	const double Eps  = 1e-9;
	
	FILE* fpO = fopen(Oput.CStr(), "w");
	
	PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput);
	fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges());
	const int NNodes = Graph->GetNodes();
	TIntFltH TRankH;
	TRankH.Gen(NNodes);
	int maxNId = 0, NId = 0, ret = 0;
	for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
		maxNId = max(maxNId, NI.GetId());
	
	TFltV initialTrustScore(maxNId + 1);
	for (int i = 0; i < initialTrustScore.Len(); i++)
		initialTrustScore[i] = 0.5;

	FILE* fpI = fopen(Gnod.CStr(), "r");
	while (true) {
		ret = fscanf(fpI, "%d", &NId);
		if (ret == EOF) break;
		if (Graph->IsNode(NId))
			initialTrustScore[NId] = 1.0;
	}
	fclose(fpI);
	
	fpI = fopen(Bnod.CStr(), "r");
	while (true) {
		ret = fscanf(fpI, "%d", &NId);
		if (ret == EOF) break;
		if (Graph->IsNode(NId))
			initialTrustScore[NId] = 0.0;
	}
	fclose(fpI);

	double Tot = 0.0;
	for(int i = 0; i < initialTrustScore.Len(); i++)
		Tot += initialTrustScore[i];
	for(int i = 0; i < initialTrustScore.Len(); i++)
		initialTrustScore[i] /= Tot;
	
	for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
		TRankH.AddDat( NI.GetId(), initialTrustScore[NI.GetId()] );
	
	TFltV TmpV(NNodes);
	for (int iter = 0; iter < MaxIter; iter++) {
		int j = 0;
		for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
			TmpV[j] = 0;
			for (int e = 0; e < NI.GetOutDeg(); e++) {
				const int OutNId = NI.GetOutNId(e);
				const int InDeg  = Graph->GetNI(InNId).GetInDeg();
				if (InDeg > 0) 
					TmpV[j] += (double) TRankH.GetDat(OutNId) / (double) InDeg; 
        	}
			TmpV[j] =  C * TmpV[j] + (1.0 - C) * initialTrustScore[NI.GetId()]; 
		}

		for (int i = 0; i < TRankH.Len(); i++) 
			TRankH[i] = TmpV[i];
	}
	
	fprintf(fpO, "Node ID\t\tTrustRank\n");
	for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
		int Id = NI.GetId();
		double tr = TRankH.GetDat(Id);
		fprintf(fpO, "%d\t\t\t%.5lf\n", Id, tr);
	}
	fclose(fpO);
	
 Catch
	printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
	
	return 0;
}
Beispiel #17
0
 void UpdateMaxQ() { MxQId=-1;
   for (int i = -1; NIdQH.FNextKeyId(i); ) {
     if (MxQId==-1 || NIdQH[MxQId]< NIdQH[i]) { MxQId=i; } } }
Beispiel #18
0
void GetBetweennessCentr(const PUNGraph& Graph, const TIntV& BtwNIdV, TIntFltH& NodeBtwH, const bool& DoNodeCent, TIntPrFltH& EdgeBtwH, const bool& DoEdgeCent) {
  if (DoNodeCent) { NodeBtwH.Clr(); }
  if (DoEdgeCent) { EdgeBtwH.Clr(); }
  const int nodes = Graph->GetNodes();
  TIntS S(nodes);
  TIntQ Q(nodes);
  TIntIntVH P(nodes); // one vector for every node
  TIntFltH delta(nodes);
  TIntH sigma(nodes), d(nodes);
  // init
  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
    if (DoNodeCent) {
      NodeBtwH.AddDat(NI.GetId(), 0);
    }
    if (DoEdgeCent) {
      for (int e = 0; e < NI.GetOutDeg(); e++) {
        if (NI.GetId() < NI.GetOutNId(e)) {
          EdgeBtwH.AddDat(TIntPr(NI.GetId(), NI.GetOutNId(e)), 0);
        }
      }
    }
    sigma.AddDat(NI.GetId(), 0);
    d.AddDat(NI.GetId(), -1);
    P.AddDat(NI.GetId(), TIntV());
    delta.AddDat(NI.GetId(), 0);
  }
  // calc betweeness
  for (int k = 0; k < BtwNIdV.Len(); k++) {
    const TUNGraph::TNodeI NI = Graph->GetNI(BtwNIdV[k]);
    // reset
    for (int i = 0; i < sigma.Len(); i++) {
      sigma[i] = 0;  d[i] = -1;  delta[i] = 0;  P[i].Clr(false);
    }
    S.Clr(false);
    Q.Clr(false);
    sigma.AddDat(NI.GetId(), 1);
    d.AddDat(NI.GetId(), 0);
    Q.Push(NI.GetId());
    while (!Q.Empty()) {
      const int v = Q.Top();  Q.Pop();
      const TUNGraph::TNodeI NI2 = Graph->GetNI(v);
      S.Push(v);
      const int VDat = d.GetDat(v);
      for (int e = 0; e < NI2.GetOutDeg(); e++) {
        const int w = NI2.GetOutNId(e);
        if (d.GetDat(w) < 0) { // find w for the first time
          Q.Push(w);
          d.AddDat(w, VDat + 1);
        }
        //shortest path to w via v ?
        if (d.GetDat(w) == VDat + 1) {
          sigma.AddDat(w) += sigma.GetDat(v);
          P.GetDat(w).Add(v);
        }
      }
    }
    while (!S.Empty()) {
      const int w = S.Top();
      const double SigmaW = sigma.GetDat(w);
      const double DeltaW = delta.GetDat(w);
      const TIntV NIdV = P.GetDat(w);
      S.Pop();
      for (int i = 0; i < NIdV.Len(); i++) {
        const int nid = NIdV[i];
        const double c = (sigma.GetDat(nid)*1.0 / SigmaW) * (1 + DeltaW);
        delta.AddDat(nid) += c;
        if (DoEdgeCent) {
          EdgeBtwH.AddDat(TIntPr(TMath::Mn(nid, w), TMath::Mx(nid, w))) += c;
        }
      }
      if (DoNodeCent && w != NI.GetId()) {
        NodeBtwH.AddDat(w) += delta.GetDat(w) / 2.0;
      }
    }
  }
}
Beispiel #19
0
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
  GradU.Gen(CIDSet.Len());

  TFltV HOSumFV; //adjust for Fv of v hold out
  if (HOVIDSV[UID].Len() > 0) {
    HOSumFV.Gen(SumFV.Len());
    
    for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
      for (int c = 0; c < SumFV.Len(); c++) {
        HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
      }
    }
  }
    
  TUNGraph::TNodeI NI = G->GetNI(UID);
  int Deg = NI.GetDeg();
  TFltV PredV(Deg), GradV(CIDSet.Len());
  TIntV CIDV(CIDSet.Len());
  if (DoParallel && Deg + CIDSet.Len() > 10) {
#pragma omp parallel for schedule(static, 1)
    for (int e = 0; e < Deg; e++) {
      if (NI.GetNbrNId(e) == UID) { continue; }
      if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
      PredV[e] = Prediction(UID, NI.GetNbrNId(e));
    }
  
#pragma omp parallel for schedule(static, 1)
    for (int c = 0; c < CIDSet.Len(); c++) {
      int CID = CIDSet.GetKey(c);
      double Val = 0.0;
      for (int e = 0; e < Deg; e++) {
        int VID = NI.GetNbrNId(e);
        if (VID == UID) { continue; }
        if (HOVIDSV[UID].IsKey(VID)) { continue; }
        Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
      }
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
      CIDV[c] = CID;
      GradV[c] = Val;
    }
  } 
  else {
    for (int e = 0; e < Deg; e++) {
      if (NI.GetNbrNId(e) == UID) { continue; }
      if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
      PredV[e] = Prediction(UID, NI.GetNbrNId(e));
    }
  
    for (int c = 0; c < CIDSet.Len(); c++) {
      int CID = CIDSet.GetKey(c);
      double Val = 0.0;
      for (int e = 0; e < Deg; e++) {
        int VID = NI.GetNbrNId(e);
        if (VID == UID) { continue; }
        if (HOVIDSV[UID].IsKey(VID)) { continue; }
        Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
      }
      double HOSum = HOVIDSV[UID].Len() > 0?  HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
      Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
      CIDV[c] = CID;
      GradV[c] = Val;
    }
  }
  //add regularization
  if (RegCoef > 0.0) { //L1
    for (int c = 0; c < GradV.Len(); c++) {
      GradV[c] -= RegCoef; 
    }
  }
  if (RegCoef < 0.0) { //L2
    for (int c = 0; c < GradV.Len(); c++) {
      GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); 
    }
  }


  for (int c = 0; c < GradV.Len(); c++) {
    if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
    if (fabs(GradV[c]) < 0.0001) { continue; }
    GradU.AddDat(CIDV[c], GradV[c]);
  }
  for (int c = 0; c < GradU.Len(); c++) {
    if (GradU[c] >= 10) { GradU[c] = 10; }
    if (GradU[c] <= -10) { GradU[c] = -10; }
    IAssert(GradU[c] >= -10);
  }
}
/////////////////////////////////////////////////
// NIST-score
double TEvalScoreNist::Eval(const PTransCorpus& TransCorpus, const TIntV& _SentIdV) {
    // check if the corpus has translations
    IAssert(TransCorpus->IsTrans());

    // ngram counts (cliped and full)
    TIntH ClipCountNGramH, CountNGramH;
    // ngram info score
    TIntFltH NGramInfoH;
    // candidate and effective reference length
    double FullTransLen = 0.0, FullRefLen = 0.0;

    // iterate over sentences
    TIntV SentIdV = _SentIdV;
    if (SentIdV.Empty()) { TransCorpus->GetSentIdV(SentIdV); }
    const int Sents = SentIdV.Len();
    for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
        const int SentId = SentIdV[SentIdN];
        // tokenize translation
        TIntV TransWIdV; Parse(TransCorpus->GetTransStr(SentId), TransWIdV);
        TIntH TransNGramH; GetNGramH(TransWIdV, MxNGramLen, TransNGramH);
        TIntH FreeTransNGramH = TransNGramH; // number of non-matched ngrams
        // counters for getting the closest length of reference sentences
        const int TransLen = TransWIdV.Len(); int RefLenSum = 0;
        // go over reference translations and count ngram matches
        TStrV RefTransStrV = TransCorpus->GetRefTransStrV(SentId);
        // we assume that there is at least one reference translation
        IAssert(!RefTransStrV.Empty());
        for (int RefN = 0; RefN < RefTransStrV.Len(); RefN++) {
            // parse reference translation sentence
            TIntV RefWIdV; Parse(RefTransStrV[RefN], RefWIdV);
            TIntH RefNGramH; GetNGramH(RefWIdV, MxNGramLen, RefNGramH);
            // check for matches
            int TransNGramKeyId = TransNGramH.FFirstKeyId();
            while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
                const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
                const int FreeTransNGrams = FreeTransNGramH(NGramId);
                if (RefNGramH.IsKey(NGramId) && (FreeTransNGrams>0)) {
                    // ngram match and still some free ngrams left to clip
                    const int RefNGrams = RefNGramH(NGramId);
                    FreeTransNGramH(NGramId) = TInt::GetMx(0, FreeTransNGrams - RefNGrams);
                }
            }
            // check the length difference
            const int RefLen = RefWIdV.Len();
            RefLenSum += RefLen;
        }
        // count ngrams
        int TransNGramKeyId = TransNGramH.FFirstKeyId();
        while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
            // get ngram
            const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId != -1);
            // check if two hash tables are aligned (should be...)
            const int FreeNGramId = FreeTransNGramH.GetKey(TransNGramKeyId);
            IAssert(NGramId == FreeNGramId);
            // get ngram count and clip-count
            const int Count = TransNGramH[TransNGramKeyId];
            const int ClipCount = Count - FreeTransNGramH[TransNGramKeyId];
            // add ngram to the coprus ngram counts
            CountNGramH.AddDat(NGramId) += Count;
            ClipCountNGramH.AddDat(NGramId) += ClipCount;
        }
        // count length
        FullTransLen += double(TransLen);
        FullRefLen += double(RefLenSum) / double(RefTransStrV.Len());
    }

    // calculate ngram info scores
    int CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        // get the n-gram
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        TIntV NGram = GetNGram(NGramId);
        // prepare counts
        if (NGram.Len() == 1) {
            // n-gram is a word
            const int WordCount = CountNGramH[CountKeyId];
            const double NGramInfoScore = TMath::Log2(FullTransLen / double(WordCount));
            NGramInfoH.AddDat(NGramId, NGramInfoScore);
        } else {
            // more then one word in the n-gram
            // get a n-gram with removed last element
            TIntV N1Gram = NGram; N1Gram.DelLast();
            const int N1GramId = NGramH.GetKeyId(N1Gram);
            // get the counts
            const int NGramCount = CountNGramH(NGramId);
            const int N1GramCount = CountNGramH(N1GramId);
            // get the score
            const double NGramInfoScore = TMath::Log2(double(N1GramCount) / double(NGramCount));
            NGramInfoH.AddDat(NGramId, NGramInfoScore);
        }
    }

    // calcualte ngram precisions
    TFltV ClipCountV(MxNGramLen); ClipCountV.PutAll(0);
    int ClipCountKeyId = ClipCountNGramH.FFirstKeyId();
    while (ClipCountNGramH.FNextKeyId(ClipCountKeyId)) {
        const int NGramId = ClipCountNGramH.GetKey(ClipCountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        const double NGramInfo = NGramInfoH(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        const int ClipCountNGram = ClipCountNGramH[ClipCountKeyId];
        ClipCountV[NGramLen-1] += double(ClipCountNGram) * NGramInfo;
    }
    TIntV CountV(MxNGramLen); CountV.PutAll(0);
    CountKeyId = CountNGramH.FFirstKeyId();
    while (CountNGramH.FNextKeyId(CountKeyId)) {
        const int NGramId = CountNGramH.GetKey(CountKeyId);
        const int NGramLen = GetNGramLen(NGramId);
        IAssert(0 < NGramLen && NGramLen <= MxNGramLen);
        CountV[NGramLen-1] += CountNGramH[CountKeyId];
    }
    TFltV PrecV(MxNGramLen, 0);
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        const double ClipCount = ClipCountV[NGramLen];
        const int Count = CountV[NGramLen];
        const double Prec = (Count > 0) ? ClipCount / double(Count) : 0.0;
        PrecV.Add(Prec);
    }

    // calcualte brevity penalty
    const double LenFrac = double(FullTransLen)/double(FullRefLen);
    double BP = 0.0;
    if (LenFrac >= 1.0) { BP = 1.0; }
    else if (LenFrac <= 0.0) { BP = 0.0; }
    else {
        // calculate beta
        const double LenFracX = 1.5, BPX = 0.5;
        const double Beta = log(BPX) / TMath::Sqr(log(LenFracX));
        // calculate BP score
        BP = exp(Beta * TMath::Sqr(log(LenFrac)));
    }

    // calculate full NIST score
    double NistScore = 0.0; 
    for (int NGramLen = 0; NGramLen < MxNGramLen; NGramLen++) {
        NistScore += PrecV[NGramLen];
    }    
    NistScore *= BP;
    printf("NIST Score: %.5f\n", NistScore);
    
    // done!
    return NistScore;
}
Beispiel #21
0
int main(int argc, char* argv[])
{
  TEnv Env(argc, argv);
  TStr PrefixPath = Env.GetArgs() > 1 ? Env.GetArg(1) : TStr("");

  double ts1 = Tick();
  TTableContext Context;
  TVec<PTable> NodeTblV = TVec<PTable>();
  TVec<PTable> EdgeTblV = TVec<PTable>();
  Schema NodeSchema = Schema();
  Schema EdgeSchema = Schema();
  LoadFlickrTables(PrefixPath, Context, NodeTblV, NodeSchema, EdgeTblV, EdgeSchema);

  double ts2 = Tick();

  int ExpectedSz = 0;
  for (TVec<PTable>::TIter it = NodeTblV.BegI(); it < NodeTblV.EndI(); it++) {
    PTable Table = *it;
    ExpectedSz += Table->GetNumRows();
  }

  THash<TStr, TInt> Hash(ExpectedSz);
  TStrV OriNIdV(ExpectedSz);

  MergeNodeTables(NodeTblV, NodeSchema, Hash, OriNIdV);
  PTable EdgeTable = MergeEdgeTables(EdgeTblV, EdgeSchema, Hash, Context);

  double ts3 = Tick();
  TStrV V;
  TStrV VE;
  VE.Add(EdgeSchema.GetVal(2).GetVal1());
  PNEANet Graph = TSnap::ToNetwork<PNEANet>(EdgeTable, EdgeSchema.GetVal(0).GetVal1(), EdgeSchema.GetVal(1).GetVal1(),
						V, V, VE, aaLast);
  double ts4 = Tick();

  //int nExps = 1;
  int nExps = 40;
  TIntFltH PageRankResults;
  for (int i = 0; i < nExps; i++) {
    PageRankResults = TIntFltH(ExpectedSz);
#ifdef USE_OPENMP
    TSnap::GetWeightedPageRankMP2(Graph, PageRankResults, EdgeSchema.GetVal(2).GetVal1(), 0.849999999999998, 0.0001, 10);
#else
    TSnap::GetWeightedPageRank(Graph, PageRankResults, EdgeSchema.GetVal(2).GetVal1(), 0.849999999999998, 0.0001, 10);
#endif
  }
  double ts5 = Tick();

  PSOut ResultOut = TFOut::New(PrefixPath + TStr("page-rank-results.tsv"));
  for (TIntFltH::TIter it = PageRankResults.BegI(); it < PageRankResults.EndI(); it++) {
    ResultOut->PutStrFmtLn("%s\t%f9", OriNIdV[it.GetKey()].CStr(), it.GetDat().Val);
  }
  double ts6 = Tick();

  bool isPar = false;
#ifdef USE_OPENMP
  isPar = true;
#endif

//  PSOut FeaturesOut = TFOut::New(PrefixPath + "features.txt");
//  FeaturesOut->PutStrFmtLn("Photo %d", PPhotoTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Users %d", PUserTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Tags %d", PTagTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Comments %d", PCommentTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Locations %d", PLocationTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Photo - Owner %d", PPhotoOwnerTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Photo - Comment %d", PPhotoCommentTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Photo - Location %d", PPhotoLocationTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Comment - User %d", PCommentUserTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Comment - User %d", PCommentUserTbl->GetNumRows().Val);
////  FeaturesOut->PutStrFmtLn("Photo - Tagger %d", PPhotoTaggerTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Tagger - Tag %d", PTaggerTagTbl->GetNumRows().Val);
//  FeaturesOut->PutStrFmtLn("Total number of nodes = %d", Graph->GetNodes());
//  FeaturesOut->PutStrFmtLn("Total number of edges = %d", Graph->GetEdges());

  PSOut TimeOut = TFOut::New(PrefixPath + TStr("time.txt"), true);
  TimeOut->PutStrFmtLn("Experiment Weighted - %s - %s", PrefixPath.CStr(), (isPar ? "Parallel" : "Sequential"));
  TimeOut->PutStrFmtLn("Input Time = %f", GetCPUTimeUsage(ts1, ts2));
  TimeOut->PutStrFmtLn("Preprocessing Time = %f", GetCPUTimeUsage(ts2, ts3));
  TimeOut->PutStrFmtLn("Conversion Time = %f", GetCPUTimeUsage(ts3, ts4));
  TimeOut->PutStrFmtLn("Computing Time = %f", GetCPUTimeUsage(ts4, ts5)/nExps);
  TimeOut->PutStrFmtLn("Output Time = %f", GetCPUTimeUsage(ts5, ts6));

  return 0;
}
Beispiel #22
0
int TAGMFast::MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
  time_t InitTime = time(NULL);
  TExeTm ExeTm, CheckTm;
  int iter = 0, PrevIter = 0;
  TIntFltPrV IterLV;
  TUNGraph::TNodeI UI;
  double PrevL = TFlt::Mn, CurL = 0.0;
  TIntV NIdxV(F.Len(), 0);
  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
  IAssert(NIdxV.Len() == F.Len());
  TIntFltH GradV;
  while(iter < MaxIter) {
    NIdxV.Shuffle(Rnd);
    for (int ui = 0; ui < F.Len(); ui++, iter++) {
      int u = NIdxV[ui]; //
      //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
      UI = G->GetNI(u);
      TIntSet CIDSet(5 * UI.GetDeg());
      for (int e = 0; e < UI.GetDeg(); e++) {
        if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
        TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
        for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
          CIDSet.AddKey(CI.GetKey());
        }
      }
      for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors
        if (! CIDSet.IsKey(CI.GetKey())) {
          DelCom(u, CI.GetKey());
        }
      }
      if (CIDSet.Empty()) { continue; }
      GradientForRow(u, GradV, CIDSet);
      if (Norm2(GradV) < 1e-4) { continue; }
      double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta);
      if (LearnRate == 0.0) { continue; }
      for (int ci = 0; ci < GradV.Len(); ci++) {
        int CID = GradV.GetKey(ci);
        double Change = LearnRate * GradV.GetDat(CID);
        double NewFuc = GetCom(u, CID) + Change;
        if (NewFuc <= 0.0) {
          DelCom(u, CID);
        } else {
          AddCom(u, CID, NewFuc);
        }
      }
      if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
        IterLV.Add(TIntFltPr(iter, Likelihood(false)));
      }
    }
    printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
    fflush(stdout);
    if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
      PrevIter = iter;
      CurL = Likelihood();
      if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
        printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL,  CurL - PrevL);
      }
      fflush(stdout);
      if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
      else { PrevL = CurL; }
    }
    
  }
  printf("\n");
  printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
  if (! PlotNm.Empty()) {
    TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
  }
  return iter;
}
Beispiel #23
0
 void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q);
   if (MxQId==-1 || NIdQH[MxQId]<Q) { MxQId=NIdQH.GetKeyId(NId); } }