RealT estimateNDistinctCollisionsFromDSPoint(IntT nPoints, IntT dim, PPointT *dataSet, IntT queryIndex, BooleanT useUfunctions, IntT k, IntT LorM, RealT R){ RealT sumCollisions = 0; for(IntT i = 0; i < nPoints; i++){ if (queryIndex != i) { RealT dist = distance(dim, dataSet[queryIndex], dataSet[i]); if (!useUfunctions){ // Pi: if ( R < Pi_EPSILON ) sumCollisions += 1-POW(1-POW(computeFunctionP(PARAMETER_W_DEFAULT, dist), k), LorM); else sumCollisions += 1-POW(1-POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k), LorM); }else{ RealT mu; RealT x; // Pi: if ( R < Pi_EPSILON ) mu = 1 - POW(computeFunctionP(PARAMETER_W_DEFAULT, dist), k / 2); else mu = 1 - POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k / 2); x = POW(mu, LorM - 1); sumCollisions += 1 - mu * x - LorM * (1 - mu) * x; } } } return sumCollisions; }
RealT estimateNCollisionsFromDSPoint(IntT nPoints, IntT dim, PPointT *dataSet, IntT queryIndex, IntT k, IntT L, RealT R){ RealT sumCollisions = 0; for(IntT i = 0; i < nPoints; i++){ if (queryIndex != i) { RealT dist = distance(dim, dataSet[queryIndex], dataSet[i]); // Pi: if ( R < Pi_EPSILON ) sumCollisions += POW(computeFunctionP(PARAMETER_W_DEFAULT, dist), k); else sumCollisions += POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k); } } return L * sumCollisions; }
//estimateNDistinctCollisions(nPoints, dimension, dataSet, sampleQueries[i], TRUE, k, m, R); RealT estimateNDistinctCollisions(IntT nPoints, IntT dim, PPointT *dataSet, PPointT query, BooleanT useUfunctions, IntT k, IntT LorM, RealT R){ RealT sumCollisions = 0; for(IntT i = 0; i < nPoints; i++){ if (query != dataSet[i]) { RealT dist = distance(dim, query, dataSet[i]); if (!useUfunctions){ sumCollisions += 1-POW(1-POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k), LorM); //1-(1-p1^k)^L,两点冲突的概率 }else{ RealT mu = 1 - POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k / 2); RealT x = POW(mu, LorM - 1); sumCollisions += 1 - mu * x - LorM * (1 - mu) * x; //查询点与数据集点冲突的概率总和,为能查询到的近邻点的数目 } } } return sumCollisions; }
RealT estimateNCollisions(IntT nPoints, IntT dim, PPointT *dataSet, PPointT query, IntT k, IntT L, RealT R){ RealT sumCollisions = 0; for(IntT i = 0; i < nPoints; i++){ if (query != dataSet[i]) { RealT dist = distance(dim, query, dataSet[i]); //计算在桶中查询点与数据集中点的距离 sumCollisions += POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k); // dist很有可能就是表示r2,dist越大算出来的概率越小。 } } return L * sumCollisions; //求冲突的期望值 //sumCollosions是在所有桶中的冲突点的数目 }
RealT estimateNCollisions(IntT nPoints, IntT dim, PPointT *dataSet, PPointT query, IntT k, IntT L, RealT R){ RealT sumCollisions = 0; for(IntT i = 0; i < nPoints; i++){ if (query != dataSet[i]) { RealT dist = distance(dim, query, dataSet[i]); sumCollisions += POW(computeFunctionP(PARAMETER_W_DEFAULT, dist / R), k); } } return L * sumCollisions; }
// Computes the parameter <m> of the algorithm, given the parameter // <k> and the desired success probability <successProbability>. Only // meaningful when functions <g> are interdependent (pairs of // functions <u>, where the <m> functions <u> are each k/2-tuples of // independent LSH functions). //g函数是相关时,有用! 为了减少计算时间 IntT computeMForULSH(IntT k, RealT successProbability){ ASSERT((k & 1) == 0); // k should be even in order to use ULSH. RealT mu = 1 - POW(computeFunctionP(PARAMETER_W_DEFAULT, 1), k / 2); //1-p1^(k/2) ,为1 说明c=1 RealT P = successProbability; RealT d = (1-mu)/(1-P)*1/LOG(1/mu) * POW(mu, -1/(1-mu)); //(p1^k/2)/delta*log(1/(p1^k/2))*((1-p1^k/2)^(-1/(p1^(k/2)))) RealT y = LOG(d); IntT m = CEIL(1 - y/LOG(mu) - 1/(1-mu)); while (POW(mu, m-1) * (1 + m * (1-mu)) > 1 - P){ //13 m++; } return m; }
// Computes the parameter <L> of the algorithm, given the parameter // <k> and the desired success probability // <successProbability>. Functions <g> are considered all independent // (original scheme). IntT computeLfromKP(IntT k, RealT successProbability){ return CEIL(LOG(1 - successProbability) / LOG(1 - POW(computeFunctionP(PARAMETER_W_DEFAULT, 1), k))); //向上取整,LOG=ln ,公式6,返回L的值 }