// Generate a random real from normal distribution N(0,1). RealT genGaussianRandom(){ // Use Box-Muller transform to generate a point from normal // distribution. RealT x1, x2; do{ x1 = genUniformRandom(0.0, 1.0); } while (x1 == 0); // cannot take log of 0. x2 = genUniformRandom(0.0, 1.0); RealT z; z = SQRT(-2.0 * LOG(x1)) * COS(2.0 * M_PI * x2); return z; }
// Creates the LSH hash functions for the R-near neighbor structure // <nnStruct>. The functions fills in the corresponding field of // <nnStruct>. void initHashFunctions(PRNearNeighborStructT nnStruct){ ASSERT(nnStruct != NULL); LSHFunctionT **lshFunctions; // allocate memory for the functions FAILIF(NULL == (lshFunctions = (LSHFunctionT**)MALLOC(nnStruct->nHFTuples * sizeof(LSHFunctionT*)))); for(IntT i = 0; i < nnStruct->nHFTuples; i++){ FAILIF(NULL == (lshFunctions[i] = (LSHFunctionT*)MALLOC(nnStruct->hfTuplesLength * sizeof(LSHFunctionT)))); for(IntT j = 0; j < nnStruct->hfTuplesLength; j++){ FAILIF(NULL == (lshFunctions[i][j].a = (RealT*)MALLOC(nnStruct->dimension * sizeof(RealT)))); } } // initialize the LSH functions for(IntT i = 0; i < nnStruct->nHFTuples; i++){ for(IntT j = 0; j < nnStruct->hfTuplesLength; j++){ // vector a for(IntT d = 0; d < nnStruct->dimension; d++){ #ifdef USE_L1_DISTANCE lshFunctions[i][j].a[d] = genCauchyRandom(); #else lshFunctions[i][j].a[d] = genGaussianRandom(); #endif } // b lshFunctions[i][j].b = genUniformRandom(0, nnStruct->parameterW); } } nnStruct->lshFunctions = lshFunctions; }
int main(int nargs, char **args){ if (nargs < 5){ usage(args[0]); exit(1); } IntT dimension = atoi(args[1]); RealT M = atof(args[2]); IntT nPoints = atoi(args[3]); RealT R = atof(args[4]); //IntT nExpNNs = atoi(args[4]); RealT probability = atof(args[5]); // for d even, Vol_d = pi^{d/2} / (d/2)! // RealT vol_d_const = 1.0 / SQRT(M_PI * (RealT)dimension); // RealT R = 2.0 * M * POW((RealT)nExpNNs / (RealT)nPoints / vol_d_const, 1.0/(RealT)dimension) / SQRT(M_PI) * SQRT((RealT)dimension / 2.0 / M_E); // RealT vol_d_const = 1; // for(IntT i = 1; i <= dimension / 2; i++) // vol_d_const = vol_d_const * M_PI / (RealT)i; // RealT R = 2.0 * M * POW((RealT)nExpNNs / (RealT)nPoints / vol_d_const, 1.0/(RealT)dimension); //printf("%Lf %Lf \n", POW(R/2.0/M, dimension), (RealT)nExpNNs / (RealT)nPoints / vol_d_const); printf("%d %d ", nPoints, dimension); FPRINTF_REAL(stdout, R); printf(" "); FPRINTF_REAL(stdout, probability); printf("\n"); for(IntT d = 0; d < dimension; d++){ FPRINTF_REAL(stdout, 0.99 * R / SQRT(dimension)); printf(" "); } printf("\n"); for(IntT i = 0; i < nPoints - 1; i++){ for(IntT d = 0; d < dimension; d++){ FPRINTF_REAL(stdout, genUniformRandom(-M, M)); printf(" "); } printf("\n"); } }
// Creates the LSH hash functions for the R-near neighbor structure // <nnStruct>. The functions fills in the corresponding field of // <nnStruct>. void initHashFunctions(PRNearNeighborStructT nnStruct) { /*按照 nHFTuples个 组,hfTuplesLength个最终的维度 产生高斯分布的随机值: a向量和 b,付给 nnStruct->lshFunctions【】【】 用于第一步的点积降维 */ //按照gi hi 两层函数族的格式,初始化nnStruct->lshFunctions所指向的二维指针 //nHFTuples组hash函数g,每个组有hfTuplesLength个hi,每个hi就是一个向量a和一个b //按照a。v+b 的格式,初始化结构体中的随机向量和空间 //nnStruct结构体中,初始化nnStruct->lshFunctions 所指向的二维随机矩阵 ASSERT(nnStruct != NULL);//编程技巧: asssert来判断条件执行: __line__ --file__显示位置 LSHFunctionT **lshFunctions;//LSHFunctionT结构体就两个元素:一个指针+一个double // allocate memory for the functions FAILIF(NULL == (lshFunctions = (LSHFunctionT**)MALLOC(nnStruct->nHFTuples * sizeof(LSHFunctionT*)))); for(IntT i = 0; i < nnStruct->nHFTuples; i++) { FAILIF(NULL == (lshFunctions[i] = (LSHFunctionT*)MALLOC(nnStruct->hfTuplesLength * sizeof(LSHFunctionT)))); for(IntT j = 0; j < nnStruct->hfTuplesLength; j++) { FAILIF(NULL == (lshFunctions[i][j].a = (RealT*)MALLOC(nnStruct->dimension * sizeof(RealT)))); } } // initialize the LSH functions //这里开始构造算法中的一组“位置敏感”的Hash函数。 for(IntT i = 0; i < nnStruct->nHFTuples; i++) { for(IntT j = 0; j < nnStruct->hfTuplesLength; j++) { // vector a for(IntT d = 0; d < nnStruct->dimension; d++) { #ifdef USE_L1_DISTANCE lshFunctions[i][j].a[d] = genCauchyRandom();//L1距离有柯西分布 #else lshFunctions[i][j].a[d] = genGaussianRandom(); //L2使用高斯分布 公式中a就是一组d维的正态分布随机数 #endif } // b lshFunctions[i][j].b = genUniformRandom(0, nnStruct->parameterW); } } nnStruct->lshFunctions = lshFunctions; }