Example #1
0
// Generate a random real from normal distribution N(0,1).
RealT genGaussianRandom(){
  // Use Box-Muller transform to generate a point from normal
  // distribution.
  RealT x1, x2;
  do{
    x1 = genUniformRandom(0.0, 1.0);
  } while (x1 == 0); // cannot take log of 0.
  x2 = genUniformRandom(0.0, 1.0);
  RealT z;
  z = SQRT(-2.0 * LOG(x1)) * COS(2.0 * M_PI * x2);
  return z;
}
// Creates the LSH hash functions for the R-near neighbor structure
// <nnStruct>. The functions fills in the corresponding field of
// <nnStruct>.
void initHashFunctions(PRNearNeighborStructT nnStruct){
  ASSERT(nnStruct != NULL);
  LSHFunctionT **lshFunctions;
  // allocate memory for the functions
  FAILIF(NULL == (lshFunctions = (LSHFunctionT**)MALLOC(nnStruct->nHFTuples * sizeof(LSHFunctionT*))));
  for(IntT i = 0; i < nnStruct->nHFTuples; i++){
    FAILIF(NULL == (lshFunctions[i] = (LSHFunctionT*)MALLOC(nnStruct->hfTuplesLength * sizeof(LSHFunctionT))));
    for(IntT j = 0; j < nnStruct->hfTuplesLength; j++){
      FAILIF(NULL == (lshFunctions[i][j].a = (RealT*)MALLOC(nnStruct->dimension * sizeof(RealT))));
    }
  }

  // initialize the LSH functions
  for(IntT i = 0; i < nnStruct->nHFTuples; i++){
    for(IntT j = 0; j < nnStruct->hfTuplesLength; j++){
      // vector a
      for(IntT d = 0; d < nnStruct->dimension; d++){
#ifdef USE_L1_DISTANCE
	lshFunctions[i][j].a[d] = genCauchyRandom();
#else
	lshFunctions[i][j].a[d] = genGaussianRandom();
#endif
      }
      // b
      lshFunctions[i][j].b = genUniformRandom(0, nnStruct->parameterW);
    }
  }

  nnStruct->lshFunctions = lshFunctions;
}
Example #3
0
int main(int nargs, char **args){
  if (nargs < 5){
    usage(args[0]);
    exit(1);
  }

  IntT dimension = atoi(args[1]);
  RealT M = atof(args[2]);
  IntT nPoints = atoi(args[3]);
  RealT R = atof(args[4]);
  //IntT nExpNNs = atoi(args[4]);
  RealT probability = atof(args[5]);

  // for d even, Vol_d = pi^{d/2} / (d/2)!


  // RealT vol_d_const = 1.0 / SQRT(M_PI * (RealT)dimension);

  // RealT R = 2.0 * M * POW((RealT)nExpNNs / (RealT)nPoints / vol_d_const, 1.0/(RealT)dimension) / SQRT(M_PI) * SQRT((RealT)dimension / 2.0 / M_E);
  
//   RealT vol_d_const = 1;
//   for(IntT i = 1; i <= dimension / 2; i++)
//     vol_d_const = vol_d_const * M_PI / (RealT)i;

//   RealT R = 2.0 * M * POW((RealT)nExpNNs / (RealT)nPoints / vol_d_const, 1.0/(RealT)dimension);
  
  //printf("%Lf %Lf \n", POW(R/2.0/M, dimension), (RealT)nExpNNs / (RealT)nPoints / vol_d_const);
  printf("%d %d ", nPoints, dimension);
  FPRINTF_REAL(stdout, R);
  printf(" ");
  FPRINTF_REAL(stdout, probability);
  printf("\n");

  for(IntT d = 0; d < dimension; d++){
    FPRINTF_REAL(stdout, 0.99 * R / SQRT(dimension));
    printf(" ");
  }
  printf("\n");

  for(IntT i = 0; i < nPoints - 1; i++){
    for(IntT d = 0; d < dimension; d++){
      FPRINTF_REAL(stdout, genUniformRandom(-M, M));
      printf(" ");
    }
    printf("\n");
  }
}
// Creates the LSH hash functions for the R-near neighbor structure
// <nnStruct>. The functions fills in the corresponding field of
// <nnStruct>.
void initHashFunctions(PRNearNeighborStructT nnStruct)
{   /*按照 nHFTuples个 组,hfTuplesLength个最终的维度
       产生高斯分布的随机值:  a向量和 b,付给  nnStruct->lshFunctions【】【】
    用于第一步的点积降维
    */

    //按照gi  hi 两层函数族的格式,初始化nnStruct->lshFunctions所指向的二维指针

    //nHFTuples组hash函数g,每个组有hfTuplesLength个hi,每个hi就是一个向量a和一个b

    //按照a。v+b 的格式,初始化结构体中的随机向量和空间
    //nnStruct结构体中,初始化nnStruct->lshFunctions 所指向的二维随机矩阵
    ASSERT(nnStruct != NULL);//编程技巧: asssert来判断条件执行:  __line__  --file__显示位置
    LSHFunctionT **lshFunctions;//LSHFunctionT结构体就两个元素:一个指针+一个double
    // allocate memory for the functions
    FAILIF(NULL == (lshFunctions = (LSHFunctionT**)MALLOC(nnStruct->nHFTuples * sizeof(LSHFunctionT*))));
    for(IntT i = 0; i < nnStruct->nHFTuples; i++)
    {
        FAILIF(NULL == (lshFunctions[i] = (LSHFunctionT*)MALLOC(nnStruct->hfTuplesLength * sizeof(LSHFunctionT))));
        for(IntT j = 0; j < nnStruct->hfTuplesLength; j++)
        {
            FAILIF(NULL == (lshFunctions[i][j].a = (RealT*)MALLOC(nnStruct->dimension * sizeof(RealT))));
        }
    }

    // initialize the LSH functions
    //这里开始构造算法中的一组“位置敏感”的Hash函数。
    for(IntT i = 0; i < nnStruct->nHFTuples; i++)
    {
        for(IntT j = 0; j < nnStruct->hfTuplesLength; j++)
        {
            // vector a
            for(IntT d = 0; d < nnStruct->dimension; d++)
            {
#ifdef USE_L1_DISTANCE
                lshFunctions[i][j].a[d] = genCauchyRandom();//L1距离有柯西分布
#else
                lshFunctions[i][j].a[d] = genGaussianRandom(); //L2使用高斯分布   公式中a就是一组d维的正态分布随机数
#endif
            }
            // b
            lshFunctions[i][j].b = genUniformRandom(0, nnStruct->parameterW);
        }
    }

    nnStruct->lshFunctions = lshFunctions;
}