// Compute the value of a hash function u=lshFunctions[gNumber] (a
// vector of <hfTuplesLength> LSH functions) in the point <point>. The
// result is stored in the vector <vectorValue>. <vectorValue> must be
// already allocated (and have space for <hfTuplesLength> Uns32T-words).
inline void computeULSH(PRNearNeighborStructT nnStruct, IntT gNumber, RealT *point, Uns32T *vectorValue){
  CR_ASSERT(nnStruct != NULL);
  CR_ASSERT(point != NULL);
  CR_ASSERT(vectorValue != NULL);

  for(IntT i = 0; i < nnStruct->hfTuplesLength; i++){
    RealT value = 0;
    for(IntT d = 0; d < nnStruct->dimension; d++){
      value += point[d] * nnStruct->lshFunctions[gNumber][i].a[d];
    }
  
    vectorValue[i] = (Uns32T)(FLOOR_INT32((value + nnStruct->lshFunctions[gNumber][i].b) / nnStruct->parameterW) /* - MIN_INT32T*/);
  }
}
Пример #2
0
MemCacheClient::Server *
MemCacheClient::FindServer(
    const string_t & aKey,
    unsigned         aService
    )
{
#ifdef CROSSBASE_API
    // in our private usage of this, the service must never be 0
    if (aService == 0) {
        mTrace.Trace(CLERROR, "FindServer: no service requested, supplied cache server may not be appropriate!!!");
        CR_ASSERT(!"FindServer: no service requested, supplied cache server may not be appropriate!!!");
    }
#endif

    // probably need some servers for this
    if (mServerHash.empty()) {
        //mTrace.Trace(CLDEBUG, "FindServer: server hash is empty");
        return NULL;
    }

    // find the next largest consistent hash value above this key hash
    ConsistentHash hash(CreateKeyHash(aKey.data()), NULL, 0, 0);
    std::vector<ConsistentHash>::iterator iBegin = mServerHash.begin();
    std::vector<ConsistentHash>::iterator iEnd = mServerHash.end();
    std::vector<ConsistentHash>::iterator iCurr = std::lower_bound(iBegin, iEnd, hash);
    if (iCurr == iEnd) iCurr = iBegin;

    // now find the next server that handles this service
    if (aService != 0) {
        //int nSkipped = 0;
        std::vector<ConsistentHash>::iterator iStart = iCurr;
        while (!iCurr->services(aService)) {
            //++nSkipped;
            ++iCurr; 
            if (iCurr == iEnd) iCurr = iBegin;
            if (iCurr == iStart) {
                mTrace.Trace(CLDEBUG, "FindServer: no server for required service: %u", aService);
                return NULL;
            }
        }
        //if (nSkipped > 0) mTrace.Trace(CLDEBUG, "skipped %d servers for service: %u", nSkipped, aService);
    }

    // ensure that this server is connected 
    Server * pServer = iCurr->mServer;
    Server::ConnectResult rc = pServer->Connect(mTimeoutMs, mRetryMs);
    switch (rc) {
    case Server::CONNECT_SUCCESS:
        //mTrace.Trace(CLDEBUG, "FindServer: using server %s", pServer->GetAddress());
        return pServer;
    case Server::CONNECT_WAITING:
        return NULL;
    default:
    case Server::CONNECT_FAILED:
        //mTrace.Trace(CLDEBUG, "FindServer: failed to connect to server %s", pServer->GetAddress());
        return NULL;
    }
}
Пример #3
0
unsigned long 
MemCacheClient::CreateKeyHash(
    const char * aKey
    )
{
    const size_t LONG_COUNT = SHA_DIGEST_LENGTH / sizeof(unsigned long);
    
    union {
        unsigned char as_char[SHA_DIGEST_LENGTH];
        unsigned long as_long[LONG_COUNT];
    } output;

    CR_ASSERT(sizeof(output.as_char) == SHA_DIGEST_LENGTH);
    CR_ASSERT(sizeof(output.as_long) == SHA_DIGEST_LENGTH);

    SHA1((const unsigned char *) aKey, (unsigned long) strlen(aKey), output.as_char);
    return output.as_long[LONG_COUNT-1];
}
// Compute the value of a hash function u=lshFunctions[gNumber] (a
// vector of <hfTuplesLength> LSH functions) in the point <point>. The
// result is stored in the vector <vectorValue>. <vectorValue> must be
// already allocated (and have space for <hfTuplesLength> Uns32T-words).
inline void computeULSH(PRNearNeighborStructT nnStruct, IntT gNumber, RealT *point, Uns32T *vectorValue)
{   //求出point向量和多个hansh映射后的值, 对于每个hash:  a。v+b 除以 r
    //结果返回到vectorValue 向量上
    CR_ASSERT(nnStruct != NULL);
    CR_ASSERT(point != NULL);
    CR_ASSERT(vectorValue != NULL);


//  FILE *file=fopen("vector.txt","a+");
//   fprintf(file,"\n\n");

    for(IntT i = 0; i < nnStruct->hfTuplesLength; i++) {
        RealT value = 0;
        for(IntT d = 0; d < nnStruct->dimension; d++) {
            value += point[d] * nnStruct->lshFunctions[gNumber][i].a[d];
            //两个向量point[]。第gnumber的hash向量 点乘 ; 就是a。v
        }

        value=value*97;//放大10倍看看
        double tempv=( (value + nnStruct->lshFunctions[gNumber][i].b)  );
        double temp_w=tempv/ nnStruct->parameterW ;
        int vi=temp_w;
        if ( vi < 0)
        {
            vi+=1793;
        }
        vectorValue[i] = (Uns32T)(FLOOR_INT32( (value + nnStruct->lshFunctions[gNumber][i].b)  / nnStruct->parameterW )) ;
        vectorValue[i] =vi;
        // fprintf(file,"%lf  %lf %d  ||",value,temp_w ,vi );

        //  vectorValue[i] = (Uns32T)(FLOOR_INT32( (value + nnStruct->lshFunctions[gNumber][i].b) / nnStruct->parameterW) /* - MIN_INT32T*/);
        //a。v+b 除以 r
    }

    //	 fclose(file);
}
// Returns the list of near neighbors of the point <point> (with a
// certain success probability). Near neighbor is defined as being a
// point within distance <parameterR>. Each near neighbor from the
// data set is returned is returned with a certain probability,
// dependent on <parameterK>, <parameterL>, and <parameterT>. The
// returned points are kept in the array <result>. If result is not
// allocated, it will be allocated to at least some minimum size
// (RESULT_INIT_SIZE). If number of returned points is bigger than the
// size of <result>, then the <result> is resized (to up to twice the
// number of returned points). The return value is the number of
// points found.
Int32T getNearNeighborsFromPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize){
  ASSERT(nnStruct != NULL);
  ASSERT(query != NULL);
  ASSERT(nnStruct->reducedPoint != NULL);
  ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);

  PPointT point = query;

  if (result == NULL){
    resultSize = RESULT_INIT_SIZE;
    FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT))));
  }
  
  preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);

  Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED];
  for(IntT i = 0; i < nnStruct->nHFTuples; i++){
    for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++){
      precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j];
    }
  }
  TIMEV_START(timeTotalBuckets);

  BooleanT oldTimingOn = timingOn;
  if (noExpensiveTiming) {
    timingOn = FALSE;
  }
  
  // Initialize the counters for defining the pair of <u> functions used for <g> functions.
  IntT firstUComp = 0;
  IntT secondUComp = 1;

  Int32T nNeighbors = 0;// the number of near neighbors found so far.
  Int32T nMarkedPoints = 0;// the number of marked points
  for(IntT i = 0; i < nnStruct->parameterL; i++){ 
    TIMEV_START(timeGetBucket);
    GeneralizedPGBucket gbucket;
    if (!nnStruct->useUfunctions) {
      // Use usual <g> functions (truly independent; <g>s are precisly
      // <u>s).
      gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL);
    } else {
      // Use <u> functions (<g>s are pairs of <u> functions).
      gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]);

      // compute what is the next pair of <u> functions.
      secondUComp++;
      if (secondUComp == nnStruct->nHFTuples) {
	firstUComp++;
	secondUComp = firstUComp + 1;
      }
    }
    TIMEV_END(timeGetBucket);

    PGBucketT bucket;

    TIMEV_START(timeCycleBucket);
    switch (nnStruct->hashedBuckets[i]->typeHT){
    case HT_LINKED_LIST:
      bucket = gbucket.llGBucket;
      if (bucket != NULL){
	// circle through the bucket and add to <result> the points that are near.
	PBucketEntryT bucketEntry = &(bucket->firstEntry);
	//TIMEV_START(timeCycleProc);
	while (bucketEntry != NULL){
	  //TIMEV_END(timeCycleProc);
	  //ASSERT(bucketEntry->point != NULL);
	  //TIMEV_START(timeDistanceComputation);
	  Int32T candidatePIndex = bucketEntry->pointIndex;
	  PPointT candidatePoint = nnStruct->points[candidatePIndex];
	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
	    //TIMEV_END(timeDistanceComputation);
	    if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
	      //TIMEV_START(timeResultStoring);
	      // a new R-NN point was found (not yet in <result>).
	      if (nNeighbors >= resultSize){
		// run out of space => resize the <result> array.
		resultSize = 2 * resultSize;
		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
	      }
	      result[nNeighbors] = candidatePoint;
	      nNeighbors++;
	      nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	      nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	      nMarkedPoints++;
	      //TIMEV_END(timeResultStoring);
	    }
	  }else{
	    //TIMEV_END(timeDistanceComputation);
	  }
	  //TIMEV_START(timeCycleProc);
	  bucketEntry = bucketEntry->nextEntry;
	}
	//TIMEV_END(timeCycleProc);
      }
      break;
    case HT_STATISTICS:
      ASSERT(FALSE); // HT_STATISTICS not supported anymore

//       if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){
// 	Int32T position;
// 	PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList;
// 	position = gbucket.linkGBucket->indexStart;
// 	// circle through the bucket and add to <result> the points that are near.
// 	while (position != INDEX_START_EMPTY){
// 	  PPointT candidatePoint = pointsList[position].point;
// 	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
// 	    if (nnStruct->nearPoints[candidatePoint->index] == FALSE) {
// 	      // a new R-NN point was found (not yet in <result>).
// 	      if (nNeighbors >= resultSize){
// 		// run out of space => resize the <result> array.
// 		resultSize = 2 * resultSize;
// 		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
// 	      }
// 	      result[nNeighbors] = candidatePoint;
// 	      nNeighbors++;
// 	      nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index
// 	    }
// 	  }
// 	  // Int32T oldP = position;
// 	  position = pointsList[position].nextPoint;
// 	  // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1);
// 	}
//       }
      break;
    case HT_HYBRID_CHAINS:
      if (gbucket.hybridGBucket != NULL){
	PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;
	Uns32T offset = 0;
	if (hybridPoint->point.bucketLength == 0){
	  // there are overflow points in this bucket.
	  offset = 0;
	  for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++){
	    offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH));
	  }
	}
	Uns32T index = 0;
	BooleanT done = FALSE;
	while(!done){
	  if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET){
	    //CR_ASSERT(hybridPoint->point.bucketLength == 0);
	    index = index + offset;
	  }
	  Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex;
	  CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints);
	  done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE;
	  index++;
	  if (nnStruct->markedPoints[candidatePIndex] == FALSE){
	    // mark the point first.
	    nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	    nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	    nMarkedPoints++;

	    PPointT candidatePoint = nnStruct->points[candidatePIndex];
	    if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
	      //if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
	      // a new R-NN point was found (not yet in <result>).
	      //TIMEV_START(timeResultStoring);
	      if (nNeighbors >= resultSize){
		// run out of space => resize the <result> array.
		resultSize = 2 * resultSize;
		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
	      }
	      result[nNeighbors] = candidatePoint;
	      nNeighbors++;
	      //TIMEV_END(timeResultStoring);
	      //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	      //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	      //nMarkedPoints++;
	      //}
	    }
	  }else{
	    // the point was already marked (& examined)
	  }
	}
      }
      break;
    default:
      ASSERT(FALSE);
    }
    TIMEV_END(timeCycleBucket);
    
  }

  timingOn = oldTimingOn;
  TIMEV_END(timeTotalBuckets);

  // we need to clear the array nnStruct->nearPoints for the next query.
  for(Int32T i = 0; i < nMarkedPoints; i++){
    ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE);
    nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE;
  }
  DPRINTF("nMarkedPoints: %d\n", nMarkedPoints);

  return nNeighbors;
}
Пример #6
0
/*
  The main entry to LSH package. Depending on the command line
  parameters, the function computes the R-NN data structure optimal
  parameters and/or construct the R-NN data structure and runs the
  queries on the data structure.
 */
int main_T(int nargs, char **args)
{


	//先分析参数
	/* 官方lsh文件:10个参数
	1000 9 784 0.9  0.6  mnist1k.dts  mnist1k.q
	bin/LSHMain $nDataSet $nQuerySet $dimension $successProbability "$1" "$2" "$3" $m -c*/


	//算参数	bin/LSHMain 1000 9 784 0.9  "0.6" "mnist1k.dts" "mnist1k.q" 1002000000  -c


	//bin/LSHMain $nDataSet $nQuerySet $dimension $successProbability 1.0 "$1" "$2" $m -p "$3"

	//匹配	bin/LSHMain 1000 9 784 0.9 1.0  "mnist1k.dts" "mnist1k.q" 1002000000  -p  "outputparma.txt"
  if(nargs < 9)
  {
    usage(args[0]);
    exit(1);
  }

  //initializeLSHGlobal();

  // Parse part of the command-line parameters.
  nPoints = atoi(args[1]);
  IntT nQueries = atoi(args[2]);
  pointsDimension = atoi(args[3]);
  successProbability = atof(args[4]);
  char* endPtr[1];
  RealT thresholdR = strtod(args[5], endPtr);//点相邻的距离阈值
  //str-to -double  将字符串转换成浮点数的函数
  //endPtr 接收数字结尾后非字符串字母

  //这个r阈值是什么呢?
  if (thresholdR == 0 || endPtr[1] == args[5])
  {//如果阈值为0,或者第一个字符就不是数字, 
	  //表示是用文件保存的
	  //这大概是用于测试哪个阈值好的
    // The value for R is not specified, instead there is a file
    // specifying multiple R's.
    thresholdR = 0;

    // Read in the file
    FILE *radiiFile = fopen(args[5], "rt");
    FAILIF(radiiFile == NULL);
    fscanf(radiiFile, "%d\n", &nRadii);
    ASSERT(nRadii > 0);
    FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    for(IntT i = 0; i < nRadii; i++)
	{
      FSCANF_REAL(radiiFile, &listOfRadii[i]);
      ASSERT(listOfRadii[i] > 0);
      FSCANF_REAL(radiiFile, &memRatiosForNNStructs[i]);
      ASSERT(memRatiosForNNStructs[i] > 0);
    }
  }
  else
  {
    nRadii = 1;
    FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    listOfRadii[0] = thresholdR;
    memRatiosForNNStructs[0] = 1;
  }//对阈值R 和Radiii的处理



  DPRINTF("No. radii: %d\n", nRadii);
  //thresholdR = atof(args[5]);
  availableTotalMemory = atoll(args[8]);//$M表示的是内存空间大小

  if (nPoints > MAX_N_POINTS)
  {
    printf("Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints);
    fprintf(ERROR_OUTPUT, "Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints);
    exit(1);
  }

  readDataSetFromFile(args[6]);//点读到dataSetPoints

  //这个totalAllocatedMemory初始化为0,但是
  //#define MALLOC(amount) ((amount > 0) ? totalAllocatedMemory += amount, malloc(amount) : NULL)
  //这样,每次申请内存都会统计到了

  DPRINTF("Allocated memory (after reading data set): %lld\n", totalAllocatedMemory);

  Int32T nSampleQueries = N_SAMPLE_QUERY_POINTS;
  PPointT sampleQueries[N_SAMPLE_QUERY_POINTS];
  Int32T sampleQBoundaryIndeces[N_SAMPLE_QUERY_POINTS];
//  PPointT sampleQueries[nSampleQueries];
 // Int32T sampleQBoundaryIndeces[nSampleQueries];
 
  
  if ((nargs <= 9)   ||  (strcmp("-c", args[9]) == 0)    )
  {
    // In this cases, we need to generate a sample query set for
    // computing the optimal parameters.

    // Generate a sample query set.
    FILE *queryFile = fopen(args[7], "rt");
    if (strcmp(args[7], ".") == 0 || queryFile == NULL || nQueries <= 0)
	{//没有查询文件,就用所有点产生随机点
      // Choose several data set points for the sample query points.
		for(IntT i = 0; i < nSampleQueries; i++){
			sampleQueries[i] = dataSetPoints[genRandomInt(0, nPoints - 1)];

	  }
    }
	else
	{
		//从查询文件中选取随机的点,
      // Choose several actual query points for the sample query points.
		  nSampleQueries = MIN(nSampleQueries, nQueries);
		   Int32T sampleIndeces[N_SAMPLE_QUERY_POINTS];
		  //Int32T sampleIndeces[nSampleQueries];
		  for(IntT i = 0; i < nSampleQueries; i++)
		  {
			sampleIndeces[i] = genRandomInt(0, nQueries - 1);
		  }
		  qsort(sampleIndeces, nSampleQueries, sizeof(*sampleIndeces), compareInt32T);
		  //printIntVector("sampleIndeces: ", nSampleQueries, sampleIndeces);
		  Int32T j = 0;
		  for(Int32T i = 0; i < nQueries; i++)
		  {
			  if (i == sampleIndeces[j])
			  {
				  sampleQueries[j] = readPoint(queryFile);
				  j++;
				  while (i == sampleIndeces[j])
				  {
					  sampleQueries[j] = sampleQueries[j - 1];
					  j++;
				  }
			  }else
			  {
				  fscanf(queryFile, "%[^\n]", sBuffer);
				  fscanf(queryFile, "\n");
			  }
		 }
		  nSampleQueries = j;
		  fclose(queryFile);
    }

	//前面那么多,好像就是在申请内存,读文件,读入参数

    // Compute the array sampleQBoundaryIndeces that specifies how to
    // segregate the sample query points according to their distance
    // to NN.
	//采用遍历的方法,计算查询点的最近邻(并且距离小于listOfRadii【nRadii】)
    sortQueryPointsByRadii(pointsDimension,
			   nSampleQueries,
			   sampleQueries,
			   nPoints,
			   dataSetPoints,
			   nRadii,
			   listOfRadii,
			   sampleQBoundaryIndeces);
  }//if ((nargs < 9) || (strcmp("-c", args[9]) == 0))


  RNNParametersT *algParameters = NULL;
  PRNearNeighborStructT *nnStructs = NULL;
  if (nargs > 9) 
  {/* 官方lsh文件:10个参数
bin/LSHMain $nDataSet $nQuerySet $dimension $successProbability "$1" "$2" "$3" $m -c


*/
	     
    // Additional command-line parameter is specified.
    if (strcmp("-c", args[9]) == 0) 	//-c表示参数优化
	{
 // Only compute the R-NN DS parameters and output them to stdout.
      printf("%d\n", nRadii);
      transformMemRatios();
      for(IntT i = 0; i < nRadii; i++)
	  {
		// which sample queries to use
		Int32T segregatedQStart = (i == 0) ? 0 : sampleQBoundaryIndeces[i - 1];
		Int32T segregatedQNumber = nSampleQueries - segregatedQStart;
		if (segregatedQNumber == 0) 
		{
		  // XXX: not the right answer
		  segregatedQNumber = nSampleQueries;
		  segregatedQStart = 0;
		}
		ASSERT(segregatedQStart < nSampleQueries);
		ASSERT(segregatedQStart >= 0);
		ASSERT(segregatedQStart + segregatedQNumber <= nSampleQueries);
		ASSERT(segregatedQNumber >= 0);

		//从文件读取点,然后计算优化后的参数
		RNNParametersT optParameters = computeOptimalParameters(listOfRadii[i],
									successProbability,
									nPoints,
									pointsDimension,
									dataSetPoints,
									segregatedQNumber,
									sampleQueries + segregatedQStart,
/*对内存的约束,就体现在这里,
availableTotalMemory总共的内存(传入) - totalAllocatedMemory(使用mallloc分配的)*1=内存上限

然后(L * nPoints > memoryUpperBound / 12 来约束
*/
									(MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i]));
		printRNNParameters(stdout, optParameters);
      }
      exit(0);
    } 
	else if (strcmp("-p", args[9]) == 0) 
	{//-p表示从文件读入参数,然后建立结构体
      // Read the R-NN DS parameters from the given file and run the
      // queries on the constructed data structure.
      if (nargs < 10)
	  {
		  usage(args[0]);
		  exit(1);
      }
      FILE *pFile = fopen(args[10], "rt");
      FAILIFWR(pFile == NULL, "Could not open the params file.");
      fscanf(pFile, "%d\n", &nRadii);
      DPRINTF1("Using the following R-NN DS parameters:\n");
      DPRINTF("N radii = %d\n", nRadii);
      FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT))));
      FAILIF(NULL == (algParameters = (RNNParametersT*)MALLOC(nRadii * sizeof(RNNParametersT))));
      for(IntT i = 0; i < nRadii; i++)
	  {//默认i=1
		  algParameters[i] = readRNNParameters(pFile);//从文件读参数

		  printRNNParameters(stderr, algParameters[i]);
		  nnStructs[i] = initLSH_WithDataSet(algParameters[i], nPoints, dataSetPoints);
		  //核心
		  //初始化整个数据结构 包括整体+l个hash表 +点映射到桶
	  }

      pointsDimension = algParameters[0].dimension;
      FREE(listOfRadii);
      FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
      for(IntT i = 0; i < nRadii; i++)
	  {
		  listOfRadii[i] = algParameters[i].parameterR;
	  }
    } 
	else
	{
      // Wrong option.
      usage(args[0]);
      exit(1);
    }
  }//if (nargs > 9) 
  else 
  {
    FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT))));
    // Determine the R-NN DS parameters, construct the DS and run the queries.
    transformMemRatios();
    for(IntT i = 0; i < nRadii; i++)
	{
      // XXX: segregate the sample queries...
		//建立查询结构,自动优化参数
      nnStructs[i] = initSelfTunedRNearNeighborWithDataSet(listOfRadii[i], 
							   successProbability, 
							   nPoints, 
							   pointsDimension, 
							   dataSetPoints, 
							   nSampleQueries, 
							   sampleQueries, 
							   (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i]));
    }
  } // if (nargs <= 9) 



  //上面都是根据不同配置,对参数的优化,建立查询结构

  DPRINTF1("X\n");

  IntT resultSize = nPoints;
  PPointT *result = (PPointT*)MALLOC(resultSize * sizeof(*result));
  PPointT queryPoint;
  FAILIF(NULL == (queryPoint = (PPointT)MALLOC(sizeof(PointT))));
  FAILIF(NULL == (queryPoint->coordinates = (RealT*)MALLOC(pointsDimension * sizeof(RealT))));

  //读取查询点的文件
  FILE *queryFile = fopen(args[7], "rt");
  FAILIF(queryFile == NULL);
  TimeVarT meanQueryTime = 0;
  PPointAndRealTStructT *distToNN = NULL;
  for(IntT i = 0; i < nQueries; i++)
  {//对于每一个要查询的点

    RealT sqrLength = 0;
    // read in the query point.
    for(IntT d = 0; d < pointsDimension; d++)
	{

      FSCANF_REAL(queryFile, &(queryPoint->coordinates[d]));
      sqrLength += SQR(queryPoint->coordinates[d]);


	  /*//test
	  if (d >150 &&  d<160)
	  {
		  printf(" %lf ",queryPoint->coordinates[d]);
	  }
	  if ( d==160)
	  {
		  printf("原始的文件数据\n");
	  }
	  */
	  
    }
    queryPoint->sqrLength = sqrLength;
    //printRealVector("Query: ", pointsDimension, queryPoint->coordinates);


	
    // get the near neighbors.
    IntT nNNs = 0;
    for(IntT r = 0; r < nRadii; r++)
	{//查询n个近邻点,并计算距离

		//查询核心
      nNNs = getRNearNeighbors(nnStructs[r], queryPoint, result, resultSize);



      printf("Total time for R-NN query at radius %0.6lf (radius no. %d):\t%0.6lf\n", (double)(listOfRadii[r]), r, timeRNNQuery);
      meanQueryTime += timeRNNQuery;

      if (nNNs > 0)
	  {
		printf("Query point %d: found %d NNs at distance %0.6lf (%dth radius). First %d NNs are:\n", 
			i, nNNs, (double)(listOfRadii[r]), r, MIN(nNNs, MAX_REPORTED_POINTS));
	
		// compute the distances to the found NN, and sort according to the distance
		//计算近邻点和查询点的距离
		FAILIF(NULL == (distToNN = (PPointAndRealTStructT*)REALLOC(distToNN, nNNs * sizeof(*distToNN))));
		for(IntT p = 0; p < nNNs; p++)
		{
		  distToNN[p].ppoint = result[p];
		  distToNN[p].real = distance(pointsDimension, queryPoint, result[p]);
		}
		qsort(distToNN, nNNs, sizeof(*distToNN), comparePPointAndRealTStructT);

		// Print the points
		for(IntT j = 0; j < MIN(nNNs, MAX_REPORTED_POINTS); j++)
		{
		  ASSERT(distToNN[j].ppoint != NULL);
		  printf("%09d\tDistance:%0.6lf\n", distToNN[j].ppoint->index, distToNN[j].real);
		  CR_ASSERT(distToNN[j].real <= listOfRadii[r]);
		  //DPRINTF("Distance: %lf\n", distance(pointsDimension, queryPoint, result[j]));
		  //printRealVector("NN: ", pointsDimension, result[j]->coordinates);
		}
		break;
      }
    }
    if (nNNs == 0)
	{
      printf("Query point %d: no NNs found.\n", i);
    }
  }//  for(IntT i = 0; i < nQueries; i++)每个点查询

  //
  if (nQueries > 0)
  {
    meanQueryTime = meanQueryTime / nQueries;
    printf("Mean query time: %0.6lf\n", (double)meanQueryTime);
  }


  for(IntT i = 0; i < nRadii; i++)
  {
    freePRNearNeighborStruct(nnStructs[i]);
  }
  // XXX: should ideally free the other stuff as well.


  return 0;
}
Пример #7
0
/*
  The main entry to LSH package. Depending on the command line
  parameters, the function computes the R-NN data structure optimal
  parameters and/or construct the R-NN data structure and runs the
  queries on the data structure.
 */
int main(int nargs, char **args){
  if(nargs < 9){
    usage(args[0]);
    exit(1);
  }

  //initializeLSHGlobal();

  // Parse part of the command-line parameters.
  nPoints = atoi(args[1]);
  IntT nQueries = atoi(args[2]);
  pointsDimension = atoi(args[3]);
  successProbability = atof(args[4]);
  char* endPtr[1];
  RealT thresholdR = strtod(args[5], endPtr);
  if (thresholdR == 0 || endPtr[1] == args[5]){
    // The value for R is not specified, instead there is a file
    // specifying multiple R's.
    thresholdR = 0;

    // Read in the file
    FILE *radiiFile = fopen(args[5], "rt");
    FAILIF(radiiFile == NULL);
    fscanf(radiiFile, "%d\n", &nRadii);
    ASSERT(nRadii > 0);
    FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    for(IntT i = 0; i < nRadii; i++){
      FSCANF_REAL(radiiFile, &listOfRadii[i]);
      ASSERT(listOfRadii[i] > 0);
      FSCANF_REAL(radiiFile, &memRatiosForNNStructs[i]);
      ASSERT(memRatiosForNNStructs[i] > 0);
    }
  }else{
    nRadii = 1;
    FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    listOfRadii[0] = thresholdR;
    memRatiosForNNStructs[0] = 1;
  }
  DPRINTF("No. radii: %d\n", nRadii);
  //thresholdR = atof(args[5]);
  availableTotalMemory = atoll(args[8]);

  if (nPoints > MAX_N_POINTS) { // 104w points
    printf("Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints);
    fprintf(ERROR_OUTPUT, "Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints);
    exit(1);
  }

  readDataSetFromFile(args[6]); // read points into data structure
  DPRINTF("Allocated memory (after reading data set): %lld\n", totalAllocatedMemory);

  Int32T nSampleQueries = N_SAMPLE_QUERY_POINTS;
  PPointT sampleQueries[nSampleQueries];
  Int32T sampleQBoundaryIndeces[nSampleQueries];
  if ((nargs < 9) || (strcmp("-c", args[9]) == 0)){
    // In this cases, we need to generate a sample query set for
    // computing the optimal parameters.

    // Generate a sample query set.
    FILE *queryFile = fopen(args[7], "rt");
    if (strcmp(args[7], ".") == 0 || queryFile == NULL || nQueries <= 0){
      // Choose several data set points for the sample query points.
      for(IntT i = 0; i < nSampleQueries; i++){
	sampleQueries[i] = dataSetPoints[genRandomInt(0, nPoints - 1)];
      }
    }else{
      // Choose several actual query points for the sample query points.
      nSampleQueries = MIN(nSampleQueries, nQueries);
      Int32T sampleIndeces[nSampleQueries];
      for(IntT i = 0; i < nSampleQueries; i++){
	sampleIndeces[i] = genRandomInt(0, nQueries - 1);
      }
      qsort(sampleIndeces, nSampleQueries, sizeof(*sampleIndeces), compareInt32T);
      //printIntVector("sampleIndeces: ", nSampleQueries, sampleIndeces);
      Int32T j = 0;
      for(Int32T i = 0; i < nQueries; i++){
	if (i == sampleIndeces[j]){
	  sampleQueries[j] = readPoint(queryFile);
	  j++;
	  while (i == sampleIndeces[j]){
	    sampleQueries[j] = sampleQueries[j - 1];
	    j++;
	  }
	}else{
	  fscanf(queryFile, "%[^\n]", sBuffer);
	  fscanf(queryFile, "\n");
	}
      }
      nSampleQueries = j;
      fclose(queryFile);
    }

    // Compute the array sampleQBoundaryIndeces that specifies how to
    // segregate the sample query points according to their distance
    // to NN.
    sortQueryPointsByRadii(pointsDimension,
			   nSampleQueries,
			   sampleQueries,
			   nPoints,
			   dataSetPoints,
			   nRadii,
			   listOfRadii,
			   sampleQBoundaryIndeces);
  }

  RNNParametersT *algParameters = NULL;
  PRNearNeighborStructT *nnStructs = NULL;
  if (nargs > 9) {
    // Additional command-line parameter is specified.
    if (strcmp("-c", args[9]) == 0) {
      // Only compute the R-NN DS parameters and output them to stdout.
      
      printf("%d\n", nRadii);
      transformMemRatios();
      for(IntT i = 0; i < nRadii; i++){
	// which sample queries to use
	Int32T segregatedQStart = (i == 0) ? 0 : sampleQBoundaryIndeces[i - 1];
	Int32T segregatedQNumber = nSampleQueries - segregatedQStart;
	if (segregatedQNumber == 0) {
	  // XXX: not the right answer
	  segregatedQNumber = nSampleQueries;
	  segregatedQStart = 0;
	}
	ASSERT(segregatedQStart < nSampleQueries);
	ASSERT(segregatedQStart >= 0);
	ASSERT(segregatedQStart + segregatedQNumber <= nSampleQueries);
	ASSERT(segregatedQNumber >= 0);
	RNNParametersT optParameters = computeOptimalParameters(listOfRadii[i],
								successProbability,
								nPoints,
								pointsDimension,
								dataSetPoints,
								segregatedQNumber,
								sampleQueries + segregatedQStart,
								(MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i]));
	printRNNParameters(stdout, optParameters);
      }
      exit(0);
    } else if (strcmp("-p", args[9]) == 0) {
      // Read the R-NN DS parameters from the given file and run the
      // queries on the constructed data structure.
      if (nargs < 10){
	usage(args[0]);
	exit(1);
      }
      FILE *pFile = fopen(args[10], "rt");
      FAILIFWR(pFile == NULL, "Could not open the params file.");
      fscanf(pFile, "%d\n", &nRadii);
      DPRINTF1("Using the following R-NN DS parameters:\n");
      DPRINTF("N radii = %d\n", nRadii);
      FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT))));
      FAILIF(NULL == (algParameters = (RNNParametersT*)MALLOC(nRadii * sizeof(RNNParametersT))));
      for(IntT i = 0; i < nRadii; i++){
	algParameters[i] = readRNNParameters(pFile);
	printRNNParameters(stderr, algParameters[i]);
	nnStructs[i] = initLSH_WithDataSet(algParameters[i], nPoints, dataSetPoints);
      }

      pointsDimension = algParameters[0].dimension;
      FREE(listOfRadii);
      FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
      for(IntT i = 0; i < nRadii; i++){
	listOfRadii[i] = algParameters[i].parameterR;
      }
    } else{
      // Wrong option.
      usage(args[0]);
      exit(1);
    }
  } else {
    FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT))));
    // Determine the R-NN DS parameters, construct the DS and run the queries.
    transformMemRatios();
    for(IntT i = 0; i < nRadii; i++){
      // XXX: segregate the sample queries...
      nnStructs[i] = initSelfTunedRNearNeighborWithDataSet(listOfRadii[i], 
							   successProbability, 
							   nPoints, 
							   pointsDimension, 
							   dataSetPoints, 
							   nSampleQueries, 
							   sampleQueries, 
							   (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i]));
    }
  }

  DPRINTF1("X\n");

  IntT resultSize = nPoints;
  PPointT *result = (PPointT*)MALLOC(resultSize * sizeof(*result));
  PPointT queryPoint;
  FAILIF(NULL == (queryPoint = (PPointT)MALLOC(sizeof(PointT))));
  FAILIF(NULL == (queryPoint->coordinates = (RealT*)MALLOC(pointsDimension * sizeof(RealT))));

  FILE *queryFile = fopen(args[7], "rt");
  FAILIF(queryFile == NULL);
  TimeVarT meanQueryTime = 0;
  PPointAndRealTStructT *distToNN = NULL;
  for(IntT i = 0; i < nQueries; i++){

    RealT sqrLength = 0;
    // read in the query point.
    for(IntT d = 0; d < pointsDimension; d++){
      FSCANF_REAL(queryFile, &(queryPoint->coordinates[d]));
      sqrLength += SQR(queryPoint->coordinates[d]);
    }
    queryPoint->sqrLength = sqrLength;
    //printRealVector("Query: ", pointsDimension, queryPoint->coordinates);

    // get the near neighbors.
    IntT nNNs = 0;
    for(IntT r = 0; r < nRadii; r++){
      nNNs = getRNearNeighbors(nnStructs[r], queryPoint, result, resultSize);
      printf("Total time for R-NN query at radius %0.6lf (radius no. %d):\t%0.6lf\n", (double)(listOfRadii[r]), r, timeRNNQuery);
      meanQueryTime += timeRNNQuery;

      if (nNNs > 0){
	printf("Query point %d: found %d NNs at distance %0.6lf (%dth radius). First %d NNs are:\n", i, nNNs, (double)(listOfRadii[r]), r, MIN(nNNs, MAX_REPORTED_POINTS));
	
	// compute the distances to the found NN, and sort according to the distance
	FAILIF(NULL == (distToNN = (PPointAndRealTStructT*)REALLOC(distToNN, nNNs * sizeof(*distToNN))));
	for(IntT p = 0; p < nNNs; p++){
	  distToNN[p].ppoint = result[p];
	  distToNN[p].real = distance(pointsDimension, queryPoint, result[p]);
	}
	qsort(distToNN, nNNs, sizeof(*distToNN), comparePPointAndRealTStructT);

	// Print the points
	for(IntT j = 0; j < MIN(nNNs, MAX_REPORTED_POINTS); j++){
	  ASSERT(distToNN[j].ppoint != NULL);
	  printf("%09d\tDistance:%0.6lf\n", distToNN[j].ppoint->index, distToNN[j].real);
	  CR_ASSERT(distToNN[j].real <= listOfRadii[r]);
	  //DPRINTF("Distance: %lf\n", distance(pointsDimension, queryPoint, result[j]));
	  //printRealVector("NN: ", pointsDimension, result[j]->coordinates);
	}
	break;
      }
    }
    if (nNNs == 0){
      printf("Query point %d: no NNs found.\n", i);
    }
  }
  if (nQueries > 0){
    meanQueryTime = meanQueryTime / nQueries;
    printf("Mean query time: %0.6lf\n", (double)meanQueryTime);
  }

  for(IntT i = 0; i < nRadii; i++){
    freePRNearNeighborStruct(nnStructs[i]);
  }
  // XXX: should ideally free the other stuff as well.


  return 0;
}
Пример #8
0
/*
  The main entry to LSH package. Depending on the command line
  parameters, the function computes the R-NN data structure optimal
  parameters and/or construct the R-NN data structure and runs the
  queries on the data structure.
 */
int main(int nargs, char **args){
  if(nargs < 9){
    usage(args[0]);
    exit(1);
  }

  //initializeLSHGlobal();

  // Parse part of the command-line parameters.
  nPoints = atoi(args[1]);
  IntT nQueries = atoi(args[2]);
  pointsDimension = atoi(args[3]);
  successProbability = atof(args[4]);
  char* endPtr[1];
  RealT thresholdR = strtod(args[5], endPtr);  //strtod将字符串转换成浮点数   //r=0.6
  //strtod()会扫描参数nptr字符串,跳过前面的空格字符,直到遇上数字或正负符号才开始做转换
  //,到出现非数字或字符串结束时('')才结束转换, 并将结果返回。
  //若endptr不为NULL,则会将遇到不合条件而终止的nptr中的字符指针由endptr传回。
  if (thresholdR == 0 || endPtr[1] == args[5]){   //确保阈值合法
    // The value for R is not specified, instead there is a file
    // specifying multiple R's.
    thresholdR = 0;

    // Read in the file
    FILE *radiiFile = fopen(args[5], "rt");
    FAILIF(radiiFile == NULL);
    fscanf(radiiFile, "%d\n", &nRadii);
    ASSERT(nRadii > 0);
    FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    for(IntT i = 0; i < nRadii; i++){
      FSCANF_REAL(radiiFile, &listOfRadii[i]);
      ASSERT(listOfRadii[i] > 0);
      FSCANF_REAL(radiiFile, &memRatiosForNNStructs[i]);
      ASSERT(memRatiosForNNStructs[i] > 0);
    }
  }else{
    nRadii = 1;     //半径的个数为1个
    FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT))));
    listOfRadii[0] = thresholdR;
    memRatiosForNNStructs[0] = 1;
  }
  DPRINTF("No. radii: %d\n", nRadii);
  //thresholdR = atof(args[5]);
  availableTotalMemory = atoll(args[8]);

  if (nPoints > MAX_N_POINTS) {
    printf("Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints);
    fprintf(ERROR_OUTPUT, "Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints);
    exit(1);
  }

  readDataSetFromFile(args[6]);    //数据集的文件名
  DPRINTF("Allocated memory (after reading data set): %lld\n", totalAllocatedMemory);

  Int32T nSampleQueries = N_SAMPLE_QUERY_POINTS;   //样本查询点的个数,100
  PPointT sampleQueries[nSampleQueries];      //对查询点编号
  Int32T sampleQBoundaryIndeces[nSampleQueries];   //第一个大于半径的点的编号,如果有多个半径的话,就会记录更多
  if ((nargs < 9) || (strcmp("-c", args[9]) == 0)){       //计算最优参数
    // In this cases, we need to generate a sample query set for
    // computing the optimal parameters.

    // Generate a sample query set.
    FILE *queryFile = fopen(args[7], "rt");              //打开查询集,以只读文本方式打开
    if (strcmp(args[7], ".") == 0 || queryFile == NULL || nQueries <= 0){
      // Choose several data set points for the sample query points.  //如果没有查询点就随机选择几个数据集点作为查询点
      for(IntT i = 0; i < nSampleQueries; i++){
	sampleQueries[i] = dataSetPoints[genRandomInt(0, nPoints - 1)];
      }
    }else{
      // Choose several actual query points for the sample query points.
      nSampleQueries = MIN(nSampleQueries, nQueries);    //MIN(100,9)
      Int32T sampleIndeces[nSampleQueries];              //定义了一个查询点样本索引数组
      for(IntT i = 0; i < nSampleQueries; i++){          
	  ////为什么要对查询点索引进行随机变化? 想把样本查询点控制在一定的范围内,如果查询点过多,则样本点最多取100个查询点。
	      sampleIndeces[i] = genRandomInt(0, nQueries - 1);  //对查询点做了一下顺序的变化,对查询点的索引做随机处理。
      }
	   // 根据你给的比较条件进行快速排序,通过指针的移动实验排序,排序之后的结果仍然放在原数组中,必须自己写一个比较函数
	  //http://www.slyar.com/blog/stdlib-qsort.html qsort(数组起始地址,数组元素大小,每个元素的大小,函数指针指向比较函数)
      qsort(sampleIndeces, nSampleQueries, sizeof(*sampleIndeces), compareInt32T); //qsort,C语言标准库函数,对样本查询点的索引值进行排序
      //printIntVector("sampleIndeces: ", nSampleQueries, sampleIndeces);
      Int32T j = 0;
      for(Int32T i = 0; i < nQueries; i++){
	if (i == sampleIndeces[j]){  //如果样本查询点的索引值与实际查询点的索引值一致,读入点
	  sampleQueries[j] = readPoint(queryFile);
	  j++;
	  while (i == sampleIndeces[j]){   //如果样本查询点之后的索引值与实践查询点的索引值一致,则直接将此点的值赋给后面一点的值
	    sampleQueries[j] = sampleQueries[j - 1];   //覆盖之后索引点的值
	    j++;          //取后面的点
	  }
	}else{
	  fscanf(queryFile, "%[^\n]", sBuffer);
	  fscanf(queryFile, "\n");
	}
      }
      nSampleQueries = j;
      fclose(queryFile);
    }

    // Compute the array sampleQBoundaryIndeces that specifies how to
    // segregate the sample query points according to their distance
    // to NN.
	//边界sampleQBoundaryIndeces只会存取一个点的索引,该点的大小为第一个大于半径点的值
    sortQueryPointsByRadii(pointsDimension,
			   nSampleQueries,    //查询集的点的个数
			   sampleQueries,     //查询点的集合,函数运行完成后,点的值将以距离数据集合的距离由小到大的顺序排序
			   nPoints,           //数据集点的个数
			   dataSetPoints,     //数据集集合
			   nRadii,            //半径的个数
			   listOfRadii,        //半径的值
			   sampleQBoundaryIndeces);
  }
//之前的东西-c运行的,-p是不会运行的
  RNNParametersT *algParameters = NULL;
  PRNearNeighborStructT *nnStructs = NULL;
  if (nargs > 9) {
    // Additional command-line parameter is specified.
    if (strcmp("-c", args[9]) == 0) {
      // Only compute the R-NN DS parameters and output them to stdout. // 如果是-c,就只计算数据集参数,然后输出
      
      printf("%d\n", nRadii);           //打印出半径的个数:1个。 将写入到参数文件中,
      transformMemRatios();        //memRatiosForNNstructs,转换内存使用率。假设每个结构为1,每个半径占用的总内存的比率,用于计算内存
      for(IntT i = 0; i < nRadii; i++){   //看使用哪个样本查询点
	// which sample queries to use
	Int32T segregatedQStart = (i == 0) ? 0 : sampleQBoundaryIndeces[i - 1];   //起始点的位置
	Int32T segregatedQNumber = nSampleQueries - segregatedQStart;              //查询点的个数
	if (segregatedQNumber == 0) {                        //如果计算所得点的个数为0,就查询所有的点,从0到最后
	  // XXX: not the right answer
	  segregatedQNumber = nSampleQueries;
	  segregatedQStart = 0;
	}
	ASSERT(segregatedQStart < nSampleQueries);
	ASSERT(segregatedQStart >= 0);
	ASSERT(segregatedQStart + segregatedQNumber <= nSampleQueries);
	ASSERT(segregatedQNumber >= 0);
	RNNParametersT optParameters = computeOptimalParameters(listOfRadii[i],    //计算最优的运行时间,
								successProbability,
								nPoints,
								pointsDimension,
								dataSetPoints,
								segregatedQNumber,
								sampleQueries + segregatedQStart,
								(MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); //比率
								////memRatioForNNStructs[i]:近邻结构体每个半径所占用的内存比率,计算能用多少内存
	printRNNParameters(stdout, optParameters);  //将参数打印出来
      }
      exit(0);
    } else if (strcmp("-p", args[9]) == 0) {
      // Read the R-NN DS parameters from the given file and run the
      // queries on the constructed data structure.
      if (nargs < 10){
	usage(args[0]);
	exit(1);
      }
      FILE *pFile = fopen(args[10], "rt");    //读取参数文件,由lsh_computeParas产生
      FAILIFWR(pFile == NULL, "Could not open the params file.");
      fscanf(pFile, "%d\n", &nRadii);    //这里只取了参数文件中的半径,那参数文件中的其他数据怎样被取用的??
     DPRINTF1("Using the following R-NN DS parameters:\n");   //使用R-NN DS(DateSet)参数
      DPRINTF("N radii = %d\n", nRadii);     //不知道将数据输出到哪里了??
	 // printf("Using the following R-NN DS parameters:\n");
	 // printf("N radii=%d\n",nRadii);
      FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT))));
      FAILIF(NULL == (algParameters = (RNNParametersT*)MALLOC(nRadii * sizeof(RNNParametersT))));
      for(IntT i = 0; i < nRadii; i++){
	        algParameters[i] = readRNNParameters(pFile);      //将参数信息,输出到屏幕上
  //	printRNNParameters(stderr, algParameters[i]);@727
      //printRNNParameters(stdout,algParameters[i]);
	        nnStructs[i] = initLSH_WithDataSet(algParameters[i], nPoints, dataSetPoints);  //根据用户输入的参数,初始化结构
      }

      pointsDimension = algParameters[0].dimension;
      FREE(listOfRadii);
      FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT))));
      for(IntT i = 0; i < nRadii; i++){
	listOfRadii[i] = algParameters[i].parameterR;
      }
    } else{
      // Wrong option.
      usage(args[0]);
      exit(1);
    }
  } else {
    FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT))));
    // Determine the R-NN DS parameters, construct the DS and run the queries.
    transformMemRatios();
    for(IntT i = 0; i < nRadii; i++){
      // XXX: segregate the sample queries...
      nnStructs[i] = initSelfTunedRNearNeighborWithDataSet(listOfRadii[i], 
							   successProbability, 
							   nPoints, 
							   pointsDimension, 
							   dataSetPoints, 
							   nSampleQueries, 
							   sampleQueries, 
							   (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i]));
    }
  }

 // DPRINTF1("X\n");@
  printf("X\n");

  IntT resultSize = nPoints;
  PPointT *result = (PPointT*)MALLOC(resultSize * sizeof(*result));
  PPointT queryPoint;
  FAILIF(NULL == (queryPoint = (PPointT)MALLOC(sizeof(PointT))));
  FAILIF(NULL == (queryPoint->coordinates = (RealT*)MALLOC(pointsDimension * sizeof(RealT))));

  FILE *queryFile = fopen(args[7], "rt");
  FAILIF(queryFile == NULL);
  TimeVarT meanQueryTime = 0;
  PPointAndRealTStructT *distToNN = NULL;
  for(IntT i = 0; i < nQueries; i++){

    RealT sqrLength = 0;
    // read in the query point.
    for(IntT d = 0; d < pointsDimension; d++){
      FSCANF_REAL(queryFile, &(queryPoint->coordinates[d]));
      sqrLength += SQR(queryPoint->coordinates[d]);   //向量到原点的距离
    }
    queryPoint->sqrLength = sqrLength;
    //printRealVector("Query: ", pointsDimension, queryPoint->coordinates);

    // get the near neighbors.
    IntT nNNs = 0;
    for(IntT r = 0; r < nRadii; r++){
      nNNs = getRNearNeighbors(nnStructs[r], queryPoint, result, resultSize);
      printf("Total time for R-NN query at radius %0.6lf (radius no. %d):\t%0.6lf\n", (double)(listOfRadii[r]), r, timeRNNQuery);
      meanQueryTime += timeRNNQuery;

      if (nNNs > 0){
	printf("Query point %d: found %d NNs at distance %0.6lf (%dth radius). First %d NNs are:\n", i, nNNs, (double)(listOfRadii[r]), r, MIN(nNNs, MAX_REPORTED_POINTS));
	
	// compute the distances to the found NN, and sort according to the distance
	FAILIF(NULL == (distToNN = (PPointAndRealTStructT*)REALLOC(distToNN, nNNs * sizeof(*distToNN))));
	for(IntT p = 0; p < nNNs; p++){
	  distToNN[p].ppoint = result[p];
	  distToNN[p].real = distance(pointsDimension, queryPoint, result[p]);
	}
	qsort(distToNN, nNNs, sizeof(*distToNN), comparePPointAndRealTStructT);  //C语言标准的函数

	// Print the points
	for(IntT j = 0; j < MIN(nNNs, MAX_REPORTED_POINTS); j++){
	  ASSERT(distToNN[j].ppoint != NULL);
	  printf("%09d\tDistance:%0.6lf\n", distToNN[j].ppoint->index, distToNN[j].real);   //打印点的坐标
	  CR_ASSERT(distToNN[j].real <= listOfRadii[r]);
	  //DPRINTF("Distance: %lf\n", distance(pointsDimension, queryPoint, result[j]));
	  //printRealVector("NN: ", pointsDimension, result[j]->coordinates);
	}
	break;
      }
    }
    if (nNNs == 0){
      printf("Query point %d: no NNs found.\n", i);
    }
  }
  if (nQueries > 0){
    meanQueryTime = meanQueryTime / nQueries;
    printf("Mean query time: %0.6lf\n", (double)meanQueryTime);
  }

  for(IntT i = 0; i < nRadii; i++){
    freePRNearNeighborStruct(nnStructs[i]);
  }
  // XXX: should ideally free the other stuff as well.


  return 0;
}
Пример #9
0
int 
MemCacheClient::Store(
    const char *    aType,
    MemRequest *    aItem, 
    int             aCount
    )
{
    if (aCount < 1) {
        mTrace.Trace(CLDEBUG, "Store: ignoring request for %d items", aCount);
        return 0;
    }

    // initialize and find all of the servers for these items
    int nItemCount = 0;
    for (int n = 0; n < aCount; ++n) {
        // ensure that the key doesn't have a space in it
        CR_ASSERT(NULL == strchr(aItem[n].mKey.data(), ' '));
        aItem[n].mServer = FindServer(aItem[n].mKey, aItem[n].mService);
        if (aItem[n].mServer) {
            ++nItemCount;
        }
        else {
            aItem[n].mResult = MCERR_NOSERVER;
        }
    }
    if (nItemCount == 0) {
        mTrace.Trace(CLDEBUG, "Store: ignoring request for all %d items (no servers available)", 
            aCount);
        return 0;
    }

    char szBuf[50];
    int nResponses = 0;
    string_t sRequest;
    for (int n = 0; n < aCount; ++n) {
        if (!aItem[n].mServer) continue;

        // <command name> <key> <flags> <exptime> <bytes> [noreply]\r\n
        sRequest  = aType;
        sRequest += ' ';
        sRequest += aItem[n].mKey;
        snprintf(szBuf, sizeof(szBuf), " %u %ld %u", 
            aItem[n].mFlags, (long) aItem[n].mExpiry, 
            (unsigned)aItem[n].mData.GetReadSize());
        sRequest += szBuf;
        if (*aType == 'c') { // cas
            snprintf(szBuf, sizeof(szBuf), " %" PRIu64, aItem[n].mCas);
            sRequest += szBuf;
        }
        if (aItem[n].mResult == MCERR_NOREPLY) {
            sRequest += " noreply";
        }
        sRequest += "\r\n";

        // send the request. any socket error causes the server connection 
        // to be dropped, so we return errors for all requests using that server.
        try {
            aItem[n].mServer->SendBytes(
                sRequest.data(), sRequest.length());
            aItem[n].mServer->SendBytes(
                aItem[n].mData.GetReadBuffer(), 
                aItem[n].mData.GetReadSize());
            aItem[n].mServer->SendBytes("\r\n", 2);

            // done with these read bytes
            aItem[n].mData.CommitReadBytes(
                aItem[n].mData.GetReadSize());

            // if no reply is required then move on to the next request
            if (aItem[n].mResult == MCERR_NOREPLY) {
                continue;
            }

            // handle this response
            HandleStoreResponse(aItem[n].mServer, aItem[n]);
            ++nResponses;
        }
        catch (const Socket::Exception & e) {
            mTrace.Trace(CLINFO, "Store: error '%s' at %s, marking requests as NOSERVER",
                e.mDetail, aItem[n].mServer->GetAddress());
            for (int i = aCount - 1; i >= n; --i) {
                if (aItem[n].mServer != aItem[i].mServer) continue;
                aItem[i].mServer = NULL;
                aItem[i].mResult = MCERR_NOSERVER;
            }
            continue;
        }
    }

    return nResponses;
}
Пример #10
0
int 
MemCacheClient::Combine(
    const char *    aType,
    MemRequest *    aItem, 
    int             aCount
    )
{
    if (aCount < 1) {
        mTrace.Trace(CLDEBUG, "%s: ignoring request for %d items",
            aType, aCount);
        return 0;
    }
    CR_ASSERT(*aType == 'g' || *aType == 'd'); // get, gets, del

    MemRequest * rgpItem[MAX_REQUESTS] = { NULL };
    if (aCount > MAX_REQUESTS) {
        mTrace.Trace(CLDEBUG, "%s: ignoring request for all %d items (too many)", 
            aType, aCount);
        return -1; // invalid args
    }

    // initialize and find all of the servers for these items
    int nItemCount = 0;
    for (int n = 0; n < aCount; ++n) {
        // ensure that the key doesn't have a space in it
        CR_ASSERT(NULL == strchr(aItem[n].mKey.data(), ' '));
        aItem[n].mServer = FindServer(aItem[n].mKey, aItem[n].mService);
        aItem[n].mData.SetEmpty();
        if (aItem[n].mServer) {
            rgpItem[nItemCount++] = &aItem[n];
        }
        else {
            aItem[n].mResult = MCERR_NOSERVER;
        }
    }
    if (nItemCount == 0) {
        mTrace.Trace(CLDEBUG, "%s: ignoring request for all %d items (no servers available)", 
            aType, aCount);
        return 0;
    }

    // sort all requests into server order
    const static MemRequest::Sort sortOnServer = MemRequest::Sort();
    std::sort(&rgpItem[0], &rgpItem[nItemCount], sortOnServer);

    // send all requests
    char szBuf[50];
    int nItem = 0, nNext;
    string_t sRequest, sTemp;
    while (nItem < nItemCount) {
        for (nNext = nItem; nNext < nItemCount; ++nNext) {
            if (rgpItem[nItem]->mServer != rgpItem[nNext]->mServer) break;
            CR_ASSERT(*aType == 'g' || *aType == 'd');
            rgpItem[nNext]->mData.SetEmpty();

            // create get request for all keys on this server
            if (*aType == 'g') {
                if (nNext == nItem) sRequest = "get";
                else sRequest.resize(sRequest.length() - 2);
                sRequest += ' ';
                sRequest += rgpItem[nNext]->mKey;
                sRequest += "\r\n";
                rgpItem[nNext]->mResult = MCERR_NOTFOUND;
            }
            // create del request for all keys on this server
            else if (*aType == 'd') {
                // delete <key> [<time>] [noreply]\r\n
                sRequest += "delete ";
                sRequest += rgpItem[nNext]->mKey;
                sRequest += ' ';
                snprintf(szBuf, sizeof(szBuf), "%ld", (long) rgpItem[nNext]->mExpiry);
                sRequest += szBuf;
                if (rgpItem[nNext]->mResult == MCERR_NOREPLY) {
                    sRequest += " noreply";
                }
                sRequest += "\r\n";
                if (rgpItem[nNext]->mResult != MCERR_NOREPLY) {
                    rgpItem[nNext]->mResult = MCERR_NOTFOUND;
                }
            }
        }

        // send the request. any socket error causes the server connection 
        // to be dropped, so we return errors for all requests using that server.
        try {
            rgpItem[nItem]->mServer->SendBytes(
                sRequest.data(), sRequest.length());
        }
        catch (const Socket::Exception & e) {
            mTrace.Trace(CLINFO, "%s: request error '%s' at %s, marking requests as NOSERVER",
                aType, e.mDetail, rgpItem[nItem]->mServer->GetAddress());
            for (int n = nItem; n < nNext; ++n) {
                rgpItem[n]->mServer = NULL;
                rgpItem[n]->mResult = MCERR_NOSERVER;
            }
        }
        nItem = nNext;
    }

    // receive responses from all servers
    int nResponses = 0;
    for (nItem = 0; nItem < nItemCount; nItem = nNext) {
        // find the end of this server
        if (!rgpItem[nItem]->mServer) { nNext = nItem + 1; continue; }
        for (nNext = nItem + 1; nNext < nItemCount; ++nNext) {
            if (rgpItem[nItem]->mServer != rgpItem[nNext]->mServer) break;
        }

        // receive the responses. any socket error causes the server connection 
        // to be dropped, so we return errors for all requests using that server.
        try {
            if (*aType == 'g') {
                nResponses += HandleGetResponse(
                    rgpItem[nItem]->mServer, 
                    &rgpItem[nItem], &rgpItem[nNext]);
            }
            else if (*aType == 'd') {
                nResponses += HandleDelResponse(
                    rgpItem[nItem]->mServer, 
                    &rgpItem[nItem], &rgpItem[nNext]);
            }
        }
        catch (const Socket::Exception & e) {
            mTrace.Trace(CLINFO, "%s: response error '%s' at %s, marking requests as NOSERVER",
                aType, e.mDetail, rgpItem[nItem]->mServer->GetAddress());
            rgpItem[nItem]->mServer->Disconnect();
            for (int n = nNext - 1; n >= nItem; --n) {
                if (rgpItem[nItem]->mServer != rgpItem[n]->mServer) continue;
                rgpItem[n]->mServer = NULL;
                rgpItem[n]->mResult = MCERR_NOSERVER;
            }
        }
    }

    mTrace.Trace(CLDEBUG, "%s: received %d responses to %d requests",
        aType, nResponses, aCount);
    return nResponses;
}
// Returns the list of near neighbors of the point <point> (with a
// certain success probability). Near neighbor is defined as being a
// point within distance <parameterR>. Each near neighbor from the
// data set is returned is returned with a certain probability,
// dependent on <parameterK>, <parameterL>, and <parameterT>. The
// returned points are kept in the array <result>. If result is not
// allocated, it will be allocated to at least some minimum size
// (RESULT_INIT_SIZE). If number of returned points is bigger than the
// size of <result>, then the <result> is resized (to up to twice the
// number of returned points). The return value is the number of
// points found.
Int32T getNearNeighborsFromPRNearNeighborStruct(
    PRNearNeighborStructT nnStruct, PPointT query,
    PPointT *(&result), Int32T &resultSize)
{   //通过查找索引,然后获得桶,提取n个最近邻点
    //通过计算点的降维值,然后计算主副索引,最后由索引查找表
    ASSERT(nnStruct != NULL);
    ASSERT(query != NULL);
    ASSERT(nnStruct->reducedPoint != NULL);
    ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);

    PPointT point = query;

    if (result == NULL)
    {
        resultSize = RESULT_INIT_SIZE;
        FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT))));
    }

    /*
    for (int tempd=150; tempd< 160;tempd++)
    {
      printf(" %lf ",query->coordinates[tempd]);
    }

    printf("查询的具体数据 10个  \n\n");
     printf("查询数据 :  %lf  \n",query->coordinates[151]);
    // printf( "主hash的值:  %u  \n",nnStruct->hehasdBuckets[0]->mainHashA[5]);
    // printf( "辅助hash的值:  %u  \n",nnStruct->hashedBuckets[0]->controlHash1[5]);

    //  printf( "a     %u \n",nnStruct->lshFunctions[0][0].a[5]);
    //  printf( "b     %u \n",nnStruct->lshFunctions[0][0].b  );
    */

    preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);
    //根据传入的多维point。计算对应每个hash表的降维=》hash值,存入了nnStruct->precomputedHashesOfULSHs


    Uns32T **(precomputedHashesOfULSHs);//没释放
    precomputedHashesOfULSHs= (Uns32T**)malloc(sizeof(Uns32T*)*(nnStruct->nHFTuples));
// Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED];
    for (IntT i=0; i< nnStruct->nHFTuples ; i++)
    {
        precomputedHashesOfULSHs[i]= (Uns32T*)malloc(sizeof(Uns32T)*(N_PRECOMPUTED_HASHES_NEEDED));

        for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++)
        {
            precomputedHashesOfULSHs[i][temi]=0;
        }
    }

    //初始化??

    /*
    printf("\n输出:\n");

    FILE *in = fopen("preconpute.txt", "a+") ;
    fprintf(in,"\n输出:\n");
    fclose(in);
    */

    for(IntT i = 0; i < nnStruct->nHFTuples; i++)
    {
        for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++)
        {
            precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j];

            /*   printf(" %u", precomputedHashesOfULSHs[i][j]);

            	  FILE *in = fopen("preconpute.txt", "a+") ;
            	  fprintf(in," %u", precomputedHashesOfULSHs[i][j]);
            	    fclose(in);
            	   */
        }

        /*printf(" \n");

          FILE *in = fopen("preconpute.txt", "a+") ;
          fprintf(in," \n");
          fclose(in);

          */
    }


    TIMEV_START(timeTotalBuckets);

    BooleanT oldTimingOn = timingOn;
    if (noExpensiveTiming)
    {
        timingOn = FALSE;
    }

    // Initialize the counters for defining the pair of <u> functions used for <g> functions.
    IntT firstUComp = 0;
    IntT secondUComp = 1;

    Int32T nNeighbors = 0;// the number of near neighbors found so far.
    Int32T nMarkedPoints = 0;// the number of marked points
    for(IntT i = 0; i < nnStruct->parameterL; i++)
    {   //L个表
        TIMEV_START(timeGetBucket);
        GeneralizedPGBucket gbucket;
        if (!nnStruct->useUfunctions)
        {
            // Use usual <g> functions (truly independent; <g>s are precisly
            // <u>s).
            gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL);
        }
        else
        {
            // Use <u> functions (<g>s are pairs of <u> functions).
            gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]);
            //通过两个向量,计算主副索引。然后遍历二级索引,提取对应的桶

            // compute what is the next pair of <u> functions.
            //不是每个都 (first,second )(first,second )(first,second )的数组吗?
            secondUComp++;
            if (secondUComp == nnStruct->nHFTuples)
            {
                firstUComp++;
                secondUComp = firstUComp + 1;
            }
        }

        TIMEV_END(timeGetBucket);

        PGBucketT bucket;

        TIMEV_START(timeCycleBucket);
        switch (nnStruct->hashedBuckets[i]->typeHT)
        {   //对不同类型的hash桶结构,使用不同方法获取二级桶的实体
        case HT_LINKED_LIST:
            bucket = gbucket.llGBucket;
            if (bucket != NULL)
            {
                // circle through the bucket and add to <result> the points that are near.
                PBucketEntryT bucketEntry = &(bucket->firstEntry);
                //TIMEV_START(timeCycleProc);
                while (bucketEntry != NULL)
                {
                    //TIMEV_END(timeCycleProc);
                    //ASSERT(bucketEntry->point != NULL);
                    //TIMEV_START(timeDistanceComputation);
                    Int32T candidatePIndex = bucketEntry->pointIndex;
                    PPointT candidatePoint = nnStruct->points[candidatePIndex];
                    if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2)
                            && nnStruct->reportingResult)
                    {

                        //TIMEV_END(timeDistanceComputation);
                        if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                        {

                            //TIMEV_START(timeResultStoring);
                            // a new R-NN point was found (not yet in <result>).
                            if (nNeighbors >= resultSize)
                            {
                                // run out of space => resize the <result> array.
                                resultSize = 2 * resultSize;
                                result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
                            }
                            result[nNeighbors] = candidatePoint;
                            nNeighbors++;
                            nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                            nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                            nMarkedPoints++;
                            //TIMEV_END(timeResultStoring);
                        }
                    }
                    else
                    {
                        //TIMEV_END(timeDistanceComputation);
                    }
                    //TIMEV_START(timeCycleProc);
                    bucketEntry = bucketEntry->nextEntry;
                }//while

                //TIMEV_END(timeCycleProc);
            }
            break;


        case HT_STATISTICS:
            ASSERT(FALSE); // HT_STATISTICS not supported anymore
            //       if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){
            // 	Int32T position;
            // 	PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList;
            // 	position = gbucket.linkGBucket->indexStart;
            // 	// circle through the bucket and add to <result> the points that are near.
            // 	while (position != INDEX_START_EMPTY){
            // 	  PPointT candidatePoint = pointsList[position].point;
            // 	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
            // 	    if (nnStruct->nearPoints[candidatePoint->index] == FALSE) {
            // 	      // a new R-NN point was found (not yet in <result>).
            // 	      if (nNeighbors >= resultSize){
            // 		// run out of space => resize the <result> array.
            // 		resultSize = 2 * resultSize;
            // 		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
            // 	      }
            // 	      result[nNeighbors] = candidatePoint;
            // 	      nNeighbors++;
            // 	      nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index
            // 	    }
            // 	  }
            // 	  // Int32T oldP = position;
            // 	  position = pointsList[position].nextPoint;
            // 	  // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1);
            // 	}
            //       }
            break;

        case HT_HYBRID_CHAINS://默认的链条
            if (gbucket.hybridGBucket != NULL)
            {   //好像是在链表中找空间,同时要判断没有重复的
                PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;//获取 二级桶的数组指针,(实际桶就是一个数组)
                Uns32T offset = 0;
                if (hybridPoint->point.bucketLength == 0)
                {   //长度为0,就是溢出了的桶,

                    // there are overflow points in this bucket.
                    offset = 0;
                    for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++)
                    {
                        offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH));
                    }
                }
                Uns32T index = 0;
                BooleanT done = FALSE;
                while(!done)
                {
                    if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET)
                    {
                        //CR_ASSERT(hybridPoint->point.bucketLength == 0);
                        index = index + offset;
                    }
                    //hybridPoint 是个二级桶+实体组成的数组的首地址(其实就是个二级刻度)
                    Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex;

                    //索引只是记录每个点的序号, 所有点都在nnStruct->points[candidatePIndex] 上保存具体值

                    CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints);
                    done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE;
                    //链表的遍历?好像是用数组来当链表用
                    index++;

                    if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                    {   //已经计算过的点都标记为true了
                        //nnStruct->markedPoints 是用来标记是否检测过得

                        // mark the point first.
                        nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                        nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                        nMarkedPoints++;

                        PPointT candidatePoint = nnStruct->points[candidatePIndex];
                        if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2)
                                && nnStruct->reportingResult)
                        {   //两点距离是否小于阈值
                            //if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
                            // a new R-NN point was found (not yet in <result>).
                            //TIMEV_START(timeResultStoring);
                            if (nNeighbors >= resultSize)
                            {   //近邻点太多,扩大空间
                                // run out of space => resize the <result> array.
                                resultSize = 2 * resultSize;
                                result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
                            }
                            result[nNeighbors] = candidatePoint;//存入返回结果中
                            nNeighbors++;
                            //TIMEV_END(timeResultStoring);
                            //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                            //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                            //nMarkedPoints++;
                            //}
                        }
                    }// if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                    else
                    {
                        // the point was already marked (& examined)
                    }
                }//	while(!done)


            }// if (gbucket.hybridGBucket != NULL)
            break;

        default:
            ASSERT(FALSE);
        }//swichcase


        TIMEV_END(timeCycleBucket);
    }//for

    timingOn = oldTimingOn;
    TIMEV_END(timeTotalBuckets);

    // we need to clear the array nnStruct->nearPoints for the next query.
    for(Int32T i = 0; i < nMarkedPoints; i++)
    {
        ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE);
        nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE;
    }
    DPRINTF("nMarkedPoints: %d\n", nMarkedPoints);

    return nNeighbors;
}