C++ (Cpp) getGBucket Beispiele

Programmiersprache: C++ (Cpp)

Methode / Funktion: getGBucket

Beispiele auf hotexamples.com: 2

C++ (Cpp) getGBucket - 2 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die getGBucket, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: dodo1211/20120709

// Returns the list of near neighbors of the point <point> (with a
// certain success probability). Near neighbor is defined as being a
// point within distance <parameterR>. Each near neighbor from the
// data set is returned is returned with a certain probability,
// dependent on <parameterK>, <parameterL>, and <parameterT>. The
// returned points are kept in the array <result>. If result is not
// allocated, it will be allocated to at least some minimum size
// (RESULT_INIT_SIZE). If number of returned points is bigger than the
// size of <result>, then the <result> is resized (to up to twice the
// number of returned points). The return value is the number of
// points found.
Int32T getNearNeighborsFromPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize){
  ASSERT(nnStruct != NULL);
  ASSERT(query != NULL);
  ASSERT(nnStruct->reducedPoint != NULL);
  ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);

  PPointT point = query;

  if (result == NULL){
    resultSize = RESULT_INIT_SIZE;
    FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT))));
  }
  
  preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);

  Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED];
  for(IntT i = 0; i < nnStruct->nHFTuples; i++){
    for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++){
      precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j];
    }
  }
  TIMEV_START(timeTotalBuckets);

  BooleanT oldTimingOn = timingOn;
  if (noExpensiveTiming) {
    timingOn = FALSE;
  }
  
  // Initialize the counters for defining the pair of <u> functions used for <g> functions.
  IntT firstUComp = 0;
  IntT secondUComp = 1;

  Int32T nNeighbors = 0;// the number of near neighbors found so far.
  Int32T nMarkedPoints = 0;// the number of marked points
  for(IntT i = 0; i < nnStruct->parameterL; i++){ 
    TIMEV_START(timeGetBucket);
    GeneralizedPGBucket gbucket;
    if (!nnStruct->useUfunctions) {
      // Use usual <g> functions (truly independent; <g>s are precisly
      // <u>s).
      gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL);
    } else {
      // Use <u> functions (<g>s are pairs of <u> functions).
      gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]);

      // compute what is the next pair of <u> functions.
      secondUComp++;
      if (secondUComp == nnStruct->nHFTuples) {
	firstUComp++;
	secondUComp = firstUComp + 1;
      }
    }
    TIMEV_END(timeGetBucket);

    PGBucketT bucket;

    TIMEV_START(timeCycleBucket);
    switch (nnStruct->hashedBuckets[i]->typeHT){
    case HT_LINKED_LIST:
      bucket = gbucket.llGBucket;
      if (bucket != NULL){
	// circle through the bucket and add to <result> the points that are near.
	PBucketEntryT bucketEntry = &(bucket->firstEntry);
	//TIMEV_START(timeCycleProc);
	while (bucketEntry != NULL){
	  //TIMEV_END(timeCycleProc);
	  //ASSERT(bucketEntry->point != NULL);
	  //TIMEV_START(timeDistanceComputation);
	  Int32T candidatePIndex = bucketEntry->pointIndex;
	  PPointT candidatePoint = nnStruct->points[candidatePIndex];
	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
	    //TIMEV_END(timeDistanceComputation);
	    if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
	      //TIMEV_START(timeResultStoring);
	      // a new R-NN point was found (not yet in <result>).
	      if (nNeighbors >= resultSize){
		// run out of space => resize the <result> array.
		resultSize = 2 * resultSize;
		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
	      }
	      result[nNeighbors] = candidatePoint;
	      nNeighbors++;
	      nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	      nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	      nMarkedPoints++;
	      //TIMEV_END(timeResultStoring);
	    }
	  }else{
	    //TIMEV_END(timeDistanceComputation);
	  }
	  //TIMEV_START(timeCycleProc);
	  bucketEntry = bucketEntry->nextEntry;
	}
	//TIMEV_END(timeCycleProc);
      }
      break;
    case HT_STATISTICS:
      ASSERT(FALSE); // HT_STATISTICS not supported anymore

//       if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){
// 	Int32T position;
// 	PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList;
// 	position = gbucket.linkGBucket->indexStart;
// 	// circle through the bucket and add to <result> the points that are near.
// 	while (position != INDEX_START_EMPTY){
// 	  PPointT candidatePoint = pointsList[position].point;
// 	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
// 	    if (nnStruct->nearPoints[candidatePoint->index] == FALSE) {
// 	      // a new R-NN point was found (not yet in <result>).
// 	      if (nNeighbors >= resultSize){
// 		// run out of space => resize the <result> array.
// 		resultSize = 2 * resultSize;
// 		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
// 	      }
// 	      result[nNeighbors] = candidatePoint;
// 	      nNeighbors++;
// 	      nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index
// 	    }
// 	  }
// 	  // Int32T oldP = position;
// 	  position = pointsList[position].nextPoint;
// 	  // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1);
// 	}
//       }
      break;
    case HT_HYBRID_CHAINS:
      if (gbucket.hybridGBucket != NULL){
	PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;
	Uns32T offset = 0;
	if (hybridPoint->point.bucketLength == 0){
	  // there are overflow points in this bucket.
	  offset = 0;
	  for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++){
	    offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH));
	  }
	}
	Uns32T index = 0;
	BooleanT done = FALSE;
	while(!done){
	  if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET){
	    //CR_ASSERT(hybridPoint->point.bucketLength == 0);
	    index = index + offset;
	  }
	  Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex;
	  CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints);
	  done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE;
	  index++;
	  if (nnStruct->markedPoints[candidatePIndex] == FALSE){
	    // mark the point first.
	    nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	    nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	    nMarkedPoints++;

	    PPointT candidatePoint = nnStruct->points[candidatePIndex];
	    if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
	      //if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
	      // a new R-NN point was found (not yet in <result>).
	      //TIMEV_START(timeResultStoring);
	      if (nNeighbors >= resultSize){
		// run out of space => resize the <result> array.
		resultSize = 2 * resultSize;
		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
	      }
	      result[nNeighbors] = candidatePoint;
	      nNeighbors++;
	      //TIMEV_END(timeResultStoring);
	      //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	      //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	      //nMarkedPoints++;
	      //}
	    }
	  }else{
	    // the point was already marked (& examined)
	  }
	}
      }
      break;
    default:
      ASSERT(FALSE);
    }
    TIMEV_END(timeCycleBucket);
    
  }

  timingOn = oldTimingOn;
  TIMEV_END(timeTotalBuckets);

  // we need to clear the array nnStruct->nearPoints for the next query.
  for(Int32T i = 0; i < nMarkedPoints; i++){
    ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE);
    nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE;
  }
  DPRINTF("nMarkedPoints: %d\n", nMarkedPoints);

  return nNeighbors;
}

Beispiel #2

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: xiawei0000/Logo_LSH

// Returns the list of near neighbors of the point <point> (with a
// certain success probability). Near neighbor is defined as being a
// point within distance <parameterR>. Each near neighbor from the
// data set is returned is returned with a certain probability,
// dependent on <parameterK>, <parameterL>, and <parameterT>. The
// returned points are kept in the array <result>. If result is not
// allocated, it will be allocated to at least some minimum size
// (RESULT_INIT_SIZE). If number of returned points is bigger than the
// size of <result>, then the <result> is resized (to up to twice the
// number of returned points). The return value is the number of
// points found.
Int32T getNearNeighborsFromPRNearNeighborStruct(
    PRNearNeighborStructT nnStruct, PPointT query,
    PPointT *(&result), Int32T &resultSize)
{   //通过查找索引，然后获得桶，提取n个最近邻点
    //通过计算点的降维值，然后计算主副索引，最后由索引查找表
    ASSERT(nnStruct != NULL);
    ASSERT(query != NULL);
    ASSERT(nnStruct->reducedPoint != NULL);
    ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);

    PPointT point = query;

    if (result == NULL)
    {
        resultSize = RESULT_INIT_SIZE;
        FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT))));
    }

    /*
    for (int tempd=150; tempd< 160;tempd++)
    {
      printf(" %lf ",query->coordinates[tempd]);
    }

    printf("查询的具体数据 10个  \n\n");
     printf("查询数据 :  %lf  \n",query->coordinates[151]);
    // printf( "主hash的值：  %u  \n",nnStruct->hehasdBuckets[0]->mainHashA[5]);
    // printf( "辅助hash的值：  %u  \n",nnStruct->hashedBuckets[0]->controlHash1[5]);

    //  printf( "a     %u \n",nnStruct->lshFunctions[0][0].a[5]);
    //  printf( "b     %u \n",nnStruct->lshFunctions[0][0].b  );
    */

    preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);
    //根据传入的多维point。计算对应每个hash表的降维=》hash值，存入了nnStruct->precomputedHashesOfULSHs


    Uns32T **(precomputedHashesOfULSHs);//没释放
    precomputedHashesOfULSHs= (Uns32T**)malloc(sizeof(Uns32T*)*(nnStruct->nHFTuples));
// Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED];
    for (IntT i=0; i< nnStruct->nHFTuples ; i++)
    {
        precomputedHashesOfULSHs[i]= (Uns32T*)malloc(sizeof(Uns32T)*(N_PRECOMPUTED_HASHES_NEEDED));

        for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++)
        {
            precomputedHashesOfULSHs[i][temi]=0;
        }
    }

    //初始化？？

    /*
    printf("\n输出：\n");

    FILE *in = fopen("preconpute.txt", "a+") ;
    fprintf(in,"\n输出：\n");
    fclose(in);
    */

    for(IntT i = 0; i < nnStruct->nHFTuples; i++)
    {
        for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++)
        {
            precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j];

            /*   printf(" %u", precomputedHashesOfULSHs[i][j]);

            	  FILE *in = fopen("preconpute.txt", "a+") ;
            	  fprintf(in," %u", precomputedHashesOfULSHs[i][j]);
            	    fclose(in);
            	   */
        }

        /*printf(" \n");

          FILE *in = fopen("preconpute.txt", "a+") ;
          fprintf(in," \n");
          fclose(in);

          */
    }


    TIMEV_START(timeTotalBuckets);

    BooleanT oldTimingOn = timingOn;
    if (noExpensiveTiming)
    {
        timingOn = FALSE;
    }

    // Initialize the counters for defining the pair of <u> functions used for <g> functions.
    IntT firstUComp = 0;
    IntT secondUComp = 1;

    Int32T nNeighbors = 0;// the number of near neighbors found so far.
    Int32T nMarkedPoints = 0;// the number of marked points
    for(IntT i = 0; i < nnStruct->parameterL; i++)
    {   //L个表
        TIMEV_START(timeGetBucket);
        GeneralizedPGBucket gbucket;
        if (!nnStruct->useUfunctions)
        {
            // Use usual <g> functions (truly independent; <g>s are precisly
            // <u>s).
            gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL);
        }
        else
        {
            // Use <u> functions (<g>s are pairs of <u> functions).
            gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]);
            //通过两个向量，计算主副索引。然后遍历二级索引，提取对应的桶

            // compute what is the next pair of <u> functions.
            //不是每个都 （first，second ）（first，second ）（first，second ）的数组吗？
            secondUComp++;
            if (secondUComp == nnStruct->nHFTuples)
            {
                firstUComp++;
                secondUComp = firstUComp + 1;
            }
        }

        TIMEV_END(timeGetBucket);

        PGBucketT bucket;

        TIMEV_START(timeCycleBucket);
        switch (nnStruct->hashedBuckets[i]->typeHT)
        {   //对不同类型的hash桶结构，使用不同方法获取二级桶的实体
        case HT_LINKED_LIST:
            bucket = gbucket.llGBucket;
            if (bucket != NULL)
            {
                // circle through the bucket and add to <result> the points that are near.
                PBucketEntryT bucketEntry = &(bucket->firstEntry);
                //TIMEV_START(timeCycleProc);
                while (bucketEntry != NULL)
                {
                    //TIMEV_END(timeCycleProc);
                    //ASSERT(bucketEntry->point != NULL);
                    //TIMEV_START(timeDistanceComputation);
                    Int32T candidatePIndex = bucketEntry->pointIndex;
                    PPointT candidatePoint = nnStruct->points[candidatePIndex];
                    if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2)
                            && nnStruct->reportingResult)
                    {

                        //TIMEV_END(timeDistanceComputation);
                        if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                        {

                            //TIMEV_START(timeResultStoring);
                            // a new R-NN point was found (not yet in <result>).
                            if (nNeighbors >= resultSize)
                            {
                                // run out of space => resize the <result> array.
                                resultSize = 2 * resultSize;
                                result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
                            }
                            result[nNeighbors] = candidatePoint;
                            nNeighbors++;
                            nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                            nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                            nMarkedPoints++;
                            //TIMEV_END(timeResultStoring);
                        }
                    }
                    else
                    {
                        //TIMEV_END(timeDistanceComputation);
                    }
                    //TIMEV_START(timeCycleProc);
                    bucketEntry = bucketEntry->nextEntry;
                }//while

                //TIMEV_END(timeCycleProc);
            }
            break;


        case HT_STATISTICS:
            ASSERT(FALSE); // HT_STATISTICS not supported anymore
            //       if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){
            // 	Int32T position;
            // 	PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList;
            // 	position = gbucket.linkGBucket->indexStart;
            // 	// circle through the bucket and add to <result> the points that are near.
            // 	while (position != INDEX_START_EMPTY){
            // 	  PPointT candidatePoint = pointsList[position].point;
            // 	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
            // 	    if (nnStruct->nearPoints[candidatePoint->index] == FALSE) {
            // 	      // a new R-NN point was found (not yet in <result>).
            // 	      if (nNeighbors >= resultSize){
            // 		// run out of space => resize the <result> array.
            // 		resultSize = 2 * resultSize;
            // 		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
            // 	      }
            // 	      result[nNeighbors] = candidatePoint;
            // 	      nNeighbors++;
            // 	      nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index
            // 	    }
            // 	  }
            // 	  // Int32T oldP = position;
            // 	  position = pointsList[position].nextPoint;
            // 	  // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1);
            // 	}
            //       }
            break;

        case HT_HYBRID_CHAINS://默认的链条
            if (gbucket.hybridGBucket != NULL)
            {   //好像是在链表中找空间，同时要判断没有重复的
                PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;//获取 二级桶的数组指针，（实际桶就是一个数组）
                Uns32T offset = 0;
                if (hybridPoint->point.bucketLength == 0)
                {   //长度为0，就是溢出了的桶，

                    // there are overflow points in this bucket.
                    offset = 0;
                    for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++)
                    {
                        offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH));
                    }
                }
                Uns32T index = 0;
                BooleanT done = FALSE;
                while(!done)
                {
                    if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET)
                    {
                        //CR_ASSERT(hybridPoint->point.bucketLength == 0);
                        index = index + offset;
                    }
                    //hybridPoint 是个二级桶+实体组成的数组的首地址（其实就是个二级刻度）
                    Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex;

                    //索引只是记录每个点的序号， 所有点都在nnStruct->points[candidatePIndex] 上保存具体值

                    CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints);
                    done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE;
                    //链表的遍历？好像是用数组来当链表用
                    index++;

                    if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                    {   //已经计算过的点都标记为true了
                        //nnStruct->markedPoints 是用来标记是否检测过得

                        // mark the point first.
                        nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                        nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                        nMarkedPoints++;

                        PPointT candidatePoint = nnStruct->points[candidatePIndex];
                        if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2)
                                && nnStruct->reportingResult)
                        {   //两点距离是否小于阈值
                            //if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
                            // a new R-NN point was found (not yet in <result>).
                            //TIMEV_START(timeResultStoring);
                            if (nNeighbors >= resultSize)
                            {   //近邻点太多，扩大空间
                                // run out of space => resize the <result> array.
                                resultSize = 2 * resultSize;
                                result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
                            }
                            result[nNeighbors] = candidatePoint;//存入返回结果中
                            nNeighbors++;
                            //TIMEV_END(timeResultStoring);
                            //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                            //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                            //nMarkedPoints++;
                            //}
                        }
                    }// if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                    else
                    {
                        // the point was already marked (& examined)
                    }
                }//	while(!done)


            }// if (gbucket.hybridGBucket != NULL)
            break;

        default:
            ASSERT(FALSE);
        }//swichcase


        TIMEV_END(timeCycleBucket);
    }//for

    timingOn = oldTimingOn;
    TIMEV_END(timeTotalBuckets);

    // we need to clear the array nnStruct->nearPoints for the next query.
    for(Int32T i = 0; i < nMarkedPoints; i++)
    {
        ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE);
        nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE;
    }
    DPRINTF("nMarkedPoints: %d\n", nMarkedPoints);

    return nNeighbors;
}