C++ (Cpp) TIMEV_START Beispiele

Beispiel #1

0

Datei anzeigen

Datei: NearNeighbors.cpp Projekt: lessc0de/cylsh

/*
  Creates a new R-near neighbor data structure (PRNearNeighborStructT)
  from the parameters <thresholdR> and <successProbability> for the
  data set <dataSet>. <nPoints> is the number of points in the data
  set and <dimension> is the dimension of the points.

  The set <sampleQueries> is a set with query sample points
  (R-NN DS's parameters are optimized for query points from the set
  <sampleQueries>). <sampleQueries> could be a sample of points from the
  actual query set or from the data set. When computing the estimated
  number of collisions of a sample query point <q> with the data set
  points, if there is a point in the data set with the same pointer
  with <q> (that is when <q> is a data set point), then the
  corresponding point (<q>) is not considered in the data set (for the
  purpose of computing the respective #collisions estimation).
*/
PRNearNeighborStructT initSelfTunedRNearNeighborWithDataSet(RealT thresholdR, 
							    RealT successProbability, 
							    Int32T nPoints, 
							    IntT dimension, 
							    PPointT *dataSet, 
							    IntT nSampleQueries, 
							    PPointT *sampleQueries, 
							    MemVarT memoryUpperBound){
  initializeLSHGlobal();

  PRNearNeighborStructT nnStruct = NULL;

  RNNParametersT optParameters = computeOptimalParameters(thresholdR, successProbability, nPoints, dimension, dataSet, nSampleQueries, sampleQueries, memoryUpperBound);

  if (!optParameters.useUfunctions) {
    DPRINTF("Used L=%d\n", optParameters.parameterL);
  }else{
    DPRINTF("Used m = %d\n", optParameters.parameterM);
    DPRINTF("Used L = %d\n", optParameters.parameterL);
  }

  TimeVarT timeInit = 0;
  TIMEV_START(timeInit);

  // Init the R-NN data structure.
  if (optParameters.typeHT != HT_HYBRID_CHAINS){
    nnStruct = initLSH(optParameters, nPoints);
  }else{
    printRNNParameters(DEBUG_OUTPUT, optParameters);
    nnStruct = initLSH_WithDataSet(optParameters, nPoints, dataSet);
  }

  TIMEV_END(timeInit);
  DPRINTF("Time for initializing: %0.6lf\n", timeInit);
  DPRINTF("Allocated memory: %lld\n", totalAllocatedMemory);

  TimeVarT timeAdding = 0;
  if (optParameters.typeHT != HT_HYBRID_CHAINS){
    // Add the points to the LSH buckets.
    TIMEV_START(timeAdding);
    for(IntT i = 0; i < nPoints; i++){
      addNewPointToPRNearNeighborStruct(nnStruct, dataSet[i]);
    }
    TIMEV_END(timeAdding);
    printf("Time for adding points: %0.6lf\n", timeAdding);
    DPRINTF("Allocated memory: %lld\n", totalAllocatedMemory);
  }

  DPRINTF("Time for creating buckets: %0.6lf\n", timeBucketCreation);
  DPRINTF("Time for putting buckets into UH: %0.6lf\n", timeBucketIntoUH);
  DPRINTF("Time for computing GLSH: %0.6lf\n", timeComputeULSH);
  DPRINTF("NGBuckets: %d\n", nGBuckets);

  return nnStruct;
}

Beispiel #2

0

Datei anzeigen

Datei: NearNeighbors.cpp Projekt: lvpei/v9

Int32T getRNearNeighbors(PRNearNeighborStructT nnStruct, PPointT queryPoint, PPointT *(&result), Int32T &resultSize){
  DPRINTF("Estimated ULSH comp: %0.6lf\n", lshPrecomp * nnStruct->nHFTuples * nnStruct->hfTuplesLength);
  DPRINTF("Estimated UH overhead: %0.6lf\n", uhashOver * nnStruct->nHFTuples);
//   RealT estNColls = estimateNCollisions(nnStruct->nPoints, 
// 					nnStruct->dimension, 
// 					nnStruct->points, 
// 					queryPoint, 
// 					nnStruct->parameterK, 
// 					nnStruct->parameterL, 
// 					nnStruct->parameterR);
//   DPRINTF("Estimated #collisions (query specific): %0.6lf\n", (double)estNColls);
//   estNColls = (double)estimateNDistinctCollisions(nnStruct->nPoints, 
// 						  nnStruct->dimension, 
// 						  nnStruct->points, 
// 						  queryPoint, 
// 						  nnStruct->useUfunctions, 
// 						  nnStruct->hfTuplesLength, 
// 						  nnStruct->nHFTuples, 
// 						  nnStruct->parameterR);
//   DPRINTF("Estimated #distinct collisions (query specific): %0.6lf\n", estNColls);
//   DPRINTF("Estimated Dist comp time (query specific): %0.6lf\n", distComp * estNColls);

  // reset all the timers
  timeRNNQuery = 0;
  timeComputeULSH = 0;
  timeGetBucket = 0;
  timeCycleBucket = 0;
  timeDistanceComputation = 0;
  timeResultStoring = 0;
  timeCycleProc = 0;
  timePrecomputeHash = 0;
  timeGBHash = 0;
  timeChainTraversal = 0;
  nOfDistComps = 0;
  timeTotalBuckets = 0;

  TIMEV_START(timeRNNQuery);
  noExpensiveTiming = !DEBUG_PROFILE_TIMING;
  Int32T nNearNeighbors = getNearNeighborsFromPRNearNeighborStruct(nnStruct, queryPoint, result, resultSize);
  TIMEV_END(timeRNNQuery);

  DPRINTF("Time to compute LSH: %0.6lf\n", timeComputeULSH);
  DPRINTF("Time to get bucket: %0.6lf\n", timeGetBucket);
  DPRINTF("Time to cycle through buckets: %0.6lf\n", timeCycleBucket);
  DPRINTF("Time to for processing buckets (UH+examining points): %0.6lf\n", timeTotalBuckets);
  //DPRINTF("Time to copy ULSHs: %0.6lf\n", timeCopyingULSHs);
  //DPRINTF("Time to unmark points: %0.6lf\n", timeUnmarking);
  DPRINTF("Time for distance comps: %0.6lf\n", timeDistanceComputation);
  DPRINTF("Time to store result: %0.6lf\n", timeResultStoring);
  //printf("Time for cycle processing: %0.6lf\n", timeCycleProc);
  //printf("Time for precomputing hashes: %0.6lf\n", timePrecomputeHash);
  //printf("Time for GB hash: %0.6lf\n", timeGBHash);
  //printf("Time for traversal of chains: %0.6lf\n", timeChainTraversal);
  DPRINTF("Number of dist comps: %d\n", nOfDistComps);
  DPRINTF("Number buckets in chains: %d\n", nBucketsInChains);
  DPRINTF("Number buckets in chains / L: %0.3lf\n", (double)nBucketsInChains / nnStruct->nHFTuples);
  DPRINTF("Cumulative time for R-NN query: %0.6lf\n", timeRNNQuery);

  return nNearNeighbors;
}

Beispiel #3

0

Datei anzeigen

Datei: SelfTuning.cpp Projekt: dodo1211/1015_2

// Computes how much time it takes to run timing functions (functions
// that compute timings) -- we need to substract this value when we
// compute the length of an actual interval of time.
//计算函数运行的时间，我们需要减去这个值，得到精确的处理时间
void tuneTimeFunctions(){
  timevSpeed = 0;
  // Compute the time needed for a calls to TIMEV_START and TIMEV_END
  IntT nIterations = 100000;
  TimeVarT timeVar = 0;
  for(IntT i = 0; i < nIterations; i++){
    TIMEV_START(timeVar);
    TIMEV_END(timeVar);
  }
  timevSpeed = timeVar / nIterations;
  DPRINTF("Tuning: timevSpeed = %0.9lf\n", timevSpeed);
}

Beispiel #4

0

Datei anzeigen

Datei: Knnexact_notime.cpp Projekt: xymajy/865datamining

int main(int nargs, char **args){
  if (nargs < 7) {
    usage(args[0]);
    exit(1);
  }
  
  nPoints = atoi(args[1]);
  nQueries = atoi(args[2]);
  dimension = atoi(args[3]);
  p = atof(args[4]);

  K = atoi(args[5]);

  readPoints(args[6]); // read all points
  

  FILE *queryFile = fopen(args[7], "rt");
  //fscanf(queryFile, "%d\n", &nQueries);
  query = (RealT*)malloc(dimension * sizeof(RealT));
  printf("nPoints = %d\n", nPoints);
  //printf("nQueries = %d\n", nQueries);
  for(int i = 0; i < nQueries; i++){
    // read in the query point.
    for(int d = 0; d < dimension; d++){
      FSCANF_REAL(queryFile, &(query[d]));
    }
    //printRealVector1("Query: ", dimension, query);
    
    std::priority_queue<Node> myq;

    TimeVarT time = 0;
    RealT tempdis = 0;

    TIMEV_START(time);
    for(int j = 0; j < nPoints; j++){

      tempdis = dist(query, points[j]);
      updataQ(myq, tempdis, j);
    	//printf("Distance[dist] (%d): %lf\n", j, dist(query, points[j]));
    	//printRealVector1("X: ", dimension, points[j]);
    }
    TIMEV_END(time); // time only finding the near neighbors, and exclude printing from timing.
  
    printf("Total time for K-NN query \t%0.6lf\n",time);
    
  	printf("Query point %d 's %d NNs are:\n", i, K);

    display(myq);
  }
  
}

Beispiel #5

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: dodo1211/20120709

// Adds a new point to the LSH data structure, that is for each
// i=0..parameterL-1, the point is added to the bucket defined by
// function g_i=lshFunctions[i].
void addNewPointToPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT point){
  ASSERT(nnStruct != NULL);
  ASSERT(point != NULL);
  ASSERT(nnStruct->reducedPoint != NULL);
  ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);
  ASSERT(nnStruct->hashedBuckets[0]->typeHT == HT_LINKED_LIST || nnStruct->hashedBuckets[0]->typeHT == HT_STATISTICS);

  nnStruct->points[nnStruct->nPoints] = point;
  nnStruct->nPoints++;

  preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);

  // Initialize the counters for defining the pair of <u> functions used for <g> functions.
  IntT firstUComp = 0;
  IntT secondUComp = 1;

  TIMEV_START(timeBucketIntoUH);
  for(IntT i = 0; i < nnStruct->parameterL; i++){
    if (!nnStruct->useUfunctions) {
      // Use usual <g> functions (truly independent; <g>s are precisly
      // <u>s).
      addBucketEntry(nnStruct->hashedBuckets[i], 1, nnStruct->precomputedHashesOfULSHs[i], NULL, nnStruct->nPoints - 1);
    } else {
      // Use <u> functions (<g>s are pairs of <u> functions).
      addBucketEntry(nnStruct->hashedBuckets[i], 2, nnStruct->precomputedHashesOfULSHs[firstUComp], nnStruct->precomputedHashesOfULSHs[secondUComp], nnStruct->nPoints - 1);

      // compute what is the next pair of <u> functions.
      secondUComp++;
      if (secondUComp == nnStruct->nHFTuples) {
	firstUComp++;
	secondUComp = firstUComp + 1;
      }
    }
    //batchAddRequest(nnStruct, i, firstUComp, secondUComp, point);
  }
  TIMEV_END(timeBucketIntoUH);

  // Check whether the vectors <nearPoints> & <nearPointsIndeces> is still big enough.
  if (nnStruct->nPoints > nnStruct->sizeMarkedPoints) {
    nnStruct->sizeMarkedPoints = 2 * nnStruct->nPoints;
    FAILIF(NULL == (nnStruct->markedPoints = (BooleanT*)REALLOC(nnStruct->markedPoints, nnStruct->sizeMarkedPoints * sizeof(BooleanT))));
    for(IntT i = 0; i < nnStruct->sizeMarkedPoints; i++){
      nnStruct->markedPoints[i] = FALSE;
    }
    FAILIF(NULL == (nnStruct->markedPointsIndeces = (Int32T*)REALLOC(nnStruct->markedPointsIndeces, nnStruct->sizeMarkedPoints * sizeof(Int32T))));
  }
}

Beispiel #6

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: landys/photo-demo

// Returns TRUE iff |p1-p2|_2^2 <= threshold
inline BooleanT isDistanceSqrLeq(IntT dimension, PPointT p1, PPointT p2, RealT threshold){
  RealT result = 0;
  nOfDistComps++;

  TIMEV_START(timeDistanceComputation);
  for (IntT i = 0; i < dimension; i++){
    RealT temp = p1->coordinates[i] - p2->coordinates[i];
    result += SQR(temp);
    if (result > threshold){
      TIMEV_END(timeDistanceComputation);
      return 0;
    }
  }
  TIMEV_END(timeDistanceComputation);

  //return result <= threshold;
  return 1;
}

Beispiel #7

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: xiawei0000/Logo_LSH

inline void preparePointAdding(PRNearNeighborStructT nnStruct, PUHashStructureT uhash, PPointT point)
{
//输入： nnntstuct结构体（降维的向量），  uhash（hash的两个主副向量）  特征点
//操作：先计算点的降维结果，然后计算两个hash索引值 保存到nnStruct->precomputedHashesOfULSHs
//功能，提前计算好每个点的hash表索引值

    //根据传入的多维point。
    //计算对应每个hash表的降维=》hash值，
    //存入了nnStruct->precomputedHashesOfULSHs
    ASSERT(nnStruct != NULL);
    ASSERT(uhash != NULL);
    ASSERT(point != NULL);

    TIMEV_START(timeComputeULSH);
    for(IntT d = 0; d < nnStruct->dimension; d++) {
        nnStruct->reducedPoint[d] = point->coordinates[d] / nnStruct->parameterR;
    }


    //降维
    // Compute all ULSH functions.
    for(IntT i = 0; i < nnStruct->nHFTuples; i++) { //nHFTuples是元组个数，
        //求出nnStruct->reducedPoint向量和多个hansh映射后的值， 对于每个hash：  a。v+b 除以 r
        //结果返回到pointULSHVectors【】 向量上
        //pointULSHVectors【i】就是 第一步降维后的向量值
        computeULSH(nnStruct, i, nnStruct->reducedPoint, nnStruct->pointULSHVectors[i]);
    }

    //模hash
    // Compute data for <precomputedHashesOfULSHs>.
    if (USE_SAME_UHASH_FUNCTIONS) {
        for(IntT i = 0; i < nnStruct->nHFTuples; i++) {
            precomputeUHFsForULSH(uhash, nnStruct->pointULSHVectors[i], nnStruct->hfTuplesLength, nnStruct->precomputedHashesOfULSHs[i]);
            //根据降维后的结果向量：pointULSHVectors【i】  计算两个hash值，存入precomputedHashesOfULSHs【i】
        }
    }

    TIMEV_END(timeComputeULSH);
}

Beispiel #8

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: xiawei0000/Logo_LSH

// Returns TRUE iff |p1-p2|_2^2 <= threshold
inline BooleanT isDistanceSqrLeq(IntT dimension, PPointT p1, PPointT p2, RealT threshold)
{   //直接计算：|p1-p2|_2^2 <= threshold；两点距离是否小于阈值
    RealT result = 0;
    nOfDistComps++;

    TIMEV_START(timeDistanceComputation);
    for (IntT i = 0; i < dimension; i++) {
        RealT temp = p1->coordinates[i] - p2->coordinates[i];
#ifdef USE_L1_DISTANCE
        result += ABS(temp);
#else
        result += SQR(temp);
#endif
        if (result > threshold) {
            // TIMEV_END(timeDistanceComputation);
            return 0;
        }
    }
    TIMEV_END(timeDistanceComputation);

    //return result <= threshold;
    return 1;
}

Beispiel #9

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: dodo1211/20120709

inline void preparePointAdding(PRNearNeighborStructT nnStruct, PUHashStructureT uhash, PPointT point){
  ASSERT(nnStruct != NULL);
  ASSERT(uhash != NULL);
  ASSERT(point != NULL);

  TIMEV_START(timeComputeULSH);
  for(IntT d = 0; d < nnStruct->dimension; d++){
    nnStruct->reducedPoint[d] = point->coordinates[d] / nnStruct->parameterR;
  }

  // Compute all ULSH functions.
  for(IntT i = 0; i < nnStruct->nHFTuples; i++){
    computeULSH(nnStruct, i, nnStruct->reducedPoint, nnStruct->pointULSHVectors[i]);
  }

  // Compute data for <precomputedHashesOfULSHs>.
  if (USE_SAME_UHASH_FUNCTIONS) {
    for(IntT i = 0; i < nnStruct->nHFTuples; i++){
      precomputeUHFsForULSH(uhash, nnStruct->pointULSHVectors[i], nnStruct->hfTuplesLength, nnStruct->precomputedHashesOfULSHs[i]);
    }
  }

  TIMEV_END(timeComputeULSH);
}

Beispiel #10

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: dodo1211/20120709

// Returns the list of near neighbors of the point <point> (with a
// certain success probability). Near neighbor is defined as being a
// point within distance <parameterR>. Each near neighbor from the
// data set is returned is returned with a certain probability,
// dependent on <parameterK>, <parameterL>, and <parameterT>. The
// returned points are kept in the array <result>. If result is not
// allocated, it will be allocated to at least some minimum size
// (RESULT_INIT_SIZE). If number of returned points is bigger than the
// size of <result>, then the <result> is resized (to up to twice the
// number of returned points). The return value is the number of
// points found.
Int32T getNearNeighborsFromPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize){
  ASSERT(nnStruct != NULL);
  ASSERT(query != NULL);
  ASSERT(nnStruct->reducedPoint != NULL);
  ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);

  PPointT point = query;

  if (result == NULL){
    resultSize = RESULT_INIT_SIZE;
    FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT))));
  }
  
  preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);

  Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED];
  for(IntT i = 0; i < nnStruct->nHFTuples; i++){
    for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++){
      precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j];
    }
  }
  TIMEV_START(timeTotalBuckets);

  BooleanT oldTimingOn = timingOn;
  if (noExpensiveTiming) {
    timingOn = FALSE;
  }
  
  // Initialize the counters for defining the pair of <u> functions used for <g> functions.
  IntT firstUComp = 0;
  IntT secondUComp = 1;

  Int32T nNeighbors = 0;// the number of near neighbors found so far.
  Int32T nMarkedPoints = 0;// the number of marked points
  for(IntT i = 0; i < nnStruct->parameterL; i++){ 
    TIMEV_START(timeGetBucket);
    GeneralizedPGBucket gbucket;
    if (!nnStruct->useUfunctions) {
      // Use usual <g> functions (truly independent; <g>s are precisly
      // <u>s).
      gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL);
    } else {
      // Use <u> functions (<g>s are pairs of <u> functions).
      gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]);

      // compute what is the next pair of <u> functions.
      secondUComp++;
      if (secondUComp == nnStruct->nHFTuples) {
	firstUComp++;
	secondUComp = firstUComp + 1;
      }
    }
    TIMEV_END(timeGetBucket);

    PGBucketT bucket;

    TIMEV_START(timeCycleBucket);
    switch (nnStruct->hashedBuckets[i]->typeHT){
    case HT_LINKED_LIST:
      bucket = gbucket.llGBucket;
      if (bucket != NULL){
	// circle through the bucket and add to <result> the points that are near.
	PBucketEntryT bucketEntry = &(bucket->firstEntry);
	//TIMEV_START(timeCycleProc);
	while (bucketEntry != NULL){
	  //TIMEV_END(timeCycleProc);
	  //ASSERT(bucketEntry->point != NULL);
	  //TIMEV_START(timeDistanceComputation);
	  Int32T candidatePIndex = bucketEntry->pointIndex;
	  PPointT candidatePoint = nnStruct->points[candidatePIndex];
	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
	    //TIMEV_END(timeDistanceComputation);
	    if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
	      //TIMEV_START(timeResultStoring);
	      // a new R-NN point was found (not yet in <result>).
	      if (nNeighbors >= resultSize){
		// run out of space => resize the <result> array.
		resultSize = 2 * resultSize;
		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
	      }
	      result[nNeighbors] = candidatePoint;
	      nNeighbors++;
	      nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	      nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	      nMarkedPoints++;
	      //TIMEV_END(timeResultStoring);
	    }
	  }else{
	    //TIMEV_END(timeDistanceComputation);
	  }
	  //TIMEV_START(timeCycleProc);
	  bucketEntry = bucketEntry->nextEntry;
	}
	//TIMEV_END(timeCycleProc);
      }
      break;
    case HT_STATISTICS:
      ASSERT(FALSE); // HT_STATISTICS not supported anymore

//       if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){
// 	Int32T position;
// 	PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList;
// 	position = gbucket.linkGBucket->indexStart;
// 	// circle through the bucket and add to <result> the points that are near.
// 	while (position != INDEX_START_EMPTY){
// 	  PPointT candidatePoint = pointsList[position].point;
// 	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
// 	    if (nnStruct->nearPoints[candidatePoint->index] == FALSE) {
// 	      // a new R-NN point was found (not yet in <result>).
// 	      if (nNeighbors >= resultSize){
// 		// run out of space => resize the <result> array.
// 		resultSize = 2 * resultSize;
// 		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
// 	      }
// 	      result[nNeighbors] = candidatePoint;
// 	      nNeighbors++;
// 	      nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index
// 	    }
// 	  }
// 	  // Int32T oldP = position;
// 	  position = pointsList[position].nextPoint;
// 	  // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1);
// 	}
//       }
      break;
    case HT_HYBRID_CHAINS:
      if (gbucket.hybridGBucket != NULL){
	PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;
	Uns32T offset = 0;
	if (hybridPoint->point.bucketLength == 0){
	  // there are overflow points in this bucket.
	  offset = 0;
	  for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++){
	    offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH));
	  }
	}
	Uns32T index = 0;
	BooleanT done = FALSE;
	while(!done){
	  if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET){
	    //CR_ASSERT(hybridPoint->point.bucketLength == 0);
	    index = index + offset;
	  }
	  Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex;
	  CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints);
	  done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE;
	  index++;
	  if (nnStruct->markedPoints[candidatePIndex] == FALSE){
	    // mark the point first.
	    nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	    nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	    nMarkedPoints++;

	    PPointT candidatePoint = nnStruct->points[candidatePIndex];
	    if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
	      //if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
	      // a new R-NN point was found (not yet in <result>).
	      //TIMEV_START(timeResultStoring);
	      if (nNeighbors >= resultSize){
		// run out of space => resize the <result> array.
		resultSize = 2 * resultSize;
		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
	      }
	      result[nNeighbors] = candidatePoint;
	      nNeighbors++;
	      //TIMEV_END(timeResultStoring);
	      //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
	      //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
	      //nMarkedPoints++;
	      //}
	    }
	  }else{
	    // the point was already marked (& examined)
	  }
	}
      }
      break;
    default:
      ASSERT(FALSE);
    }
    TIMEV_END(timeCycleBucket);
    
  }

  timingOn = oldTimingOn;
  TIMEV_END(timeTotalBuckets);

  // we need to clear the array nnStruct->nearPoints for the next query.
  for(Int32T i = 0; i < nMarkedPoints; i++){
    ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE);
    nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE;
  }
  DPRINTF("nMarkedPoints: %d\n", nMarkedPoints);

  return nNeighbors;
}

Beispiel #11

0

Datei anzeigen

Datei: LocalitySensitiveHashing.cpp Projekt: xiawei0000/Logo_LSH

// Returns the list of near neighbors of the point <point> (with a
// certain success probability). Near neighbor is defined as being a
// point within distance <parameterR>. Each near neighbor from the
// data set is returned is returned with a certain probability,
// dependent on <parameterK>, <parameterL>, and <parameterT>. The
// returned points are kept in the array <result>. If result is not
// allocated, it will be allocated to at least some minimum size
// (RESULT_INIT_SIZE). If number of returned points is bigger than the
// size of <result>, then the <result> is resized (to up to twice the
// number of returned points). The return value is the number of
// points found.
Int32T getNearNeighborsFromPRNearNeighborStruct(
    PRNearNeighborStructT nnStruct, PPointT query,
    PPointT *(&result), Int32T &resultSize)
{   //通过查找索引，然后获得桶，提取n个最近邻点
    //通过计算点的降维值，然后计算主副索引，最后由索引查找表
    ASSERT(nnStruct != NULL);
    ASSERT(query != NULL);
    ASSERT(nnStruct->reducedPoint != NULL);
    ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL);

    PPointT point = query;

    if (result == NULL)
    {
        resultSize = RESULT_INIT_SIZE;
        FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT))));
    }

    /*
    for (int tempd=150; tempd< 160;tempd++)
    {
      printf(" %lf ",query->coordinates[tempd]);
    }

    printf("查询的具体数据 10个  \n\n");
     printf("查询数据 :  %lf  \n",query->coordinates[151]);
    // printf( "主hash的值：  %u  \n",nnStruct->hehasdBuckets[0]->mainHashA[5]);
    // printf( "辅助hash的值：  %u  \n",nnStruct->hashedBuckets[0]->controlHash1[5]);

    //  printf( "a     %u \n",nnStruct->lshFunctions[0][0].a[5]);
    //  printf( "b     %u \n",nnStruct->lshFunctions[0][0].b  );
    */

    preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point);
    //根据传入的多维point。计算对应每个hash表的降维=》hash值，存入了nnStruct->precomputedHashesOfULSHs


    Uns32T **(precomputedHashesOfULSHs);//没释放
    precomputedHashesOfULSHs= (Uns32T**)malloc(sizeof(Uns32T*)*(nnStruct->nHFTuples));
// Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED];
    for (IntT i=0; i< nnStruct->nHFTuples ; i++)
    {
        precomputedHashesOfULSHs[i]= (Uns32T*)malloc(sizeof(Uns32T)*(N_PRECOMPUTED_HASHES_NEEDED));

        for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++)
        {
            precomputedHashesOfULSHs[i][temi]=0;
        }
    }

    //初始化？？

    /*
    printf("\n输出：\n");

    FILE *in = fopen("preconpute.txt", "a+") ;
    fprintf(in,"\n输出：\n");
    fclose(in);
    */

    for(IntT i = 0; i < nnStruct->nHFTuples; i++)
    {
        for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++)
        {
            precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j];

            /*   printf(" %u", precomputedHashesOfULSHs[i][j]);

            	  FILE *in = fopen("preconpute.txt", "a+") ;
            	  fprintf(in," %u", precomputedHashesOfULSHs[i][j]);
            	    fclose(in);
            	   */
        }

        /*printf(" \n");

          FILE *in = fopen("preconpute.txt", "a+") ;
          fprintf(in," \n");
          fclose(in);

          */
    }


    TIMEV_START(timeTotalBuckets);

    BooleanT oldTimingOn = timingOn;
    if (noExpensiveTiming)
    {
        timingOn = FALSE;
    }

    // Initialize the counters for defining the pair of <u> functions used for <g> functions.
    IntT firstUComp = 0;
    IntT secondUComp = 1;

    Int32T nNeighbors = 0;// the number of near neighbors found so far.
    Int32T nMarkedPoints = 0;// the number of marked points
    for(IntT i = 0; i < nnStruct->parameterL; i++)
    {   //L个表
        TIMEV_START(timeGetBucket);
        GeneralizedPGBucket gbucket;
        if (!nnStruct->useUfunctions)
        {
            // Use usual <g> functions (truly independent; <g>s are precisly
            // <u>s).
            gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL);
        }
        else
        {
            // Use <u> functions (<g>s are pairs of <u> functions).
            gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]);
            //通过两个向量，计算主副索引。然后遍历二级索引，提取对应的桶

            // compute what is the next pair of <u> functions.
            //不是每个都 （first，second ）（first，second ）（first，second ）的数组吗？
            secondUComp++;
            if (secondUComp == nnStruct->nHFTuples)
            {
                firstUComp++;
                secondUComp = firstUComp + 1;
            }
        }

        TIMEV_END(timeGetBucket);

        PGBucketT bucket;

        TIMEV_START(timeCycleBucket);
        switch (nnStruct->hashedBuckets[i]->typeHT)
        {   //对不同类型的hash桶结构，使用不同方法获取二级桶的实体
        case HT_LINKED_LIST:
            bucket = gbucket.llGBucket;
            if (bucket != NULL)
            {
                // circle through the bucket and add to <result> the points that are near.
                PBucketEntryT bucketEntry = &(bucket->firstEntry);
                //TIMEV_START(timeCycleProc);
                while (bucketEntry != NULL)
                {
                    //TIMEV_END(timeCycleProc);
                    //ASSERT(bucketEntry->point != NULL);
                    //TIMEV_START(timeDistanceComputation);
                    Int32T candidatePIndex = bucketEntry->pointIndex;
                    PPointT candidatePoint = nnStruct->points[candidatePIndex];
                    if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2)
                            && nnStruct->reportingResult)
                    {

                        //TIMEV_END(timeDistanceComputation);
                        if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                        {

                            //TIMEV_START(timeResultStoring);
                            // a new R-NN point was found (not yet in <result>).
                            if (nNeighbors >= resultSize)
                            {
                                // run out of space => resize the <result> array.
                                resultSize = 2 * resultSize;
                                result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
                            }
                            result[nNeighbors] = candidatePoint;
                            nNeighbors++;
                            nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                            nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                            nMarkedPoints++;
                            //TIMEV_END(timeResultStoring);
                        }
                    }
                    else
                    {
                        //TIMEV_END(timeDistanceComputation);
                    }
                    //TIMEV_START(timeCycleProc);
                    bucketEntry = bucketEntry->nextEntry;
                }//while

                //TIMEV_END(timeCycleProc);
            }
            break;


        case HT_STATISTICS:
            ASSERT(FALSE); // HT_STATISTICS not supported anymore
            //       if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){
            // 	Int32T position;
            // 	PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList;
            // 	position = gbucket.linkGBucket->indexStart;
            // 	// circle through the bucket and add to <result> the points that are near.
            // 	while (position != INDEX_START_EMPTY){
            // 	  PPointT candidatePoint = pointsList[position].point;
            // 	  if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){
            // 	    if (nnStruct->nearPoints[candidatePoint->index] == FALSE) {
            // 	      // a new R-NN point was found (not yet in <result>).
            // 	      if (nNeighbors >= resultSize){
            // 		// run out of space => resize the <result> array.
            // 		resultSize = 2 * resultSize;
            // 		result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
            // 	      }
            // 	      result[nNeighbors] = candidatePoint;
            // 	      nNeighbors++;
            // 	      nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index
            // 	    }
            // 	  }
            // 	  // Int32T oldP = position;
            // 	  position = pointsList[position].nextPoint;
            // 	  // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1);
            // 	}
            //       }
            break;

        case HT_HYBRID_CHAINS://默认的链条
            if (gbucket.hybridGBucket != NULL)
            {   //好像是在链表中找空间，同时要判断没有重复的
                PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;//获取 二级桶的数组指针，（实际桶就是一个数组）
                Uns32T offset = 0;
                if (hybridPoint->point.bucketLength == 0)
                {   //长度为0，就是溢出了的桶，

                    // there are overflow points in this bucket.
                    offset = 0;
                    for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++)
                    {
                        offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH));
                    }
                }
                Uns32T index = 0;
                BooleanT done = FALSE;
                while(!done)
                {
                    if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET)
                    {
                        //CR_ASSERT(hybridPoint->point.bucketLength == 0);
                        index = index + offset;
                    }
                    //hybridPoint 是个二级桶+实体组成的数组的首地址（其实就是个二级刻度）
                    Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex;

                    //索引只是记录每个点的序号， 所有点都在nnStruct->points[candidatePIndex] 上保存具体值

                    CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints);
                    done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE;
                    //链表的遍历？好像是用数组来当链表用
                    index++;

                    if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                    {   //已经计算过的点都标记为true了
                        //nnStruct->markedPoints 是用来标记是否检测过得

                        // mark the point first.
                        nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                        nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                        nMarkedPoints++;

                        PPointT candidatePoint = nnStruct->points[candidatePIndex];
                        if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2)
                                && nnStruct->reportingResult)
                        {   //两点距离是否小于阈值
                            //if (nnStruct->markedPoints[candidatePIndex] == FALSE) {
                            // a new R-NN point was found (not yet in <result>).
                            //TIMEV_START(timeResultStoring);
                            if (nNeighbors >= resultSize)
                            {   //近邻点太多，扩大空间
                                // run out of space => resize the <result> array.
                                resultSize = 2 * resultSize;
                                result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT));
                            }
                            result[nNeighbors] = candidatePoint;//存入返回结果中
                            nNeighbors++;
                            //TIMEV_END(timeResultStoring);
                            //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex;
                            //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index
                            //nMarkedPoints++;
                            //}
                        }
                    }// if (nnStruct->markedPoints[candidatePIndex] == FALSE)
                    else
                    {
                        // the point was already marked (& examined)
                    }
                }//	while(!done)


            }// if (gbucket.hybridGBucket != NULL)
            break;

        default:
            ASSERT(FALSE);
        }//swichcase


        TIMEV_END(timeCycleBucket);
    }//for

    timingOn = oldTimingOn;
    TIMEV_END(timeTotalBuckets);

    // we need to clear the array nnStruct->nearPoints for the next query.
    for(Int32T i = 0; i < nMarkedPoints; i++)
    {
        ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE);
        nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE;
    }
    DPRINTF("nMarkedPoints: %d\n", nMarkedPoints);

    return nNeighbors;
}