/* Creates a new R-near neighbor data structure (PRNearNeighborStructT) from the parameters <thresholdR> and <successProbability> for the data set <dataSet>. <nPoints> is the number of points in the data set and <dimension> is the dimension of the points. The set <sampleQueries> is a set with query sample points (R-NN DS's parameters are optimized for query points from the set <sampleQueries>). <sampleQueries> could be a sample of points from the actual query set or from the data set. When computing the estimated number of collisions of a sample query point <q> with the data set points, if there is a point in the data set with the same pointer with <q> (that is when <q> is a data set point), then the corresponding point (<q>) is not considered in the data set (for the purpose of computing the respective #collisions estimation). */ PRNearNeighborStructT initSelfTunedRNearNeighborWithDataSet(RealT thresholdR, RealT successProbability, Int32T nPoints, IntT dimension, PPointT *dataSet, IntT nSampleQueries, PPointT *sampleQueries, MemVarT memoryUpperBound){ initializeLSHGlobal(); PRNearNeighborStructT nnStruct = NULL; RNNParametersT optParameters = computeOptimalParameters(thresholdR, successProbability, nPoints, dimension, dataSet, nSampleQueries, sampleQueries, memoryUpperBound); if (!optParameters.useUfunctions) { DPRINTF("Used L=%d\n", optParameters.parameterL); }else{ DPRINTF("Used m = %d\n", optParameters.parameterM); DPRINTF("Used L = %d\n", optParameters.parameterL); } TimeVarT timeInit = 0; TIMEV_START(timeInit); // Init the R-NN data structure. if (optParameters.typeHT != HT_HYBRID_CHAINS){ nnStruct = initLSH(optParameters, nPoints); }else{ printRNNParameters(DEBUG_OUTPUT, optParameters); nnStruct = initLSH_WithDataSet(optParameters, nPoints, dataSet); } TIMEV_END(timeInit); DPRINTF("Time for initializing: %0.6lf\n", timeInit); DPRINTF("Allocated memory: %lld\n", totalAllocatedMemory); TimeVarT timeAdding = 0; if (optParameters.typeHT != HT_HYBRID_CHAINS){ // Add the points to the LSH buckets. TIMEV_START(timeAdding); for(IntT i = 0; i < nPoints; i++){ addNewPointToPRNearNeighborStruct(nnStruct, dataSet[i]); } TIMEV_END(timeAdding); printf("Time for adding points: %0.6lf\n", timeAdding); DPRINTF("Allocated memory: %lld\n", totalAllocatedMemory); } DPRINTF("Time for creating buckets: %0.6lf\n", timeBucketCreation); DPRINTF("Time for putting buckets into UH: %0.6lf\n", timeBucketIntoUH); DPRINTF("Time for computing GLSH: %0.6lf\n", timeComputeULSH); DPRINTF("NGBuckets: %d\n", nGBuckets); return nnStruct; }
Int32T getRNearNeighbors(PRNearNeighborStructT nnStruct, PPointT queryPoint, PPointT *(&result), Int32T &resultSize){ DPRINTF("Estimated ULSH comp: %0.6lf\n", lshPrecomp * nnStruct->nHFTuples * nnStruct->hfTuplesLength); DPRINTF("Estimated UH overhead: %0.6lf\n", uhashOver * nnStruct->nHFTuples); // RealT estNColls = estimateNCollisions(nnStruct->nPoints, // nnStruct->dimension, // nnStruct->points, // queryPoint, // nnStruct->parameterK, // nnStruct->parameterL, // nnStruct->parameterR); // DPRINTF("Estimated #collisions (query specific): %0.6lf\n", (double)estNColls); // estNColls = (double)estimateNDistinctCollisions(nnStruct->nPoints, // nnStruct->dimension, // nnStruct->points, // queryPoint, // nnStruct->useUfunctions, // nnStruct->hfTuplesLength, // nnStruct->nHFTuples, // nnStruct->parameterR); // DPRINTF("Estimated #distinct collisions (query specific): %0.6lf\n", estNColls); // DPRINTF("Estimated Dist comp time (query specific): %0.6lf\n", distComp * estNColls); // reset all the timers timeRNNQuery = 0; timeComputeULSH = 0; timeGetBucket = 0; timeCycleBucket = 0; timeDistanceComputation = 0; timeResultStoring = 0; timeCycleProc = 0; timePrecomputeHash = 0; timeGBHash = 0; timeChainTraversal = 0; nOfDistComps = 0; timeTotalBuckets = 0; TIMEV_START(timeRNNQuery); noExpensiveTiming = !DEBUG_PROFILE_TIMING; Int32T nNearNeighbors = getNearNeighborsFromPRNearNeighborStruct(nnStruct, queryPoint, result, resultSize); TIMEV_END(timeRNNQuery); DPRINTF("Time to compute LSH: %0.6lf\n", timeComputeULSH); DPRINTF("Time to get bucket: %0.6lf\n", timeGetBucket); DPRINTF("Time to cycle through buckets: %0.6lf\n", timeCycleBucket); DPRINTF("Time to for processing buckets (UH+examining points): %0.6lf\n", timeTotalBuckets); //DPRINTF("Time to copy ULSHs: %0.6lf\n", timeCopyingULSHs); //DPRINTF("Time to unmark points: %0.6lf\n", timeUnmarking); DPRINTF("Time for distance comps: %0.6lf\n", timeDistanceComputation); DPRINTF("Time to store result: %0.6lf\n", timeResultStoring); //printf("Time for cycle processing: %0.6lf\n", timeCycleProc); //printf("Time for precomputing hashes: %0.6lf\n", timePrecomputeHash); //printf("Time for GB hash: %0.6lf\n", timeGBHash); //printf("Time for traversal of chains: %0.6lf\n", timeChainTraversal); DPRINTF("Number of dist comps: %d\n", nOfDistComps); DPRINTF("Number buckets in chains: %d\n", nBucketsInChains); DPRINTF("Number buckets in chains / L: %0.3lf\n", (double)nBucketsInChains / nnStruct->nHFTuples); DPRINTF("Cumulative time for R-NN query: %0.6lf\n", timeRNNQuery); return nNearNeighbors; }
// Returns TRUE iff |p1-p2|_2^2 <= threshold inline BooleanT isDistanceSqrLeq(IntT dimension, PPointT p1, PPointT p2, RealT threshold){ RealT result = 0; nOfDistComps++; TIMEV_START(timeDistanceComputation); for (IntT i = 0; i < dimension; i++){ RealT temp = p1->coordinates[i] - p2->coordinates[i]; result += SQR(temp); if (result > threshold){ TIMEV_END(timeDistanceComputation); return 0; } } TIMEV_END(timeDistanceComputation); //return result <= threshold; return 1; }
// Computes how much time it takes to run timing functions (functions // that compute timings) -- we need to substract this value when we // compute the length of an actual interval of time. //计算函数运行的时间,我们需要减去这个值,得到精确的处理时间 void tuneTimeFunctions(){ timevSpeed = 0; // Compute the time needed for a calls to TIMEV_START and TIMEV_END IntT nIterations = 100000; TimeVarT timeVar = 0; for(IntT i = 0; i < nIterations; i++){ TIMEV_START(timeVar); TIMEV_END(timeVar); } timevSpeed = timeVar / nIterations; DPRINTF("Tuning: timevSpeed = %0.9lf\n", timevSpeed); }
int main(int nargs, char **args){ if (nargs < 7) { usage(args[0]); exit(1); } nPoints = atoi(args[1]); nQueries = atoi(args[2]); dimension = atoi(args[3]); p = atof(args[4]); K = atoi(args[5]); readPoints(args[6]); // read all points FILE *queryFile = fopen(args[7], "rt"); //fscanf(queryFile, "%d\n", &nQueries); query = (RealT*)malloc(dimension * sizeof(RealT)); printf("nPoints = %d\n", nPoints); //printf("nQueries = %d\n", nQueries); for(int i = 0; i < nQueries; i++){ // read in the query point. for(int d = 0; d < dimension; d++){ FSCANF_REAL(queryFile, &(query[d])); } //printRealVector1("Query: ", dimension, query); std::priority_queue<Node> myq; TimeVarT time = 0; RealT tempdis = 0; TIMEV_START(time); for(int j = 0; j < nPoints; j++){ tempdis = dist(query, points[j]); updataQ(myq, tempdis, j); //printf("Distance[dist] (%d): %lf\n", j, dist(query, points[j])); //printRealVector1("X: ", dimension, points[j]); } TIMEV_END(time); // time only finding the near neighbors, and exclude printing from timing. printf("Total time for K-NN query \t%0.6lf\n",time); printf("Query point %d 's %d NNs are:\n", i, K); display(myq); } }
// Adds a new point to the LSH data structure, that is for each // i=0..parameterL-1, the point is added to the bucket defined by // function g_i=lshFunctions[i]. void addNewPointToPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT point){ ASSERT(nnStruct != NULL); ASSERT(point != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); ASSERT(nnStruct->hashedBuckets[0]->typeHT == HT_LINKED_LIST || nnStruct->hashedBuckets[0]->typeHT == HT_STATISTICS); nnStruct->points[nnStruct->nPoints] = point; nnStruct->nPoints++; preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; TIMEV_START(timeBucketIntoUH); for(IntT i = 0; i < nnStruct->parameterL; i++){ if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). addBucketEntry(nnStruct->hashedBuckets[i], 1, nnStruct->precomputedHashesOfULSHs[i], NULL, nnStruct->nPoints - 1); } else { // Use <u> functions (<g>s are pairs of <u> functions). addBucketEntry(nnStruct->hashedBuckets[i], 2, nnStruct->precomputedHashesOfULSHs[firstUComp], nnStruct->precomputedHashesOfULSHs[secondUComp], nnStruct->nPoints - 1); // compute what is the next pair of <u> functions. secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } //batchAddRequest(nnStruct, i, firstUComp, secondUComp, point); } TIMEV_END(timeBucketIntoUH); // Check whether the vectors <nearPoints> & <nearPointsIndeces> is still big enough. if (nnStruct->nPoints > nnStruct->sizeMarkedPoints) { nnStruct->sizeMarkedPoints = 2 * nnStruct->nPoints; FAILIF(NULL == (nnStruct->markedPoints = (BooleanT*)REALLOC(nnStruct->markedPoints, nnStruct->sizeMarkedPoints * sizeof(BooleanT)))); for(IntT i = 0; i < nnStruct->sizeMarkedPoints; i++){ nnStruct->markedPoints[i] = FALSE; } FAILIF(NULL == (nnStruct->markedPointsIndeces = (Int32T*)REALLOC(nnStruct->markedPointsIndeces, nnStruct->sizeMarkedPoints * sizeof(Int32T)))); } }
inline void preparePointAdding(PRNearNeighborStructT nnStruct, PUHashStructureT uhash, PPointT point) { //输入: nnntstuct结构体(降维的向量), uhash(hash的两个主副向量) 特征点 //操作:先计算点的降维结果,然后计算两个hash索引值 保存到nnStruct->precomputedHashesOfULSHs //功能,提前计算好每个点的hash表索引值 //根据传入的多维point。 //计算对应每个hash表的降维=》hash值, //存入了nnStruct->precomputedHashesOfULSHs ASSERT(nnStruct != NULL); ASSERT(uhash != NULL); ASSERT(point != NULL); TIMEV_START(timeComputeULSH); for(IntT d = 0; d < nnStruct->dimension; d++) { nnStruct->reducedPoint[d] = point->coordinates[d] / nnStruct->parameterR; } //降维 // Compute all ULSH functions. for(IntT i = 0; i < nnStruct->nHFTuples; i++) { //nHFTuples是元组个数, //求出nnStruct->reducedPoint向量和多个hansh映射后的值, 对于每个hash: a。v+b 除以 r //结果返回到pointULSHVectors【】 向量上 //pointULSHVectors【i】就是 第一步降维后的向量值 computeULSH(nnStruct, i, nnStruct->reducedPoint, nnStruct->pointULSHVectors[i]); } //模hash // Compute data for <precomputedHashesOfULSHs>. if (USE_SAME_UHASH_FUNCTIONS) { for(IntT i = 0; i < nnStruct->nHFTuples; i++) { precomputeUHFsForULSH(uhash, nnStruct->pointULSHVectors[i], nnStruct->hfTuplesLength, nnStruct->precomputedHashesOfULSHs[i]); //根据降维后的结果向量:pointULSHVectors【i】 计算两个hash值,存入precomputedHashesOfULSHs【i】 } } TIMEV_END(timeComputeULSH); }
// Returns TRUE iff |p1-p2|_2^2 <= threshold inline BooleanT isDistanceSqrLeq(IntT dimension, PPointT p1, PPointT p2, RealT threshold) { //直接计算:|p1-p2|_2^2 <= threshold;两点距离是否小于阈值 RealT result = 0; nOfDistComps++; TIMEV_START(timeDistanceComputation); for (IntT i = 0; i < dimension; i++) { RealT temp = p1->coordinates[i] - p2->coordinates[i]; #ifdef USE_L1_DISTANCE result += ABS(temp); #else result += SQR(temp); #endif if (result > threshold) { // TIMEV_END(timeDistanceComputation); return 0; } } TIMEV_END(timeDistanceComputation); //return result <= threshold; return 1; }
inline void preparePointAdding(PRNearNeighborStructT nnStruct, PUHashStructureT uhash, PPointT point){ ASSERT(nnStruct != NULL); ASSERT(uhash != NULL); ASSERT(point != NULL); TIMEV_START(timeComputeULSH); for(IntT d = 0; d < nnStruct->dimension; d++){ nnStruct->reducedPoint[d] = point->coordinates[d] / nnStruct->parameterR; } // Compute all ULSH functions. for(IntT i = 0; i < nnStruct->nHFTuples; i++){ computeULSH(nnStruct, i, nnStruct->reducedPoint, nnStruct->pointULSHVectors[i]); } // Compute data for <precomputedHashesOfULSHs>. if (USE_SAME_UHASH_FUNCTIONS) { for(IntT i = 0; i < nnStruct->nHFTuples; i++){ precomputeUHFsForULSH(uhash, nnStruct->pointULSHVectors[i], nnStruct->hfTuplesLength, nnStruct->precomputedHashesOfULSHs[i]); } } TIMEV_END(timeComputeULSH); }
// Returns the list of near neighbors of the point <point> (with a // certain success probability). Near neighbor is defined as being a // point within distance <parameterR>. Each near neighbor from the // data set is returned is returned with a certain probability, // dependent on <parameterK>, <parameterL>, and <parameterT>. The // returned points are kept in the array <result>. If result is not // allocated, it will be allocated to at least some minimum size // (RESULT_INIT_SIZE). If number of returned points is bigger than the // size of <result>, then the <result> is resized (to up to twice the // number of returned points). The return value is the number of // points found. Int32T getNearNeighborsFromPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize){ ASSERT(nnStruct != NULL); ASSERT(query != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); PPointT point = query; if (result == NULL){ resultSize = RESULT_INIT_SIZE; FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT)))); } preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED]; for(IntT i = 0; i < nnStruct->nHFTuples; i++){ for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++){ precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j]; } } TIMEV_START(timeTotalBuckets); BooleanT oldTimingOn = timingOn; if (noExpensiveTiming) { timingOn = FALSE; } // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; Int32T nNeighbors = 0;// the number of near neighbors found so far. Int32T nMarkedPoints = 0;// the number of marked points for(IntT i = 0; i < nnStruct->parameterL; i++){ TIMEV_START(timeGetBucket); GeneralizedPGBucket gbucket; if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL); } else { // Use <u> functions (<g>s are pairs of <u> functions). gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]); // compute what is the next pair of <u> functions. secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } TIMEV_END(timeGetBucket); PGBucketT bucket; TIMEV_START(timeCycleBucket); switch (nnStruct->hashedBuckets[i]->typeHT){ case HT_LINKED_LIST: bucket = gbucket.llGBucket; if (bucket != NULL){ // circle through the bucket and add to <result> the points that are near. PBucketEntryT bucketEntry = &(bucket->firstEntry); //TIMEV_START(timeCycleProc); while (bucketEntry != NULL){ //TIMEV_END(timeCycleProc); //ASSERT(bucketEntry->point != NULL); //TIMEV_START(timeDistanceComputation); Int32T candidatePIndex = bucketEntry->pointIndex; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ //TIMEV_END(timeDistanceComputation); if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //TIMEV_START(timeResultStoring); // a new R-NN point was found (not yet in <result>). if (nNeighbors >= resultSize){ // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; //TIMEV_END(timeResultStoring); } }else{ //TIMEV_END(timeDistanceComputation); } //TIMEV_START(timeCycleProc); bucketEntry = bucketEntry->nextEntry; } //TIMEV_END(timeCycleProc); } break; case HT_STATISTICS: ASSERT(FALSE); // HT_STATISTICS not supported anymore // if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){ // Int32T position; // PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList; // position = gbucket.linkGBucket->indexStart; // // circle through the bucket and add to <result> the points that are near. // while (position != INDEX_START_EMPTY){ // PPointT candidatePoint = pointsList[position].point; // if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ // if (nnStruct->nearPoints[candidatePoint->index] == FALSE) { // // a new R-NN point was found (not yet in <result>). // if (nNeighbors >= resultSize){ // // run out of space => resize the <result> array. // resultSize = 2 * resultSize; // result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); // } // result[nNeighbors] = candidatePoint; // nNeighbors++; // nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index // } // } // // Int32T oldP = position; // position = pointsList[position].nextPoint; // // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1); // } // } break; case HT_HYBRID_CHAINS: if (gbucket.hybridGBucket != NULL){ PHybridChainEntryT hybridPoint = gbucket.hybridGBucket; Uns32T offset = 0; if (hybridPoint->point.bucketLength == 0){ // there are overflow points in this bucket. offset = 0; for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++){ offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH)); } } Uns32T index = 0; BooleanT done = FALSE; while(!done){ if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET){ //CR_ASSERT(hybridPoint->point.bucketLength == 0); index = index + offset; } Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex; CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints); done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE; index++; if (nnStruct->markedPoints[candidatePIndex] == FALSE){ // mark the point first. nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ //if (nnStruct->markedPoints[candidatePIndex] == FALSE) { // a new R-NN point was found (not yet in <result>). //TIMEV_START(timeResultStoring); if (nNeighbors >= resultSize){ // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; //TIMEV_END(timeResultStoring); //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index //nMarkedPoints++; //} } }else{ // the point was already marked (& examined) } } } break; default: ASSERT(FALSE); } TIMEV_END(timeCycleBucket); } timingOn = oldTimingOn; TIMEV_END(timeTotalBuckets); // we need to clear the array nnStruct->nearPoints for the next query. for(Int32T i = 0; i < nMarkedPoints; i++){ ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE); nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE; } DPRINTF("nMarkedPoints: %d\n", nMarkedPoints); return nNeighbors; }
// Returns the list of near neighbors of the point <point> (with a // certain success probability). Near neighbor is defined as being a // point within distance <parameterR>. Each near neighbor from the // data set is returned is returned with a certain probability, // dependent on <parameterK>, <parameterL>, and <parameterT>. The // returned points are kept in the array <result>. If result is not // allocated, it will be allocated to at least some minimum size // (RESULT_INIT_SIZE). If number of returned points is bigger than the // size of <result>, then the <result> is resized (to up to twice the // number of returned points). The return value is the number of // points found. Int32T getNearNeighborsFromPRNearNeighborStruct( PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize) { //通过查找索引,然后获得桶,提取n个最近邻点 //通过计算点的降维值,然后计算主副索引,最后由索引查找表 ASSERT(nnStruct != NULL); ASSERT(query != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); PPointT point = query; if (result == NULL) { resultSize = RESULT_INIT_SIZE; FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT)))); } /* for (int tempd=150; tempd< 160;tempd++) { printf(" %lf ",query->coordinates[tempd]); } printf("查询的具体数据 10个 \n\n"); printf("查询数据 : %lf \n",query->coordinates[151]); // printf( "主hash的值: %u \n",nnStruct->hehasdBuckets[0]->mainHashA[5]); // printf( "辅助hash的值: %u \n",nnStruct->hashedBuckets[0]->controlHash1[5]); // printf( "a %u \n",nnStruct->lshFunctions[0][0].a[5]); // printf( "b %u \n",nnStruct->lshFunctions[0][0].b ); */ preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); //根据传入的多维point。计算对应每个hash表的降维=》hash值,存入了nnStruct->precomputedHashesOfULSHs Uns32T **(precomputedHashesOfULSHs);//没释放 precomputedHashesOfULSHs= (Uns32T**)malloc(sizeof(Uns32T*)*(nnStruct->nHFTuples)); // Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED]; for (IntT i=0; i< nnStruct->nHFTuples ; i++) { precomputedHashesOfULSHs[i]= (Uns32T*)malloc(sizeof(Uns32T)*(N_PRECOMPUTED_HASHES_NEEDED)); for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++) { precomputedHashesOfULSHs[i][temi]=0; } } //初始化?? /* printf("\n输出:\n"); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in,"\n输出:\n"); fclose(in); */ for(IntT i = 0; i < nnStruct->nHFTuples; i++) { for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++) { precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j]; /* printf(" %u", precomputedHashesOfULSHs[i][j]); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in," %u", precomputedHashesOfULSHs[i][j]); fclose(in); */ } /*printf(" \n"); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in," \n"); fclose(in); */ } TIMEV_START(timeTotalBuckets); BooleanT oldTimingOn = timingOn; if (noExpensiveTiming) { timingOn = FALSE; } // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; Int32T nNeighbors = 0;// the number of near neighbors found so far. Int32T nMarkedPoints = 0;// the number of marked points for(IntT i = 0; i < nnStruct->parameterL; i++) { //L个表 TIMEV_START(timeGetBucket); GeneralizedPGBucket gbucket; if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL); } else { // Use <u> functions (<g>s are pairs of <u> functions). gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]); //通过两个向量,计算主副索引。然后遍历二级索引,提取对应的桶 // compute what is the next pair of <u> functions. //不是每个都 (first,second )(first,second )(first,second )的数组吗? secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } TIMEV_END(timeGetBucket); PGBucketT bucket; TIMEV_START(timeCycleBucket); switch (nnStruct->hashedBuckets[i]->typeHT) { //对不同类型的hash桶结构,使用不同方法获取二级桶的实体 case HT_LINKED_LIST: bucket = gbucket.llGBucket; if (bucket != NULL) { // circle through the bucket and add to <result> the points that are near. PBucketEntryT bucketEntry = &(bucket->firstEntry); //TIMEV_START(timeCycleProc); while (bucketEntry != NULL) { //TIMEV_END(timeCycleProc); //ASSERT(bucketEntry->point != NULL); //TIMEV_START(timeDistanceComputation); Int32T candidatePIndex = bucketEntry->pointIndex; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult) { //TIMEV_END(timeDistanceComputation); if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //TIMEV_START(timeResultStoring); // a new R-NN point was found (not yet in <result>). if (nNeighbors >= resultSize) { // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; //TIMEV_END(timeResultStoring); } } else { //TIMEV_END(timeDistanceComputation); } //TIMEV_START(timeCycleProc); bucketEntry = bucketEntry->nextEntry; }//while //TIMEV_END(timeCycleProc); } break; case HT_STATISTICS: ASSERT(FALSE); // HT_STATISTICS not supported anymore // if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){ // Int32T position; // PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList; // position = gbucket.linkGBucket->indexStart; // // circle through the bucket and add to <result> the points that are near. // while (position != INDEX_START_EMPTY){ // PPointT candidatePoint = pointsList[position].point; // if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ // if (nnStruct->nearPoints[candidatePoint->index] == FALSE) { // // a new R-NN point was found (not yet in <result>). // if (nNeighbors >= resultSize){ // // run out of space => resize the <result> array. // resultSize = 2 * resultSize; // result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); // } // result[nNeighbors] = candidatePoint; // nNeighbors++; // nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index // } // } // // Int32T oldP = position; // position = pointsList[position].nextPoint; // // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1); // } // } break; case HT_HYBRID_CHAINS://默认的链条 if (gbucket.hybridGBucket != NULL) { //好像是在链表中找空间,同时要判断没有重复的 PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;//获取 二级桶的数组指针,(实际桶就是一个数组) Uns32T offset = 0; if (hybridPoint->point.bucketLength == 0) { //长度为0,就是溢出了的桶, // there are overflow points in this bucket. offset = 0; for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++) { offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH)); } } Uns32T index = 0; BooleanT done = FALSE; while(!done) { if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET) { //CR_ASSERT(hybridPoint->point.bucketLength == 0); index = index + offset; } //hybridPoint 是个二级桶+实体组成的数组的首地址(其实就是个二级刻度) Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex; //索引只是记录每个点的序号, 所有点都在nnStruct->points[candidatePIndex] 上保存具体值 CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints); done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE; //链表的遍历?好像是用数组来当链表用 index++; if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //已经计算过的点都标记为true了 //nnStruct->markedPoints 是用来标记是否检测过得 // mark the point first. nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult) { //两点距离是否小于阈值 //if (nnStruct->markedPoints[candidatePIndex] == FALSE) { // a new R-NN point was found (not yet in <result>). //TIMEV_START(timeResultStoring); if (nNeighbors >= resultSize) { //近邻点太多,扩大空间 // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint;//存入返回结果中 nNeighbors++; //TIMEV_END(timeResultStoring); //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index //nMarkedPoints++; //} } }// if (nnStruct->markedPoints[candidatePIndex] == FALSE) else { // the point was already marked (& examined) } }// while(!done) }// if (gbucket.hybridGBucket != NULL) break; default: ASSERT(FALSE); }//swichcase TIMEV_END(timeCycleBucket); }//for timingOn = oldTimingOn; TIMEV_END(timeTotalBuckets); // we need to clear the array nnStruct->nearPoints for the next query. for(Int32T i = 0; i < nMarkedPoints; i++) { ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE); nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE; } DPRINTF("nMarkedPoints: %d\n", nMarkedPoints); return nNeighbors; }