// Returns the list of near neighbors of the point <point> (with a // certain success probability). Near neighbor is defined as being a // point within distance <parameterR>. Each near neighbor from the // data set is returned is returned with a certain probability, // dependent on <parameterK>, <parameterL>, and <parameterT>. The // returned points are kept in the array <result>. If result is not // allocated, it will be allocated to at least some minimum size // (RESULT_INIT_SIZE). If number of returned points is bigger than the // size of <result>, then the <result> is resized (to up to twice the // number of returned points). The return value is the number of // points found. Int32T getNearNeighborsFromPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize){ ASSERT(nnStruct != NULL); ASSERT(query != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); PPointT point = query; if (result == NULL){ resultSize = RESULT_INIT_SIZE; FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT)))); } preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED]; for(IntT i = 0; i < nnStruct->nHFTuples; i++){ for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++){ precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j]; } } TIMEV_START(timeTotalBuckets); BooleanT oldTimingOn = timingOn; if (noExpensiveTiming) { timingOn = FALSE; } // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; Int32T nNeighbors = 0;// the number of near neighbors found so far. Int32T nMarkedPoints = 0;// the number of marked points for(IntT i = 0; i < nnStruct->parameterL; i++){ TIMEV_START(timeGetBucket); GeneralizedPGBucket gbucket; if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL); } else { // Use <u> functions (<g>s are pairs of <u> functions). gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]); // compute what is the next pair of <u> functions. secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } TIMEV_END(timeGetBucket); PGBucketT bucket; TIMEV_START(timeCycleBucket); switch (nnStruct->hashedBuckets[i]->typeHT){ case HT_LINKED_LIST: bucket = gbucket.llGBucket; if (bucket != NULL){ // circle through the bucket and add to <result> the points that are near. PBucketEntryT bucketEntry = &(bucket->firstEntry); //TIMEV_START(timeCycleProc); while (bucketEntry != NULL){ //TIMEV_END(timeCycleProc); //ASSERT(bucketEntry->point != NULL); //TIMEV_START(timeDistanceComputation); Int32T candidatePIndex = bucketEntry->pointIndex; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ //TIMEV_END(timeDistanceComputation); if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //TIMEV_START(timeResultStoring); // a new R-NN point was found (not yet in <result>). if (nNeighbors >= resultSize){ // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; //TIMEV_END(timeResultStoring); } }else{ //TIMEV_END(timeDistanceComputation); } //TIMEV_START(timeCycleProc); bucketEntry = bucketEntry->nextEntry; } //TIMEV_END(timeCycleProc); } break; case HT_STATISTICS: ASSERT(FALSE); // HT_STATISTICS not supported anymore // if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){ // Int32T position; // PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList; // position = gbucket.linkGBucket->indexStart; // // circle through the bucket and add to <result> the points that are near. // while (position != INDEX_START_EMPTY){ // PPointT candidatePoint = pointsList[position].point; // if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ // if (nnStruct->nearPoints[candidatePoint->index] == FALSE) { // // a new R-NN point was found (not yet in <result>). // if (nNeighbors >= resultSize){ // // run out of space => resize the <result> array. // resultSize = 2 * resultSize; // result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); // } // result[nNeighbors] = candidatePoint; // nNeighbors++; // nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index // } // } // // Int32T oldP = position; // position = pointsList[position].nextPoint; // // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1); // } // } break; case HT_HYBRID_CHAINS: if (gbucket.hybridGBucket != NULL){ PHybridChainEntryT hybridPoint = gbucket.hybridGBucket; Uns32T offset = 0; if (hybridPoint->point.bucketLength == 0){ // there are overflow points in this bucket. offset = 0; for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++){ offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH)); } } Uns32T index = 0; BooleanT done = FALSE; while(!done){ if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET){ //CR_ASSERT(hybridPoint->point.bucketLength == 0); index = index + offset; } Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex; CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints); done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE; index++; if (nnStruct->markedPoints[candidatePIndex] == FALSE){ // mark the point first. nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ //if (nnStruct->markedPoints[candidatePIndex] == FALSE) { // a new R-NN point was found (not yet in <result>). //TIMEV_START(timeResultStoring); if (nNeighbors >= resultSize){ // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; //TIMEV_END(timeResultStoring); //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index //nMarkedPoints++; //} } }else{ // the point was already marked (& examined) } } } break; default: ASSERT(FALSE); } TIMEV_END(timeCycleBucket); } timingOn = oldTimingOn; TIMEV_END(timeTotalBuckets); // we need to clear the array nnStruct->nearPoints for the next query. for(Int32T i = 0; i < nMarkedPoints; i++){ ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE); nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE; } DPRINTF("nMarkedPoints: %d\n", nMarkedPoints); return nNeighbors; }
// Returns the list of near neighbors of the point <point> (with a // certain success probability). Near neighbor is defined as being a // point within distance <parameterR>. Each near neighbor from the // data set is returned is returned with a certain probability, // dependent on <parameterK>, <parameterL>, and <parameterT>. The // returned points are kept in the array <result>. If result is not // allocated, it will be allocated to at least some minimum size // (RESULT_INIT_SIZE). If number of returned points is bigger than the // size of <result>, then the <result> is resized (to up to twice the // number of returned points). The return value is the number of // points found. Int32T getNearNeighborsFromPRNearNeighborStruct( PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize) { //通过查找索引,然后获得桶,提取n个最近邻点 //通过计算点的降维值,然后计算主副索引,最后由索引查找表 ASSERT(nnStruct != NULL); ASSERT(query != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); PPointT point = query; if (result == NULL) { resultSize = RESULT_INIT_SIZE; FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT)))); } /* for (int tempd=150; tempd< 160;tempd++) { printf(" %lf ",query->coordinates[tempd]); } printf("查询的具体数据 10个 \n\n"); printf("查询数据 : %lf \n",query->coordinates[151]); // printf( "主hash的值: %u \n",nnStruct->hehasdBuckets[0]->mainHashA[5]); // printf( "辅助hash的值: %u \n",nnStruct->hashedBuckets[0]->controlHash1[5]); // printf( "a %u \n",nnStruct->lshFunctions[0][0].a[5]); // printf( "b %u \n",nnStruct->lshFunctions[0][0].b ); */ preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); //根据传入的多维point。计算对应每个hash表的降维=》hash值,存入了nnStruct->precomputedHashesOfULSHs Uns32T **(precomputedHashesOfULSHs);//没释放 precomputedHashesOfULSHs= (Uns32T**)malloc(sizeof(Uns32T*)*(nnStruct->nHFTuples)); // Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED]; for (IntT i=0; i< nnStruct->nHFTuples ; i++) { precomputedHashesOfULSHs[i]= (Uns32T*)malloc(sizeof(Uns32T)*(N_PRECOMPUTED_HASHES_NEEDED)); for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++) { precomputedHashesOfULSHs[i][temi]=0; } } //初始化?? /* printf("\n输出:\n"); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in,"\n输出:\n"); fclose(in); */ for(IntT i = 0; i < nnStruct->nHFTuples; i++) { for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++) { precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j]; /* printf(" %u", precomputedHashesOfULSHs[i][j]); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in," %u", precomputedHashesOfULSHs[i][j]); fclose(in); */ } /*printf(" \n"); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in," \n"); fclose(in); */ } TIMEV_START(timeTotalBuckets); BooleanT oldTimingOn = timingOn; if (noExpensiveTiming) { timingOn = FALSE; } // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; Int32T nNeighbors = 0;// the number of near neighbors found so far. Int32T nMarkedPoints = 0;// the number of marked points for(IntT i = 0; i < nnStruct->parameterL; i++) { //L个表 TIMEV_START(timeGetBucket); GeneralizedPGBucket gbucket; if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL); } else { // Use <u> functions (<g>s are pairs of <u> functions). gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]); //通过两个向量,计算主副索引。然后遍历二级索引,提取对应的桶 // compute what is the next pair of <u> functions. //不是每个都 (first,second )(first,second )(first,second )的数组吗? secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } TIMEV_END(timeGetBucket); PGBucketT bucket; TIMEV_START(timeCycleBucket); switch (nnStruct->hashedBuckets[i]->typeHT) { //对不同类型的hash桶结构,使用不同方法获取二级桶的实体 case HT_LINKED_LIST: bucket = gbucket.llGBucket; if (bucket != NULL) { // circle through the bucket and add to <result> the points that are near. PBucketEntryT bucketEntry = &(bucket->firstEntry); //TIMEV_START(timeCycleProc); while (bucketEntry != NULL) { //TIMEV_END(timeCycleProc); //ASSERT(bucketEntry->point != NULL); //TIMEV_START(timeDistanceComputation); Int32T candidatePIndex = bucketEntry->pointIndex; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult) { //TIMEV_END(timeDistanceComputation); if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //TIMEV_START(timeResultStoring); // a new R-NN point was found (not yet in <result>). if (nNeighbors >= resultSize) { // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; //TIMEV_END(timeResultStoring); } } else { //TIMEV_END(timeDistanceComputation); } //TIMEV_START(timeCycleProc); bucketEntry = bucketEntry->nextEntry; }//while //TIMEV_END(timeCycleProc); } break; case HT_STATISTICS: ASSERT(FALSE); // HT_STATISTICS not supported anymore // if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){ // Int32T position; // PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList; // position = gbucket.linkGBucket->indexStart; // // circle through the bucket and add to <result> the points that are near. // while (position != INDEX_START_EMPTY){ // PPointT candidatePoint = pointsList[position].point; // if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ // if (nnStruct->nearPoints[candidatePoint->index] == FALSE) { // // a new R-NN point was found (not yet in <result>). // if (nNeighbors >= resultSize){ // // run out of space => resize the <result> array. // resultSize = 2 * resultSize; // result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); // } // result[nNeighbors] = candidatePoint; // nNeighbors++; // nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index // } // } // // Int32T oldP = position; // position = pointsList[position].nextPoint; // // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1); // } // } break; case HT_HYBRID_CHAINS://默认的链条 if (gbucket.hybridGBucket != NULL) { //好像是在链表中找空间,同时要判断没有重复的 PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;//获取 二级桶的数组指针,(实际桶就是一个数组) Uns32T offset = 0; if (hybridPoint->point.bucketLength == 0) { //长度为0,就是溢出了的桶, // there are overflow points in this bucket. offset = 0; for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++) { offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH)); } } Uns32T index = 0; BooleanT done = FALSE; while(!done) { if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET) { //CR_ASSERT(hybridPoint->point.bucketLength == 0); index = index + offset; } //hybridPoint 是个二级桶+实体组成的数组的首地址(其实就是个二级刻度) Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex; //索引只是记录每个点的序号, 所有点都在nnStruct->points[candidatePIndex] 上保存具体值 CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints); done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE; //链表的遍历?好像是用数组来当链表用 index++; if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //已经计算过的点都标记为true了 //nnStruct->markedPoints 是用来标记是否检测过得 // mark the point first. nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult) { //两点距离是否小于阈值 //if (nnStruct->markedPoints[candidatePIndex] == FALSE) { // a new R-NN point was found (not yet in <result>). //TIMEV_START(timeResultStoring); if (nNeighbors >= resultSize) { //近邻点太多,扩大空间 // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint;//存入返回结果中 nNeighbors++; //TIMEV_END(timeResultStoring); //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index //nMarkedPoints++; //} } }// if (nnStruct->markedPoints[candidatePIndex] == FALSE) else { // the point was already marked (& examined) } }// while(!done) }// if (gbucket.hybridGBucket != NULL) break; default: ASSERT(FALSE); }//swichcase TIMEV_END(timeCycleBucket); }//for timingOn = oldTimingOn; TIMEV_END(timeTotalBuckets); // we need to clear the array nnStruct->nearPoints for the next query. for(Int32T i = 0; i < nMarkedPoints; i++) { ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE); nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE; } DPRINTF("nMarkedPoints: %d\n", nMarkedPoints); return nNeighbors; }