// Construct PRNearNeighborStructT given the data set <dataSet> (all // the points <dataSet> will be contained in the resulting DS). // Currenly only type HT_HYBRID_CHAINS is supported for this // operation. PRNearNeighborStructT initLSH_WithDataSet(RNNParametersT algParameters, Int32T nPoints, PPointT *dataSet){ ASSERT(algParameters.typeHT == HT_HYBRID_CHAINS); ASSERT(dataSet != NULL); ASSERT(USE_SAME_UHASH_FUNCTIONS); PRNearNeighborStructT nnStruct = initializePRNearNeighborFields(algParameters, nPoints); // Set the fields <nPoints> and <points>. nnStruct->nPoints = nPoints; for(Int32T i = 0; i < nPoints; i++){ nnStruct->points[i] = dataSet[i]; } // initialize second level hashing (bucket hashing) FAILIF(NULL == (nnStruct->hashedBuckets = (PUHashStructureT*)MALLOC(nnStruct->parameterL * sizeof(PUHashStructureT)))); Uns32T *mainHashA = NULL, *controlHash1 = NULL; PUHashStructureT modelHT = newUHashStructure(HT_LINKED_LIST, nPoints, nnStruct->parameterK, FALSE, mainHashA, controlHash1, NULL); Uns32T **(precomputedHashesOfULSHs[nnStruct->nHFTuples]); for(IntT l = 0; l < nnStruct->nHFTuples; l++){ FAILIF(NULL == (precomputedHashesOfULSHs[l] = (Uns32T**)MALLOC(nPoints * sizeof(Uns32T*)))); for(IntT i = 0; i < nPoints; i++){ FAILIF(NULL == (precomputedHashesOfULSHs[l][i] = (Uns32T*)MALLOC(N_PRECOMPUTED_HASHES_NEEDED * sizeof(Uns32T)))); } } for(IntT i = 0; i < nPoints; i++){ preparePointAdding(nnStruct, modelHT, dataSet[i]); for(IntT l = 0; l < nnStruct->nHFTuples; l++){ for(IntT h = 0; h < N_PRECOMPUTED_HASHES_NEEDED; h++){ precomputedHashesOfULSHs[l][i][h] = nnStruct->precomputedHashesOfULSHs[l][h]; } } } //DPRINTF("Allocated memory(modelHT and precomputedHashesOfULSHs just a.): %lld\n", totalAllocatedMemory); // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; for(IntT i = 0; i < nnStruct->parameterL; i++){ // build the model HT. for(IntT p = 0; p < nPoints; p++){ // Add point <dataSet[p]> to modelHT. if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). addBucketEntry(modelHT, 1, precomputedHashesOfULSHs[i][p], NULL, p); } else { // Use <u> functions (<g>s are pairs of <u> functions). addBucketEntry(modelHT, 2, precomputedHashesOfULSHs[firstUComp][p], precomputedHashesOfULSHs[secondUComp][p], p); } } //ASSERT(nAllocatedGBuckets <= nPoints); //ASSERT(nAllocatedBEntries <= nPoints); // compute what is the next pair of <u> functions. secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } // copy the model HT into the actual (packed) HT. copy the uhash function too. nnStruct->hashedBuckets[i] = newUHashStructure(algParameters.typeHT, nPoints, nnStruct->parameterK, TRUE, mainHashA, controlHash1, modelHT); // clear the model HT for the next iteration. clearUHashStructure(modelHT); } freeUHashStructure(modelHT, FALSE); // do not free the uhash functions since they are used by nnStruct->hashedBuckets[i] // freeing precomputedHashesOfULSHs for(IntT l = 0; l < nnStruct->nHFTuples; l++){ for(IntT i = 0; i < nPoints; i++){ FREE(precomputedHashesOfULSHs[l][i]); } FREE(precomputedHashesOfULSHs[l]); } return nnStruct; }
// Construct PRNearNeighborStructT given the data set <dataSet> (all // the points <dataSet> will be contained in the resulting DS). // Currenly only type HT_HYBRID_CHAINS is supported for this // operation. PRNearNeighborStructT initLSH_WithDataSet(RNNParametersT algParameters, Int32T nPoints, PPointT *dataSet) { //初始化整个数据结构 包括整体+l个hash表 +点映射到桶 /*整体:先初始化整体结构体: PRNearNeighborStructT nnStruct 初始化随机向量gi hi,点存入 然后初始化PUHashStructureT modelHT:多个hansh表 最后,将多个 modelHT 链接到nnStruct */ ASSERT(algParameters.typeHT == HT_HYBRID_CHAINS); ASSERT(dataSet != NULL); ASSERT(USE_SAME_UHASH_FUNCTIONS); PRNearNeighborStructT nnStruct = initializePRNearNeighborFields(algParameters, nPoints); //就是申请结构体,初始化了gi hi函数的随机值 //按照gi hi 两层函数族的格式,初始化nnStruct->lshFunctions所指向的二维指针 // 产生高斯分布的随机值: a向量和 b,付给 nnStruct->lshFunctions【】【】 // Set the fields <nPoints> and <points>. nnStruct->nPoints = nPoints; for(Int32T i = 0; i < nPoints; i++) { nnStruct->points[i] = dataSet[i]; } // initialize second level hashing (bucket hashing) FAILIF(NULL == (nnStruct->hashedBuckets = (PUHashStructureT*)MALLOC(nnStruct->parameterL * sizeof(PUHashStructureT)))); Uns32T *mainHashA = NULL, *controlHash1 = NULL; PUHashStructureT modelHT = newUHashStructure(HT_LINKED_LIST, nPoints, nnStruct->parameterK, FALSE, mainHashA, controlHash1, NULL); //初始化hash的桶结构 //对主hash和辅助hash表的初始化:给一个随机值 /* 建立 uhash , 当typeHT= HT_LINKED_LIST时,直接初始化数组, 然后建立随机的主hash表 向量, 辅助hash表的向量: 就是 (u 。x)%(2^32 -5)/tablesize 中的u1向量 和 (u 。x)%(2^32 -5) 中的u2向量 最终返回:PUHashStructureT uhash */ Uns32T ***(precomputedHashesOfULSHs); precomputedHashesOfULSHs= (Uns32T***)malloc(sizeof(Uns32T**)*(nnStruct->nHFTuples)); //没有释放 // Uns32T **(precomputedHashesOfULSHs[ (nnStruct->nHFTuples) ]); windows下不允许动态值建立数组 for(IntT l = 0; l < nnStruct->nHFTuples; l++) { //每组hash函数 gi FAILIF(NULL == (precomputedHashesOfULSHs[l] = (Uns32T**)MALLOC(nPoints * sizeof(Uns32T*)))); for(IntT i = 0; i < nPoints; i++) { FAILIF(NULL == (precomputedHashesOfULSHs[l][i] = (Uns32T*)MALLOC(N_PRECOMPUTED_HASHES_NEEDED * sizeof(Uns32T)))); for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++) { precomputedHashesOfULSHs[l][i][temi]=0; } } } for(IntT i = 0; i < nPoints; i++) { ASSERT(nnStruct != NULL); //根据传入的多维point。计算对应每个hash表的降维=》hash值,存入了nnStruct->precomputedHashesOfULSHs preparePointAdding(nnStruct, modelHT, dataSet[i]); /* if ( i ==0) { for (int tempd=150; tempd< 160;tempd++) { printf(" %lf ",dataSet[i]->coordinates[tempd]); } // printf("初始化数据 10个 \n\n"); // printf(" : %lf \n",dataSet[i][151]); // printf( "主hash的值: %u \n",modelHT->mainHashA[5] ); // printf( "辅助hash的值: %u \n",modelHT->controlHash1[5] ); // printf( "a %u \n",nnStruct->lshFunctions[0][0].a[5]); // printf( "b %u \n",nnStruct->lshFunctions[0][0].b ); } */ for(IntT l = 0; l < nnStruct->nHFTuples; l++) { for(IntT h = 0; h < N_PRECOMPUTED_HASHES_NEEDED; h++) { //precomputedHashesOfULSHs结构保存提前计算好的最终hash值 precomputedHashesOfULSHs[l][i][h] = nnStruct->precomputedHashesOfULSHs[l][h]; /* if ( i==0) { printf(" %u",precomputedHashesOfULSHs[l][i][h]); FILE *in; in = fopen("preconpute.txt", "a+") ; fprintf(in," %u",precomputedHashesOfULSHs[l][i][h]); fclose(in); } /**/ } /* if ( i==0) { FILE *in; in = fopen("preconpute.txt", "a+") ; fprintf(in," \n"); fclose(in); printf(" \n"); } */ } } //DPRINTF("Allocated memory(modelHT and precomputedHashesOfULSHs just a.): %lld\n", totalAllocatedMemory); // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; for(IntT i = 0; i < nnStruct->parameterL; i++) { //l个表,每个表, // build the model HT. for(IntT p = 0; p < nPoints; p++) { //对于每个点,都hash到对应的桶里,然后建立hash结构体 // Add point <dataSet[p]> to modelHT. if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). addBucketEntry(modelHT, 1, precomputedHashesOfULSHs[i][p], NULL, p); //根据, precomputedHashesOfULSHs[i][p]来计算hash值,然后查找桶,并插入 } else { //nnStruct->useUfunctions 表示降维只取一般:8个,所以8维特征点值 和16维随机索引 分两次计算了 //所以用 firstUComp secondUComp表示两级索引 // Use <u> functions (<g>s are pairs of <u> functions). addBucketEntry(modelHT, 2, precomputedHashesOfULSHs[firstUComp][p], precomputedHashesOfULSHs[secondUComp][p], p); } } //ASSERT(nAllocatedGBuckets <= nPoints); //ASSERT(nAllocatedBEntries <= nPoints); // compute what is the next pair of <u> functions. secondUComp++; if (secondUComp == nnStruct->nHFTuples) { //分两次算:就有 263 个一级+263个二级索引 到了 263,就要变一下了 firstUComp++; secondUComp = firstUComp + 1; } // copy the model HT into the actual (packed) HT. copy the uhash function too. nnStruct->hashedBuckets[i] = newUHashStructure(algParameters.typeHT, nPoints, nnStruct->parameterK, TRUE, mainHashA, controlHash1, modelHT); //初始化hash的桶结构 //根据已有的modelHT 转化为空间紧凑的HT_HYBRID_CHAINS 表 // clear the model HT for the next iteration. clearUHashStructure(modelHT);//可能需要释放内存 } //释放 freeUHashStructure(modelHT, FALSE); // do not free the uhash functions since they are used by nnStruct->hashedBuckets[i] // freeing precomputedHashesOfULSHs for(IntT l = 0; l < nnStruct->nHFTuples; l++) { for(IntT i = 0; i < nPoints; i++) { FREE(precomputedHashesOfULSHs[l][i]); } FREE(precomputedHashesOfULSHs[l]); } // printf("\n\n返回前 :\n"); // printf( "a %u \n",nnStruct->lshFunctions[0][0].a[5]); // printf( "b %u \n\n",nnStruct->lshFunctions[0][0].b ); return nnStruct; }