// Compute the value of a hash function u=lshFunctions[gNumber] (a // vector of <hfTuplesLength> LSH functions) in the point <point>. The // result is stored in the vector <vectorValue>. <vectorValue> must be // already allocated (and have space for <hfTuplesLength> Uns32T-words). inline void computeULSH(PRNearNeighborStructT nnStruct, IntT gNumber, RealT *point, Uns32T *vectorValue){ CR_ASSERT(nnStruct != NULL); CR_ASSERT(point != NULL); CR_ASSERT(vectorValue != NULL); for(IntT i = 0; i < nnStruct->hfTuplesLength; i++){ RealT value = 0; for(IntT d = 0; d < nnStruct->dimension; d++){ value += point[d] * nnStruct->lshFunctions[gNumber][i].a[d]; } vectorValue[i] = (Uns32T)(FLOOR_INT32((value + nnStruct->lshFunctions[gNumber][i].b) / nnStruct->parameterW) /* - MIN_INT32T*/); } }
MemCacheClient::Server * MemCacheClient::FindServer( const string_t & aKey, unsigned aService ) { #ifdef CROSSBASE_API // in our private usage of this, the service must never be 0 if (aService == 0) { mTrace.Trace(CLERROR, "FindServer: no service requested, supplied cache server may not be appropriate!!!"); CR_ASSERT(!"FindServer: no service requested, supplied cache server may not be appropriate!!!"); } #endif // probably need some servers for this if (mServerHash.empty()) { //mTrace.Trace(CLDEBUG, "FindServer: server hash is empty"); return NULL; } // find the next largest consistent hash value above this key hash ConsistentHash hash(CreateKeyHash(aKey.data()), NULL, 0, 0); std::vector<ConsistentHash>::iterator iBegin = mServerHash.begin(); std::vector<ConsistentHash>::iterator iEnd = mServerHash.end(); std::vector<ConsistentHash>::iterator iCurr = std::lower_bound(iBegin, iEnd, hash); if (iCurr == iEnd) iCurr = iBegin; // now find the next server that handles this service if (aService != 0) { //int nSkipped = 0; std::vector<ConsistentHash>::iterator iStart = iCurr; while (!iCurr->services(aService)) { //++nSkipped; ++iCurr; if (iCurr == iEnd) iCurr = iBegin; if (iCurr == iStart) { mTrace.Trace(CLDEBUG, "FindServer: no server for required service: %u", aService); return NULL; } } //if (nSkipped > 0) mTrace.Trace(CLDEBUG, "skipped %d servers for service: %u", nSkipped, aService); } // ensure that this server is connected Server * pServer = iCurr->mServer; Server::ConnectResult rc = pServer->Connect(mTimeoutMs, mRetryMs); switch (rc) { case Server::CONNECT_SUCCESS: //mTrace.Trace(CLDEBUG, "FindServer: using server %s", pServer->GetAddress()); return pServer; case Server::CONNECT_WAITING: return NULL; default: case Server::CONNECT_FAILED: //mTrace.Trace(CLDEBUG, "FindServer: failed to connect to server %s", pServer->GetAddress()); return NULL; } }
unsigned long MemCacheClient::CreateKeyHash( const char * aKey ) { const size_t LONG_COUNT = SHA_DIGEST_LENGTH / sizeof(unsigned long); union { unsigned char as_char[SHA_DIGEST_LENGTH]; unsigned long as_long[LONG_COUNT]; } output; CR_ASSERT(sizeof(output.as_char) == SHA_DIGEST_LENGTH); CR_ASSERT(sizeof(output.as_long) == SHA_DIGEST_LENGTH); SHA1((const unsigned char *) aKey, (unsigned long) strlen(aKey), output.as_char); return output.as_long[LONG_COUNT-1]; }
// Compute the value of a hash function u=lshFunctions[gNumber] (a // vector of <hfTuplesLength> LSH functions) in the point <point>. The // result is stored in the vector <vectorValue>. <vectorValue> must be // already allocated (and have space for <hfTuplesLength> Uns32T-words). inline void computeULSH(PRNearNeighborStructT nnStruct, IntT gNumber, RealT *point, Uns32T *vectorValue) { //求出point向量和多个hansh映射后的值, 对于每个hash: a。v+b 除以 r //结果返回到vectorValue 向量上 CR_ASSERT(nnStruct != NULL); CR_ASSERT(point != NULL); CR_ASSERT(vectorValue != NULL); // FILE *file=fopen("vector.txt","a+"); // fprintf(file,"\n\n"); for(IntT i = 0; i < nnStruct->hfTuplesLength; i++) { RealT value = 0; for(IntT d = 0; d < nnStruct->dimension; d++) { value += point[d] * nnStruct->lshFunctions[gNumber][i].a[d]; //两个向量point[]。第gnumber的hash向量 点乘 ; 就是a。v } value=value*97;//放大10倍看看 double tempv=( (value + nnStruct->lshFunctions[gNumber][i].b) ); double temp_w=tempv/ nnStruct->parameterW ; int vi=temp_w; if ( vi < 0) { vi+=1793; } vectorValue[i] = (Uns32T)(FLOOR_INT32( (value + nnStruct->lshFunctions[gNumber][i].b) / nnStruct->parameterW )) ; vectorValue[i] =vi; // fprintf(file,"%lf %lf %d ||",value,temp_w ,vi ); // vectorValue[i] = (Uns32T)(FLOOR_INT32( (value + nnStruct->lshFunctions[gNumber][i].b) / nnStruct->parameterW) /* - MIN_INT32T*/); //a。v+b 除以 r } // fclose(file); }
// Returns the list of near neighbors of the point <point> (with a // certain success probability). Near neighbor is defined as being a // point within distance <parameterR>. Each near neighbor from the // data set is returned is returned with a certain probability, // dependent on <parameterK>, <parameterL>, and <parameterT>. The // returned points are kept in the array <result>. If result is not // allocated, it will be allocated to at least some minimum size // (RESULT_INIT_SIZE). If number of returned points is bigger than the // size of <result>, then the <result> is resized (to up to twice the // number of returned points). The return value is the number of // points found. Int32T getNearNeighborsFromPRNearNeighborStruct(PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize){ ASSERT(nnStruct != NULL); ASSERT(query != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); PPointT point = query; if (result == NULL){ resultSize = RESULT_INIT_SIZE; FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT)))); } preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED]; for(IntT i = 0; i < nnStruct->nHFTuples; i++){ for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++){ precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j]; } } TIMEV_START(timeTotalBuckets); BooleanT oldTimingOn = timingOn; if (noExpensiveTiming) { timingOn = FALSE; } // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; Int32T nNeighbors = 0;// the number of near neighbors found so far. Int32T nMarkedPoints = 0;// the number of marked points for(IntT i = 0; i < nnStruct->parameterL; i++){ TIMEV_START(timeGetBucket); GeneralizedPGBucket gbucket; if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL); } else { // Use <u> functions (<g>s are pairs of <u> functions). gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]); // compute what is the next pair of <u> functions. secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } TIMEV_END(timeGetBucket); PGBucketT bucket; TIMEV_START(timeCycleBucket); switch (nnStruct->hashedBuckets[i]->typeHT){ case HT_LINKED_LIST: bucket = gbucket.llGBucket; if (bucket != NULL){ // circle through the bucket and add to <result> the points that are near. PBucketEntryT bucketEntry = &(bucket->firstEntry); //TIMEV_START(timeCycleProc); while (bucketEntry != NULL){ //TIMEV_END(timeCycleProc); //ASSERT(bucketEntry->point != NULL); //TIMEV_START(timeDistanceComputation); Int32T candidatePIndex = bucketEntry->pointIndex; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ //TIMEV_END(timeDistanceComputation); if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //TIMEV_START(timeResultStoring); // a new R-NN point was found (not yet in <result>). if (nNeighbors >= resultSize){ // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; //TIMEV_END(timeResultStoring); } }else{ //TIMEV_END(timeDistanceComputation); } //TIMEV_START(timeCycleProc); bucketEntry = bucketEntry->nextEntry; } //TIMEV_END(timeCycleProc); } break; case HT_STATISTICS: ASSERT(FALSE); // HT_STATISTICS not supported anymore // if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){ // Int32T position; // PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList; // position = gbucket.linkGBucket->indexStart; // // circle through the bucket and add to <result> the points that are near. // while (position != INDEX_START_EMPTY){ // PPointT candidatePoint = pointsList[position].point; // if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ // if (nnStruct->nearPoints[candidatePoint->index] == FALSE) { // // a new R-NN point was found (not yet in <result>). // if (nNeighbors >= resultSize){ // // run out of space => resize the <result> array. // resultSize = 2 * resultSize; // result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); // } // result[nNeighbors] = candidatePoint; // nNeighbors++; // nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index // } // } // // Int32T oldP = position; // position = pointsList[position].nextPoint; // // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1); // } // } break; case HT_HYBRID_CHAINS: if (gbucket.hybridGBucket != NULL){ PHybridChainEntryT hybridPoint = gbucket.hybridGBucket; Uns32T offset = 0; if (hybridPoint->point.bucketLength == 0){ // there are overflow points in this bucket. offset = 0; for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++){ offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH)); } } Uns32T index = 0; BooleanT done = FALSE; while(!done){ if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET){ //CR_ASSERT(hybridPoint->point.bucketLength == 0); index = index + offset; } Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex; CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints); done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE; index++; if (nnStruct->markedPoints[candidatePIndex] == FALSE){ // mark the point first. nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ //if (nnStruct->markedPoints[candidatePIndex] == FALSE) { // a new R-NN point was found (not yet in <result>). //TIMEV_START(timeResultStoring); if (nNeighbors >= resultSize){ // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; //TIMEV_END(timeResultStoring); //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index //nMarkedPoints++; //} } }else{ // the point was already marked (& examined) } } } break; default: ASSERT(FALSE); } TIMEV_END(timeCycleBucket); } timingOn = oldTimingOn; TIMEV_END(timeTotalBuckets); // we need to clear the array nnStruct->nearPoints for the next query. for(Int32T i = 0; i < nMarkedPoints; i++){ ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE); nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE; } DPRINTF("nMarkedPoints: %d\n", nMarkedPoints); return nNeighbors; }
/* The main entry to LSH package. Depending on the command line parameters, the function computes the R-NN data structure optimal parameters and/or construct the R-NN data structure and runs the queries on the data structure. */ int main_T(int nargs, char **args) { //先分析参数 /* 官方lsh文件:10个参数 1000 9 784 0.9 0.6 mnist1k.dts mnist1k.q bin/LSHMain $nDataSet $nQuerySet $dimension $successProbability "$1" "$2" "$3" $m -c*/ //算参数 bin/LSHMain 1000 9 784 0.9 "0.6" "mnist1k.dts" "mnist1k.q" 1002000000 -c //bin/LSHMain $nDataSet $nQuerySet $dimension $successProbability 1.0 "$1" "$2" $m -p "$3" //匹配 bin/LSHMain 1000 9 784 0.9 1.0 "mnist1k.dts" "mnist1k.q" 1002000000 -p "outputparma.txt" if(nargs < 9) { usage(args[0]); exit(1); } //initializeLSHGlobal(); // Parse part of the command-line parameters. nPoints = atoi(args[1]); IntT nQueries = atoi(args[2]); pointsDimension = atoi(args[3]); successProbability = atof(args[4]); char* endPtr[1]; RealT thresholdR = strtod(args[5], endPtr);//点相邻的距离阈值 //str-to -double 将字符串转换成浮点数的函数 //endPtr 接收数字结尾后非字符串字母 //这个r阈值是什么呢? if (thresholdR == 0 || endPtr[1] == args[5]) {//如果阈值为0,或者第一个字符就不是数字, //表示是用文件保存的 //这大概是用于测试哪个阈值好的 // The value for R is not specified, instead there is a file // specifying multiple R's. thresholdR = 0; // Read in the file FILE *radiiFile = fopen(args[5], "rt"); FAILIF(radiiFile == NULL); fscanf(radiiFile, "%d\n", &nRadii); ASSERT(nRadii > 0); FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT)))); for(IntT i = 0; i < nRadii; i++) { FSCANF_REAL(radiiFile, &listOfRadii[i]); ASSERT(listOfRadii[i] > 0); FSCANF_REAL(radiiFile, &memRatiosForNNStructs[i]); ASSERT(memRatiosForNNStructs[i] > 0); } } else { nRadii = 1; FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT)))); listOfRadii[0] = thresholdR; memRatiosForNNStructs[0] = 1; }//对阈值R 和Radiii的处理 DPRINTF("No. radii: %d\n", nRadii); //thresholdR = atof(args[5]); availableTotalMemory = atoll(args[8]);//$M表示的是内存空间大小 if (nPoints > MAX_N_POINTS) { printf("Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints); fprintf(ERROR_OUTPUT, "Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints); exit(1); } readDataSetFromFile(args[6]);//点读到dataSetPoints //这个totalAllocatedMemory初始化为0,但是 //#define MALLOC(amount) ((amount > 0) ? totalAllocatedMemory += amount, malloc(amount) : NULL) //这样,每次申请内存都会统计到了 DPRINTF("Allocated memory (after reading data set): %lld\n", totalAllocatedMemory); Int32T nSampleQueries = N_SAMPLE_QUERY_POINTS; PPointT sampleQueries[N_SAMPLE_QUERY_POINTS]; Int32T sampleQBoundaryIndeces[N_SAMPLE_QUERY_POINTS]; // PPointT sampleQueries[nSampleQueries]; // Int32T sampleQBoundaryIndeces[nSampleQueries]; if ((nargs <= 9) || (strcmp("-c", args[9]) == 0) ) { // In this cases, we need to generate a sample query set for // computing the optimal parameters. // Generate a sample query set. FILE *queryFile = fopen(args[7], "rt"); if (strcmp(args[7], ".") == 0 || queryFile == NULL || nQueries <= 0) {//没有查询文件,就用所有点产生随机点 // Choose several data set points for the sample query points. for(IntT i = 0; i < nSampleQueries; i++){ sampleQueries[i] = dataSetPoints[genRandomInt(0, nPoints - 1)]; } } else { //从查询文件中选取随机的点, // Choose several actual query points for the sample query points. nSampleQueries = MIN(nSampleQueries, nQueries); Int32T sampleIndeces[N_SAMPLE_QUERY_POINTS]; //Int32T sampleIndeces[nSampleQueries]; for(IntT i = 0; i < nSampleQueries; i++) { sampleIndeces[i] = genRandomInt(0, nQueries - 1); } qsort(sampleIndeces, nSampleQueries, sizeof(*sampleIndeces), compareInt32T); //printIntVector("sampleIndeces: ", nSampleQueries, sampleIndeces); Int32T j = 0; for(Int32T i = 0; i < nQueries; i++) { if (i == sampleIndeces[j]) { sampleQueries[j] = readPoint(queryFile); j++; while (i == sampleIndeces[j]) { sampleQueries[j] = sampleQueries[j - 1]; j++; } }else { fscanf(queryFile, "%[^\n]", sBuffer); fscanf(queryFile, "\n"); } } nSampleQueries = j; fclose(queryFile); } //前面那么多,好像就是在申请内存,读文件,读入参数 // Compute the array sampleQBoundaryIndeces that specifies how to // segregate the sample query points according to their distance // to NN. //采用遍历的方法,计算查询点的最近邻(并且距离小于listOfRadii【nRadii】) sortQueryPointsByRadii(pointsDimension, nSampleQueries, sampleQueries, nPoints, dataSetPoints, nRadii, listOfRadii, sampleQBoundaryIndeces); }//if ((nargs < 9) || (strcmp("-c", args[9]) == 0)) RNNParametersT *algParameters = NULL; PRNearNeighborStructT *nnStructs = NULL; if (nargs > 9) {/* 官方lsh文件:10个参数 bin/LSHMain $nDataSet $nQuerySet $dimension $successProbability "$1" "$2" "$3" $m -c */ // Additional command-line parameter is specified. if (strcmp("-c", args[9]) == 0) //-c表示参数优化 { // Only compute the R-NN DS parameters and output them to stdout. printf("%d\n", nRadii); transformMemRatios(); for(IntT i = 0; i < nRadii; i++) { // which sample queries to use Int32T segregatedQStart = (i == 0) ? 0 : sampleQBoundaryIndeces[i - 1]; Int32T segregatedQNumber = nSampleQueries - segregatedQStart; if (segregatedQNumber == 0) { // XXX: not the right answer segregatedQNumber = nSampleQueries; segregatedQStart = 0; } ASSERT(segregatedQStart < nSampleQueries); ASSERT(segregatedQStart >= 0); ASSERT(segregatedQStart + segregatedQNumber <= nSampleQueries); ASSERT(segregatedQNumber >= 0); //从文件读取点,然后计算优化后的参数 RNNParametersT optParameters = computeOptimalParameters(listOfRadii[i], successProbability, nPoints, pointsDimension, dataSetPoints, segregatedQNumber, sampleQueries + segregatedQStart, /*对内存的约束,就体现在这里, availableTotalMemory总共的内存(传入) - totalAllocatedMemory(使用mallloc分配的)*1=内存上限 然后(L * nPoints > memoryUpperBound / 12 来约束 */ (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); printRNNParameters(stdout, optParameters); } exit(0); } else if (strcmp("-p", args[9]) == 0) {//-p表示从文件读入参数,然后建立结构体 // Read the R-NN DS parameters from the given file and run the // queries on the constructed data structure. if (nargs < 10) { usage(args[0]); exit(1); } FILE *pFile = fopen(args[10], "rt"); FAILIFWR(pFile == NULL, "Could not open the params file."); fscanf(pFile, "%d\n", &nRadii); DPRINTF1("Using the following R-NN DS parameters:\n"); DPRINTF("N radii = %d\n", nRadii); FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT)))); FAILIF(NULL == (algParameters = (RNNParametersT*)MALLOC(nRadii * sizeof(RNNParametersT)))); for(IntT i = 0; i < nRadii; i++) {//默认i=1 algParameters[i] = readRNNParameters(pFile);//从文件读参数 printRNNParameters(stderr, algParameters[i]); nnStructs[i] = initLSH_WithDataSet(algParameters[i], nPoints, dataSetPoints); //核心 //初始化整个数据结构 包括整体+l个hash表 +点映射到桶 } pointsDimension = algParameters[0].dimension; FREE(listOfRadii); FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); for(IntT i = 0; i < nRadii; i++) { listOfRadii[i] = algParameters[i].parameterR; } } else { // Wrong option. usage(args[0]); exit(1); } }//if (nargs > 9) else { FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT)))); // Determine the R-NN DS parameters, construct the DS and run the queries. transformMemRatios(); for(IntT i = 0; i < nRadii; i++) { // XXX: segregate the sample queries... //建立查询结构,自动优化参数 nnStructs[i] = initSelfTunedRNearNeighborWithDataSet(listOfRadii[i], successProbability, nPoints, pointsDimension, dataSetPoints, nSampleQueries, sampleQueries, (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); } } // if (nargs <= 9) //上面都是根据不同配置,对参数的优化,建立查询结构 DPRINTF1("X\n"); IntT resultSize = nPoints; PPointT *result = (PPointT*)MALLOC(resultSize * sizeof(*result)); PPointT queryPoint; FAILIF(NULL == (queryPoint = (PPointT)MALLOC(sizeof(PointT)))); FAILIF(NULL == (queryPoint->coordinates = (RealT*)MALLOC(pointsDimension * sizeof(RealT)))); //读取查询点的文件 FILE *queryFile = fopen(args[7], "rt"); FAILIF(queryFile == NULL); TimeVarT meanQueryTime = 0; PPointAndRealTStructT *distToNN = NULL; for(IntT i = 0; i < nQueries; i++) {//对于每一个要查询的点 RealT sqrLength = 0; // read in the query point. for(IntT d = 0; d < pointsDimension; d++) { FSCANF_REAL(queryFile, &(queryPoint->coordinates[d])); sqrLength += SQR(queryPoint->coordinates[d]); /*//test if (d >150 && d<160) { printf(" %lf ",queryPoint->coordinates[d]); } if ( d==160) { printf("原始的文件数据\n"); } */ } queryPoint->sqrLength = sqrLength; //printRealVector("Query: ", pointsDimension, queryPoint->coordinates); // get the near neighbors. IntT nNNs = 0; for(IntT r = 0; r < nRadii; r++) {//查询n个近邻点,并计算距离 //查询核心 nNNs = getRNearNeighbors(nnStructs[r], queryPoint, result, resultSize); printf("Total time for R-NN query at radius %0.6lf (radius no. %d):\t%0.6lf\n", (double)(listOfRadii[r]), r, timeRNNQuery); meanQueryTime += timeRNNQuery; if (nNNs > 0) { printf("Query point %d: found %d NNs at distance %0.6lf (%dth radius). First %d NNs are:\n", i, nNNs, (double)(listOfRadii[r]), r, MIN(nNNs, MAX_REPORTED_POINTS)); // compute the distances to the found NN, and sort according to the distance //计算近邻点和查询点的距离 FAILIF(NULL == (distToNN = (PPointAndRealTStructT*)REALLOC(distToNN, nNNs * sizeof(*distToNN)))); for(IntT p = 0; p < nNNs; p++) { distToNN[p].ppoint = result[p]; distToNN[p].real = distance(pointsDimension, queryPoint, result[p]); } qsort(distToNN, nNNs, sizeof(*distToNN), comparePPointAndRealTStructT); // Print the points for(IntT j = 0; j < MIN(nNNs, MAX_REPORTED_POINTS); j++) { ASSERT(distToNN[j].ppoint != NULL); printf("%09d\tDistance:%0.6lf\n", distToNN[j].ppoint->index, distToNN[j].real); CR_ASSERT(distToNN[j].real <= listOfRadii[r]); //DPRINTF("Distance: %lf\n", distance(pointsDimension, queryPoint, result[j])); //printRealVector("NN: ", pointsDimension, result[j]->coordinates); } break; } } if (nNNs == 0) { printf("Query point %d: no NNs found.\n", i); } }// for(IntT i = 0; i < nQueries; i++)每个点查询 // if (nQueries > 0) { meanQueryTime = meanQueryTime / nQueries; printf("Mean query time: %0.6lf\n", (double)meanQueryTime); } for(IntT i = 0; i < nRadii; i++) { freePRNearNeighborStruct(nnStructs[i]); } // XXX: should ideally free the other stuff as well. return 0; }
/* The main entry to LSH package. Depending on the command line parameters, the function computes the R-NN data structure optimal parameters and/or construct the R-NN data structure and runs the queries on the data structure. */ int main(int nargs, char **args){ if(nargs < 9){ usage(args[0]); exit(1); } //initializeLSHGlobal(); // Parse part of the command-line parameters. nPoints = atoi(args[1]); IntT nQueries = atoi(args[2]); pointsDimension = atoi(args[3]); successProbability = atof(args[4]); char* endPtr[1]; RealT thresholdR = strtod(args[5], endPtr); if (thresholdR == 0 || endPtr[1] == args[5]){ // The value for R is not specified, instead there is a file // specifying multiple R's. thresholdR = 0; // Read in the file FILE *radiiFile = fopen(args[5], "rt"); FAILIF(radiiFile == NULL); fscanf(radiiFile, "%d\n", &nRadii); ASSERT(nRadii > 0); FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT)))); for(IntT i = 0; i < nRadii; i++){ FSCANF_REAL(radiiFile, &listOfRadii[i]); ASSERT(listOfRadii[i] > 0); FSCANF_REAL(radiiFile, &memRatiosForNNStructs[i]); ASSERT(memRatiosForNNStructs[i] > 0); } }else{ nRadii = 1; FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT)))); listOfRadii[0] = thresholdR; memRatiosForNNStructs[0] = 1; } DPRINTF("No. radii: %d\n", nRadii); //thresholdR = atof(args[5]); availableTotalMemory = atoll(args[8]); if (nPoints > MAX_N_POINTS) { // 104w points printf("Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints); fprintf(ERROR_OUTPUT, "Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints); exit(1); } readDataSetFromFile(args[6]); // read points into data structure DPRINTF("Allocated memory (after reading data set): %lld\n", totalAllocatedMemory); Int32T nSampleQueries = N_SAMPLE_QUERY_POINTS; PPointT sampleQueries[nSampleQueries]; Int32T sampleQBoundaryIndeces[nSampleQueries]; if ((nargs < 9) || (strcmp("-c", args[9]) == 0)){ // In this cases, we need to generate a sample query set for // computing the optimal parameters. // Generate a sample query set. FILE *queryFile = fopen(args[7], "rt"); if (strcmp(args[7], ".") == 0 || queryFile == NULL || nQueries <= 0){ // Choose several data set points for the sample query points. for(IntT i = 0; i < nSampleQueries; i++){ sampleQueries[i] = dataSetPoints[genRandomInt(0, nPoints - 1)]; } }else{ // Choose several actual query points for the sample query points. nSampleQueries = MIN(nSampleQueries, nQueries); Int32T sampleIndeces[nSampleQueries]; for(IntT i = 0; i < nSampleQueries; i++){ sampleIndeces[i] = genRandomInt(0, nQueries - 1); } qsort(sampleIndeces, nSampleQueries, sizeof(*sampleIndeces), compareInt32T); //printIntVector("sampleIndeces: ", nSampleQueries, sampleIndeces); Int32T j = 0; for(Int32T i = 0; i < nQueries; i++){ if (i == sampleIndeces[j]){ sampleQueries[j] = readPoint(queryFile); j++; while (i == sampleIndeces[j]){ sampleQueries[j] = sampleQueries[j - 1]; j++; } }else{ fscanf(queryFile, "%[^\n]", sBuffer); fscanf(queryFile, "\n"); } } nSampleQueries = j; fclose(queryFile); } // Compute the array sampleQBoundaryIndeces that specifies how to // segregate the sample query points according to their distance // to NN. sortQueryPointsByRadii(pointsDimension, nSampleQueries, sampleQueries, nPoints, dataSetPoints, nRadii, listOfRadii, sampleQBoundaryIndeces); } RNNParametersT *algParameters = NULL; PRNearNeighborStructT *nnStructs = NULL; if (nargs > 9) { // Additional command-line parameter is specified. if (strcmp("-c", args[9]) == 0) { // Only compute the R-NN DS parameters and output them to stdout. printf("%d\n", nRadii); transformMemRatios(); for(IntT i = 0; i < nRadii; i++){ // which sample queries to use Int32T segregatedQStart = (i == 0) ? 0 : sampleQBoundaryIndeces[i - 1]; Int32T segregatedQNumber = nSampleQueries - segregatedQStart; if (segregatedQNumber == 0) { // XXX: not the right answer segregatedQNumber = nSampleQueries; segregatedQStart = 0; } ASSERT(segregatedQStart < nSampleQueries); ASSERT(segregatedQStart >= 0); ASSERT(segregatedQStart + segregatedQNumber <= nSampleQueries); ASSERT(segregatedQNumber >= 0); RNNParametersT optParameters = computeOptimalParameters(listOfRadii[i], successProbability, nPoints, pointsDimension, dataSetPoints, segregatedQNumber, sampleQueries + segregatedQStart, (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); printRNNParameters(stdout, optParameters); } exit(0); } else if (strcmp("-p", args[9]) == 0) { // Read the R-NN DS parameters from the given file and run the // queries on the constructed data structure. if (nargs < 10){ usage(args[0]); exit(1); } FILE *pFile = fopen(args[10], "rt"); FAILIFWR(pFile == NULL, "Could not open the params file."); fscanf(pFile, "%d\n", &nRadii); DPRINTF1("Using the following R-NN DS parameters:\n"); DPRINTF("N radii = %d\n", nRadii); FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT)))); FAILIF(NULL == (algParameters = (RNNParametersT*)MALLOC(nRadii * sizeof(RNNParametersT)))); for(IntT i = 0; i < nRadii; i++){ algParameters[i] = readRNNParameters(pFile); printRNNParameters(stderr, algParameters[i]); nnStructs[i] = initLSH_WithDataSet(algParameters[i], nPoints, dataSetPoints); } pointsDimension = algParameters[0].dimension; FREE(listOfRadii); FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); for(IntT i = 0; i < nRadii; i++){ listOfRadii[i] = algParameters[i].parameterR; } } else{ // Wrong option. usage(args[0]); exit(1); } } else { FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT)))); // Determine the R-NN DS parameters, construct the DS and run the queries. transformMemRatios(); for(IntT i = 0; i < nRadii; i++){ // XXX: segregate the sample queries... nnStructs[i] = initSelfTunedRNearNeighborWithDataSet(listOfRadii[i], successProbability, nPoints, pointsDimension, dataSetPoints, nSampleQueries, sampleQueries, (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); } } DPRINTF1("X\n"); IntT resultSize = nPoints; PPointT *result = (PPointT*)MALLOC(resultSize * sizeof(*result)); PPointT queryPoint; FAILIF(NULL == (queryPoint = (PPointT)MALLOC(sizeof(PointT)))); FAILIF(NULL == (queryPoint->coordinates = (RealT*)MALLOC(pointsDimension * sizeof(RealT)))); FILE *queryFile = fopen(args[7], "rt"); FAILIF(queryFile == NULL); TimeVarT meanQueryTime = 0; PPointAndRealTStructT *distToNN = NULL; for(IntT i = 0; i < nQueries; i++){ RealT sqrLength = 0; // read in the query point. for(IntT d = 0; d < pointsDimension; d++){ FSCANF_REAL(queryFile, &(queryPoint->coordinates[d])); sqrLength += SQR(queryPoint->coordinates[d]); } queryPoint->sqrLength = sqrLength; //printRealVector("Query: ", pointsDimension, queryPoint->coordinates); // get the near neighbors. IntT nNNs = 0; for(IntT r = 0; r < nRadii; r++){ nNNs = getRNearNeighbors(nnStructs[r], queryPoint, result, resultSize); printf("Total time for R-NN query at radius %0.6lf (radius no. %d):\t%0.6lf\n", (double)(listOfRadii[r]), r, timeRNNQuery); meanQueryTime += timeRNNQuery; if (nNNs > 0){ printf("Query point %d: found %d NNs at distance %0.6lf (%dth radius). First %d NNs are:\n", i, nNNs, (double)(listOfRadii[r]), r, MIN(nNNs, MAX_REPORTED_POINTS)); // compute the distances to the found NN, and sort according to the distance FAILIF(NULL == (distToNN = (PPointAndRealTStructT*)REALLOC(distToNN, nNNs * sizeof(*distToNN)))); for(IntT p = 0; p < nNNs; p++){ distToNN[p].ppoint = result[p]; distToNN[p].real = distance(pointsDimension, queryPoint, result[p]); } qsort(distToNN, nNNs, sizeof(*distToNN), comparePPointAndRealTStructT); // Print the points for(IntT j = 0; j < MIN(nNNs, MAX_REPORTED_POINTS); j++){ ASSERT(distToNN[j].ppoint != NULL); printf("%09d\tDistance:%0.6lf\n", distToNN[j].ppoint->index, distToNN[j].real); CR_ASSERT(distToNN[j].real <= listOfRadii[r]); //DPRINTF("Distance: %lf\n", distance(pointsDimension, queryPoint, result[j])); //printRealVector("NN: ", pointsDimension, result[j]->coordinates); } break; } } if (nNNs == 0){ printf("Query point %d: no NNs found.\n", i); } } if (nQueries > 0){ meanQueryTime = meanQueryTime / nQueries; printf("Mean query time: %0.6lf\n", (double)meanQueryTime); } for(IntT i = 0; i < nRadii; i++){ freePRNearNeighborStruct(nnStructs[i]); } // XXX: should ideally free the other stuff as well. return 0; }
/* The main entry to LSH package. Depending on the command line parameters, the function computes the R-NN data structure optimal parameters and/or construct the R-NN data structure and runs the queries on the data structure. */ int main(int nargs, char **args){ if(nargs < 9){ usage(args[0]); exit(1); } //initializeLSHGlobal(); // Parse part of the command-line parameters. nPoints = atoi(args[1]); IntT nQueries = atoi(args[2]); pointsDimension = atoi(args[3]); successProbability = atof(args[4]); char* endPtr[1]; RealT thresholdR = strtod(args[5], endPtr); //strtod将字符串转换成浮点数 //r=0.6 //strtod()会扫描参数nptr字符串,跳过前面的空格字符,直到遇上数字或正负符号才开始做转换 //,到出现非数字或字符串结束时('')才结束转换, 并将结果返回。 //若endptr不为NULL,则会将遇到不合条件而终止的nptr中的字符指针由endptr传回。 if (thresholdR == 0 || endPtr[1] == args[5]){ //确保阈值合法 // The value for R is not specified, instead there is a file // specifying multiple R's. thresholdR = 0; // Read in the file FILE *radiiFile = fopen(args[5], "rt"); FAILIF(radiiFile == NULL); fscanf(radiiFile, "%d\n", &nRadii); ASSERT(nRadii > 0); FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT)))); for(IntT i = 0; i < nRadii; i++){ FSCANF_REAL(radiiFile, &listOfRadii[i]); ASSERT(listOfRadii[i] > 0); FSCANF_REAL(radiiFile, &memRatiosForNNStructs[i]); ASSERT(memRatiosForNNStructs[i] > 0); } }else{ nRadii = 1; //半径的个数为1个 FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); FAILIF(NULL == (memRatiosForNNStructs = (RealT*)MALLOC(nRadii * sizeof(RealT)))); listOfRadii[0] = thresholdR; memRatiosForNNStructs[0] = 1; } DPRINTF("No. radii: %d\n", nRadii); //thresholdR = atof(args[5]); availableTotalMemory = atoll(args[8]); if (nPoints > MAX_N_POINTS) { printf("Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints); fprintf(ERROR_OUTPUT, "Error: the structure supports at most %d points (%d were specified).\n", MAX_N_POINTS, nPoints); exit(1); } readDataSetFromFile(args[6]); //数据集的文件名 DPRINTF("Allocated memory (after reading data set): %lld\n", totalAllocatedMemory); Int32T nSampleQueries = N_SAMPLE_QUERY_POINTS; //样本查询点的个数,100 PPointT sampleQueries[nSampleQueries]; //对查询点编号 Int32T sampleQBoundaryIndeces[nSampleQueries]; //第一个大于半径的点的编号,如果有多个半径的话,就会记录更多 if ((nargs < 9) || (strcmp("-c", args[9]) == 0)){ //计算最优参数 // In this cases, we need to generate a sample query set for // computing the optimal parameters. // Generate a sample query set. FILE *queryFile = fopen(args[7], "rt"); //打开查询集,以只读文本方式打开 if (strcmp(args[7], ".") == 0 || queryFile == NULL || nQueries <= 0){ // Choose several data set points for the sample query points. //如果没有查询点就随机选择几个数据集点作为查询点 for(IntT i = 0; i < nSampleQueries; i++){ sampleQueries[i] = dataSetPoints[genRandomInt(0, nPoints - 1)]; } }else{ // Choose several actual query points for the sample query points. nSampleQueries = MIN(nSampleQueries, nQueries); //MIN(100,9) Int32T sampleIndeces[nSampleQueries]; //定义了一个查询点样本索引数组 for(IntT i = 0; i < nSampleQueries; i++){ ////为什么要对查询点索引进行随机变化? 想把样本查询点控制在一定的范围内,如果查询点过多,则样本点最多取100个查询点。 sampleIndeces[i] = genRandomInt(0, nQueries - 1); //对查询点做了一下顺序的变化,对查询点的索引做随机处理。 } // 根据你给的比较条件进行快速排序,通过指针的移动实验排序,排序之后的结果仍然放在原数组中,必须自己写一个比较函数 //http://www.slyar.com/blog/stdlib-qsort.html qsort(数组起始地址,数组元素大小,每个元素的大小,函数指针指向比较函数) qsort(sampleIndeces, nSampleQueries, sizeof(*sampleIndeces), compareInt32T); //qsort,C语言标准库函数,对样本查询点的索引值进行排序 //printIntVector("sampleIndeces: ", nSampleQueries, sampleIndeces); Int32T j = 0; for(Int32T i = 0; i < nQueries; i++){ if (i == sampleIndeces[j]){ //如果样本查询点的索引值与实际查询点的索引值一致,读入点 sampleQueries[j] = readPoint(queryFile); j++; while (i == sampleIndeces[j]){ //如果样本查询点之后的索引值与实践查询点的索引值一致,则直接将此点的值赋给后面一点的值 sampleQueries[j] = sampleQueries[j - 1]; //覆盖之后索引点的值 j++; //取后面的点 } }else{ fscanf(queryFile, "%[^\n]", sBuffer); fscanf(queryFile, "\n"); } } nSampleQueries = j; fclose(queryFile); } // Compute the array sampleQBoundaryIndeces that specifies how to // segregate the sample query points according to their distance // to NN. //边界sampleQBoundaryIndeces只会存取一个点的索引,该点的大小为第一个大于半径点的值 sortQueryPointsByRadii(pointsDimension, nSampleQueries, //查询集的点的个数 sampleQueries, //查询点的集合,函数运行完成后,点的值将以距离数据集合的距离由小到大的顺序排序 nPoints, //数据集点的个数 dataSetPoints, //数据集集合 nRadii, //半径的个数 listOfRadii, //半径的值 sampleQBoundaryIndeces); } //之前的东西-c运行的,-p是不会运行的 RNNParametersT *algParameters = NULL; PRNearNeighborStructT *nnStructs = NULL; if (nargs > 9) { // Additional command-line parameter is specified. if (strcmp("-c", args[9]) == 0) { // Only compute the R-NN DS parameters and output them to stdout. // 如果是-c,就只计算数据集参数,然后输出 printf("%d\n", nRadii); //打印出半径的个数:1个。 将写入到参数文件中, transformMemRatios(); //memRatiosForNNstructs,转换内存使用率。假设每个结构为1,每个半径占用的总内存的比率,用于计算内存 for(IntT i = 0; i < nRadii; i++){ //看使用哪个样本查询点 // which sample queries to use Int32T segregatedQStart = (i == 0) ? 0 : sampleQBoundaryIndeces[i - 1]; //起始点的位置 Int32T segregatedQNumber = nSampleQueries - segregatedQStart; //查询点的个数 if (segregatedQNumber == 0) { //如果计算所得点的个数为0,就查询所有的点,从0到最后 // XXX: not the right answer segregatedQNumber = nSampleQueries; segregatedQStart = 0; } ASSERT(segregatedQStart < nSampleQueries); ASSERT(segregatedQStart >= 0); ASSERT(segregatedQStart + segregatedQNumber <= nSampleQueries); ASSERT(segregatedQNumber >= 0); RNNParametersT optParameters = computeOptimalParameters(listOfRadii[i], //计算最优的运行时间, successProbability, nPoints, pointsDimension, dataSetPoints, segregatedQNumber, sampleQueries + segregatedQStart, (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); //比率 ////memRatioForNNStructs[i]:近邻结构体每个半径所占用的内存比率,计算能用多少内存 printRNNParameters(stdout, optParameters); //将参数打印出来 } exit(0); } else if (strcmp("-p", args[9]) == 0) { // Read the R-NN DS parameters from the given file and run the // queries on the constructed data structure. if (nargs < 10){ usage(args[0]); exit(1); } FILE *pFile = fopen(args[10], "rt"); //读取参数文件,由lsh_computeParas产生 FAILIFWR(pFile == NULL, "Could not open the params file."); fscanf(pFile, "%d\n", &nRadii); //这里只取了参数文件中的半径,那参数文件中的其他数据怎样被取用的?? DPRINTF1("Using the following R-NN DS parameters:\n"); //使用R-NN DS(DateSet)参数 DPRINTF("N radii = %d\n", nRadii); //不知道将数据输出到哪里了?? // printf("Using the following R-NN DS parameters:\n"); // printf("N radii=%d\n",nRadii); FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT)))); FAILIF(NULL == (algParameters = (RNNParametersT*)MALLOC(nRadii * sizeof(RNNParametersT)))); for(IntT i = 0; i < nRadii; i++){ algParameters[i] = readRNNParameters(pFile); //将参数信息,输出到屏幕上 // printRNNParameters(stderr, algParameters[i]);@727 //printRNNParameters(stdout,algParameters[i]); nnStructs[i] = initLSH_WithDataSet(algParameters[i], nPoints, dataSetPoints); //根据用户输入的参数,初始化结构 } pointsDimension = algParameters[0].dimension; FREE(listOfRadii); FAILIF(NULL == (listOfRadii = (RealT*)MALLOC(nRadii * sizeof(RealT)))); for(IntT i = 0; i < nRadii; i++){ listOfRadii[i] = algParameters[i].parameterR; } } else{ // Wrong option. usage(args[0]); exit(1); } } else { FAILIF(NULL == (nnStructs = (PRNearNeighborStructT*)MALLOC(nRadii * sizeof(PRNearNeighborStructT)))); // Determine the R-NN DS parameters, construct the DS and run the queries. transformMemRatios(); for(IntT i = 0; i < nRadii; i++){ // XXX: segregate the sample queries... nnStructs[i] = initSelfTunedRNearNeighborWithDataSet(listOfRadii[i], successProbability, nPoints, pointsDimension, dataSetPoints, nSampleQueries, sampleQueries, (MemVarT)((availableTotalMemory - totalAllocatedMemory) * memRatiosForNNStructs[i])); } } // DPRINTF1("X\n");@ printf("X\n"); IntT resultSize = nPoints; PPointT *result = (PPointT*)MALLOC(resultSize * sizeof(*result)); PPointT queryPoint; FAILIF(NULL == (queryPoint = (PPointT)MALLOC(sizeof(PointT)))); FAILIF(NULL == (queryPoint->coordinates = (RealT*)MALLOC(pointsDimension * sizeof(RealT)))); FILE *queryFile = fopen(args[7], "rt"); FAILIF(queryFile == NULL); TimeVarT meanQueryTime = 0; PPointAndRealTStructT *distToNN = NULL; for(IntT i = 0; i < nQueries; i++){ RealT sqrLength = 0; // read in the query point. for(IntT d = 0; d < pointsDimension; d++){ FSCANF_REAL(queryFile, &(queryPoint->coordinates[d])); sqrLength += SQR(queryPoint->coordinates[d]); //向量到原点的距离 } queryPoint->sqrLength = sqrLength; //printRealVector("Query: ", pointsDimension, queryPoint->coordinates); // get the near neighbors. IntT nNNs = 0; for(IntT r = 0; r < nRadii; r++){ nNNs = getRNearNeighbors(nnStructs[r], queryPoint, result, resultSize); printf("Total time for R-NN query at radius %0.6lf (radius no. %d):\t%0.6lf\n", (double)(listOfRadii[r]), r, timeRNNQuery); meanQueryTime += timeRNNQuery; if (nNNs > 0){ printf("Query point %d: found %d NNs at distance %0.6lf (%dth radius). First %d NNs are:\n", i, nNNs, (double)(listOfRadii[r]), r, MIN(nNNs, MAX_REPORTED_POINTS)); // compute the distances to the found NN, and sort according to the distance FAILIF(NULL == (distToNN = (PPointAndRealTStructT*)REALLOC(distToNN, nNNs * sizeof(*distToNN)))); for(IntT p = 0; p < nNNs; p++){ distToNN[p].ppoint = result[p]; distToNN[p].real = distance(pointsDimension, queryPoint, result[p]); } qsort(distToNN, nNNs, sizeof(*distToNN), comparePPointAndRealTStructT); //C语言标准的函数 // Print the points for(IntT j = 0; j < MIN(nNNs, MAX_REPORTED_POINTS); j++){ ASSERT(distToNN[j].ppoint != NULL); printf("%09d\tDistance:%0.6lf\n", distToNN[j].ppoint->index, distToNN[j].real); //打印点的坐标 CR_ASSERT(distToNN[j].real <= listOfRadii[r]); //DPRINTF("Distance: %lf\n", distance(pointsDimension, queryPoint, result[j])); //printRealVector("NN: ", pointsDimension, result[j]->coordinates); } break; } } if (nNNs == 0){ printf("Query point %d: no NNs found.\n", i); } } if (nQueries > 0){ meanQueryTime = meanQueryTime / nQueries; printf("Mean query time: %0.6lf\n", (double)meanQueryTime); } for(IntT i = 0; i < nRadii; i++){ freePRNearNeighborStruct(nnStructs[i]); } // XXX: should ideally free the other stuff as well. return 0; }
int MemCacheClient::Store( const char * aType, MemRequest * aItem, int aCount ) { if (aCount < 1) { mTrace.Trace(CLDEBUG, "Store: ignoring request for %d items", aCount); return 0; } // initialize and find all of the servers for these items int nItemCount = 0; for (int n = 0; n < aCount; ++n) { // ensure that the key doesn't have a space in it CR_ASSERT(NULL == strchr(aItem[n].mKey.data(), ' ')); aItem[n].mServer = FindServer(aItem[n].mKey, aItem[n].mService); if (aItem[n].mServer) { ++nItemCount; } else { aItem[n].mResult = MCERR_NOSERVER; } } if (nItemCount == 0) { mTrace.Trace(CLDEBUG, "Store: ignoring request for all %d items (no servers available)", aCount); return 0; } char szBuf[50]; int nResponses = 0; string_t sRequest; for (int n = 0; n < aCount; ++n) { if (!aItem[n].mServer) continue; // <command name> <key> <flags> <exptime> <bytes> [noreply]\r\n sRequest = aType; sRequest += ' '; sRequest += aItem[n].mKey; snprintf(szBuf, sizeof(szBuf), " %u %ld %u", aItem[n].mFlags, (long) aItem[n].mExpiry, (unsigned)aItem[n].mData.GetReadSize()); sRequest += szBuf; if (*aType == 'c') { // cas snprintf(szBuf, sizeof(szBuf), " %" PRIu64, aItem[n].mCas); sRequest += szBuf; } if (aItem[n].mResult == MCERR_NOREPLY) { sRequest += " noreply"; } sRequest += "\r\n"; // send the request. any socket error causes the server connection // to be dropped, so we return errors for all requests using that server. try { aItem[n].mServer->SendBytes( sRequest.data(), sRequest.length()); aItem[n].mServer->SendBytes( aItem[n].mData.GetReadBuffer(), aItem[n].mData.GetReadSize()); aItem[n].mServer->SendBytes("\r\n", 2); // done with these read bytes aItem[n].mData.CommitReadBytes( aItem[n].mData.GetReadSize()); // if no reply is required then move on to the next request if (aItem[n].mResult == MCERR_NOREPLY) { continue; } // handle this response HandleStoreResponse(aItem[n].mServer, aItem[n]); ++nResponses; } catch (const Socket::Exception & e) { mTrace.Trace(CLINFO, "Store: error '%s' at %s, marking requests as NOSERVER", e.mDetail, aItem[n].mServer->GetAddress()); for (int i = aCount - 1; i >= n; --i) { if (aItem[n].mServer != aItem[i].mServer) continue; aItem[i].mServer = NULL; aItem[i].mResult = MCERR_NOSERVER; } continue; } } return nResponses; }
int MemCacheClient::Combine( const char * aType, MemRequest * aItem, int aCount ) { if (aCount < 1) { mTrace.Trace(CLDEBUG, "%s: ignoring request for %d items", aType, aCount); return 0; } CR_ASSERT(*aType == 'g' || *aType == 'd'); // get, gets, del MemRequest * rgpItem[MAX_REQUESTS] = { NULL }; if (aCount > MAX_REQUESTS) { mTrace.Trace(CLDEBUG, "%s: ignoring request for all %d items (too many)", aType, aCount); return -1; // invalid args } // initialize and find all of the servers for these items int nItemCount = 0; for (int n = 0; n < aCount; ++n) { // ensure that the key doesn't have a space in it CR_ASSERT(NULL == strchr(aItem[n].mKey.data(), ' ')); aItem[n].mServer = FindServer(aItem[n].mKey, aItem[n].mService); aItem[n].mData.SetEmpty(); if (aItem[n].mServer) { rgpItem[nItemCount++] = &aItem[n]; } else { aItem[n].mResult = MCERR_NOSERVER; } } if (nItemCount == 0) { mTrace.Trace(CLDEBUG, "%s: ignoring request for all %d items (no servers available)", aType, aCount); return 0; } // sort all requests into server order const static MemRequest::Sort sortOnServer = MemRequest::Sort(); std::sort(&rgpItem[0], &rgpItem[nItemCount], sortOnServer); // send all requests char szBuf[50]; int nItem = 0, nNext; string_t sRequest, sTemp; while (nItem < nItemCount) { for (nNext = nItem; nNext < nItemCount; ++nNext) { if (rgpItem[nItem]->mServer != rgpItem[nNext]->mServer) break; CR_ASSERT(*aType == 'g' || *aType == 'd'); rgpItem[nNext]->mData.SetEmpty(); // create get request for all keys on this server if (*aType == 'g') { if (nNext == nItem) sRequest = "get"; else sRequest.resize(sRequest.length() - 2); sRequest += ' '; sRequest += rgpItem[nNext]->mKey; sRequest += "\r\n"; rgpItem[nNext]->mResult = MCERR_NOTFOUND; } // create del request for all keys on this server else if (*aType == 'd') { // delete <key> [<time>] [noreply]\r\n sRequest += "delete "; sRequest += rgpItem[nNext]->mKey; sRequest += ' '; snprintf(szBuf, sizeof(szBuf), "%ld", (long) rgpItem[nNext]->mExpiry); sRequest += szBuf; if (rgpItem[nNext]->mResult == MCERR_NOREPLY) { sRequest += " noreply"; } sRequest += "\r\n"; if (rgpItem[nNext]->mResult != MCERR_NOREPLY) { rgpItem[nNext]->mResult = MCERR_NOTFOUND; } } } // send the request. any socket error causes the server connection // to be dropped, so we return errors for all requests using that server. try { rgpItem[nItem]->mServer->SendBytes( sRequest.data(), sRequest.length()); } catch (const Socket::Exception & e) { mTrace.Trace(CLINFO, "%s: request error '%s' at %s, marking requests as NOSERVER", aType, e.mDetail, rgpItem[nItem]->mServer->GetAddress()); for (int n = nItem; n < nNext; ++n) { rgpItem[n]->mServer = NULL; rgpItem[n]->mResult = MCERR_NOSERVER; } } nItem = nNext; } // receive responses from all servers int nResponses = 0; for (nItem = 0; nItem < nItemCount; nItem = nNext) { // find the end of this server if (!rgpItem[nItem]->mServer) { nNext = nItem + 1; continue; } for (nNext = nItem + 1; nNext < nItemCount; ++nNext) { if (rgpItem[nItem]->mServer != rgpItem[nNext]->mServer) break; } // receive the responses. any socket error causes the server connection // to be dropped, so we return errors for all requests using that server. try { if (*aType == 'g') { nResponses += HandleGetResponse( rgpItem[nItem]->mServer, &rgpItem[nItem], &rgpItem[nNext]); } else if (*aType == 'd') { nResponses += HandleDelResponse( rgpItem[nItem]->mServer, &rgpItem[nItem], &rgpItem[nNext]); } } catch (const Socket::Exception & e) { mTrace.Trace(CLINFO, "%s: response error '%s' at %s, marking requests as NOSERVER", aType, e.mDetail, rgpItem[nItem]->mServer->GetAddress()); rgpItem[nItem]->mServer->Disconnect(); for (int n = nNext - 1; n >= nItem; --n) { if (rgpItem[nItem]->mServer != rgpItem[n]->mServer) continue; rgpItem[n]->mServer = NULL; rgpItem[n]->mResult = MCERR_NOSERVER; } } } mTrace.Trace(CLDEBUG, "%s: received %d responses to %d requests", aType, nResponses, aCount); return nResponses; }
// Returns the list of near neighbors of the point <point> (with a // certain success probability). Near neighbor is defined as being a // point within distance <parameterR>. Each near neighbor from the // data set is returned is returned with a certain probability, // dependent on <parameterK>, <parameterL>, and <parameterT>. The // returned points are kept in the array <result>. If result is not // allocated, it will be allocated to at least some minimum size // (RESULT_INIT_SIZE). If number of returned points is bigger than the // size of <result>, then the <result> is resized (to up to twice the // number of returned points). The return value is the number of // points found. Int32T getNearNeighborsFromPRNearNeighborStruct( PRNearNeighborStructT nnStruct, PPointT query, PPointT *(&result), Int32T &resultSize) { //通过查找索引,然后获得桶,提取n个最近邻点 //通过计算点的降维值,然后计算主副索引,最后由索引查找表 ASSERT(nnStruct != NULL); ASSERT(query != NULL); ASSERT(nnStruct->reducedPoint != NULL); ASSERT(!nnStruct->useUfunctions || nnStruct->pointULSHVectors != NULL); PPointT point = query; if (result == NULL) { resultSize = RESULT_INIT_SIZE; FAILIF(NULL == (result = (PPointT*)MALLOC(resultSize * sizeof(PPointT)))); } /* for (int tempd=150; tempd< 160;tempd++) { printf(" %lf ",query->coordinates[tempd]); } printf("查询的具体数据 10个 \n\n"); printf("查询数据 : %lf \n",query->coordinates[151]); // printf( "主hash的值: %u \n",nnStruct->hehasdBuckets[0]->mainHashA[5]); // printf( "辅助hash的值: %u \n",nnStruct->hashedBuckets[0]->controlHash1[5]); // printf( "a %u \n",nnStruct->lshFunctions[0][0].a[5]); // printf( "b %u \n",nnStruct->lshFunctions[0][0].b ); */ preparePointAdding(nnStruct, nnStruct->hashedBuckets[0], point); //根据传入的多维point。计算对应每个hash表的降维=》hash值,存入了nnStruct->precomputedHashesOfULSHs Uns32T **(precomputedHashesOfULSHs);//没释放 precomputedHashesOfULSHs= (Uns32T**)malloc(sizeof(Uns32T*)*(nnStruct->nHFTuples)); // Uns32T precomputedHashesOfULSHs[nnStruct->nHFTuples][N_PRECOMPUTED_HASHES_NEEDED]; for (IntT i=0; i< nnStruct->nHFTuples ; i++) { precomputedHashesOfULSHs[i]= (Uns32T*)malloc(sizeof(Uns32T)*(N_PRECOMPUTED_HASHES_NEEDED)); for (int temi=0; temi< N_PRECOMPUTED_HASHES_NEEDED ; temi++) { precomputedHashesOfULSHs[i][temi]=0; } } //初始化?? /* printf("\n输出:\n"); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in,"\n输出:\n"); fclose(in); */ for(IntT i = 0; i < nnStruct->nHFTuples; i++) { for(IntT j = 0; j < N_PRECOMPUTED_HASHES_NEEDED; j++) { precomputedHashesOfULSHs[i][j] = nnStruct->precomputedHashesOfULSHs[i][j]; /* printf(" %u", precomputedHashesOfULSHs[i][j]); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in," %u", precomputedHashesOfULSHs[i][j]); fclose(in); */ } /*printf(" \n"); FILE *in = fopen("preconpute.txt", "a+") ; fprintf(in," \n"); fclose(in); */ } TIMEV_START(timeTotalBuckets); BooleanT oldTimingOn = timingOn; if (noExpensiveTiming) { timingOn = FALSE; } // Initialize the counters for defining the pair of <u> functions used for <g> functions. IntT firstUComp = 0; IntT secondUComp = 1; Int32T nNeighbors = 0;// the number of near neighbors found so far. Int32T nMarkedPoints = 0;// the number of marked points for(IntT i = 0; i < nnStruct->parameterL; i++) { //L个表 TIMEV_START(timeGetBucket); GeneralizedPGBucket gbucket; if (!nnStruct->useUfunctions) { // Use usual <g> functions (truly independent; <g>s are precisly // <u>s). gbucket = getGBucket(nnStruct->hashedBuckets[i], 1, precomputedHashesOfULSHs[i], NULL); } else { // Use <u> functions (<g>s are pairs of <u> functions). gbucket = getGBucket(nnStruct->hashedBuckets[i], 2, precomputedHashesOfULSHs[firstUComp], precomputedHashesOfULSHs[secondUComp]); //通过两个向量,计算主副索引。然后遍历二级索引,提取对应的桶 // compute what is the next pair of <u> functions. //不是每个都 (first,second )(first,second )(first,second )的数组吗? secondUComp++; if (secondUComp == nnStruct->nHFTuples) { firstUComp++; secondUComp = firstUComp + 1; } } TIMEV_END(timeGetBucket); PGBucketT bucket; TIMEV_START(timeCycleBucket); switch (nnStruct->hashedBuckets[i]->typeHT) { //对不同类型的hash桶结构,使用不同方法获取二级桶的实体 case HT_LINKED_LIST: bucket = gbucket.llGBucket; if (bucket != NULL) { // circle through the bucket and add to <result> the points that are near. PBucketEntryT bucketEntry = &(bucket->firstEntry); //TIMEV_START(timeCycleProc); while (bucketEntry != NULL) { //TIMEV_END(timeCycleProc); //ASSERT(bucketEntry->point != NULL); //TIMEV_START(timeDistanceComputation); Int32T candidatePIndex = bucketEntry->pointIndex; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult) { //TIMEV_END(timeDistanceComputation); if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //TIMEV_START(timeResultStoring); // a new R-NN point was found (not yet in <result>). if (nNeighbors >= resultSize) { // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint; nNeighbors++; nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; //TIMEV_END(timeResultStoring); } } else { //TIMEV_END(timeDistanceComputation); } //TIMEV_START(timeCycleProc); bucketEntry = bucketEntry->nextEntry; }//while //TIMEV_END(timeCycleProc); } break; case HT_STATISTICS: ASSERT(FALSE); // HT_STATISTICS not supported anymore // if (gbucket.linkGBucket != NULL && gbucket.linkGBucket->indexStart != INDEX_START_EMPTY){ // Int32T position; // PointsListEntryT *pointsList = nnStruct->hashedBuckets[i]->bucketPoints.pointsList; // position = gbucket.linkGBucket->indexStart; // // circle through the bucket and add to <result> the points that are near. // while (position != INDEX_START_EMPTY){ // PPointT candidatePoint = pointsList[position].point; // if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult){ // if (nnStruct->nearPoints[candidatePoint->index] == FALSE) { // // a new R-NN point was found (not yet in <result>). // if (nNeighbors >= resultSize){ // // run out of space => resize the <result> array. // resultSize = 2 * resultSize; // result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); // } // result[nNeighbors] = candidatePoint; // nNeighbors++; // nnStruct->nearPoints[candidatePoint->index] = TRUE; // do not include more points with the same index // } // } // // Int32T oldP = position; // position = pointsList[position].nextPoint; // // ASSERT(position == INDEX_START_EMPTY || position == oldP + 1); // } // } break; case HT_HYBRID_CHAINS://默认的链条 if (gbucket.hybridGBucket != NULL) { //好像是在链表中找空间,同时要判断没有重复的 PHybridChainEntryT hybridPoint = gbucket.hybridGBucket;//获取 二级桶的数组指针,(实际桶就是一个数组) Uns32T offset = 0; if (hybridPoint->point.bucketLength == 0) { //长度为0,就是溢出了的桶, // there are overflow points in this bucket. offset = 0; for(IntT j = 0; j < N_FIELDS_PER_INDEX_OF_OVERFLOW; j++) { offset += ((Uns32T)((hybridPoint + 1 + j)->point.bucketLength) << (j * N_BITS_FOR_BUCKET_LENGTH)); } } Uns32T index = 0; BooleanT done = FALSE; while(!done) { if (index == MAX_NONOVERFLOW_POINTS_PER_BUCKET) { //CR_ASSERT(hybridPoint->point.bucketLength == 0); index = index + offset; } //hybridPoint 是个二级桶+实体组成的数组的首地址(其实就是个二级刻度) Int32T candidatePIndex = (hybridPoint + index)->point.pointIndex; //索引只是记录每个点的序号, 所有点都在nnStruct->points[candidatePIndex] 上保存具体值 CR_ASSERT(candidatePIndex >= 0 && candidatePIndex < nnStruct->nPoints); done = (hybridPoint + index)->point.isLastPoint == 1 ? TRUE : FALSE; //链表的遍历?好像是用数组来当链表用 index++; if (nnStruct->markedPoints[candidatePIndex] == FALSE) { //已经计算过的点都标记为true了 //nnStruct->markedPoints 是用来标记是否检测过得 // mark the point first. nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index nMarkedPoints++; PPointT candidatePoint = nnStruct->points[candidatePIndex]; if (isDistanceSqrLeq(nnStruct->dimension, point, candidatePoint, nnStruct->parameterR2) && nnStruct->reportingResult) { //两点距离是否小于阈值 //if (nnStruct->markedPoints[candidatePIndex] == FALSE) { // a new R-NN point was found (not yet in <result>). //TIMEV_START(timeResultStoring); if (nNeighbors >= resultSize) { //近邻点太多,扩大空间 // run out of space => resize the <result> array. resultSize = 2 * resultSize; result = (PPointT*)REALLOC(result, resultSize * sizeof(PPointT)); } result[nNeighbors] = candidatePoint;//存入返回结果中 nNeighbors++; //TIMEV_END(timeResultStoring); //nnStruct->markedPointsIndeces[nMarkedPoints] = candidatePIndex; //nnStruct->markedPoints[candidatePIndex] = TRUE; // do not include more points with the same index //nMarkedPoints++; //} } }// if (nnStruct->markedPoints[candidatePIndex] == FALSE) else { // the point was already marked (& examined) } }// while(!done) }// if (gbucket.hybridGBucket != NULL) break; default: ASSERT(FALSE); }//swichcase TIMEV_END(timeCycleBucket); }//for timingOn = oldTimingOn; TIMEV_END(timeTotalBuckets); // we need to clear the array nnStruct->nearPoints for the next query. for(Int32T i = 0; i < nMarkedPoints; i++) { ASSERT(nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] == TRUE); nnStruct->markedPoints[nnStruct->markedPointsIndeces[i]] = FALSE; } DPRINTF("nMarkedPoints: %d\n", nMarkedPoints); return nNeighbors; }