sp_mat make_ball_basis(const Points & points, const Points & centers, uint R){ /* Make a set of balls. Each ball has unique points via set subtraction (think "moon shape" if two balls overlap) Each ball should have exactly R points, but there may be Fewer if we run out of points. */ uint N = points.n_rows; uint K = centers.n_rows; sp_mat basis = sp_mat(N,K); bvec mask = zeros<bvec>(N); vec dist; uvec idx; double r; uint k; for(k = 0; k < K; k++){ dist = lp_norm(points.each_row() - centers.row(k),2,1); r = find_radius(dist,mask,R); idx = find(dist < r); for(uint i = 0; i < idx.n_elem; i++){ // Paint all elements of idx that aren't // already in another basis if(0 == mask(idx(i))) basis(idx(i),k) = 1.0 / (double) idx.n_elem; } mask(idx).fill(1); // Add to mask if(N == sum(mask)){ break; } } basis.resize(N,k); // Avoid all zero basis = sp_normalise(basis,2,0); // Should be almost ortho return basis; }
// ----------------------------------------------------------------------------- int QALSH::knn( // k-nn search float* query, // query point int top_k, // top-k value ResultItem* rslt, // k-nn results char* output_folder) // output folder { // ------------------------------------------------------------------------- // Space allocation and initialization // ------------------------------------------------------------------------- // init k-nn results for (int i = 0; i < top_k; i++) { rslt[i].id_ = -1; rslt[i].dist_ = MAXREAL; } // objects frequency int* frequency = new int[n_pts_]; for (int i = 0; i < n_pts_; i++) { frequency[i] = 0; } // whether an object is checked bool* is_checked = new bool[n_pts_]; for (int i = 0; i < n_pts_; i++) { is_checked[i] = false; } float* data = new float[dim_]; // one object data for (int i = 0; i < dim_; i++) { data[i] = 0.0f; } g_memory += ((SIZEBOOL + SIZEINT) * n_pts_ + SIZEFLOAT * dim_); bool* flag = new bool[m_]; // whether a hash table is finished for (int i = 0; i < m_; i++) { flag[i] = true; } float* q_val = new float[m_]; // hash value of query for (int i = 0; i < m_; i++) { q_val[i] = -1.0f; } g_memory += (SIZEFLOAT + SIZEBOOL) * m_; // left and right page buffer PageBuffer* lptr = new PageBuffer[m_]; PageBuffer* rptr = new PageBuffer[m_]; g_memory += (SIZECHAR * B_ * m_ * 2 + SIZEINT * m_ * 6); for (int i = 0; i < m_; i++) { lptr[i].leaf_node_ = NULL; lptr[i].index_pos_ = -1; lptr[i].leaf_pos_ = -1; lptr[i].size_ = -1; rptr[i].leaf_node_ = NULL; rptr[i].index_pos_ = -1; rptr[i].leaf_pos_ = -1; rptr[i].size_ = -1; } // ------------------------------------------------------------------------- // Compute hash value <q_dist> of query and init the page buffers // <lptr> and <rptr>. // ------------------------------------------------------------------------- page_io_ = 0; // num of page i/os dist_io_ = 0; // num of dist cmpt init_buffer(lptr, rptr, q_val, query); // ------------------------------------------------------------------------- // Determine the basic <radius> and <bucket_width> // ------------------------------------------------------------------------- float radius = find_radius(lptr, rptr, q_val); float bucket_width = (w_ * radius / 2.0f); // ------------------------------------------------------------------------- // K-nn search // ------------------------------------------------------------------------- bool again = true; // stop flag int candidates = 99 + top_k; // threshold of candidates int flag_num = 0; // used for bucket bound int scanned_id = 0; // num of scanned id int checked=0; int id = -1; // current object id int count = -1; // count size in one page int start = -1; // start position int end = -1; // end position float left_dist = -1.0f; // left dist with query float right_dist = -1.0f; // right dist with query float knn_dist = MAXREAL; // kth nn dist // result entry for update ResultItem* item = new ResultItem(); g_memory += (long) sizeof(ResultItem); while (again) { // --------------------------------------------------------------------- // Step 1: initialize the stop condition for current round // --------------------------------------------------------------------- flag_num = 0; for (int i = 0; i < m_; i++) { flag[i] = true; } // --------------------------------------------------------------------- // Step 2: find frequent objects // --------------------------------------------------------------------- while (true) { for (int i = 0; i < m_; i++) { if (!flag[i]) continue; // ------------------------------------------------------------- // Step 2.1: compute <left_dist> and <right_dist> // ------------------------------------------------------------- left_dist = -1.0f; if (lptr[i].size_ != -1) { left_dist = calc_proj_dist(&lptr[i], q_val[i]); } right_dist = -1.0f; if (rptr[i].size_ != -1) { right_dist = calc_proj_dist(&rptr[i], q_val[i]); } // ------------------------------------------------------------- // Step 2.2: determine the closer direction (left or right) // and do collision counting to find frequent objects. // // For the frequent object, we calc the L2 distance with // query, and update the k-nn result. // ------------------------------------------------------------- if (left_dist >= 0 && left_dist < bucket_width && ((right_dist >= 0 && left_dist <= right_dist) || right_dist < 0)) { count = lptr[i].size_; end = lptr[i].leaf_pos_; start = end - count; for (int j = end; j > start; j--) { id = lptr[i].leaf_node_->get_entry_id(j); frequency[id]++; scanned_id++; if (frequency[id] > l_ && !is_checked[id]) { is_checked[id] = true; read_data(id, dim_, B_, data, output_folder); item->dist_ = calc_l2_dist(data, query, dim_); item->id_ = id; knn_dist = update_result(rslt, item, top_k); checked++; // ------------------------------------------------- // Terminating condition 2 // ------------------------------------------------- dist_io_++; if (dist_io_ >= candidates) { again = false; flag_num += m_; break; } } } update_left_buffer(&lptr[i], &rptr[i]); } else if (right_dist >= 0 && right_dist < bucket_width && ((left_dist >= 0 && left_dist > right_dist) || left_dist < 0)) { count = rptr[i].size_; start = rptr[i].leaf_pos_; end = start + count; for (int j = start; j < end; j++) { id = rptr[i].leaf_node_->get_entry_id(j); frequency[id]++; scanned_id++; if (frequency[id] > l_ && !is_checked[id]) { is_checked[id] = true; read_data(id, dim_, B_, data, output_folder); item->dist_ = calc_l2_dist(data, query, dim_); item->id_ = id; knn_dist = update_result(rslt, item, top_k); checked++; // ------------------------------------------------- // Terminating condition 2 // ------------------------------------------------- dist_io_++; if (dist_io_ >= candidates) { again = false; flag_num += m_; break; } } } update_right_buffer(&lptr[i], &rptr[i]); } else { flag[i] = false; flag_num++; } if (flag_num >= m_) break; } if (flag_num >= m_) break; } // --------------------------------------------------------------------- // Terminating condition 1 // --------------------------------------------------------------------- if (knn_dist < appr_ratio_ * radius && dist_io_ >= top_k) { again = false; break; } // --------------------------------------------------------------------- // Step 3: auto-update <radius> // --------------------------------------------------------------------- radius = update_radius(lptr, rptr, q_val, radius); bucket_width = radius * w_ / 2.0f; } // ------------------------------------------------------------------------- // Release space // ------------------------------------------------------------------------- if (data != NULL || frequency != NULL || is_checked != NULL) { delete[] data; data = NULL; delete[] frequency; frequency = NULL; delete[] is_checked; is_checked = NULL; g_memory -= ((SIZEBOOL + SIZEINT) * n_pts_ + SIZEFLOAT * dim_); } if (q_val != NULL || flag != NULL || item != NULL) { delete[] q_val; q_val = NULL; delete[] flag; flag = NULL; delete item; item = NULL; g_memory -= (SIZEFLOAT + SIZEBOOL) * m_; g_memory -= (long) sizeof(ResultItem); } for (int i = 0; i < m_; i++) { // --------------------------------------------------------------------- // CANNOT remove the condition // <lptrs[i].leaf_node != rptrs[i].leaf_node> // Because <lptrs[i].leaf_node> and <rptrs[i].leaf_node> may point // to the same address, then we would delete it twice and receive // the runtime error or segmentation fault. // --------------------------------------------------------------------- if (lptr[i].leaf_node_ && lptr[i].leaf_node_ != rptr[i].leaf_node_) { delete lptr[i].leaf_node_; lptr[i].leaf_node_ = NULL; } if (rptr[i].leaf_node_) { delete rptr[i].leaf_node_; rptr[i].leaf_node_ = NULL; } } delete[] lptr; lptr = NULL; delete[] rptr; rptr = NULL; g_memory -= (SIZECHAR * B_ * m_ * 2 + SIZEINT * m_ * 6); return (page_io_ + dist_io_); }
double find_radius(const vec & dist, uint target){ uint N = dist.n_rows; bvec mask = zeros<bvec>(N); return find_radius(dist,mask,target); }
partlist *getParticles(matrix *mat){ int i,j; int id_counter=0; int width,height; partlist *plist; if((plist = calloc(1,sizeof(partlist)))==NULL){ fprintf(stderr, "ident.c: getParticles(): Out of memory error.\n"); exit(1); } width=mat->width; height=mat->height; for(i=0; i<width; i++){ for(j=0; j<height; j++){ if(mat->vals[i][j] == 0){ continue; } particle *part; part = floodfill(mat,getid(&id_counter),i,j); find_center(part); find_radius(part); #ifndef USE_RADIUS_THRESH if(part->size > MAX_PART_SIZE){ //This if statement removes any particles that are too large or too small. freeParticle(part); continue; }else if(part->size < MIN_PART_SIZE){ freeParticle(part); continue; } #endif #ifdef USE_RADIUS_THRESH if(part->radius > RAD_MAX){ freeParticle(part); continue; }else if(part->radius < RAD_MIN){ freeParticle(part); continue; } #endif else if(part->x > mat->width-6){ freeParticle(part); continue; }else if(part->x < 5){ freeParticle(part); continue; }else if(part->y > mat->height-6){ freeParticle(part); continue; }else if(part->y < 5){ freeParticle(part); continue; }else{ pushParticle(plist,part); } } } return plist; }