void knn_reorder_shortlist(int n, int nb, int d, int k, const float *b, const float *v, int *assign, float *dists) { float *subb=fvec_new(k*d); float *diststmp=fvec_new(k); int *perm=ivec_new(k); int *assigntmp=ivec_new(k); int i,j; for(i=0;i<n;i++) { int *assigni=assign+i*k; float *disti=dists+i*k; int ki ; if(1) { for(j=0;j<k;j++) { if(assigni[j]<0) break; memcpy(subb+j*d,b+assigni[j]*d,sizeof(*subb)*d); } ki=j; } else { for(j=0;j<k;j++) if(assigni[j]<0) break; ki=j; ivec_sort(assigni,ki); /* to improve access locality */ for(j=0;j<ki;j++) { memcpy(subb+j*d,b+assigni[j]*(long) d,sizeof(*subb)*d); } } compute_distances_1(d,ki,v+i*d,subb,diststmp); fvec_sort_index(diststmp,ki,perm); memcpy(assigntmp,assigni,sizeof(*assigni)*ki); for(j=0;j<ki;j++) { disti[j]=diststmp[perm[j]]; assigni[j]=assigntmp[perm[j]]; } } free(assigntmp); free(diststmp); free(subb); free(perm); }
void bof_compute_subsets(int k, int d, const float *centroids, int n, const float *v, int n_subset, const int *subset_indexes, const int *subset_ends, float *desc) { int *assign=ivec_new(n); nn (n, k, d, centroids, v, assign); fvec_0 (desc, k * n_subset); int ss, ss_begin = 0; for (ss = 0 ; ss < n_subset ; ss++) { float *descss = desc + ss * k; int ss_end = subset_ends[ss], ii; for (ii = ss_begin ; ii < ss_end ; ii++) { int i = subset_indexes[ii]; descss[assign[i]] ++; } ss_begin = ss_end; } free (assign); }
void eigs_reorder (int d, float * eigval, float * eigvec, int criterion) { int i; int * perm = ivec_new (d); float * eigvalst = fvec_new (d); float * eigvecst = fvec_new (d * d); fvec_sort_index (eigval, d, perm); if (criterion) for (i = 0 ; i < d / 2 ; i++) { int tmp = perm[i]; perm[i] = perm[d - 1 - i]; perm[d - 1 - i] = tmp; } for (i = 0 ; i < d ; i++) { eigvalst[i] = eigval[perm[i]]; memcpy (eigvecst + i * d, eigvec + perm[i] * d, sizeof (*eigvecst) * d); } memcpy (eigval, eigvalst, d * sizeof (*eigval)); memcpy (eigvec, eigvecst, d * d * sizeof (*eigvec)); free (eigvalst); free (eigvecst); free (perm); }
void vlad_compute_subsets(int k, int d, const float *centroids, int n, const float *v, int n_subset, const int *subset_indexes, const int *subset_ends, float *desc) { int j; int *assign = ivec_new(n); nn (n, k, d, centroids, v, assign); fvec_0 (desc, k * d * n_subset); int ss, ss_begin = 0; for (ss = 0 ; ss < n_subset ; ss++) { float *descss = desc + ss * k * d; int ss_end = subset_ends[ss], ii; for (ii = ss_begin ; ii < ss_end ; ii++) { int i = subset_indexes[ii]; for (j = 0 ; j < d ; j++) descss[assign[i]*d+j] += v[i*d+j] - centroids[assign[i]*d+j]; } ss_begin = ss_end; } free(assign); }
int ahc_quantize(DyArray *ahct, float *v, int d){ __assertinfo(ahct == NULL || v == NULL || d <= 0, "IPP"); int iclu; Cluster *pclu; int nchild = 0; int *_vassign = ivec_new(1); float *_vdis = fvec_new(1); iclu = 0; /* traverse all clusters */ while(true){ // locate at the i-th cluster pclu = (Cluster*)DyArray_get(ahct, iclu, 1); if(pclu->type != ClusterType_Leaf){ // if a cluster is not leaf, extract all centroid of its children nchild = pclu->children.count; // linear_knn(pclu->cents, nchild, v, d, 1, _vassign, _vdis); knn_full(2, 1, nchild, d, 1, pclu->cents, v, NULL, _vassign, _vdis); iclu = *(int*)DyArray_get(&pclu->children, _vassign[0], 1); }else{ break; } } FREE(_vassign); FREE(_vdis); return iclu; }
bool ahc_check_index(const char *folder){ __assertinfo(folder == NULL, "IPP"); char file[255]; FILE *fp; int d, nclu; int *buff = ivec_new(2); /* check out the config file */ sprintf(file, "%s/%s", folder, HCluster_ConfigFile); if(!file_exists(file)){ return false; } fp = open_file(file, "rb"); fread(buff, sizeof(int), 2, fp); /* check out the existence of cluster files [first, end] */ sprintf(file, "%s/0%s", folder, HCluster_Postfix); if(!file_exists(file)){ return false; }else{ sprintf(file, "%s/%d%s", folder, buff[1]-1, HCluster_Postfix); if(!file_exists(file)){ return false; } } FREE(buff); return true; }
int ivec_to_spivec (int * v, int n, int ** idx_out, int ** v_out) { int i, ii = 0; int nz = ivec_nz (v, n); int * idx = ivec_new (nz); int * val = ivec_new (nz); for (i = 0 ; i < n ; i++) if (v[i] != 0) { idx[ii] = i; val[ii] = v[i]; ii++; } *idx_out = idx; *v_out = val; return nz; }
void Cluster_init(Cluster *clu, int npts){ ASSERTINFO(clu == NULL || npts <= 0, "IPP"); clu->npts = npts; clu->data = NULL; clu->idx = ivec_new(npts); clu->type = ClusterType_Inner; DyArray_init(&clu->children, sizeof(int), -1); clu->cents = NULL; }
int *ivec_new_set (long n, int val) { int i; int *ret = ivec_new(n); for (i = 0 ; i < n ; i++) ret[i] = val; return ret; }
int * ivec_new_range (long a, long b) { int i; int *ret = ivec_new(b - a); for (i = a ; i < b ; i++) ret[i - a] = i; return ret; }
int *ivec_repeat_with_inc(const int *a,int n, int nrepeat, int inc) { int *ret=ivec_new(nrepeat*n); int i; for(i=0;i<nrepeat;i++) { ivec_cpy(ret+i*n, a, n); ivec_add_scalar(ret+i*n, n, i*inc); } return ret; }
int main(int argc, char** argv) { assert(argc == 3); int n = atoi(argv[1]); int nrepeat = atoi(argv[2]); float * v = fvec_new_rand(n); int * idx = ivec_new(n); int * idx2 = ivec_new(n); int k0, ki; int m[3] = {1, 2, 5}; for(k0 = 1; k0 < n; k0 *= 10) { for(ki = 0; ki < 3; ki++) { int k = k0 * m[ki]; printf("k = %d ", k); double st0 = 0, st1 = 0; int r; for(r = 0; r < nrepeat; r++) { double t0 = getmillisecs(); fvec_k_max_hoare(v, n, idx, k); double t1 = getmillisecs(); fvec_k_max_maxheap(v, n, idx2, k); double t2 = getmillisecs(); st0 += t1 - t0; st1 += t2 - t1; } printf("qselect: %.4f ms, maxheap: %.4f ms\n", st0 / nrepeat, st1 / nrepeat); } } return 0; }
int * ivec_new_fread_raw(FILE * f, long d) { int * v = ivec_new(d); long ret = fread (v, sizeof (*v), d, f); if (ret != d) { free(v); perror ("# fvec_fread error 2"); return NULL; } return v; }
int *imat_get_submatrix (const int *a, int nrow, int nrow_out, int ncol) { long i; int *b=ivec_new(nrow_out*(long)ncol); for(i=0;i<ncol;i++) memcpy(b+i*nrow_out,a+i*nrow,nrow_out*sizeof(*a)); return b; }
void bof_compute (int k, int d, const float *centroids, int n, const float *v, int *desc) { int i; int *assign = ivec_new(n); nn (n, k, d, centroids, v, assign); ivec_0(desc,k); for(i=0;i<n;i++) desc[assign[i]]++; free(assign); }
void bof_compute_ma (int k, int d, const float *centroids, int n, const float *v, int *desc, int ma, float alpha, int nt) { int i; int *assign = ivec_new(n*ma); knn_thread (n, k, d, ma, centroids, v, assign, nt); ivec_0(desc,k); for(i=0;i<n*ma;i++) desc[assign[i]]++; free(assign); }
int ivec_find (const int *v, int n, int ** nzpos_out) { int nz = ivec_nz (v, n); int * nzpos = ivec_new (nz); int i, ii = 0; for (i = 0 ; i < n ; i++) if (v[i] != 0) { nzpos[ii] = i; ii++; } *nzpos_out = nzpos; return nz; }
int fvecs_new_read_sparse (const char *fname, int d, float **vf_out) { float *vf=NULL; long n=0,na=0; float *vals=fvec_new(d); int *idx=ivec_new(d); FILE *f = fopen (fname, "r"); #define E(msg) { \ fprintf (stderr, "fvecs_new_read_sparse %s: " msg , fname); \ perror (""); \ free(vf); free(vals); free(idx); \ return -1; \ } if (!f) E(""); while(!feof(f)) { int nz,ret,nz2; ret=fread(&nz,sizeof(int),1,f); if(ret!=1) { if(feof(f)) break; E("err 1"); } if(fread(idx,sizeof(int),nz,f)!=nz) E("err 2"); if(fread(&nz2,sizeof(int),1,f)!=1) E("err 3"); if(nz!=nz2) E("err 4"); if(fread(vals,sizeof(float),nz,f)!=nz) E("err 5"); if(n>=na) { na=(na+1)*3/2; vf=realloc(vf,na*sizeof(float)*d); } float *dense=spfvec_to_fvec (idx,vals,nz,d); memcpy(vf+n*d,dense,sizeof(float)*d); free(dense); n++; } #undef E free(vals); free(idx); fclose(f); *vf_out=vf; return n; }
void vlad_compute(int k, int d, const float *centroids, int n, const float *v, float *desc) { int i,j; int *assign = ivec_new (n); nn (n, k, d, centroids, v, assign); fvec_0 (desc, k * d); for (i = 0 ; i < n ; i++) { for (j = 0 ; j < d ; j++) desc[assign[i]*d+j] += v[i*d+j] - centroids[assign[i]*d+j]; } free(assign); }
int * ivec_new_random_idx_r (int n, int k, unsigned int seed) { int *idx = ivec_new (n); int i; for (i = 0; i < n; i++) idx[i] = i; for (i = 0; i < k ; i++) { int j = i + rand_r(&seed) % (n - i); /* swap i and j */ int p = idx[i]; idx[i] = idx[j]; idx[j] = p; } return idx; }
void vlad_compute_weighted(int k, int d, const float *centroids, int n, const float *v, const float *weights, float *desc) { int i,j; int *assign=ivec_new(n); nn(n,k,d,centroids,v,assign,NULL,NULL); fvec_0(desc,k*d); for(i=0;i<n;i++) { float w=weights[i]; for(j=0;j<d;j++) desc[assign[i]*d+j] += (v[i*d+j]-centroids[assign[i]*d+j])*w; } free(assign); }
Cluster *ahc_load_a_cluster(const char *folder, int cid, int d, int bf){ __assertinfo(folder == NULL || cid < 0, "IPP"); Cluster *clu = (Cluster*)malloc(sizeof(Cluster)); char file[255]; FILE *fp = NULL; int _type, _count, _bfi; float *cent = fvec_new(d*bf); int *children = ivec_new(bf); sprintf(file, "%s/%d%s", folder, cid, HCluster_Postfix); fp = open_file(file, "rb"); fread(&_type, sizeof(int), 1, fp); if(_type != (int)ClusterType_Leaf){ fread(&_bfi, sizeof(int), 1, fp); fread(cent, sizeof(float), d*_bfi, fp); fread(children, sizeof(int), _bfi, fp); } fread(&_count, sizeof(int), 1, fp); Cluster_init(clu, _count); /* count */ fread(clu->idx, sizeof(int), _count, fp); /* idx */ fclose(fp); // fulfill type, centroids and children according to the type of the cluster clu->type = (ClusterType)_type; /* type */ if(ClusterType_Leaf == clu->type){ DyArray_init(&clu->children, sizeof(int), 0); /* bfi */ }else{ clu->cents = fvec_new(d * _bfi); memcpy(clu->cents, cent, sizeof(float) * d * _bfi); /* cent */ DyArray_init(&clu->children, sizeof(int), _bfi); /* bfi */ DyArray_add(&clu->children, (void*)children, _bfi); /* children */ } FREE(cent); FREE(children); return clu; }
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) { int i,j,l,n_quantile,i0,i1,ai,a,ma,ni; int *perm ; float un , diff; float *tab,*u,*avg,*sum,*mom2,*dists; int *hist,*assign; if(flags<11 || flags>=13) { assign=ivec_new(n); nn(n,k,d,centroids,v,assign,NULL,NULL); if(flags==6 || flags==7) { n_quantile = flags==6 ? 3 : 1; fvec_0(desc,k*d*n_quantile); perm = ivec_new(n); tab = fvec_new(n); ivec_sort_index(assign,n,perm); i0=0; for(i=0;i<k;i++) { i1=i0; while(i1<n && assign[perm[i1]]==i) { i1++; } if(i1==i0) continue; for(j=0;j<d;j++) { for(l=i0;l<i1;l++) { tab[l-i0]=v[perm[l]*d+j]; } ni=i1-i0; fvec_sort(tab,ni); for(l=0;l<n_quantile;l++) { desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni; } } i0=i1; } free(perm); free(tab); } else if(flags==5) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]; } } } else if(flags==8 || flags==9) { fvec_0(desc,k*d); u = fvec_new(d); for(i=0;i<n;i++) { fvec_cpy(u,v+i*d,d); fvec_sub(u,centroids+assign[i]*d,d); un=(float)sqrt(fvec_norm2sqr(u,d)); if(un==0) continue; if(flags==8) { fvec_div_by(u,d,un); } else if(flags==9) { fvec_div_by(u,d,sqrt(un)); } fvec_add(desc+assign[i]*d,u,d); } free(u); } else if(flags==10) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]; } } for(i=0;i<k;i++) { fvec_normalize(desc+i*d,d,2.0); } } else if(flags==13) { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]); } } } else if(flags==14) { avg = fvec_new_0(k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } hist=ivec_new_histogram(k,assign,n); for(i=0;i<k;i++) { if(hist[i]>0) { for(j=0;j<d;j++) { avg[i*d+j]/=hist[i]; } } } free(hist); fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j])); } } fvec_sqrt(desc,k*d); free(avg); } else if(flags==15) { fvec_0(desc,k*d*2); sum = desc; for(i=0;i<n;i++) { for(j=0;j<d;j++) { sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } hist = ivec_new_histogram(k,assign,n); mom2 = desc+k*d; for(i=0;i<n;i++) { ai=assign[i]; for(j=0;j<d;j++) { mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai])); } } fvec_sqrt(mom2,k*d); free(hist); } else if(flags==17) { fvec_0(desc,k*d*2); for(i=0;i<n;i++) { for(j=0;j<d;j++) { diff=v[i*d+j]-centroids[assign[i]*d+j]; if(diff>0) { desc[assign[i]*d+j]+=diff; } else { desc[assign[i]*d+j+k*d]-=diff; } } } } else { fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j]; } } if(flags==1) { hist=ivec_new_histogram(k,assign,n); /* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */ for(i=0;i<k;i++) { for(j=0;j<d;j++) { desc[i*d+j]/=hist[i]; } } free(hist); } if(flags==2) { for(i=0;i<k;i++) { fvec_normalize(desc+i*d,d,2.0); } } if(flags==3 || flags==4) { assert(!"not implemented"); } if(flags==16) { hist=ivec_new_histogram(k,assign,n); for(i=0;i<k;i++) { if(hist[i]>0) { fvec_norm(desc+i*d,d,2); fvec_mul_by(desc+i*d,d,sqrt(hist[i])); } } free(hist); } } free(assign); } else if(flags==11 || flags==12) { ma=flags==11 ? 4 : 2; assign=ivec_new(n*ma); dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL); fvec_0(desc,k*d); for(i=0;i<n;i++) { for(j=0;j<d;j++) { for(a=0;a<ma;a++) { desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j]; } } } free(dists); free(assign); } }
void ahc_clustering(DyArray *ahct, int bf, int rho, const fDataSet *ds){ ASSERTINFO(ahct == NULL || bf <= 0 || rho <= 0 || ds == NULL, "IPP"); int n = ds->n; int d = ds->d; Cluster _clu, clu, *pclu = NULL, *p0clu = NULL; int i; float qerror; int iclu, bfi, ni, ichild, ori_id; // the pointer, branch factor and volume of the i-th cluster int *nassign = ivec_new_set(bf, 0); int *assign = NULL; float *cent = fvec_new(d*bf); float *mem_points = NULL; DyArray *member = (DyArray*)malloc(sizeof(DyArray)*bf); /* initialize the first cluster (root) to add it to the ahc tree */ Cluster_init(&clu, n); for(i = 0; i < n; i++){ clu.idx[i] = i; } clu.type = ClusterType_Root; DyArray_add(ahct, (void*)&clu, 1); /* begin the loop of adaptive hierarchical clustering */ iclu = 0; while(iclu < ahct->count){ /* deal with the i-th cluster */ // figure out the adaptive branch factor of the i-th cluster pclu = (Cluster*)DyArray_get(ahct, iclu, 1); ni = pclu->npts; bfi = i_min(bf, (int)round(ni / (float)rho)); // deal with the cluster according to its size if(bfi < 2){ /* * this is a leaf cluster * - mark it, release the children * * not necessary to store real data points */ pclu->type = ClusterType_Leaf; }else{ printf("----------------- cluster %d, bfi-%d:\n", iclu, bfi); /* * this is an inner cluster * - divide it */ memcpy(&_clu, pclu, sizeof(Cluster)); // extract data points from the original dataset according to the idx mem_points = fvec_new(ni * d); for(i = 0; i < ni; i++){ memcpy(mem_points+i*d, ds->data+_clu.idx[i]*d, d); } // divide this cluster assign = ivec_new(ni); if(iclu == 30){ int _a = 1; _a++; ivec_print(_clu.idx, _clu.npts); } qerror = kmeans( d, ni, bfi, CLUSTERING_NITER, mem_points, CLUSTERING_NTHREAD | KMEANS_QUIET | KMEANS_INIT_BERKELEY, CLUSTERING_SEED, CLUSTERING_NREDO, cent, NULL, assign, nassign); // prepare space for members' ids for(i = 0; i < bfi; i++){ DyArray_init(&member[i], sizeof(int), nassign[i]); } // extract member points' ids for each children cluster for(i = 0; i < ni; i++){ ori_id = _clu.idx[i]; DyArray_add(&member[assign[i]], (void*)&ori_id, 1); } // fulfill the type, centroids and the children of this cluster, add them to the ahct _clu.type = ClusterType_Inner; _clu.cents = fvec_new(d * bfi); memcpy(_clu.cents, cent, sizeof(float)*d*bfi); DyArray_init(&_clu.children, sizeof(int), bfi); for(i = 0; i < bfi; i++){ Cluster_init(&clu, nassign[i]); memcpy(clu.idx, (int*)member[i].elem, sizeof(int)*nassign[i]); DyArray_add(&_clu.children, (void*)&ahct->count, 1); /* the i-th child's position */ DyArray_add(ahct, (void*)&clu, 1); /* add the i-th child to the ahct */ } /* as per the elems of ahct may change when expanding the space * we decide to get the brand new address of the element */ pclu = (Cluster*)DyArray_get(ahct, iclu, 1); memcpy(pclu, &_clu, sizeof(Cluster)); /* report */ ivec_print(nassign, bfi); ivec_print((int*)_clu.children.elem, _clu.children.count); /* unset or release */ FREE(mem_points); FREE(assign); for(i = 0; i < bfi; i++){ DyArray_unset(&member[i]); } } // move to next cluster iclu++; } FREE(nassign); FREE(cent); FREE(member); pclu = NULL; }
int * ivec_new_cpy (const int * v, long n) { int *ret = ivec_new(n); memcpy (ret, v, n * sizeof (*ret)); return ret; }
void ANC::search(const fDataSet *baseset, const fDataSet *queryset, char *folder, int nk, DoubleIndex **knnset, Cost *cost, int lb_type) { char filename[256]; int nq = queryset->n, qi, i, set_i; int cid; float knn_R; float *set; int *set_id; int set_num; float *set_vector = NULL; float *query = fvec_new(d); DoubleIndex candidate; DoubleIndex *lb = (DoubleIndex*)malloc(sizeof(DoubleIndex)*ncenter); // lower bounds between query and all centers Cost costi; struct timeval tvb, tve, tvb_lb, tve_lb, tvb_io, tve_io; for(qi = 0; qi < nq; qi++) { /// initialize the cost recorder CostInit(&costi); gettimeofday(&tvb, NULL); /// the qi-th query memcpy(query, queryset->data+qi*d, sizeof(float)*d); knnset[qi] = (DoubleIndex*)malloc(sizeof(DoubleIndex)*nk); /// calculate and sort the lower bounds between query and all clusters to get the right order gettimeofday(&tvb_lb, NULL); if((int)Algorithm_Search_CrossLB == lb_type){ lowerbound_crosspoint(lb, query); }else if((int)Algorithm_Search == lb_type){ lowerbound(lb, query, true); } gettimeofday(&tve_lb, NULL); costi.lowerbound = timediff(tvb_lb, tve_lb); /// search for knn set_vector = fvec_new(d); knn_R = FLOAT_MAX; i = 0; Heap heap(nk); while(i < ncenter) { cid = lb[i].id; // the i-th cluster if(f_bigger(lb[i].val, knn_R)) { break; } // knn_R > lb[i], means there are candidates in the i-th cluster set_num = member[cid].size(); set = fvec_new(set_num*d); set_id = ivec_new(set_num); /* we do not test the time cost of disk page for speed, we do not really load the data sprintf(filename, "%s/%d.cluster", folder, cid); gettimeofday(&tvb_io, NULL); HB_ClusterFromFile(filename, set_num, d, set, set_id); gettimeofday(&tve_io, NULL); costi.io = costi.io + timediff(tvb_io, tve_io); */ /* instead, we extract member points directly from the base set */ for(int mi = 0; mi < set_num; mi++){ int pts_id = member[cid][mi]; set_id[mi] = pts_id; memcpy(set+mi*d, baseset->data+pts_id*d, sizeof(float)*d); } // update cost costi.page = costi.page + 1; costi.point = costi.point + set_num; for(set_i = 0; set_i < set_num; set_i++) {// calculate real distance between all candidates and query candidate.id = set_id[set_i]; memcpy(set_vector, set+set_i*d, sizeof(float)*d); candidate.val = odistance(query, set_vector, d); if(heap.length < heap.MaxNum || f_bigger(heap.elem[0].val, candidate.val)) {// heap is not full or new value is smaller, insert heap.max_insert(&candidate); } } knn_R = heap.elem[0].val; i++; // free free(set); set = NULL; free(set_id); set_id = NULL; }// end of search loop // printf("%d ", i);// memcpy(knnset[qi], heap.elem, sizeof(DoubleIndex)*heap.length); gettimeofday(&tve, NULL); costi.cpu = timediff(tvb, tve); costi.search = costi.cpu - costi.lowerbound - costi.io; /// sum new cost CostCombine(cost, &costi); } CostMultiply(cost, 1/(float)nq); free(set_vector); set_vector = NULL; free(query); query = NULL; free(lb); lb = NULL; }
gmm_t * gmm_learn (int di, int ni, int ki, int niter, const float * v, int nt, int seed, int nredo, int flags) { long d=di,k=ki,n=ni; int iter, iter_tot = 0; double old_key, key = 666; niter = (niter == 0 ? 10000 : niter); /* the GMM parameters */ float * p = fvec_new_0 (n * k); /* p(ci|x) for all i */ gmm_t * g = gmm_new (d, k); /* initialize the GMM: k-means + variance estimation */ int * nassign = ivec_new (n); /* not useful -> to be removed when debugged */ float * dis = fvec_new (n); kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign); fflush (stderr); fprintf (stderr, "assign = "); ivec_print (nassign, k); fprintf (stderr, "\n"); free (nassign); /* initialization of the GMM parameters assuming a diagonal matrix */ fvec_set (g->w, k, 1.0 / k); double sig = fvec_sum (dis, n) / n; printf ("sigma at initialization = %.3f\n", sig); fvec_set (g->sigma, k * d, sig); free (dis); /* start the EM algorithm */ fprintf (stdout, "<><><><> GMM <><><><><>\n"); if(flags & GMM_FLAGS_PURE_KMEANS) niter=0; for (iter = 1 ; iter <= niter ; iter++) { gmm_compute_p_thread (n, v, g, p, flags, nt); fflush(stdout); gmm_handle_empty(n, v, g, p); gmm_compute_params (n, v, p, g, flags, nt); fflush(stdout); iter_tot++; /* convergence reached -> leave */ old_key = key; key = fvec_sum (g->mu, k * d); printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key); fflush(stdout); if (key == old_key) break; } fprintf (stderr, "\n"); free(p); return g; }
int main (int argc, char ** argv) { int i; int k = 10; int d = 0; int nb = 0; int nq = 0; int nt = count_cpu(); int verbose = 1; int ret = 0; int fmt_b = FMT_FVEC; int fmt_q = FMT_FVEC; int fmt_nn = FMT_IVEC; int fmt_dis = FMT_FVEC; const char * fb_name = NULL; /* database filename */ const char * fq_name = NULL; /* query filename */ const char * fnn_name = "nn.out"; /* nn idx filename */ const char * fdis_name = "dis.out"; /* nn dis filename */ if (argc == 1) usage (argv[0]); for (i = 1 ; i < argc ; i++) { char *a = argv[i]; if (!strcmp (a, "-h") || !strcmp (a, "--help")) usage (argv[0]); else if (!strcmp (a, "-silence")) { verbose = 0; } else if (!strcmp (a, "-verbose")) { verbose = 2; } else if (!strcmp (a, "-k") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &k); assert (ret); } else if (!strcmp (a, "-d") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &d); assert (ret); } else if (!strcmp (a, "-nt") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nt); assert (ret); } else if (!strcmp (a, "-nb") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nb); assert (ret); } else if (!strcmp (a, "-nq") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nq); assert (ret); } else if (!strcmp (a, "-b") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_FVEC; } else if (!strcmp (a, "-bb") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_BVEC; } else if (!strcmp (a, "-bt") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_TEXT; } else if (!strcmp (a, "-q") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_FVEC; } else if (!strcmp (a, "-qb") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_BVEC; } else if (!strcmp (a, "-qt") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_TEXT; } else if (!strcmp (a, "-onn") && i+1 < argc) { fnn_name = argv[++i]; fmt_nn = FMT_IVEC; } else if (!strcmp (a, "-onnt") && i+1 < argc) { fnn_name = argv[++i]; fmt_nn = FMT_TEXT; } else if (!strcmp (a, "-odis") && i+1 < argc) { fdis_name = argv[++i]; fmt_dis = FMT_FVEC; } else if (!strcmp (a, "-odist") && i+1 < argc) { fdis_name = argv[++i]; fmt_dis = FMT_TEXT; } } assert (fb_name && fq_name); fprintf (stderr, "k = %d\nd = %d\nnt = %d\n", k, d, nt); if (verbose) { fprintf (stderr, "fb = %s (fmt = %s)\n", fb_name, (fmt_b == FMT_FVEC ? "fvec" : (fmt_b == FMT_BVEC ? "bvec" : "txt"))); fprintf (stderr, "fq = %s (fmt = %s)\n", fq_name, (fmt_q == FMT_FVEC ? "fvec" : (fmt_q == FMT_BVEC ? "bvec" : "txt"))); fprintf (stderr, "fnn = %s (fmt = %s)\n", fnn_name, (fmt_nn == FMT_IVEC ? "ivec" : "txt")); fprintf (stderr, "fdis = %s (fmt = %s)\n", fdis_name, (fmt_dis == FMT_FVEC ? "fvec" : "txt")); } /* read the input vectors for database and queries */ float * vb = my_fvec_read (fb_name, fmt_b, verbose, &nb, &d); float * vq = my_fvec_read (fq_name, fmt_q, verbose, &nq, &d); /* Search */ int * idx = ivec_new (k * nq); float * dis = fvec_new (k * nq); knn_full_thread (2, nq, nb, d, k, vb, vq, NULL, idx, dis, nt); knn_reorder_shortlist (nq, nb, d, k, vb, vq, idx, dis); /* write the distance output file */ if (fmt_dis == FMT_FVEC) ret = fvecs_write (fdis_name, k, nq, dis); else if (fmt_dis == FMT_TEXT) ret = fvecs_write_txt (fdis_name, k, nq, dis); else assert (0 || "Unknow output format\n"); assert (ret == nq); /* write the distance output file */ if (fmt_nn == FMT_IVEC) ret = ivecs_write (fnn_name, k, nq, idx); else if (fmt_nn == FMT_TEXT) ret = ivecs_write_txt (fnn_name, k, nq, idx); else assert (0 || "Unknow output format\n"); assert (ret == nq); free (idx); free (dis); free (vb); free (vq); return 0; }
hkm_t *hkm_learn (int n, int d, int nlevel, int bf, const float *points, int nb_iter_max, int nt, int verbose, int **clust_assign_out) { int i, l, parent, k = 1; hkm_t *hkm = hkm_new (d, nlevel, bf); /* the absolute assignement of all points and the sizes of clusters */ int *node_assign = calloc (sizeof (int), n); /* the buffer that receives the vectors gathered by parent node */ float *v = fvec_new (n * d); /* Initialization */ for (l = 0; l < nlevel; l++) { /* sort the vectors depending on which cluster they have been assigned to, and compute the number of vectors assigned to each cluster *** NOTE: to replace with the k_max function of ivfgeo -> put this function in a separate library */ int *node_assign_idx = malloc (sizeof (*node_assign_idx) * n); ivec_sort_index (node_assign, n, node_assign_idx); /* Re-order the vectors depending on the previous order */ for (i = 0; i < n ; i++) memmove (v + d * i, points + d * node_assign_idx[i], sizeof (*points) * d); /* k is the number of nodes/leaves at this level */ int pos = 0; for (parent = 0; parent < k ; parent++) { /* Count the number of vectors assigned to this internal node */ int nassign = 0; while (pos + nassign < n) if (node_assign[node_assign_idx[pos + nassign]] == parent) nassign++; else break; if (verbose) fprintf (stderr, "[Level %d | Parent %d] nassign=%d | pos=%d", l, parent, nassign, pos); if (nassign == 0) { fprintf (stderr, "# Problem2: no enough vectors in a node\n"); exit (1); } /* Perform the clustering on this subset of points */ int *clust_assign = ivec_new (nassign); float * centroids = fvec_new (bf * d); int nt = count_cpu(); int flags = nt | KMEANS_INIT_RANDOM | KMEANS_QUIET; float err = kmeans (d, nassign, bf, nb_iter_max, v + d * pos, flags, 0, 1, centroids, NULL, clust_assign, NULL); if (verbose) fprintf (stderr, "-> err = %.3f\n", err); memcpy (hkm->centroids[l] + d * parent * bf, centroids, d * bf * sizeof (*centroids)); /* Update the indexes for those points */ for (i = 0; i < nassign; i++) { int truepos = node_assign_idx[pos + i]; node_assign[truepos] = node_assign[truepos] * bf + clust_assign[i]; } free (centroids); free (clust_assign); pos += nassign; } k *= bf; free (node_assign_idx); } if(clust_assign_out) { *clust_assign_out = (int *) malloc (n * sizeof (int)); memcpy (*clust_assign_out, node_assign, n * sizeof (int)); } free (node_assign); free (v); return hkm; }
int main() { int i, j; int w, h; int levels, ct_levels, wt_levels,level_init; double rate,rate_init; ivec dfb_levels; mat source, dest; contourlet_t *contourlet; mat wavelet; int length; unsigned char *buffer; //³õʼ»¯²ÎÊý int argc=6; rate_init=2; level_init=5; #define LEVELS 5 #define IMPULSE 100. source = mat_pgm_read("1.pgm"); h = mat_height(source); w = mat_width(source); dest = mat_new(w, h); rate = rate_init * w * h; levels = level_init; ct_levels = argc - 4; /* contourlet levels */ wt_levels = levels - ct_levels; /* wavelet levels */ dfb_levels = ivec_new(ct_levels); for(i = 0; i < ct_levels; i++) dfb_levels[i] = 4+i; buffer = bvec_new_zeros(BUFFER_SIZE); contourlet = contourlet_new(ct_levels, dfb_levels); contourlet->wt_levels = wt_levels; contourlet_transform(contourlet, source); wavelet = it_dwt2D(contourlet->low, it_wavelet_lifting_97, wt_levels); contourlet->dwt = it_wavelet2D_split(wavelet, wt_levels); /* normalize the subbands */ for(i = 0; i < ct_levels; i++) for(j = 0; j < (1 << dfb_levels[i]); j++) mat_mul_by(contourlet->high[i][j], norm_high[1+i][dfb_levels[i]][j]); mat_mul_by(contourlet->low, norm_low[ct_levels]); /* make flat images */ mat_pgm_write("dwt.pgm", wavelet); for(i = 0; i < ct_levels; i++) { char filename[256]; mat dfb_rec = mat_new((h >> i) + 1, (w >> i) + 1); if(dfb_levels[i]) dfb_flatten(contourlet->high[i], dfb_rec, dfb_levels[i]); else mat_set_submatrix(dfb_rec, contourlet->high[i][0], 0, 0); mat_incr(dfb_rec, 128); sprintf(filename, "dfb%d.pgm", i); mat_pgm_write(filename, dfb_rec); mat_decr(dfb_rec, 128); mat_delete(dfb_rec); } /* EZBC encoding */ length = ezbc_encode(contourlet, buffer, BUFFER_SIZE, rate); /* EZBC decoding */ ezbc_decode(contourlet, buffer, BUFFER_SIZE, rate); mat_pgm_write("rec_low.pgm", contourlet->dwt[0]); /* make flat images */ for(i = 0; i < ct_levels; i++) { char filename[256]; mat dfb_rec = mat_new((h >> i) + 1, (w >> i) + 1); if(dfb_levels[i]) dfb_flatten(contourlet->high[i], dfb_rec, dfb_levels[i]); else mat_set_submatrix(dfb_rec, contourlet->high[i][0], 0, 0); mat_incr(dfb_rec, 128); sprintf(filename, "rec_dfb%d.pgm", i); mat_pgm_write(filename, dfb_rec); mat_decr(dfb_rec, 128); mat_delete(dfb_rec); } /* normalize the subbands */ for(i = 0; i < ct_levels; i++) for(j = 0; j < (1 << dfb_levels[i]); j++) mat_div_by(contourlet->high[i][j], norm_high[1+i][dfb_levels[i]][j]); mat_div_by(contourlet->low, norm_low[ct_levels]); // mat_pgm_write("rec_low.pgm", contourlet->dwt[0]); /* TODO: fix this in libit */ if(wt_levels) wavelet = it_wavelet2D_merge(contourlet->dwt, wt_levels); else mat_copy(wavelet, contourlet->dwt[0]); mat_pgm_write("rec_dwt.pgm", wavelet); contourlet->low = it_idwt2D(wavelet, it_wavelet_lifting_97, wt_levels); contourlet_itransform(contourlet, dest); contourlet_delete(contourlet); mat_pgm_write("rec.pgm", dest); printf("rate = %f PSNR = %f\n", length * 8. / (w*h), 10*log10(255*255/mat_distance_mse(source, dest, 0))); ivec_delete(dfb_levels); mat_delete(dest); mat_delete(source); bvec_delete(buffer); return(0); }