Esempio n. 1
0
void knn_reorder_shortlist(int n, int nb, int d, int k,
                           const float *b, const float *v,
                           int *assign,
                           float *dists) 
{
  float *subb=fvec_new(k*d);
  float *diststmp=fvec_new(k);
  int *perm=ivec_new(k);
  int *assigntmp=ivec_new(k);  
  int i,j;

  for(i=0;i<n;i++) {
    int *assigni=assign+i*k;
    float *disti=dists+i*k;

    int ki  ;
    if(1) {

      for(j=0;j<k;j++) {
        if(assigni[j]<0) break;
        memcpy(subb+j*d,b+assigni[j]*d,sizeof(*subb)*d);
      }

      ki=j;

    } else {
      for(j=0;j<k;j++) 
        if(assigni[j]<0) break;
      ki=j;
      ivec_sort(assigni,ki); /* to improve access locality */
      for(j=0;j<ki;j++) {
        memcpy(subb+j*d,b+assigni[j]*(long) d,sizeof(*subb)*d);
      } 
    }


    compute_distances_1(d,ki,v+i*d,subb,diststmp);
    
    fvec_sort_index(diststmp,ki,perm);

    memcpy(assigntmp,assigni,sizeof(*assigni)*ki);
    
    for(j=0;j<ki;j++) {
      disti[j]=diststmp[perm[j]];
      assigni[j]=assigntmp[perm[j]];
    }
    
  }
  free(assigntmp);
  free(diststmp);
  free(subb);
  free(perm);
}
Esempio n. 2
0
File: vlad.c Progetto: Erotemic/yael
void bof_compute_subsets(int k, int d, const float *centroids, 
                         int n, const float *v,
                         int n_subset,
                         const int *subset_indexes, 
                         const int *subset_ends,
                         float *desc) 
{
  int *assign=ivec_new(n);
 
  nn (n, k, d, centroids, v, assign);

  fvec_0 (desc, k * n_subset);
      
  int ss, ss_begin = 0;
  for (ss = 0 ; ss < n_subset ; ss++) {
    float *descss = desc + ss * k;
    int ss_end = subset_ends[ss], ii;
    for (ii = ss_begin ; ii < ss_end ; ii++) {
      int i = subset_indexes[ii];
      descss[assign[i]] ++;
    }
    ss_begin = ss_end;
  }

  free (assign);
}
Esempio n. 3
0
void eigs_reorder (int d, float * eigval, float * eigvec, int criterion)
{
  int i;
  int * perm = ivec_new (d);

  float * eigvalst = fvec_new (d);
  float * eigvecst = fvec_new (d * d);

  fvec_sort_index (eigval, d, perm);

  if (criterion) 
    for (i = 0 ; i < d / 2 ; i++) {
      int tmp = perm[i];
      perm[i] = perm[d - 1 - i];
      perm[d - 1 - i] = tmp;
    }

  for (i = 0 ; i < d ; i++) {
    eigvalst[i] = eigval[perm[i]];
    memcpy (eigvecst + i * d, eigvec + perm[i] * d, sizeof (*eigvecst) * d);
  }

  memcpy (eigval, eigvalst, d * sizeof (*eigval));
  memcpy (eigvec, eigvecst, d * d * sizeof (*eigvec));

  free (eigvalst);
  free (eigvecst);
  free (perm);
}
Esempio n. 4
0
File: vlad.c Progetto: Erotemic/yael
void vlad_compute_subsets(int k, int d, const float *centroids, 
                          int n, const float *v,
                          int n_subset,
                          const int *subset_indexes, 
                          const int *subset_ends,
                          float *desc) 
{
  int j;
  int *assign = ivec_new(n);
 
  nn (n, k, d, centroids, v, assign);

  fvec_0 (desc, k * d * n_subset);
      
  int ss, ss_begin = 0;
  for (ss = 0 ; ss < n_subset ; ss++) {
    float *descss = desc + ss * k * d;
    int ss_end = subset_ends[ss], ii;
    for (ii = ss_begin ; ii < ss_end ; ii++) {
      int i = subset_indexes[ii];
      for (j = 0 ; j < d ; j++) 
        descss[assign[i]*d+j] += v[i*d+j] - centroids[assign[i]*d+j];
    }
    ss_begin = ss_end;
  }

  free(assign);
}
Esempio n. 5
0
int	ahc_quantize(DyArray *ahct, float *v, int d){
	__assertinfo(ahct == NULL || v == NULL || d <= 0, "IPP");
	int		iclu;
	Cluster	*pclu;
	int		nchild = 0;
	int		*_vassign = ivec_new(1);
	float	*_vdis = fvec_new(1);

	iclu = 0;
	/* traverse all clusters */
	while(true){
		// locate at the i-th cluster
		pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
		if(pclu->type != ClusterType_Leaf){
			// if a cluster is not leaf, extract all centroid of its children
			nchild = pclu->children.count;

			// linear_knn(pclu->cents, nchild, v, d, 1, _vassign, _vdis);
			knn_full(2, 1, nchild, d, 1, pclu->cents, v, NULL, _vassign, _vdis);

			iclu = *(int*)DyArray_get(&pclu->children, _vassign[0], 1);
		}else{
			break;
		}
	}
	FREE(_vassign);
	FREE(_vdis);
	return iclu;
}
Esempio n. 6
0
bool ahc_check_index(const char *folder){
	__assertinfo(folder == NULL, "IPP");
	char	file[255];
	FILE	*fp;
	int		d, nclu;
	int		*buff = ivec_new(2);

	/* check out the config file */
	sprintf(file, "%s/%s", folder, HCluster_ConfigFile);
	if(!file_exists(file)){
		return false;
	}
	fp = open_file(file, "rb");
	fread(buff, sizeof(int), 2, fp);

	/* check out the existence of cluster files [first, end] */
	sprintf(file, "%s/0%s", folder, HCluster_Postfix);
	if(!file_exists(file)){
		return false;
	}else{
		sprintf(file, "%s/%d%s", folder, buff[1]-1, HCluster_Postfix);
		if(!file_exists(file)){
			return false;
		}
	}

	FREE(buff);
	return true;
}
Esempio n. 7
0
int ivec_to_spivec (int * v, int n, int ** idx_out, int ** v_out)
{
  int i, ii = 0;
  int nz = ivec_nz (v, n);
  int * idx = ivec_new (nz);
  int * val = ivec_new (nz);

  for (i = 0 ; i < n ; i++) 
    if (v[i] != 0) {
      idx[ii] = i;
      val[ii] = v[i];
      ii++;
    }

  *idx_out = idx;
  *v_out = val;
  return nz;
}
Esempio n. 8
0
void Cluster_init(Cluster *clu, int npts){
	ASSERTINFO(clu == NULL || npts <= 0, "IPP");
	clu->npts = npts;
	clu->data = NULL;
	clu->idx = ivec_new(npts);

	clu->type = ClusterType_Inner;
	DyArray_init(&clu->children, sizeof(int), -1);
	clu->cents = NULL;
}
Esempio n. 9
0
int *ivec_new_set (long n, int val)
{
  int i;
  int *ret = ivec_new(n);

  for (i = 0 ; i < n ; i++)
    ret[i] = val;

  return ret;
}
Esempio n. 10
0
int * ivec_new_range (long a, long b)
{
  int i;
  int *ret = ivec_new(b - a);

  for (i = a ; i < b ; i++)
    ret[i - a] = i;

  return ret;
}
Esempio n. 11
0
int *ivec_repeat_with_inc(const int *a,int n,
                          int nrepeat, int inc) {
  int *ret=ivec_new(nrepeat*n); 
  int i;
  for(i=0;i<nrepeat;i++) {
    ivec_cpy(ret+i*n, a, n); 
    ivec_add_scalar(ret+i*n, n, i*inc);
  }
  return ret;
}
Esempio n. 12
0
int main(int argc, char** argv) {
  assert(argc == 3); 
  int n = atoi(argv[1]); 
  int nrepeat = atoi(argv[2]);
  
  float * v = fvec_new_rand(n); 
  int * idx = ivec_new(n); 
  int * idx2 = ivec_new(n); 
  int k0, ki; 
  int m[3] = {1, 2, 5};
  for(k0 = 1; k0 < n; k0 *= 10) {
    for(ki = 0; ki < 3; ki++) {
      int k = k0 * m[ki]; 
      printf("k = %d ", k); 
      double st0 = 0, st1 = 0; 
      int r;
      for(r = 0; r < nrepeat; r++) {
      
	double t0 = getmillisecs(); 
	fvec_k_max_hoare(v, n, idx, k); 
	double t1 = getmillisecs(); 
	fvec_k_max_maxheap(v, n, idx2, k); 
	double t2 = getmillisecs(); 
	st0 += t1 - t0; 
	st1 += t2 - t1;
      }

      printf("qselect: %.4f ms, maxheap: %.4f ms\n", 
	     st0 / nrepeat, st1 / nrepeat);
      
    }



  }

 

  

  return 0; 

}
Esempio n. 13
0
int * ivec_new_fread_raw(FILE * f, long d) 
{
  int * v = ivec_new(d);

  long ret = fread (v, sizeof (*v), d, f);
  if (ret != d) {
    free(v);
    perror ("# fvec_fread error 2");
    return NULL;
  }
  return v;
}
Esempio n. 14
0
int *imat_get_submatrix (const int *a, int nrow, 
                         int nrow_out,
                         int ncol) {
  long i;
  int *b=ivec_new(nrow_out*(long)ncol);
  
  for(i=0;i<ncol;i++) 
    memcpy(b+i*nrow_out,a+i*nrow,nrow_out*sizeof(*a));

  return b;
  
}
Esempio n. 15
0
File: vlad.c Progetto: Erotemic/yael
void bof_compute (int k, int d, const float *centroids, 
		  int n, const float *v, int *desc)
{
  int i;
  int *assign = ivec_new(n);
  nn (n, k, d, centroids, v, assign);
  ivec_0(desc,k);

  for(i=0;i<n;i++)
    desc[assign[i]]++;

  free(assign);
}
Esempio n. 16
0
File: vlad.c Progetto: Erotemic/yael
void bof_compute_ma (int k, int d, const float *centroids, 
		     int n, const float *v, int *desc, 
		     int ma, float alpha, int nt)
{
  int i;
  int *assign = ivec_new(n*ma);
  knn_thread (n, k, d, ma, centroids, v, assign, nt);
  ivec_0(desc,k);

  for(i=0;i<n*ma;i++)
    desc[assign[i]]++;

  free(assign);
}
Esempio n. 17
0
int ivec_find (const int *v, int n, int ** nzpos_out)
{
  int nz = ivec_nz (v, n);
  int * nzpos = ivec_new (nz);
  int i, ii = 0;

  for (i = 0 ; i < n ; i++) 
    if (v[i] != 0) {
      nzpos[ii] = i;
      ii++;
    }

  *nzpos_out = nzpos;
  return nz;
}
Esempio n. 18
0
int fvecs_new_read_sparse (const char *fname, int d, float **vf_out) {
  float *vf=NULL;
  long n=0,na=0;
  float *vals=fvec_new(d);
  int *idx=ivec_new(d);
  
  FILE *f = fopen (fname, "r");
#define E(msg) {                                                \
  fprintf (stderr, "fvecs_new_read_sparse %s: " msg , fname);   \
  perror ("");                                                  \
  free(vf); free(vals); free(idx);                              \
  return -1;                                                    \
}
  if (!f) E("");
  
  while(!feof(f)) {
    int nz,ret,nz2;
    ret=fread(&nz,sizeof(int),1,f);
    if(ret!=1) {
      if(feof(f)) break;
      E("err 1");
    }
    if(fread(idx,sizeof(int),nz,f)!=nz) E("err 2");
    if(fread(&nz2,sizeof(int),1,f)!=1) E("err 3");
    if(nz!=nz2) E("err 4");
    if(fread(vals,sizeof(float),nz,f)!=nz) E("err 5");
    
    if(n>=na) {
      na=(na+1)*3/2;
      vf=realloc(vf,na*sizeof(float)*d);
    }
    
    float *dense=spfvec_to_fvec (idx,vals,nz,d);
    memcpy(vf+n*d,dense,sizeof(float)*d);
    free(dense);
    
    n++;       
  }
#undef E
  free(vals);
  free(idx);
  fclose(f);
  *vf_out=vf;
  return n;
}
Esempio n. 19
0
File: vlad.c Progetto: Erotemic/yael
void vlad_compute(int k, int d, const float *centroids, 
                  int n, const float *v, float *desc) 
{
  
  int i,j;
  int *assign = ivec_new (n);
 
  nn (n, k, d, centroids, v, assign);

  fvec_0 (desc, k * d);
      
  for (i = 0 ; i < n ; i++) {
    for (j = 0 ; j < d ; j++) 
      desc[assign[i]*d+j] += v[i*d+j] - centroids[assign[i]*d+j];
  }      

  free(assign);
}
Esempio n. 20
0
int * ivec_new_random_idx_r (int n, int k, unsigned int seed)
{
  int *idx = ivec_new (n);
  int i;

  for (i = 0; i < n; i++)
    idx[i] = i;

  for (i = 0; i < k ; i++) {
    int j = i +  rand_r(&seed) % (n - i);
    /* swap i and j */
    int p = idx[i];
    idx[i] = idx[j];
    idx[j] = p;
  }

  return idx;
}
Esempio n. 21
0
void vlad_compute_weighted(int k, int d, const float *centroids, 
                           int n, const float *v, const float *weights, 
                           float *desc) {
  
  int i,j;
  int *assign=ivec_new(n);
 
  nn(n,k,d,centroids,v,assign,NULL,NULL);

  fvec_0(desc,k*d);
      
  for(i=0;i<n;i++) {
    float w=weights[i];
    for(j=0;j<d;j++) 
      desc[assign[i]*d+j] += (v[i*d+j]-centroids[assign[i]*d+j])*w;
  }      

  free(assign);
}
Esempio n. 22
0
Cluster *ahc_load_a_cluster(const char *folder, int cid, int d, int bf){
	__assertinfo(folder == NULL || cid < 0, "IPP");
	Cluster	*clu = (Cluster*)malloc(sizeof(Cluster));
	char	file[255];
	FILE	*fp = NULL;
	int		_type, _count, _bfi;
	float	*cent = fvec_new(d*bf);
	int		*children = ivec_new(bf);

	sprintf(file, "%s/%d%s", folder, cid, HCluster_Postfix);
	fp = open_file(file, "rb");

	fread(&_type, sizeof(int), 1, fp);
	if(_type != (int)ClusterType_Leaf){
		fread(&_bfi, sizeof(int), 1, fp);
		fread(cent, sizeof(float), d*_bfi, fp);
		fread(children, sizeof(int), _bfi, fp);
	}

	fread(&_count, sizeof(int), 1, fp);
	Cluster_init(clu, _count);								/* count */
	fread(clu->idx, sizeof(int), _count, fp);				/* idx */
	fclose(fp);

	// fulfill type, centroids and children according to the type of the cluster
	clu->type = (ClusterType)_type;							/* type */
	if(ClusterType_Leaf == clu->type){
		DyArray_init(&clu->children, sizeof(int), 0);			/* bfi */
	}else{
		clu->cents = fvec_new(d * _bfi);
		memcpy(clu->cents, cent, sizeof(float) * d * _bfi);		/* cent */
		DyArray_init(&clu->children, sizeof(int), _bfi);			/* bfi */
		DyArray_add(&clu->children, (void*)children, _bfi);		/* children */
	}

	FREE(cent);
	FREE(children);
	return clu;
}
Esempio n. 23
0
File: vlad.c Progetto: atroudi/V3V_2
void vlad_compute(int k, int d, const float *centroids, int n, const float *v,int flags, float *desc) 
{

	int i,j,l,n_quantile,i0,i1,ai,a,ma,ni;
	int *perm ;
	float un , diff;
	float *tab,*u,*avg,*sum,*mom2,*dists;
	int *hist,*assign;


	if(flags<11 || flags>=13) 
	{
		assign=ivec_new(n);

		nn(n,k,d,centroids,v,assign,NULL,NULL);    

		if(flags==6 || flags==7) 
		{
			n_quantile = flags==6 ? 3 : 1;
			fvec_0(desc,k*d*n_quantile);
			perm      = ivec_new(n);
			tab       = fvec_new(n);
			ivec_sort_index(assign,n,perm);
			i0=0;
			for(i=0;i<k;i++) 
			{
				i1=i0;
				while(i1<n && assign[perm[i1]]==i) 
				{
					i1++;
				}

				if(i1==i0) continue;

				for(j=0;j<d;j++) 
				{        
					for(l=i0;l<i1;l++)
					{
						tab[l-i0]=v[perm[l]*d+j];
					}
					ni=i1-i0;
					fvec_sort(tab,ni);
					for(l=0;l<n_quantile;l++) 
					{
						desc[(i*d+j)*n_quantile+l]=(tab[(l*ni+ni/2)/n_quantile]-centroids[i*d+j])*ni;
					}
				}

				i0=i1;
			}
			free(perm);
			free(tab);
		} 
		else if(flags==5) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

		} 
		else if(flags==8 || flags==9) 
		{
			fvec_0(desc,k*d);

			u   = fvec_new(d);

			for(i=0;i<n;i++) 
			{
				fvec_cpy(u,v+i*d,d);
				fvec_sub(u,centroids+assign[i]*d,d);
				un=(float)sqrt(fvec_norm2sqr(u,d));

				if(un==0) continue;
				if(flags==8) 
				{        
					fvec_div_by(u,d,un);
				} else if(flags==9) 
				{
					fvec_div_by(u,d,sqrt(un));
				}

				fvec_add(desc+assign[i]*d,u,d);

			}
			free(u);
		} 
		else if(flags==10) 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j];
				}
			}

			for(i=0;i<k;i++) 
			{
				fvec_normalize(desc+i*d,d,2.0);  
			}

		} 
		else if(flags==13) 
		{

			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)sqr(v[i*d+j]-centroids[assign[i]*d+j]);
				}
			}     

		} 
		else if(flags==14) 
		{
			avg = fvec_new_0(k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					avg[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist=ivec_new_histogram(k,assign,n);

			for(i=0;i<k;i++) 
			{
				if(hist[i]>0) 
				{
					for(j=0;j<d;j++) 
					{
						avg[i*d+j]/=hist[i];
					}
				}
			}

			free(hist);

			fvec_0(desc,k*d);
			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=(float)(sqr(v[i*d+j]-centroids[assign[i]*d+j]-avg[assign[i]*d+j]));
				}
			}

			fvec_sqrt(desc,k*d);

			free(avg);
		}  
		else if(flags==15) 
		{
			fvec_0(desc,k*d*2);
			sum = desc;

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					sum[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}

			hist = ivec_new_histogram(k,assign,n);

			mom2 = desc+k*d;

			for(i=0;i<n;i++) 
			{
				ai=assign[i];
				for(j=0;j<d;j++) 
				{
					mom2[ai*d+j]+=(float)(sqr(v[i*d+j]-centroids[ai*d+j]-sum[ai*d+j]/hist[ai]));
				}
			}
			fvec_sqrt(mom2,k*d);
			free(hist);


		} 
		else if(flags==17) 
		{
			fvec_0(desc,k*d*2);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					diff=v[i*d+j]-centroids[assign[i]*d+j];
					if(diff>0)
					{
						desc[assign[i]*d+j]+=diff;
					}
					else 
					{
						desc[assign[i]*d+j+k*d]-=diff;
					}
				}
			}

		} 
		else 
		{
			fvec_0(desc,k*d);

			for(i=0;i<n;i++) 
			{
				for(j=0;j<d;j++) 
				{
					desc[assign[i]*d+j]+=v[i*d+j]-centroids[assign[i]*d+j];
				}
			}


			if(flags==1) 
			{
				hist=ivec_new_histogram(k,assign,n);
				/* printf("unbalance factor=%g\n",ivec_unbalanced_factor(hist,k)); */

				for(i=0;i<k;i++) 
				{
					for(j=0;j<d;j++) 
					{
						desc[i*d+j]/=hist[i];    
					}
				}
				free(hist);
			}

			if(flags==2) 
			{
				for(i=0;i<k;i++) 
				{
					fvec_normalize(desc+i*d,d,2.0);
				}
			}

			if(flags==3 || flags==4) 
			{
				assert(!"not implemented");
			}

			if(flags==16) 
			{
				hist=ivec_new_histogram(k,assign,n);
				for(i=0;i<k;i++) 
				{
					if(hist[i]>0) 
					{
						fvec_norm(desc+i*d,d,2);
						fvec_mul_by(desc+i*d,d,sqrt(hist[i]));
					}
				}
				free(hist);
			}


		}
		free(assign);
	} 
	else if(flags==11 || flags==12) 
	{
		ma=flags==11 ? 4 : 2;
		assign=ivec_new(n*ma);

		dists=knn(n,k,d,ma,centroids,v,assign,NULL,NULL);    

		fvec_0(desc,k*d);

		for(i=0;i<n;i++) 
		{
			for(j=0;j<d;j++) 
			{
				for(a=0;a<ma;a++) 
				{
					desc[assign[ma*i+a]*d+j]+=v[i*d+j]-centroids[assign[ma*i+a]*d+j];
				}
			}
		} 

		free(dists);

		free(assign);
	}

}
Esempio n. 24
0
void ahc_clustering(DyArray *ahct, int bf, int rho, const fDataSet *ds){
	ASSERTINFO(ahct == NULL || bf <= 0 || rho <= 0 || ds == NULL, "IPP");

	int		n 	= ds->n;
	int		d 	= ds->d;
	Cluster	_clu, clu, *pclu = NULL, *p0clu = NULL;
	int		i;
	float	qerror;
	int		iclu, bfi, ni, ichild, ori_id;			// the pointer, branch factor and volume of the i-th cluster
	int 	*nassign = ivec_new_set(bf, 0);
	int 	*assign = NULL;
	float	*cent = fvec_new(d*bf);
	float	*mem_points = NULL;
	DyArray	*member = (DyArray*)malloc(sizeof(DyArray)*bf);

	/* initialize the first cluster (root) to add it to the ahc tree */
	Cluster_init(&clu, n);
	for(i = 0; i < n; i++){
		clu.idx[i] = i;
	}
	clu.type = ClusterType_Root;
	DyArray_add(ahct, (void*)&clu, 1);

	/* begin the loop of adaptive hierarchical clustering */
	iclu = 0;
	while(iclu < ahct->count){
		/* deal with the i-th cluster */
		// figure out the adaptive branch factor of the i-th cluster
		pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
		ni = pclu->npts;
		bfi = i_min(bf, (int)round(ni / (float)rho));

		// deal with the cluster according to its size
		if(bfi < 2){
			/*
			 *	this is a leaf cluster
			 *	- mark it, release the children
			 *	* not necessary to store real data points
			 */
			pclu->type = ClusterType_Leaf;
		}else{
			printf("----------------- cluster %d, bfi-%d:\n", iclu, bfi);

			/*
			 * this is an inner cluster
			 * - divide it
			 */
			memcpy(&_clu, pclu, sizeof(Cluster));

			// extract data points from the original dataset according to the idx
			mem_points = fvec_new(ni * d);
			for(i = 0; i < ni; i++){
				memcpy(mem_points+i*d, ds->data+_clu.idx[i]*d, d);
			}

			// divide this cluster
			assign = ivec_new(ni);

			if(iclu == 30){
				int _a = 1;
				_a++;

				ivec_print(_clu.idx, _clu.npts);
			}

			qerror = kmeans(	d, ni, bfi, CLUSTERING_NITER, mem_points,
								CLUSTERING_NTHREAD | KMEANS_QUIET | KMEANS_INIT_BERKELEY, CLUSTERING_SEED, CLUSTERING_NREDO,
								cent, NULL, assign, nassign);

			// prepare space for members' ids
			for(i = 0; i < bfi; i++){
				DyArray_init(&member[i], sizeof(int), nassign[i]);
			}
			// extract member points' ids for each children cluster
			for(i = 0; i < ni; i++){
				ori_id = _clu.idx[i];
				DyArray_add(&member[assign[i]], (void*)&ori_id, 1);
			}

			// fulfill the type, centroids and the children of this cluster, add them to the ahct
			_clu.type = ClusterType_Inner;
			_clu.cents = fvec_new(d * bfi);
			memcpy(_clu.cents, cent, sizeof(float)*d*bfi);

			DyArray_init(&_clu.children, sizeof(int), bfi);
			for(i = 0; i < bfi; i++){
				Cluster_init(&clu, nassign[i]);
				memcpy(clu.idx, (int*)member[i].elem, sizeof(int)*nassign[i]);

				DyArray_add(&_clu.children, (void*)&ahct->count, 1);	/* the i-th child's position */
				DyArray_add(ahct, (void*)&clu, 1);						/* add the i-th child to the ahct */
			}

			/* as per the elems of ahct may change when expanding the space
			 * we decide to get the brand new address of the element
			 */
			pclu = (Cluster*)DyArray_get(ahct, iclu, 1);
			memcpy(pclu, &_clu, sizeof(Cluster));


			/* report */
			ivec_print(nassign, bfi);
			ivec_print((int*)_clu.children.elem, _clu.children.count);

			/* unset or release */
			FREE(mem_points);
			FREE(assign);
			for(i = 0; i < bfi; i++){
				DyArray_unset(&member[i]);
			}
		}

		// move to next cluster
		iclu++;
	}

	FREE(nassign);
	FREE(cent);
	FREE(member);
	pclu = NULL;
}
Esempio n. 25
0
int * ivec_new_cpy (const int * v, long n)
{
  int *ret = ivec_new(n);  
  memcpy (ret, v, n * sizeof (*ret));
  return ret;
}
Esempio n. 26
0
void ANC::search(const fDataSet *baseset, const fDataSet *queryset, char *folder, int nk, DoubleIndex **knnset, Cost *cost, int lb_type)
{
	char filename[256];
	int nq = queryset->n,
		qi, i, set_i;
	int cid;
	float knn_R;
	float *set;
	int *set_id;
	int set_num;
	float *set_vector = NULL;
	float *query = fvec_new(d);
	DoubleIndex candidate;
	DoubleIndex *lb = (DoubleIndex*)malloc(sizeof(DoubleIndex)*ncenter);
																// lower bounds between query and all centers
	
    Cost costi;
	struct timeval tvb, tve, tvb_lb, tve_lb, tvb_io, tve_io;

	for(qi = 0; qi < nq; qi++)
	{
		/// initialize the cost recorder
		CostInit(&costi);
		gettimeofday(&tvb, NULL);

		/// the qi-th query
		memcpy(query, queryset->data+qi*d, sizeof(float)*d);
		knnset[qi] = (DoubleIndex*)malloc(sizeof(DoubleIndex)*nk);
		/// calculate and sort the lower bounds between query and all clusters to get the right order
		gettimeofday(&tvb_lb, NULL);
		if((int)Algorithm_Search_CrossLB == lb_type){
			lowerbound_crosspoint(lb, query);
		}else if((int)Algorithm_Search == lb_type){
			lowerbound(lb, query, true);	
		}
		
		gettimeofday(&tve_lb, NULL);
		costi.lowerbound = timediff(tvb_lb, tve_lb);

		/// search for knn
		set_vector = fvec_new(d);
		knn_R = FLOAT_MAX;
		i = 0;
		Heap heap(nk);
		while(i < ncenter)
		{
			cid = lb[i].id;
			// the i-th cluster
			if(f_bigger(lb[i].val, knn_R))
			{
				break;
			}
			// knn_R > lb[i], means there are candidates in the i-th cluster
			set_num = member[cid].size();
			set = fvec_new(set_num*d);
			set_id = ivec_new(set_num);
			
            /* we do not test the time cost of disk page for speed, we do not really load the data 
            sprintf(filename, "%s/%d.cluster", folder, cid);
			gettimeofday(&tvb_io, NULL);
			HB_ClusterFromFile(filename, set_num, d, set, set_id);
			gettimeofday(&tve_io, NULL);
            costi.io = costi.io + timediff(tvb_io, tve_io);
            */

            /* instead, we extract member points directly from the base set */
            for(int mi = 0; mi < set_num; mi++){
                int pts_id = member[cid][mi];
                set_id[mi] = pts_id;
                memcpy(set+mi*d, baseset->data+pts_id*d, sizeof(float)*d);
            }

            // update cost
			costi.page = costi.page + 1;
			costi.point = costi.point + set_num;

			for(set_i = 0; set_i < set_num; set_i++)
			{// calculate real distance between all candidates and query
				candidate.id = set_id[set_i];
				memcpy(set_vector, set+set_i*d, sizeof(float)*d);
				candidate.val = odistance(query, set_vector, d);
				if(heap.length < heap.MaxNum || f_bigger(heap.elem[0].val, candidate.val))
				{// heap is not full or new value is smaller, insert
					heap.max_insert(&candidate);
				}
			}
			knn_R = heap.elem[0].val;
			i++;
			// free
			free(set); set = NULL;
			free(set_id); set_id = NULL;
		}// end of search loop
		// printf("%d ", i);//
		memcpy(knnset[qi], heap.elem, sizeof(DoubleIndex)*heap.length);

		gettimeofday(&tve, NULL);
		costi.cpu = timediff(tvb, tve);
		costi.search = costi.cpu - costi.lowerbound - costi.io;

		/// sum new cost
		CostCombine(cost, &costi);
	}

	CostMultiply(cost, 1/(float)nq);

	free(set_vector); set_vector = NULL;
	free(query); query = NULL;
	free(lb); lb = NULL;
}
Esempio n. 27
0
gmm_t * gmm_learn (int di, int ni, int ki, int niter,
                   const float * v, int nt, int seed, int nredo,
                   int flags)
{
    long d=di,k=ki,n=ni;

    int iter, iter_tot = 0;
    double old_key, key = 666;

    niter = (niter == 0 ? 10000 : niter);

    /* the GMM parameters */
    float * p = fvec_new_0 (n * k);      /* p(ci|x) for all i */
    gmm_t * g = gmm_new (d, k);

    /* initialize the GMM: k-means + variance estimation */
    int * nassign = ivec_new (n);  /* not useful -> to be removed when debugged */
    float * dis = fvec_new (n);
    kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign);

    fflush (stderr);
    fprintf (stderr, "assign = ");
    ivec_print (nassign, k);
    fprintf (stderr, "\n");
    free (nassign);

    /* initialization of the GMM parameters assuming a diagonal matrix */
    fvec_set (g->w, k, 1.0 / k);
    double sig = fvec_sum (dis, n) / n;
    printf ("sigma at initialization = %.3f\n", sig);
    fvec_set (g->sigma, k * d, sig);
    free (dis);


    /* start the EM algorithm */
    fprintf (stdout, "<><><><> GMM  <><><><><>\n");

    if(flags & GMM_FLAGS_PURE_KMEANS) niter=0;

    for (iter = 1 ; iter <= niter ; iter++) {

        gmm_compute_p_thread (n, v, g, p, flags, nt);
        fflush(stdout);

        gmm_handle_empty(n, v, g, p);

        gmm_compute_params (n, v, p, g, flags, nt);
        fflush(stdout);


        iter_tot++;

        /* convergence reached -> leave */
        old_key = key;
        key = fvec_sum (g->mu, k * d);

        printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key);
        fflush(stdout);

        if (key == old_key)
            break;
    }
    fprintf (stderr, "\n");

    free(p);

    return g;
}
Esempio n. 28
0
int main (int argc, char ** argv)
{
  int i;
  int k = 10;
  int d = 0;
  int nb = 0;
  int nq = 0;
  int nt = count_cpu();
  int verbose = 1;
  int ret = 0;

  int fmt_b = FMT_FVEC;
  int fmt_q = FMT_FVEC;
  int fmt_nn = FMT_IVEC;
  int fmt_dis = FMT_FVEC;

  const char * fb_name = NULL;    /* database filename */
  const char * fq_name = NULL;    /* query filename */
  const char * fnn_name = "nn.out";   /* nn idx filename */
  const char * fdis_name = "dis.out";  /* nn dis filename */

  if (argc == 1)
    usage (argv[0]);

  for (i = 1 ; i < argc ; i++) {
    char *a = argv[i];

    if (!strcmp (a, "-h") || !strcmp (a, "--help"))
      usage (argv[0]);
    else if (!strcmp (a, "-silence")) {
      verbose = 0;
    }
    else if (!strcmp (a, "-verbose")) {
      verbose = 2;
    }
    else if (!strcmp (a, "-k") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &k);
      assert (ret);
    }
    else if (!strcmp (a, "-d") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &d);
      assert (ret);
    }
    else if (!strcmp (a, "-nt") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nt);
      assert (ret);
    }
    else if (!strcmp (a, "-nb") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nb);
      assert (ret);
    }
    else if (!strcmp (a, "-nq") && i+1 < argc) {
      ret = sscanf (argv[++i], "%d", &nq);
      assert (ret);
    }
    else if (!strcmp (a, "-b") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_FVEC;
    }
    else if (!strcmp (a, "-bb") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_BVEC;
    }
    else if (!strcmp (a, "-bt") && i+1 < argc) {
      fb_name = argv[++i];
      fmt_b = FMT_TEXT;
    }
    else if (!strcmp (a, "-q") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_FVEC;
    }
    else if (!strcmp (a, "-qb") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_BVEC;
    }
    else if (!strcmp (a, "-qt") && i+1 < argc) {
      fq_name = argv[++i];
      fmt_q = FMT_TEXT;
    }
    else if (!strcmp (a, "-onn") && i+1 < argc) {
      fnn_name = argv[++i];
      fmt_nn = FMT_IVEC;
    }
    else if (!strcmp (a, "-onnt") && i+1 < argc) {
      fnn_name = argv[++i];
      fmt_nn = FMT_TEXT;
    }
    else if (!strcmp (a, "-odis") && i+1 < argc) {
      fdis_name = argv[++i];
      fmt_dis = FMT_FVEC;
    }
    else if (!strcmp (a, "-odist") && i+1 < argc) {
      fdis_name = argv[++i];
      fmt_dis = FMT_TEXT;
    }
  }

  assert (fb_name && fq_name);

  fprintf (stderr, "k = %d\nd = %d\nnt = %d\n", k, d, nt);

  if (verbose) {
    fprintf (stderr, "fb = %s  (fmt = %s)\n", fb_name, 
	     (fmt_b == FMT_FVEC ? "fvec" : (fmt_b == FMT_BVEC ? "bvec" : "txt")));
    fprintf (stderr, "fq = %s  (fmt = %s)\n", fq_name, 
	     (fmt_q == FMT_FVEC ? "fvec" : (fmt_q == FMT_BVEC ? "bvec" : "txt")));
    fprintf (stderr, "fnn = %s  (fmt = %s)\n", fnn_name, 
	     (fmt_nn == FMT_IVEC ? "ivec" : "txt"));
    fprintf (stderr, "fdis = %s  (fmt = %s)\n", fdis_name, 
	     (fmt_dis == FMT_FVEC ? "fvec" : "txt"));
  }


  /* read the input vectors for database and queries */
  float * vb = my_fvec_read (fb_name, fmt_b, verbose, &nb, &d);
  float * vq = my_fvec_read (fq_name, fmt_q, verbose, &nq, &d);


  /* Search */
  int * idx = ivec_new (k * nq);
  float * dis = fvec_new (k * nq);

  knn_full_thread (2, nq, nb, d, k, vb, vq, NULL, idx, dis, nt);
  knn_reorder_shortlist (nq, nb, d, k, vb, vq, idx, dis);

  /* write the distance output file */
  if (fmt_dis == FMT_FVEC)
    ret = fvecs_write (fdis_name, k, nq, dis);
  else if (fmt_dis == FMT_TEXT)
    ret = fvecs_write_txt (fdis_name, k, nq, dis);
  else assert (0 || "Unknow output format\n");
  assert (ret == nq);
  
  /* write the distance output file */
  if (fmt_nn == FMT_IVEC)
    ret = ivecs_write (fnn_name, k, nq, idx);
  else if (fmt_nn == FMT_TEXT)
    ret = ivecs_write_txt (fnn_name, k, nq, idx);
  else assert (0 || "Unknow output format\n");
  assert (ret == nq);
  
  free (idx);
  free (dis);
  free (vb);
  free (vq);
  return 0;
}
Esempio n. 29
0
hkm_t *hkm_learn (int n, int d, int nlevel, int bf,
		  const float *points, int nb_iter_max, int nt, int verbose, 
		  int **clust_assign_out)
{
  int i, l, parent, k = 1;
  hkm_t *hkm = hkm_new (d, nlevel, bf);

  /* the absolute assignement of all points and the sizes of clusters */
  int *node_assign = calloc (sizeof (int), n);

  /* the buffer that receives the vectors gathered by parent node */
  float *v = fvec_new (n * d);

  /* Initialization */
  for (l = 0; l < nlevel; l++) {

    /* sort the vectors depending on which cluster they have been assigned to,
       and compute the number of vectors assigned to each cluster 
       *** NOTE: to replace with the k_max function of ivfgeo
       -> put this function in a separate library             */
    int *node_assign_idx = malloc (sizeof (*node_assign_idx) * n);
    ivec_sort_index (node_assign, n, node_assign_idx);

    /* Re-order the vectors depending on the previous order */
    for (i = 0; i < n ; i++)
      memmove (v + d * i, points + d * node_assign_idx[i], 
	       sizeof (*points) * d);

    /* k is the number of nodes/leaves at this level */
    int pos = 0;
    for (parent = 0; parent < k ; parent++) {
      /* Count the number of vectors assigned to this internal node */
      int nassign = 0;
      while (pos + nassign < n)
        if (node_assign[node_assign_idx[pos + nassign]] == parent)
          nassign++;
        else break;

      if (verbose) 
	fprintf (stderr, "[Level %d | Parent %d] nassign=%d | pos=%d", l, parent, nassign, pos); 

      if (nassign == 0) {
        fprintf (stderr, "# Problem2: no enough vectors in a node\n");
        exit (1);
      }

      /* Perform the clustering on this subset of points */
      int *clust_assign = ivec_new (nassign);
      float * centroids = fvec_new (bf * d);
      int nt = count_cpu();
      int flags = nt | KMEANS_INIT_RANDOM | KMEANS_QUIET;
      float err = kmeans (d, nassign, bf, nb_iter_max, v + d * pos, flags,
			  0, 1, centroids, NULL, clust_assign, NULL);
      if (verbose)
	fprintf (stderr, "-> err = %.3f\n", err);
      memcpy (hkm->centroids[l] + d * parent * bf, centroids,
              d * bf * sizeof (*centroids));

      /* Update the indexes for those points */
      for (i = 0; i < nassign; i++) {
        int truepos = node_assign_idx[pos + i];
        node_assign[truepos] = node_assign[truepos] * bf + clust_assign[i];
      }

      free (centroids);
      free (clust_assign);
      pos += nassign;
    }

    k *= bf;
    free (node_assign_idx);
  }

  if(clust_assign_out) {
    *clust_assign_out = (int *) malloc (n * sizeof (int));
    memcpy (*clust_assign_out, node_assign, n * sizeof (int));
  } 
  free (node_assign);
  free (v);
  return hkm;
}
Esempio n. 30
0
int main()
{
  int i, j;
  int w, h;
  int levels, ct_levels, wt_levels,level_init;
  double rate,rate_init;
  ivec dfb_levels;
  mat source, dest;
  contourlet_t *contourlet;
  mat wavelet;
  int length;
  unsigned char *buffer;

  //³õʼ»¯²ÎÊý
  int argc=6;
      rate_init=2;
	  level_init=5;


#define LEVELS 5
#define IMPULSE 100.

  source = mat_pgm_read("1.pgm");
  h = mat_height(source);
  w = mat_width(source);
  dest = mat_new(w, h);
  rate = rate_init * w * h;
  levels = level_init;
  ct_levels = argc - 4;              /* contourlet levels */
  wt_levels = levels - ct_levels;    /* wavelet levels */
  dfb_levels = ivec_new(ct_levels);
  for(i = 0; i < ct_levels; i++)
    dfb_levels[i] = 4+i;


  buffer = bvec_new_zeros(BUFFER_SIZE);

  contourlet = contourlet_new(ct_levels, dfb_levels);
  contourlet->wt_levels = wt_levels;

  contourlet_transform(contourlet, source);
  wavelet = it_dwt2D(contourlet->low, it_wavelet_lifting_97, wt_levels);
  contourlet->dwt = it_wavelet2D_split(wavelet, wt_levels);

  /* normalize the subbands */
  for(i = 0; i < ct_levels; i++)
    for(j = 0; j < (1 << dfb_levels[i]); j++)
      mat_mul_by(contourlet->high[i][j], norm_high[1+i][dfb_levels[i]][j]);
  mat_mul_by(contourlet->low, norm_low[ct_levels]);

  /* make flat images */
  mat_pgm_write("dwt.pgm", wavelet);
  for(i = 0; i < ct_levels; i++) {
    char filename[256];

    mat dfb_rec = mat_new((h >> i) + 1, (w >> i) + 1);
    if(dfb_levels[i])
      dfb_flatten(contourlet->high[i], dfb_rec, dfb_levels[i]);
    else
      mat_set_submatrix(dfb_rec, contourlet->high[i][0], 0, 0);
    mat_incr(dfb_rec, 128);
    sprintf(filename, "dfb%d.pgm", i);
    mat_pgm_write(filename, dfb_rec);
    mat_decr(dfb_rec, 128);
    mat_delete(dfb_rec);
  }

  /* EZBC encoding */
  length = ezbc_encode(contourlet, buffer, BUFFER_SIZE, rate);

  /* EZBC decoding */
  ezbc_decode(contourlet, buffer, BUFFER_SIZE, rate);

  mat_pgm_write("rec_low.pgm", contourlet->dwt[0]);

  /* make flat images */
  for(i = 0; i < ct_levels; i++) {
    char filename[256];

    mat dfb_rec = mat_new((h >> i) + 1, (w >> i) + 1);
    if(dfb_levels[i])
      dfb_flatten(contourlet->high[i], dfb_rec, dfb_levels[i]);
    else
      mat_set_submatrix(dfb_rec, contourlet->high[i][0], 0, 0);
    mat_incr(dfb_rec, 128);
    sprintf(filename, "rec_dfb%d.pgm", i);
    mat_pgm_write(filename, dfb_rec);
    mat_decr(dfb_rec, 128);
    mat_delete(dfb_rec);
  }

  /* normalize the subbands */
  for(i = 0; i < ct_levels; i++)
    for(j = 0; j < (1 << dfb_levels[i]); j++)
      mat_div_by(contourlet->high[i][j], norm_high[1+i][dfb_levels[i]][j]);
  mat_div_by(contourlet->low, norm_low[ct_levels]);


  //  mat_pgm_write("rec_low.pgm", contourlet->dwt[0]);

  /* TODO: fix this in libit */
  if(wt_levels)
    wavelet = it_wavelet2D_merge(contourlet->dwt, wt_levels);
  else
    mat_copy(wavelet, contourlet->dwt[0]);

  mat_pgm_write("rec_dwt.pgm", wavelet);

  contourlet->low = it_idwt2D(wavelet, it_wavelet_lifting_97, wt_levels);

  contourlet_itransform(contourlet, dest);

  contourlet_delete(contourlet);

  mat_pgm_write("rec.pgm", dest);

  printf("rate = %f PSNR = %f\n", length * 8. / (w*h), 10*log10(255*255/mat_distance_mse(source, dest, 0)));

  ivec_delete(dfb_levels);
  mat_delete(dest);
  mat_delete(source);
  bvec_delete(buffer);

  return(0);
}