float *knn_thread (int npt, int nclust, int d, int k, const float *codebook, const float *coords, int *vw, int n_thread) { float *vwdis2=fvec_new(k*npt); knn_full_thread (2, npt, nclust, d, k, codebook, coords, NULL, vw, vwdis2, n_thread); return vwdis2; }
double nn_thread (int npt, int nclust, int d, const float *codebook, const float *coords, int *vw, int n_thread) { float *vwdis2=fvec_new(npt); knn_full_thread (2, npt, nclust, d, 1, codebook, coords, NULL, vw, vwdis2, n_thread); double toterr = fvec_sum(vwdis2, npt); free(vwdis2); return toterr; }
void Clustering::generate_cluster(fDataSet *ds, fDataSet *lds, int niter, int nth, int seed, int nredo) { int i, j; int d = ds->d; int n = ds->n; int n_l = lds->n; /// allocate storage space for necessary data float *tmp_dis = fvec_new_set(n, -1); centroid = fvec_new_set(ncenter*d, 0); int *tmp_assign = ivec_new_set(n, -1); /// learning for centroids printf("-------------- kmeans on learning set -------------\nn_l-%d, nt-%d\n", n_l, nth); float quantierror = kmeans(d, n_l, ncenter, niter, lds->data, nth, seed, nredo, centroid, NULL, NULL, NULL); printf(">>> finished clustering learning, quantization error: %f\n", quantierror); /// find 1-nn among all centroids for each base vector: query=basedata, dataset=centroids knn_full_thread ( 2, // euclidean distance n, ncenter, d, 1, // 1-nn centroid, ds->data, NULL, tmp_assign, tmp_dis, nth); /// extract the assign and the member belongness for(i = 0; i < n; i++){ assign.push_back(tmp_assign[i]); } member = list2inverted(assign, ncenter); puts(">>> finished cluster assign and member points extraction"); puts("member count for each cluster"); for(i = 0; i < ncenter; i++){ printf("%d - %d\n", i, member[i].size()); } /// disallocation FREE(tmp_dis); FREE(tmp_assign); }
float ANC::neighbor_cluster_estimation(const fDataSet *ds, int nth) { /// check for necessary data: centroids, basedata ASSERTINFO(ds == NULL || centroid == NULL || ds->data == NULL, "IPP"); /// prepare for necessary variables neighbor.resize(ncenter); int i, iclu = -1, ineighbor = -1; int K = ncenter; int n = ds->n; int *tmp_assign = ivec_new_set(n * g, -1); float *tmp_dis = fvec_new_0(n * g); int *neighbor_flag = ivec_new_set(ncenter*ncenter, 0); /// find k-nn among all centroids for each base vector: query=basedata, dataset=centroids, k=2 for neighbor cluster knn_full_thread ( 2, // euclidean distance n, K, d, g, // g-nn centroid, ds->data, NULL, tmp_assign, tmp_dis, nth); // extract neighbor clusters for each cluster for(i = 0; i < n; i++) { iclu = tmp_assign[i*g]; // current cluster = current point's 1-NN for (int ig = 1; ig < g; ig++){ ineighbor = tmp_assign[i*g+ig]; // current neighbor cluster = current point's g-thNN if(0 == neighbor_flag[iclu*ncenter+ineighbor]){ neighbor[iclu].push_back(ineighbor); neighbor_flag[iclu*ncenter+ineighbor] = 1; } } } puts("end neighbor"); // check number of neighbor cluster for(i = 0; i < K; i++){ ASSERTINFO(neighbor[i].size() == 0, "warning: there is a cluster who has no neighbors"); } puts(">>> finished neighbor cluster registration"); ///### display neighbor cluster count puts(">>> neighbor cluster"); int sum_neighbor = 0; for(i = 0; i < K; i++){ // printf("\n%d - %d\t", i, neighbor[i].size()); sum_neighbor += neighbor[i].size(); } float avg_neighbor = sum_neighbor / (float)K; /* if(K <= 10){ for(i = 0; i < K; i++){ printf("\n%d - %d\t", i, neighbor[i].size()); for(ineighbor = 0; ineighbor < neighbor[i].size(); ineighbor++){ printf("%d ", neighbor[i][ineighbor]); } } }*/ /// disallocate space FREE(tmp_assign); FREE(tmp_dis); return avg_neighbor; }
int main (int argc, char ** argv) { int i; int k = 10; int d = 0; int nb = 0; int nq = 0; int nt = count_cpu(); int verbose = 1; int ret = 0; int fmt_b = FMT_FVEC; int fmt_q = FMT_FVEC; int fmt_nn = FMT_IVEC; int fmt_dis = FMT_FVEC; const char * fb_name = NULL; /* database filename */ const char * fq_name = NULL; /* query filename */ const char * fnn_name = "nn.out"; /* nn idx filename */ const char * fdis_name = "dis.out"; /* nn dis filename */ if (argc == 1) usage (argv[0]); for (i = 1 ; i < argc ; i++) { char *a = argv[i]; if (!strcmp (a, "-h") || !strcmp (a, "--help")) usage (argv[0]); else if (!strcmp (a, "-silence")) { verbose = 0; } else if (!strcmp (a, "-verbose")) { verbose = 2; } else if (!strcmp (a, "-k") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &k); assert (ret); } else if (!strcmp (a, "-d") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &d); assert (ret); } else if (!strcmp (a, "-nt") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nt); assert (ret); } else if (!strcmp (a, "-nb") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nb); assert (ret); } else if (!strcmp (a, "-nq") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nq); assert (ret); } else if (!strcmp (a, "-b") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_FVEC; } else if (!strcmp (a, "-bb") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_BVEC; } else if (!strcmp (a, "-bt") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_TEXT; } else if (!strcmp (a, "-q") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_FVEC; } else if (!strcmp (a, "-qb") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_BVEC; } else if (!strcmp (a, "-qt") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_TEXT; } else if (!strcmp (a, "-onn") && i+1 < argc) { fnn_name = argv[++i]; fmt_nn = FMT_IVEC; } else if (!strcmp (a, "-onnt") && i+1 < argc) { fnn_name = argv[++i]; fmt_nn = FMT_TEXT; } else if (!strcmp (a, "-odis") && i+1 < argc) { fdis_name = argv[++i]; fmt_dis = FMT_FVEC; } else if (!strcmp (a, "-odist") && i+1 < argc) { fdis_name = argv[++i]; fmt_dis = FMT_TEXT; } } assert (fb_name && fq_name); fprintf (stderr, "k = %d\nd = %d\nnt = %d\n", k, d, nt); if (verbose) { fprintf (stderr, "fb = %s (fmt = %s)\n", fb_name, (fmt_b == FMT_FVEC ? "fvec" : (fmt_b == FMT_BVEC ? "bvec" : "txt"))); fprintf (stderr, "fq = %s (fmt = %s)\n", fq_name, (fmt_q == FMT_FVEC ? "fvec" : (fmt_q == FMT_BVEC ? "bvec" : "txt"))); fprintf (stderr, "fnn = %s (fmt = %s)\n", fnn_name, (fmt_nn == FMT_IVEC ? "ivec" : "txt")); fprintf (stderr, "fdis = %s (fmt = %s)\n", fdis_name, (fmt_dis == FMT_FVEC ? "fvec" : "txt")); } /* read the input vectors for database and queries */ float * vb = my_fvec_read (fb_name, fmt_b, verbose, &nb, &d); float * vq = my_fvec_read (fq_name, fmt_q, verbose, &nq, &d); /* Search */ int * idx = ivec_new (k * nq); float * dis = fvec_new (k * nq); knn_full_thread (2, nq, nb, d, k, vb, vq, NULL, idx, dis, nt); knn_reorder_shortlist (nq, nb, d, k, vb, vq, idx, dis); /* write the distance output file */ if (fmt_dis == FMT_FVEC) ret = fvecs_write (fdis_name, k, nq, dis); else if (fmt_dis == FMT_TEXT) ret = fvecs_write_txt (fdis_name, k, nq, dis); else assert (0 || "Unknow output format\n"); assert (ret == nq); /* write the distance output file */ if (fmt_nn == FMT_IVEC) ret = ivecs_write (fnn_name, k, nq, idx); else if (fmt_nn == FMT_TEXT) ret = ivecs_write_txt (fnn_name, k, nq, idx); else assert (0 || "Unknow output format\n"); assert (ret == nq); free (idx); free (dis); free (vb); free (vq); return 0; }
void Clustering::neighbor_cluster_estimation(const fDataSet *ds, int nth) { /// check for necessary data: centroids, basedata ASSERTINFO(ds == NULL || centroid == NULL || ds->data == NULL, "IPP"); /// prepare for necessary variables neighbor.resize(ncenter); int i, iclu = -1, ineighbor = -1; int K = ncenter; int n = ds->n; int d = ds->d; int *tmp_assign = ivec_new_set(n * 2, -1); float *tmp_dis = fvec_new_0(n * 2); int *neighbor_flag = ivec_new_set(ncenter*ncenter, 0); /// find k-nn among all centroids for each base vector: query=basedata, dataset=centroids, k=2 for neighbor cluster knn_full_thread ( 2, // euclidean distance n, K, d, 2, // 2-nn centroid, ds->data, NULL, tmp_assign, tmp_dis, nth); // extract neighbor clusters for each cluster for(i = 0; i < n; i++) { iclu = tmp_assign[i*2]; // current cluster = current point's 1-NN ineighbor = tmp_assign[i*2+1]; // current neighbor cluster = current point's 2-NN if(0 == neighbor_flag[iclu*ncenter+ineighbor]){ neighbor[iclu].push_back(ineighbor); neighbor_flag[iclu*ncenter+ineighbor] = 1; } } puts(">>> finished neighbor cluster registration"); ///### display neighbor cluster count puts(">>> neighbor cluster"); int sum_neighbor = 0; for(i = 0; i < K; i++){ printf("\n%d - %d\t", i, neighbor[i].size()); sum_neighbor += neighbor[i].size(); } printf("\naveragely %lf neighbors\n", sum_neighbor / (float)K); /* if(K <= 500){ for(i = 0; i < K; i++){ printf("\n%d - %d\t", i, neighbor[i].size()); int ineighbor; for(ineighbor = 0; ineighbor < neighbor[i].size(); ineighbor++){ printf("%d ", neighbor[i][ineighbor]); } } } */ printf("\naveragely %lf neighbors\n", sum_neighbor / (float)K); /// disallocate space FREE(tmp_assign); FREE(tmp_dis); }
void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[]) { if (nrhs < 2 || nrhs > 5) mexErrMsgTxt ("Invalid number of input arguments"); if (nlhs != 2) mexErrMsgTxt ("2 output arguments required"); int d = mxGetM (prhs[0]); int n = mxGetN (prhs[0]); int nq = mxGetN (prhs[1]); int nt = 1; if (mxGetM (prhs[1]) != d) mexErrMsgTxt("Dimension of base and query vectors are not consistent"); if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS || mxGetClassID(prhs[1]) != mxSINGLE_CLASS ) mexErrMsgTxt ("need single precision array"); float *b = (float*) mxGetPr (prhs[0]); /* database vectors */ float *v = (float*) mxGetPr (prhs[1]); /* query vectors */ int k = 1; int distype = 2; if (nrhs >= 3) k = (int) mxGetScalar(prhs[2]); if (nrhs >= 4) distype = (int) mxGetScalar(prhs[3]); /* If practice, the following is not used (all threads by default) */ if (nrhs >= 5) nt = (int) mxGetScalar(prhs[4]); if (n < k) mexErrMsgTxt("fewer vectors than number to be returned"); /* ouptut: centroids, assignment, distances */ plhs[0] = mxCreateNumericMatrix (k, nq, mxINT32_CLASS, mxREAL); int *assign = (int*) mxGetPr (plhs[0]); plhs[1] = mxCreateNumericMatrix (k, nq, mxSINGLE_CLASS, mxREAL); float *dis = (float*) mxGetPr (plhs[1]); /* With Matlab, we have to avoid using threads for the L2 distance, because this one makes a call to MKL, which is no thread-safe */ if (distype == 2 || distype == 16) knn_full (distype, nq, n, d, k, b, v, NULL, assign, dis); else knn_full_thread (distype, nq, n, d, k, b, v, NULL, assign, dis, nt); /* post-processing: convert to matlab indices, and enforce full sort */ int i; for (i = 0 ; i < nq * k ; i++) assign[i]++; }