float *fmat_new_pca_part(int d,int n,int nev, const float *v,float *singvals) { if(!(nev<=d && nev<=n)) { fprintf(stderr,"fmat_new_pca_part: asking for too many eigenvalues (%d) wrt %d*%d data\n",nev,n,d); return NULL; } float *pcamat=fmat_new(d,nev); int ret; if(n>=d) { ret=fmat_svd_partial_full(d,n,nev,v,0,singvals,pcamat,NULL,count_cpu()); } else { fprintf(stderr,"fmat_new_pca_part: warn fewer learning points (%d) than dimensions (%d): transposing\n",n,d); ret=fmat_svd_partial_full(n,d,nev,v,1,singvals,NULL,pcamat,count_cpu()); } if(ret<0) { free(pcamat); pcamat=NULL; } return pcamat; }
int main(int argc, char *argv[]) { tst_parse_opts(argc, argv, NULL, NULL); ncpus = count_cpu(); if (get_allowed_nodes_arr(NH_MEMS | NH_CPUS, &nnodes, &nodes) < 0) tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes_arr"); if (nnodes <= 1) tst_brkm(TCONF, NULL, "requires a NUMA system."); setup(); testcpuset(); cleanup(); tst_exit(); }
int fmat_svd_partial(int d,int n,int ns,const float *a, float *singvals,float *u,float *v) { return fmat_svd_partial_full(d,n,ns,a,0,singvals,u,v,count_cpu()); }
hkm_t *hkm_learn (int n, int d, int nlevel, int bf, const float *points, int nb_iter_max, int nt, int verbose, int **clust_assign_out) { int i, l, parent, k = 1; hkm_t *hkm = hkm_new (d, nlevel, bf); /* the absolute assignement of all points and the sizes of clusters */ int *node_assign = calloc (sizeof (int), n); /* the buffer that receives the vectors gathered by parent node */ float *v = fvec_new (n * d); /* Initialization */ for (l = 0; l < nlevel; l++) { /* sort the vectors depending on which cluster they have been assigned to, and compute the number of vectors assigned to each cluster *** NOTE: to replace with the k_max function of ivfgeo -> put this function in a separate library */ int *node_assign_idx = malloc (sizeof (*node_assign_idx) * n); ivec_sort_index (node_assign, n, node_assign_idx); /* Re-order the vectors depending on the previous order */ for (i = 0; i < n ; i++) memmove (v + d * i, points + d * node_assign_idx[i], sizeof (*points) * d); /* k is the number of nodes/leaves at this level */ int pos = 0; for (parent = 0; parent < k ; parent++) { /* Count the number of vectors assigned to this internal node */ int nassign = 0; while (pos + nassign < n) if (node_assign[node_assign_idx[pos + nassign]] == parent) nassign++; else break; if (verbose) fprintf (stderr, "[Level %d | Parent %d] nassign=%d | pos=%d", l, parent, nassign, pos); if (nassign == 0) { fprintf (stderr, "# Problem2: no enough vectors in a node\n"); exit (1); } /* Perform the clustering on this subset of points */ int *clust_assign = ivec_new (nassign); float * centroids = fvec_new (bf * d); int nt = count_cpu(); int flags = nt | KMEANS_INIT_RANDOM | KMEANS_QUIET; float err = kmeans (d, nassign, bf, nb_iter_max, v + d * pos, flags, 0, 1, centroids, NULL, clust_assign, NULL); if (verbose) fprintf (stderr, "-> err = %.3f\n", err); memcpy (hkm->centroids[l] + d * parent * bf, centroids, d * bf * sizeof (*centroids)); /* Update the indexes for those points */ for (i = 0; i < nassign; i++) { int truepos = node_assign_idx[pos + i]; node_assign[truepos] = node_assign[truepos] * bf + clust_assign[i]; } free (centroids); free (clust_assign); pos += nassign; } k *= bf; free (node_assign_idx); } if(clust_assign_out) { *clust_assign_out = (int *) malloc (n * sizeof (int)); memcpy (*clust_assign_out, node_assign, n * sizeof (int)); } free (node_assign); free (v); return hkm; }
int main (int argc, char ** argv) { int i; int k = 10; int d = 0; int nb = 0; int nq = 0; int nt = count_cpu(); int verbose = 1; int ret = 0; int fmt_b = FMT_FVEC; int fmt_q = FMT_FVEC; int fmt_nn = FMT_IVEC; int fmt_dis = FMT_FVEC; const char * fb_name = NULL; /* database filename */ const char * fq_name = NULL; /* query filename */ const char * fnn_name = "nn.out"; /* nn idx filename */ const char * fdis_name = "dis.out"; /* nn dis filename */ if (argc == 1) usage (argv[0]); for (i = 1 ; i < argc ; i++) { char *a = argv[i]; if (!strcmp (a, "-h") || !strcmp (a, "--help")) usage (argv[0]); else if (!strcmp (a, "-silence")) { verbose = 0; } else if (!strcmp (a, "-verbose")) { verbose = 2; } else if (!strcmp (a, "-k") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &k); assert (ret); } else if (!strcmp (a, "-d") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &d); assert (ret); } else if (!strcmp (a, "-nt") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nt); assert (ret); } else if (!strcmp (a, "-nb") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nb); assert (ret); } else if (!strcmp (a, "-nq") && i+1 < argc) { ret = sscanf (argv[++i], "%d", &nq); assert (ret); } else if (!strcmp (a, "-b") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_FVEC; } else if (!strcmp (a, "-bb") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_BVEC; } else if (!strcmp (a, "-bt") && i+1 < argc) { fb_name = argv[++i]; fmt_b = FMT_TEXT; } else if (!strcmp (a, "-q") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_FVEC; } else if (!strcmp (a, "-qb") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_BVEC; } else if (!strcmp (a, "-qt") && i+1 < argc) { fq_name = argv[++i]; fmt_q = FMT_TEXT; } else if (!strcmp (a, "-onn") && i+1 < argc) { fnn_name = argv[++i]; fmt_nn = FMT_IVEC; } else if (!strcmp (a, "-onnt") && i+1 < argc) { fnn_name = argv[++i]; fmt_nn = FMT_TEXT; } else if (!strcmp (a, "-odis") && i+1 < argc) { fdis_name = argv[++i]; fmt_dis = FMT_FVEC; } else if (!strcmp (a, "-odist") && i+1 < argc) { fdis_name = argv[++i]; fmt_dis = FMT_TEXT; } } assert (fb_name && fq_name); fprintf (stderr, "k = %d\nd = %d\nnt = %d\n", k, d, nt); if (verbose) { fprintf (stderr, "fb = %s (fmt = %s)\n", fb_name, (fmt_b == FMT_FVEC ? "fvec" : (fmt_b == FMT_BVEC ? "bvec" : "txt"))); fprintf (stderr, "fq = %s (fmt = %s)\n", fq_name, (fmt_q == FMT_FVEC ? "fvec" : (fmt_q == FMT_BVEC ? "bvec" : "txt"))); fprintf (stderr, "fnn = %s (fmt = %s)\n", fnn_name, (fmt_nn == FMT_IVEC ? "ivec" : "txt")); fprintf (stderr, "fdis = %s (fmt = %s)\n", fdis_name, (fmt_dis == FMT_FVEC ? "fvec" : "txt")); } /* read the input vectors for database and queries */ float * vb = my_fvec_read (fb_name, fmt_b, verbose, &nb, &d); float * vq = my_fvec_read (fq_name, fmt_q, verbose, &nq, &d); /* Search */ int * idx = ivec_new (k * nq); float * dis = fvec_new (k * nq); knn_full_thread (2, nq, nb, d, k, vb, vq, NULL, idx, dis, nt); knn_reorder_shortlist (nq, nb, d, k, vb, vq, idx, dis); /* write the distance output file */ if (fmt_dis == FMT_FVEC) ret = fvecs_write (fdis_name, k, nq, dis); else if (fmt_dis == FMT_TEXT) ret = fvecs_write_txt (fdis_name, k, nq, dis); else assert (0 || "Unknow output format\n"); assert (ret == nq); /* write the distance output file */ if (fmt_nn == FMT_IVEC) ret = ivecs_write (fnn_name, k, nq, idx); else if (fmt_nn == FMT_TEXT) ret = ivecs_write_txt (fnn_name, k, nq, idx); else assert (0 || "Unknow output format\n"); assert (ret == nq); free (idx); free (dis); free (vb); free (vq); return 0; }