Point findEyeCenterByColorSegmentation(const Mat& image, float coordinateWeight, int kmeansIterations, int kmeansRepeats, int blurSize) { Mat img, gray_img; Mat colorpoints, kmeansPoints; img = equalizeImage(image); medianBlur(img, img, blurSize); cvtColor(image, gray_img, CV_BGR2GRAY); gray_img = imcomplement(gray_img); vector<Mat> layers(3); split(img, layers); for (int i = 0 ; i < layers.size(); i++) { layers[i] = layers[i].reshape(1,1).t(); } hconcat(layers, colorpoints); // add coordinates colorpoints.convertTo(colorpoints, CV_32FC1); Mat coordinates = matrixPointCoordinates(img.rows,img.cols,false) *coordinateWeight; hconcat(colorpoints, coordinates, kmeansPoints); Mat locIndex(img.size().area(),kmeansIterations,CV_32FC1,Scalar::all(-1)); linspace(0, img.size().area(), 1).copyTo(locIndex.col(0)); Mat index_img(img.rows,img.cols,CV_32FC1,Scalar::all(0)); Mat bestLabels, centers, clustered , colorsum , minColorPtIndex; for(int it = 1 ; it < kmeansIterations ; it++) { if (kmeansPoints.rows < 2) { break; } kmeans(kmeansPoints,2,bestLabels,TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, kmeansRepeats, 0.001),kmeansRepeats,KMEANS_PP_CENTERS,centers); reduce(centers.colRange(0, 3), colorsum, 1, CV_REDUCE_SUM); if (colorsum.at<float>(0) < colorsum.at<float>(1)) { findNonZero(bestLabels==0, minColorPtIndex); } else { findNonZero(bestLabels==1, minColorPtIndex); } minColorPtIndex = minColorPtIndex.reshape(1).col(1); for (int i = 0; i <minColorPtIndex.rows ; i ++) { locIndex.at<float>(i,it) = locIndex.at<float>(minColorPtIndex.at<int>(i),it-1); } Mat temp; for (int i = 0; i <minColorPtIndex.rows ; i ++) { temp.push_back(kmeansPoints.row(minColorPtIndex.at<int>(i))); } temp.copyTo(kmeansPoints); temp.release(); for (int i = 0 ; i < minColorPtIndex.rows ; i ++) { int r, c; ind2sub(locIndex.at<float>(i,it), index_img.cols, index_img.rows, r, c); index_img.at<float>(r,c) +=1; } } // imagesc("layered",mat2gray(index_img)); Mat layerweighted_img = index_img.mul(index_img); layerweighted_img = mat2gray(layerweighted_img); gray_img.convertTo(gray_img, CV_32FC1,1/255.0); Mat composed = gray_img.mul(layerweighted_img); Mat score = calculateImageSymmetryScore(composed); Mat scoresum; reduce(score.rowRange(0, composed.cols/6), scoresum, 0, CV_REDUCE_SUM,CV_32FC1); // plotVectors("live", scoresum.t()); double minVal , maxVal; Point minLoc, maxLoc; minMaxLoc(scoresum,&minVal,&maxVal,&minLoc,&maxLoc); int initialHC = maxLoc.x; int bestx = 0,bestlayer = 0; Mat bestIndex_img = index_img >=1; minMaxLoc(index_img,&minVal,&maxVal,&minLoc,&maxLoc); for (int i = 1 ; i<=maxVal; i++) { Mat indexlayer_img = index_img >=i; medianBlur(indexlayer_img, indexlayer_img, 5); erode(indexlayer_img, indexlayer_img, blurSize); erode(indexlayer_img, indexlayer_img, blurSize); indexlayer_img = removeSmallBlobs(indexlayer_img); indexlayer_img = fillHoleInBinary(indexlayer_img); indexlayer_img = fillConvexHulls(indexlayer_img); Mat score = calculateImageSymmetryScore(indexlayer_img); Mat scoresum; reduce(score.rowRange(0, indexlayer_img.cols/6), scoresum, 0, CV_REDUCE_SUM,CV_32FC1); minMaxLoc(scoresum,&minVal,&maxVal,&minLoc,&maxLoc); if (abs(maxLoc.x - initialHC) < abs(bestx - initialHC)) { if (sum(indexlayer_img)[0]/255 < indexlayer_img.size().area()/5*2 && sum(indexlayer_img)[0]/255 > indexlayer_img.size().area()/6) { bestx = maxLoc.x; bestlayer = i; bestIndex_img = indexlayer_img.clone(); } } } Point massCenter = findMassCenter_BinaryBiggestBlob(bestIndex_img); return Point(initialHC,massCenter.y); }
int main(int nargs, char **args) { kmeans_config config; kmeans_result result; int i, j; int spread = 3; point *pts; point *init; int print_results = 0; unsigned long start; int nptsincluster = 10000; int k = 10; srand(time(NULL)); /* Constants */ config.k = k; config.num_objs = config.k * nptsincluster; config.max_iterations = 200; config.distance_method = pt_distance; config.centroid_method = pt_centroid; /* Inputs for K-means */ config.objs = calloc(config.num_objs, sizeof(Pointer)); config.centers = calloc(config.k, sizeof(Pointer)); config.clusters = calloc(config.num_objs, sizeof(int)); /* Storage for raw data */ pts = calloc(config.num_objs, sizeof(point)); init = calloc(config.k, sizeof(point)); /* Create test data! */ /* Populate with K gaussian clusters of data */ for (j = 0; j < config.k; j++) { for (i = 0; i < nptsincluster; i++) { double u1 = 1.0 * random() / RAND_MAX; double u2 = 1.0 * random() / RAND_MAX; double z1 = spread * j + sqrt(-2*log2(u1))*cos(2*M_PI*u2); double z2 = spread * j + sqrt(-2*log2(u1))*sin(2*M_PI*u2); int n = j*nptsincluster + i; /* Populate raw data */ pts[n].x = z1; pts[n].y = z2; /* Pointer to raw data */ config.objs[n] = &(pts[n]); } } /* Populate the initial means vector with random start points */ for (i = 0; i < config.k; i++) { int r = lround(config.num_objs * (1.0 * rand() / RAND_MAX)); /* Populate raw data */ init[i] = pts[r]; /* Pointers to raw data */ config.centers[i] = &(init[i]); if (print_results) printf("center[%d]\t%g\t%g\n", i, init[i].x, init[i].y); } /* run k-means! */ start = time(NULL); result = kmeans(&config); printf("\n"); printf("Iteration count: %d\n", config.total_iterations); printf(" Time taken: %ld seconds\n", (time(NULL) - start)); printf(" Iterations/sec: %.3g\n", (1.0*config.total_iterations)/(time(NULL) - start)); printf("\n"); /* print results */ if (print_results) { for (i = 0; i < config.num_objs; i++) { point *pt = (point*)(config.objs[i]); if (config.objs[i]) printf("%g\t%g\t%d\n", pt->x, pt->y, config.clusters[i]); else printf("N\tN\t%d\n", config.clusters[i]); } } free(config.objs); free(config.clusters); free(config.centers); free(init); free(pts); }
gmm_t * gmm_learn (int di, int ni, int ki, int niter, const float * v, int nt, int seed, int nredo, int flags) { long d=di,k=ki,n=ni; int iter, iter_tot = 0; double old_key, key = 666; niter = (niter == 0 ? 10000 : niter); /* the GMM parameters */ float * p = fvec_new_0 (n * k); /* p(ci|x) for all i */ gmm_t * g = gmm_new (d, k); /* initialize the GMM: k-means + variance estimation */ int * nassign = ivec_new (n); /* not useful -> to be removed when debugged */ float * dis = fvec_new (n); kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign); fflush (stderr); fprintf (stderr, "assign = "); ivec_print (nassign, k); fprintf (stderr, "\n"); free (nassign); /* initialization of the GMM parameters assuming a diagonal matrix */ fvec_set (g->w, k, 1.0 / k); double sig = fvec_sum (dis, n) / n; printf ("sigma at initialization = %.3f\n", sig); fvec_set (g->sigma, k * d, sig); free (dis); /* start the EM algorithm */ fprintf (stdout, "<><><><> GMM <><><><><>\n"); if(flags & GMM_FLAGS_PURE_KMEANS) niter=0; for (iter = 1 ; iter <= niter ; iter++) { gmm_compute_p_thread (n, v, g, p, flags, nt); fflush(stdout); gmm_handle_empty(n, v, g, p); gmm_compute_params (n, v, p, g, flags, nt); fflush(stdout); iter_tot++; /* convergence reached -> leave */ old_key = key; key = fvec_sum (g->mu, k * d); printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key); fflush(stdout); if (key == old_key) break; } fprintf (stderr, "\n"); free(p); return g; }
void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[]) { if (nrhs < 2 || nrhs % 2 != 0) mexErrMsgTxt("even nb of input arguments required."); else if (nlhs > 4 || nlhs < 1) mexErrMsgTxt("1 to 3 output arguments are expected."); int flags = 0; int d = mxGetM (prhs[0]); int n = mxGetN (prhs[0]); long seed = 0L; if(mxGetClassID(prhs[0])!=mxSINGLE_CLASS) mexErrMsgTxt("need single precision array."); float *v = (float*) mxGetPr (prhs[0]); int k = (int) mxGetScalar (prhs[1]); int niter = 50, redo = 1, nt = 1, verbose = 1; int init_type = 0; /* random selection by default */ { int i; for(i = 2 ; i < nrhs ; i += 2) { char varname[256]; if (mxGetClassID(prhs[i]) != mxCHAR_CLASS) mexErrMsgTxt ("variable name required"); if (mxGetString (prhs[i], varname, 256) != 0) mexErrMsgTxt ("Could not convert string data"); if (!strcmp(varname, "niter")) niter = (int) mxGetScalar (prhs[i+1]); else if (!strcmp(varname,"redo")) redo = (int) mxGetScalar (prhs[i+1]); else if (!strcmp(varname,"seed")) seed = (int) mxGetScalar (prhs[i+1]); else if (!strcmp(varname,"verbose")) verbose = (int) mxGetScalar (prhs[i+1]); else if (!strcmp(varname,"init")) { init_type = (int) mxGetScalar (prhs[i+1]); assert (init_type == 0 || init_type == 1); } else mexErrMsgTxt("unknown variable name"); } } if (init_type == 1) /* Berkeley */ flags = flags | KMEANS_INIT_BERKELEY; else if (init_type == 0) /* random vectors */ flags = flags | KMEANS_INIT_RANDOM; flags |= nt; if (verbose > 0) printf("Input: %d vectors of dimension %d\nk=%d niter=%d " "redo=%d verbose=%d seed=%d v1=[%g %g ...], v2=[%g %g... ]\n", n, d, k, niter, redo, verbose, seed, v[0], v[1], v[d], v[d+1]); else flags |= KMEANS_QUIET; if(n < k) { mexErrMsgTxt("fewer points than centroids"); } /* ouptut: centroids, assignment, distances */ plhs[0] = mxCreateNumericMatrix (d, k, mxSINGLE_CLASS, mxREAL); float *centroids = (float*) mxGetPr (plhs[0]); float * dis = NULL; int * assign = NULL; int * nassign = NULL; if (nlhs == 2) { plhs[1] = mxCreateNumericMatrix (n, 1, mxINT32_CLASS, mxREAL); assign = (int*) mxGetPr (plhs[1]); } else if (nlhs >= 3) { plhs[1] = mxCreateNumericMatrix (n, 1, mxSINGLE_CLASS, mxREAL); dis = (float*) mxGetPr (plhs[1]); plhs[2] = mxCreateNumericMatrix (n, 1, mxINT32_CLASS, mxREAL); assign = (int*) mxGetPr (plhs[2]); } if (nlhs >=4) { plhs[3] = mxCreateNumericMatrix (k, 1, mxINT32_CLASS, mxREAL); nassign = (int*) mxGetPr (plhs[3]); } /* [centroids_tmp, dis, assign] = yael_kmeans (vs, ks, 'niter', 100, 'verbose', 0); */ kmeans (d, n, k, niter, v, flags, seed, redo, centroids, dis, assign, nassign); /* post-processing: Matlab starts from 1 */ if (assign) { int i; for (i = 0 ; i < n ; i++) assign[i]++; } }
void ahc_clustering(DyArray *ahct, int bf, int rho, const fDataSet *ds){ ASSERTINFO(ahct == NULL || bf <= 0 || rho <= 0 || ds == NULL, "IPP"); int n = ds->n; int d = ds->d; Cluster _clu, clu, *pclu = NULL, *p0clu = NULL; int i; float qerror; int iclu, bfi, ni, ichild, ori_id; // the pointer, branch factor and volume of the i-th cluster int *nassign = ivec_new_set(bf, 0); int *assign = NULL; float *cent = fvec_new(d*bf); float *mem_points = NULL; DyArray *member = (DyArray*)malloc(sizeof(DyArray)*bf); /* initialize the first cluster (root) to add it to the ahc tree */ Cluster_init(&clu, n); for(i = 0; i < n; i++){ clu.idx[i] = i; } clu.type = ClusterType_Root; DyArray_add(ahct, (void*)&clu, 1); /* begin the loop of adaptive hierarchical clustering */ iclu = 0; while(iclu < ahct->count){ /* deal with the i-th cluster */ // figure out the adaptive branch factor of the i-th cluster pclu = (Cluster*)DyArray_get(ahct, iclu, 1); ni = pclu->npts; bfi = i_min(bf, (int)round(ni / (float)rho)); // deal with the cluster according to its size if(bfi < 2){ /* * this is a leaf cluster * - mark it, release the children * * not necessary to store real data points */ pclu->type = ClusterType_Leaf; }else{ printf("----------------- cluster %d, bfi-%d:\n", iclu, bfi); /* * this is an inner cluster * - divide it */ memcpy(&_clu, pclu, sizeof(Cluster)); // extract data points from the original dataset according to the idx mem_points = fvec_new(ni * d); for(i = 0; i < ni; i++){ memcpy(mem_points+i*d, ds->data+_clu.idx[i]*d, d); } // divide this cluster assign = ivec_new(ni); if(iclu == 30){ int _a = 1; _a++; ivec_print(_clu.idx, _clu.npts); } qerror = kmeans( d, ni, bfi, CLUSTERING_NITER, mem_points, CLUSTERING_NTHREAD | KMEANS_QUIET | KMEANS_INIT_BERKELEY, CLUSTERING_SEED, CLUSTERING_NREDO, cent, NULL, assign, nassign); // prepare space for members' ids for(i = 0; i < bfi; i++){ DyArray_init(&member[i], sizeof(int), nassign[i]); } // extract member points' ids for each children cluster for(i = 0; i < ni; i++){ ori_id = _clu.idx[i]; DyArray_add(&member[assign[i]], (void*)&ori_id, 1); } // fulfill the type, centroids and the children of this cluster, add them to the ahct _clu.type = ClusterType_Inner; _clu.cents = fvec_new(d * bfi); memcpy(_clu.cents, cent, sizeof(float)*d*bfi); DyArray_init(&_clu.children, sizeof(int), bfi); for(i = 0; i < bfi; i++){ Cluster_init(&clu, nassign[i]); memcpy(clu.idx, (int*)member[i].elem, sizeof(int)*nassign[i]); DyArray_add(&_clu.children, (void*)&ahct->count, 1); /* the i-th child's position */ DyArray_add(ahct, (void*)&clu, 1); /* add the i-th child to the ahct */ } /* as per the elems of ahct may change when expanding the space * we decide to get the brand new address of the element */ pclu = (Cluster*)DyArray_get(ahct, iclu, 1); memcpy(pclu, &_clu, sizeof(Cluster)); /* report */ ivec_print(nassign, bfi); ivec_print((int*)_clu.children.elem, _clu.children.count); /* unset or release */ FREE(mem_points); FREE(assign); for(i = 0; i < bfi; i++){ DyArray_unset(&member[i]); } } // move to next cluster iclu++; } FREE(nassign); FREE(cent); FREE(member); pclu = NULL; }
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid, const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out) { // FIXME: this returns nan!! dt_iop_colortransfer_data_t *data = (dt_iop_colortransfer_data_t *)piece->data; float *in = (float *)ivoid; float *out = (float *)ovoid; const int ch = piece->colors; if(data->flag == ACQUIRE) { if(piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW) { // only get stuff from the preview pipe, rest stays untouched. int hist[HISTN]; // get histogram of L capture_histogram(in, roi_in, hist); // invert histogram of L invert_histogram(hist, data->hist); // get n clusters kmeans(in, roi_in, data->n, data->mean, data->var); // notify gui that commit_params should let stuff flow back! data->flag = ACQUIRED; dt_iop_colortransfer_params_t *p = (dt_iop_colortransfer_params_t *)self->params; p->flag = ACQUIRE2; } memcpy(out, in, (size_t)sizeof(float) * ch * roi_out->width * roi_out->height); } else if(data->flag == APPLY) { // apply histogram of L and clustering of (a,b) int hist[HISTN]; capture_histogram(in, roi_in, hist); #ifdef _OPENMP #pragma omp parallel for default(none) schedule(static) shared(roi_out, data, in, out, hist) #endif for(int k = 0; k < roi_out->height; k++) { size_t j = (size_t)ch * roi_out->width * k; for(int i = 0; i < roi_out->width; i++) { // L: match histogram out[j] = data->hist[hist[(int)CLAMP(HISTN * in[j] / 100.0, 0, HISTN - 1)]]; out[j] = CLAMP(out[j], 0, 100); j += ch; } } // cluster input buffer float mean[data->n][2], var[data->n][2]; kmeans(in, roi_in, data->n, mean, var); // get mapping from input clusters to target clusters int mapio[data->n]; get_cluster_mapping(data->n, mean, data->mean, mapio); // for all pixels: find input cluster, transfer to mapped target cluster #ifdef _OPENMP #pragma omp parallel for default(none) schedule(static) shared(roi_out, data, mean, var, mapio, in, out) #endif for(int k = 0; k < roi_out->height; k++) { float weight[MAXN]; size_t j = (size_t)ch * roi_out->width * k; for(int i = 0; i < roi_out->width; i++) { const float L = in[j]; const float Lab[3] = { L, in[j + 1], in[j + 2] }; // a, b: subtract mean, scale nvar/var, add nmean #if 0 // single cluster, gives color banding const int ki = get_cluster(in + j, data->n, mean); out[j+1] = 100.0/out[j] * ((Lab[1] - mean[ki][0])*data->var[mapio[ki]][0]/var[ki][0] + data->mean[mapio[ki]][0]); out[j+2] = 100.0/out[j] * ((Lab[2] - mean[ki][1])*data->var[mapio[ki]][1]/var[ki][1] + data->mean[mapio[ki]][1]); #else // fuzzy weighting get_clusters(in + j, data->n, mean, weight); out[j + 1] = out[j + 2] = 0.0f; for(int c = 0; c < data->n; c++) { out[j + 1] += weight[c] * ((Lab[1] - mean[c][0]) * data->var[mapio[c]][0] / var[c][0] + data->mean[mapio[c]][0]); out[j + 2] += weight[c] * ((Lab[2] - mean[c][1]) * data->var[mapio[c]][1] / var[c][1] + data->mean[mapio[c]][1]); } #endif out[j + 3] = in[j + 3]; j += ch; } } } else { memcpy(out, in, (size_t)sizeof(float) * ch * roi_out->width * roi_out->height); } }
int main(int argc, char *argv[]) { ArrayXXd data; // Creating dummy arrays for the covariates and the observations. // They're not used because we compute our Likelihood directly. ArrayXd covariates; ArrayXd observations; // ------------------------------------------------------------------- // ----- First step. Set up the models for the inference problem ----- // ------------------------------------------------------------------- // Set up a dummy model. This won't be used because we're computing // the Likelihood directly, but the Likelihood nevertheless expects a model in // its constructor. ZeroModel model(covariates); // ------------------------------------------------------- // ----- Second step. Set up all prior distributions ----- // ------------------------------------------------------- int Ndimensions = 3; // Number of free parameters (dimensions) of the problem vector<Prior*> ptrPriors(1); ArrayXd parametersMinima(Ndimensions); ArrayXd parametersMaxima(Ndimensions); parametersMinima.fill(-20); parametersMaxima.fill(20); UniformPrior uniformPrior(parametersMinima, parametersMaxima); ptrPriors[0] = &uniformPrior; // ----------------------------------------------------------------- // ----- Third step. Set up the likelihood function to be used ----- // ----------------------------------------------------------------- SingleNDGaussianLikelihood likelihood(observations, model, Ndimensions); // ------------------------------------------------------------------------------- // ----- Fourth step. Set up the K-means clusterer using an Euclidean metric ----- // ------------------------------------------------------------------------------- EuclideanMetric myMetric; int minNclusters = 1; int maxNclusters = 10; int Ntrials = 10; double relTolerance = 0.01; KmeansClusterer kmeans(myMetric, minNclusters, maxNclusters, Ntrials, relTolerance); // --------------------------------------------------------------------- // ----- Sixth step. Configure and start nested sampling inference ----- // --------------------------------------------------------------------- bool printOnTheScreen = true; // Print results on the screen int initialNobjects = 500; // Initial number of active points evolving within the nested sampling process. int minNobjects = 500; // Minimum number of active points allowed in the nesting process. int maxNdrawAttempts = 5000; // Maximum number of attempts when trying to draw a new sampling point. int NinitialIterationsWithoutClustering = 1000; // The first N iterations, we assume that there is only 1 cluster. int NiterationsWithSameClustering = 50; // Clustering is only happening every X iterations. double initialEnlargementFraction = 2.0; // Fraction by which each axis in an ellipsoid has to be enlarged. // It can be a number >= 0, where 0 means no enlargement. double shrinkingRate = 0.8; // Exponent for remaining prior mass in ellipsoid enlargement fraction. // It is a number between 0 and 1. The smaller the slower the shrinkage // of the ellipsoids. double terminationFactor = 0.01; // Termination factor for nesting loop. // Start the computation MultiEllipsoidSampler nestedSampler(printOnTheScreen, ptrPriors, likelihood, myMetric, kmeans, initialNobjects, minNobjects, initialEnlargementFraction, shrinkingRate); double tolerance = 1.e2; double exponent = 0.4; PowerlawReducer livePointsReducer(nestedSampler, tolerance, exponent, terminationFactor); //FerozReducer livePointsReducer(nestedSampler, tolerance); ostringstream numberString; numberString << Ndimensions; string outputPathPrefix = "demoSingle" + numberString.str() + "DGaussian_"; nestedSampler.run(livePointsReducer, NinitialIterationsWithoutClustering, NiterationsWithSameClustering, maxNdrawAttempts, terminationFactor, outputPathPrefix); nestedSampler.outputFile << "# List of configuring parameters used for the ellipsoidal sampler and X-means" << endl; nestedSampler.outputFile << "# Row #1: Minimum Nclusters" << endl; nestedSampler.outputFile << "# Row #2: Maximum Nclusters" << endl; nestedSampler.outputFile << "# Row #3: Initial Enlargement Fraction" << endl; nestedSampler.outputFile << "# Row #4: Shrinking Rate" << endl; nestedSampler.outputFile << minNclusters << endl; nestedSampler.outputFile << maxNclusters << endl; nestedSampler.outputFile << initialEnlargementFraction << endl; nestedSampler.outputFile << shrinkingRate << endl; nestedSampler.outputFile.close(); // ------------------------------------------------------- // ----- Last step. Save the results in output files ----- // ------------------------------------------------------- Results results(nestedSampler); results.writeParametersToFile("parameter"); results.writeLogLikelihoodToFile("logLikelihood.txt"); results.writeEvidenceInformationToFile("evidenceInformation.txt"); results.writePosteriorProbabilityToFile("posteriorDistribution.txt"); double credibleLevel = 68.3; bool writeMarginalDistributionToFile = true; results.writeParametersSummaryToFile("parameterSummary.txt", credibleLevel, writeMarginalDistributionToFile); // That's it! return EXIT_SUCCESS; }
int main(int argc, char** argv) { // Kmeans int class_n, data_n, iteration_n; float *centroids, *data; int* partitioned; FILE *io_file; struct timespec start, end, spent; // Check parameters if (argc < 4) { fprintf(stderr, "usage: %s <centroid file> <data file> <paritioned result> [<final centroids>] [<iteration number>]\n", argv[0]); exit(EXIT_FAILURE); } // Read initial centroid data io_file = fopen(argv[1], "rb"); if (io_file == NULL) { fprintf(stderr, "File open error %s\n", argv[1]); exit(EXIT_FAILURE); } class_n = read_data(io_file, ¢roids); fclose(io_file); // Read input data io_file = fopen(argv[2], "rb"); if (io_file == NULL) { fprintf(stderr, "File open error %s\n", argv[2]); exit(EXIT_FAILURE); } data_n = read_data(io_file, &data); fclose(io_file); iteration_n = argc > 5 ? atoi(argv[5]) : DEFAULT_ITERATION; partitioned = (int*)malloc(sizeof(int)*data_n); clock_gettime(CLOCK_MONOTONIC, &start); // Run Kmeans algorithm kmeans(iteration_n, class_n, data_n, (Point*)centroids, (Point*)data, partitioned); clock_gettime(CLOCK_MONOTONIC, &end); timespec_subtract(&spent, &end, &start); printf("Time spent: %ld.%09ld\n", spent.tv_sec, spent.tv_nsec); // Write classified result io_file = fopen(argv[3], "wb"); fwrite(&data_n, sizeof(data_n), 1, io_file); fwrite(partitioned, sizeof(int), data_n, io_file); fclose(io_file); // Write final centroid data if (argc > 4) { io_file = fopen(argv[4], "wb"); fwrite(&class_n, sizeof(class_n), 1, io_file); fwrite(centroids, sizeof(Point), class_n, io_file); fclose(io_file); } // Free allocated buffers free(centroids); free(data); free(partitioned); return 0; }
int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, int ngeoms, int k) { int i; int num_centroids = 0; LWGEOM **centroids; POINT2D *centers_raw; const POINT2D *cp; POINT2D min = { DBL_MAX, DBL_MAX }; POINT2D max = { -DBL_MAX, -DBL_MAX }; double dx, dy; kmeans_config config; kmeans_result result; int *seen; int sidx = 0; assert(k>0); assert(ngeoms>0); assert(geoms); /* Initialize our static structs */ memset(&config, 0, sizeof(kmeans_config)); memset(&result, 0, sizeof(kmeans_result)); if (ngeoms<k) { lwerror("%s: number of geometries is less than the number of clusters requested", __func__); } /* We'll hold the temporary centroid objects here */ centroids = lwalloc(sizeof(LWGEOM*) * ngeoms); memset(centroids, 0, sizeof(LWGEOM*) * ngeoms); /* The vector of cluster means. We have to allocate a */ /* chunk of memory for these because we'll be mutating them */ /* in the kmeans algorithm */ centers_raw = lwalloc(sizeof(POINT2D) * k); memset(centers_raw, 0, sizeof(POINT2D) * k); /* K-means configuration setup */ config.objs = lwalloc(sizeof(Pointer) * ngeoms); config.num_objs = ngeoms; config.clusters = lwalloc(sizeof(int) * ngeoms); config.centers = lwalloc(sizeof(Pointer) * k); config.k = k; config.max_iterations = 0; config.distance_method = lwkmeans_pt_distance; config.centroid_method = lwkmeans_pt_centroid; /* Clean the memory */ memset(config.objs, 0, sizeof(Pointer) * ngeoms); memset(config.clusters, 0, sizeof(int) * ngeoms); memset(config.centers, 0, sizeof(Pointer) * k); /* Prepare the list of object pointers for K-means */ for (i = 0; i < ngeoms; i++) { const LWGEOM *geom = geoms[i]; LWPOINT *lwpoint; /* Null/empty geometries get a NULL pointer */ if ((!geom) || lwgeom_is_empty(geom)) { config.objs[i] = NULL; continue; } /* If the input is a point, use its coordinates */ /* If its not a point, convert it to one via centroid */ if (lwgeom_get_type(geom) != POINTTYPE) { LWGEOM *centroid = lwgeom_centroid(geom); if ((!centroid) || lwgeom_is_empty(centroid)) { config.objs[i] = NULL; continue; } centroids[num_centroids++] = centroid; lwpoint = lwgeom_as_lwpoint(centroid); } else { lwpoint = lwgeom_as_lwpoint(geom); } /* Store a pointer to the POINT2D we are interested in */ cp = getPoint2d_cp(lwpoint->point, 0); config.objs[i] = (Pointer)cp; /* Since we're already here, let's calculate the extrema of the set */ if (cp->x < min.x) min.x = cp->x; if (cp->y < min.y) min.y = cp->y; if (cp->x > max.x) max.x = cp->x; if (cp->y > max.y) max.y = cp->y; } /* * We map a uniform assignment of points in the area covered by the set * onto actual points in the set */ dx = (max.x - min.x)/k; dy = (max.y - min.y)/k; seen = lwalloc(sizeof(int)*config.k); memset(seen, 0, sizeof(int)*config.k); for (i = 0; i < k; i++) { int closest; POINT2D p; int j; /* Calculate a point in the range */ p.x = min.x + dx * (i + 0.5); p.y = min.y + dy * (i + 0.5); /* Find the data point closest to the calculated point */ closest = lwkmeans_pt_closest(config.objs, config.num_objs, &p); /* If something is terrible wrong w/ data, cannot find a closest */ if (closest < 0) lwerror("unable to calculate cluster seed points, too many NULLs or empties?"); /* Ensure we aren't already using that point as a seed */ j = 0; while(j < sidx) { if (seen[j] == closest) { closest = (closest + 1) % config.num_objs; j = 0; } else { j++; } } seen[sidx++] = closest; /* Copy the point coordinates into the initial centers array */ /* This is ugly, but the centers array is an array of */ /* pointers to points, not an array of points */ centers_raw[i] = *((POINT2D*)config.objs[closest]); config.centers[i] = &(centers_raw[i]); } result = kmeans(&config); /* Before error handling, might as well clean up all the inputs */ lwfree(config.objs); lwfree(config.centers); lwfree(centers_raw); lwfree(centroids); lwfree(seen); /* Good result */ if (result == KMEANS_OK) return config.clusters; /* Bad result, not going to need the answer */ lwfree(config.clusters); if (result == KMEANS_EXCEEDED_MAX_ITERATIONS) { lwerror("%s did not converge after %d iterations", __func__, config.max_iterations); return NULL; } /* Unknown error */ return NULL; }
Mat TableObjectDetector::clusterObjects(Mat P, int K, bool removeOutliers) { Mat L; int attempts = 5; P.convertTo(P, CV_32F); kmeans(P, K, L, TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 10000, 0.0001), attempts, KMEANS_PP_CENTERS); // We remove outliers that are outside a number of standard deviations away // from the object centroid. We do this by just setting their cluster label // to -1 if (removeOutliers) { float numStdDevs = 3; float maxDist = 0.1; // Caluclate centroids vector<Mat> clusterData; Mat clusterCentroids(K, 3, CV_32F); for (int k=0; k<K; k++) { Mat D = Mat(0, 3, CV_64F); for (int i=0; i<L.rows; i++) { if (L.at<int>(i)==k) { D.push_back(P.row(i)); } } clusterData.push_back(D); reduce(D, clusterCentroids.row(k), 0, CV_REDUCE_AVG); } // Now calculate distances of each point, and the std. devs. of each // cluster Mat Dist = Mat::zeros(P.rows, 1, CV_32F); vector<Mat> centroidDistances; for (int k=0; k<K; k++) { centroidDistances.push_back(Mat(0, 1, CV_32F)); } for (int i=0; i<L.rows; i++) { Mat centroid = clusterCentroids.row(L.at<int>(i)); Mat pt = P.row(i); int k = L.at<int>(i); float d = std::sqrt( std::pow(pt.at<float>(0) - centroid.at<float>(0), 2) + std::pow(pt.at<float>(1) - centroid.at<float>(1), 2) + std::pow(pt.at<float>(2) - centroid.at<float>(2), 2) ); Dist.at<float>(i) = d; centroidDistances.at(k).push_back(d); } for (int k=0; k<K; k++) { Mat ignore; Mat std_dev; meanStdDev(centroidDistances.at(k), ignore, std_dev); float k_std = std_dev.at<Scalar>(0)(0); for (int i=0; i<P.rows; i++) { if (L.at<int>(i) == k) { //if (Dist.at<float>(i) > numStdDevs*k_std) { if (Dist.at<float>(i) > maxDist) { L.at<int>(i) = -1; } } } } // Now compute standard deviations for all clusters //Mat centroidStdDevs = Mat::zeros(K, 1); //for (int k=0; k<K; k++) { //} } return L; }
/** * Run hierarchical k-means with a distance threshold of 0.15 meters */ Mat TableObjectDetector::clusterObjectsHierarchical(cv::Mat P, int max_clusters) { Mat L = Mat::zeros (P.rows, 1, CV_32S); double dthresh = 0.15; int currentClusterNum = 0; int num_iter = 30; std::stack<Cluster*> c_stack; // Initialize with a single cluster containing all datapoints Cluster* C = new Cluster(); Mat D = Mat(P.rows, 4, CV_64F); P.copyTo(D.colRange(0, 3)); // ID each point so we return them in the same order for (int i=0; i<D.rows; i++) { D.at<double>(i, 3) = i; } C->setData(D); c_stack.push(C); // Run hierarchical k-means for (int t=0; t<num_iter; t++) { if (currentClusterNum == max_clusters) { return L; } if (c_stack.empty()) { return L; } Cluster* C = (Cluster*)c_stack.top(); c_stack.pop(); Mat D = C->getData(); // Calculate cluster centroid Mat Cmean; reduce(D, Cmean, 0, CV_REDUCE_AVG); // Calculate max distance double maxDist = 0; for (int i=0; i<D.rows; i++) { double dx = D.at<double>(i, 0) - Cmean.at<double>(0); double dy = D.at<double>(i, 1) - Cmean.at<double>(1); double dz = D.at<double>(i, 2) - Cmean.at<double>(2); double dist = sqrt(dx*dx + dy*dy + dz*dz); if (dist > maxDist) { maxDist = dist; } } // Check to see if this cluster satisfies the conditions if (maxDist < dthresh) { for (int i=0; i<D.rows; i++) { int idx = (int)D.at<double>(i, 3); L.at<int>(idx) = currentClusterNum; } currentClusterNum++; } else { Mat L_iter; int attempts = 5; Mat D32 = Mat(D.rows, 3, CV_64F); D.colRange(0, 3).copyTo(D32); D32.convertTo(D32, CV_32F); kmeans(D32, 2, L_iter, TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, 10000, 0.0001), attempts, KMEANS_PP_CENTERS); Mat D0 = Mat(0, 3, CV_64F); Mat D1 = Mat(0, 3, CV_64F); for (int i=0; i<L_iter.rows; i++) { if (L_iter.at<int>(i) == 0) { D0.push_back(D.row(i)); } else { D1.push_back(D.row(i)); } } Cluster* C0 = new Cluster(); C0->setData(D0); Cluster* C1 = new Cluster(); C1->setData(D1); c_stack.push(C0); c_stack.push(C1); } } return L; }
int main(int argc, char* argv[]) { const char* program_name = "auto_heir_decomp_sparse"; bool optsOK = true; gmx::initForCommandLine(&argc,&argv); copyright(program_name); cout << " Reads the symmetric CSC format sparse matrix from" << endl; cout << " input-file, and heirarchically decomposes the " << endl; cout << " Laplacian matrix until relaxation time convergence" << endl; cout << " criteria are met as the following reference:" << endl; cout << " [1] B. Nadler and M. Galun, \"Fundamental Limitations" << endl; cout << " of Spectral Clustering,\" in Advances in Neural Information" << endl; cout << " Processing Systems 19, 2007, pp. 1017–1024." << endl; cout << " eigenvalues/vectors of the normalized laplacian" << endl; cout << endl; cout << " Use -h or --help to see the complete list of options." << endl; cout << endl; // Option vars... int k_a; double sigma; int nev = 2; double c1 = 1.2; double c2 = 2.0; double K; bool pSet = false; string ssm_filename; string output_filename; string ndx_filename; string residuals_filename; // Declare the supported options. po::options_description cmdline_options; po::options_description program_options("Program options"); program_options.add_options() ("help,h", "show this help message and exit") ("sigma,s", po::value<double>(&sigma)->default_value(1.0), "Input: Kernel sigma (double)") ("relaxation,r", po::value<double>(&c1)->default_value(1.2), "Input: Relaxation cutoff parameter, c1 (double)") ("partition,p", po::value<double>(&c2)->default_value(2.0), "Input: Partition cutoff parameter, c2 (double)") ("ssm-file,f", po::value<string>(&ssm_filename)->default_value("distances.ssm"), "Input: Symmetric sparse matrix file (string:filename)") ("output,o", po::value<string>(&output_filename)->default_value("clusters.dat"), "Output: Cluster assignment file (string:filename)") ("ndx,n", po::value<string>(&ndx_filename)->default_value("clusters.ndx"), "Output: Cluster assignment index file (string:filename)") ; cmdline_options.add(program_options); po::variables_map vm; po::store(po::parse_command_line(argc, argv, cmdline_options), vm); po::notify(vm); if (vm.count("help")) { cout << "usage: " << program_name << " [options]" << endl; cout << cmdline_options << endl; return 1; } if (!optsOK) { return -1; } cout << "Running with the following options:" << endl; cout << "sigma = " << sigma << endl; cout << "ssm-file = " << ssm_filename << endl; cout << "output = " << output_filename << endl; cout << "ndx = " << ndx_filename << endl; cout << endl; // Stacks vector<vector<int> > work; vector<vector<int> > completed; // Defining variables; double *Ax; // Array for residual calculation double residual = 0.0; double max_residual = 0.0; // SSM Matrix CSC_matrix A(ssm_filename); Ax = new double[A.n]; // File output streams ofstream output; ofstream ndx; // EPS double eps = getEPS(); // Open files output.open(output_filename.c_str()); ndx.open(ndx_filename.c_str()); // Get affinities affinity(A,sigma); // Setup work work.resize(1); work[0].resize(A.n); for (int x = 0; x < A.n; x++) work[0][x] = x; while (work.size()) { vector<int> current = work[work.size()-1]; work.pop_back(); CSC_matrix current_A; A.syslice(current,current_A); normalize(current_A); double* d = NULL; // values double* Z = NULL; // vectors double nevm = runARPACK(nev,current_A,d,Z); cout << "Number of converged eigenvalues/vectors found: " << nevm << endl; int *labels = new int[current_A.n]; kmeans(current_A.n,nev,nev,Z,labels); // kmeans(current_A.n,1,nev,&Z[A.n],labels); // Get slice indices vector<int> islice1 = select(current,0,labels); vector<int> islice2 = select(current,1,labels); if (islice1.size() == 0 || islice2.size() == 0) { cout << "Defunct partition..." << endl; } // Get slices CSC_matrix slice1; A.syslice(islice1,slice1); normalize(slice1); CSC_matrix slice2; A.syslice(islice2,slice2); normalize(slice2); double *slice1_d = NULL; double *slice1_Z = NULL; int nev1 = runARPACK(nev,slice1,slice1_d,slice1_Z); cout << "Number of converged eigenvalues/vectors found: " << nev1 << endl; double *slice2_d = NULL; double *slice2_Z = NULL; int nev2 = runARPACK(nev,slice2,slice2_d,slice2_Z); cout << "Number of converged eigenvalues/vectors found: " << nev2 << endl; double current_t,slice1_t,slice2_t,ratio; current_t = (1.0/(1.0-d[0])); slice1_t = (1.0/(1.0-slice1_d[0])); slice2_t = (1.0/(1.0-slice2_d[0])); ratio = slice1_t / slice2_t; if (ratio < 1.0) ratio = slice2_t / slice1_t; cout << "Main: " << current_t << endl; cout << "Slice1: " << slice1_t << endl; cout << "Slice2: " << slice2_t << endl; cout << "Relaxation: " << c1*(slice1_t + slice2_t) << endl; cout << "Partition: " << ratio << endl; if (nev1+nev2+nevm!=6) { cout << "No convergence. Skipping decomposition..." << endl; completed.push_back(current); } else { if (current_t < c1*(slice1_t + slice2_t)) { completed.push_back(current); } else if (ratio > c2) { if (slice1_t > slice2_t) { work.push_back(islice1); completed.push_back(islice2); } else { work.push_back(islice2); completed.push_back(islice1); } } else { work.push_back(islice1); work.push_back(islice2); } } delete [] labels; delete [] slice1_d; delete [] slice1_Z; delete [] slice2_d; delete [] slice2_Z; delete [] d; delete [] Z; } int *clusters = new int[A.n]; cout << "Number of clusters: " << completed.size() << endl; cout << endl; for (int x = 0; x < completed.size(); x++) { int idx = 0; ndx << "[cluster_" << x+1 << "]" << endl; for (int y = 0; y < completed[x].size(); y++) { ndx << completed[x][y]+1 << " "; clusters[completed[x][y]] = x+1; if (++idx > 19) { ndx << endl; idx = 0; } } ndx << endl; ndx << endl; } for (int x = 0; x < A.n; x++) output << clusters[x] << endl; ndx.close(); output.close(); delete [] clusters; delete [] Ax; return 0; } // main.
int CalcCodebooks(const char *desc_path_char, const char *desc_type_char, const char *cb_path_char) { cv::Mat desc_set, labels_set, centers; int desc_dim; fs::path cb_path, desc_path; fs::directory_iterator eod; boost::smatch what; boost::regex filter(".*dat"); DescType cb_desc_type = DESCRIPTORS_NUM; bool valid_desc_type = false; RandGen rand_gen; MatWriter mw; // decode codebook type desc_path = fs::path(desc_path_char)/fs::path(desc_type_char); CHECK(fs::exists(desc_path)); for (int desc_type = 0; desc_type < DESCRIPTORS_NUM; desc_type++) { if (std::string(desc_type_char) == ActionClassifier::desc_names_[desc_type]) { cb_desc_type = static_cast<DescType>(desc_type); valid_desc_type = true; break; } } CHECK(valid_desc_type); cb_path = fs::path(cb_path_char); // if doesn't exist create a folder for codebooks try { if (fs::is_directory(cb_path) == false) fs::create_directories(cb_path); } catch(boost::filesystem::filesystem_error e) { LOGF() << "Failed to create codebooks folder: " << cb_path << std::endl; return -1; } cb_path = cb_path/fs::path("cb_" + std::string(ActionClassifier::desc_names_[cb_desc_type]) + ".dat"); if (fs::exists(cb_path)) { LOGI() << std::string(desc_type_char) << "code-book already exists." << std::endl; return 0; } // iterate through different actions at different rotations until sufficient number of descriptors is collected LOGI() << "Collecting descriptors for " << desc_type_char << " codebook" << std::endl; while (desc_set.rows < CB_MAX_DESCRIPTOR_NUM) { for(fs::directory_iterator dir_iter(desc_path) ; (dir_iter != eod) && (desc_set.rows < CB_MAX_DESCRIPTOR_NUM); ++dir_iter) { if ((fs::is_regular_file(*dir_iter)) && (boost::regex_match(dir_iter->path().string(), what, filter))) { MatReader mr; cv::Mat frame_desc; int desc_cnt = 0; if (rand_gen.RandInt(100) > CB_ANGLE_REJECTION_RATE) continue; LOGI() << "Extracting descriptors from " << fs::basename(dir_iter->path()) << std::endl; if (mr.Init(dir_iter->path().string()) != 0) { LOGW() << "Failed to open " << fs::basename(dir_iter->path()) << std::endl; continue; } while ((mr.Read(&frame_desc) == 0) && (desc_set.rows < CB_MAX_DESCRIPTOR_NUM)){ for (int desc_idx = 0; desc_idx < frame_desc.rows; desc_idx++) { if (rand_gen.RandInt(100) > CB_DESCRIPTOR_REJECTION_RATE) continue; desc_set.push_back(frame_desc.row(desc_idx)); desc_cnt ++; if (desc_set.rows == CB_MAX_DESCRIPTOR_NUM) { break; } } } LOGI() << desc_cnt << " descriptors added, total descriptors num: " << desc_set.rows << std::endl; } } } // calculate codebook words desc_dim = desc_set.cols; LOGI() << "kmeans clustering, max iterations: " << CB_KMEAS_MAX_ITER << std::endl; kmeans(desc_set, BOW_SIZE, labels_set, cv::TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, CB_KMEAS_MAX_ITER, 0.01*desc_dim), 5, cv::KMEANS_PP_CENTERS, centers); LOGI() << "kmeans complete" << std::endl; CHECK(mw.Init(cb_path.string()) == 0); mw.Write(centers); return 0; }
int main(int argc, char **argv) { extern char *optarg; extern int optind, optopt; int c, ncluster = 4, rank, nworker, nline, totalLine, ndim, i = 0, j=0; int type; char *inFile, *outFile; float thres = 0.01; float **data; // input data in this process float **centroid; // all cluster centroids int *label; // for each data point, find its new class label double stime, etime, // whole system time stimeCluster, etimeCluster; // maximum cluster time among all processes double elapse, elapseWhole, elapseCluster, elapseClusterWhole; if(argc != 15) { printUsage(argv[0]); exit(EXIT_FAILURE); } while ((c = getopt(argc, argv, "p:i:o:k:t:l:d:")) != EOF) { switch (c) { case 'p': type = atoi(optarg); break; case 'i': inFile = optarg; break; case 'o': outFile = optarg; break; case 'k': ncluster = atoi(optarg); break; case 't': thres = atof(optarg); break; case 'l': nline = atof(optarg); break; case 'd': ndim = atof(optarg); break; default: printUsage(argv[0]); exit(EXIT_FAILURE); } } if (optind > argc) { printUsage(argv[0]); exit(EXIT_FAILURE); } stime = MPI_Wtime(); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nworker); printf("rank:%d init done\n", rank); // read input data (each process has a portion of all data) // IMP: nline changed from total lines for all input to subtotal lines for a specific process totalLine = nline; data = kmeans_read(inFile, &nline, ndim, MPI_COMM_WORLD); printf("rank:%d read data done. nline:%d\n", rank, nline); // initialize cluster centers centroid = (float **) malloc(ncluster * sizeof(float *)); // pointer to each line centroid[0] = (float *) malloc(ncluster * ndim * sizeof(float)); for(i = 1; i < ncluster; i++) centroid[i] = centroid[i-1] + ndim; //float centroid[ncluster][ndim]; if(rank == 0) { for(i = 0; i < ncluster; i++) for(j = 0; j < ndim; j++) centroid[i][j] = data[i][j]; } // memcpy(centroid, data, ncluster * ndim * sizeof(float)); printf("rank:%d init cluster center done\n", rank); // broadcast the centroid to all other processes MPI_Bcast(centroid[0], ncluster*ndim, MPI_FLOAT, 0, MPI_COMM_WORLD); // do kmeans calculation stimeCluster = MPI_Wtime(); label = (int *) malloc(nline * sizeof(int)); kmeans(type, data, ncluster, ndim, nline, thres, label, centroid, MPI_COMM_WORLD); printf("rank:%d kmeans done\n", rank); etimeCluster = MPI_Wtime(); kmeans_write(outFile, nline, totalLine, ncluster, ndim , centroid, label, 0, MPI_COMM_WORLD); free(label); free(centroid[0]); free(centroid); etime = MPI_Wtime(); elapse = etime - stime; elapseCluster = etimeCluster-stimeCluster; // get the maximum time among processes MPI_Reduce(&elapse, &elapseWhole, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&elapseCluster, &elapseClusterWhole, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); // performance report printf("Done! Rank: %d\tTime: %f\n", rank, etime-stime); if(rank == 0) printf("System time: %f secs\tClustering time: %fsecs\n", elapseWhole, elapseCluster); MPI_Finalize(); return EXIT_SUCCESS; }
int main (){ char file[50];//name of file int nclus;//number of clusters //float *Cone, *Ctwo, *temp, *temp2; /*-------------------------------------------------------------------------- -------------get input parameters --------------------------------------------------------------------------*/ //get name of file to open printf("what is the name of your file?"); if (scanf("%s", &file[0]) != 1){ //read in value and make sure it is valid name printf("error: filemane invalid!\n"); exit(EXIT_FAILURE); } printf("filename is %s\n", file); //get number of clusters printf("how many clusters do you want to find?"); if (scanf("%d", &nclus) != 1){ //read in value and make sure it is valid number printf("error: number invalid!\n"); exit(EXIT_FAILURE); } printf("number of k clusters is %d\n", nclus); /*-------------------------------------------------------------------------- -------------open and read input file --------------------------------------------------------------------------*/ //open input file FILE *fp; if ((fp = fopen(file,"r")) == NULL){ printf("Error:cannot read the file %s\n", file); }else{ printf("success opening file %s\n", file); } //read number of items and attributes int items, attributes; fscanf(fp,"%d",&items); fscanf(fp,"%d",&attributes); printf ("items %d attributes %d\n", items, attributes); getchar(); //Create nodes Node *head = (Node *) malloc (sizeof(Node)); Node *node = (Node *) malloc (sizeof(Node)); float *temp = (float *)malloc(items*attributes*sizeof(float)); //read file printf("reading file\n"); int i, j; //counters for (i=0; i < items; i++){//for each item printf("reading item %d\n",i); for (j=0; j < attributes; j++){ fscanf(fp,"%f",&temp[(attributes)*i+j]); //printf("read attribute %d, %f\n",j,temp[(attributes)*i+j]); } node = addData(nclus+1,&temp[(attributes)*i]); append(head,node); } /*-------------------------------------------------------------------------- -------------get initial means --------------------------------------------------------------------------*/ float *means = (float *)malloc(nclus*attributes*sizeof(float)); //float *means_b = (float *)malloc(nclus*attributes*sizeof(float)); int index; node = head; //printf("node...%f %f\n", node->att[0], node->att[1]); for (i=0;i<nclus;i++){ node = node->next; //printf("node...%f %f\n", node->att[0], node->att[1]); printf("\n\ninitial center for cluster %d:\n",i); for (j=0;j<attributes;j++){ index = (attributes)*(i)+j; means[index] = node->att[j]; printf("attribute %d: %f\n", j, node->att[j]); } } printf("press enter\n"); getchar(); /*-------------------------------------------------------------------------- -------------run k-means algorithm --------------------------------------------------------------------------*/ //declare variables for k-means algo float *att_sum = (float *)malloc(nclus*attributes*sizeof(float)); //sum of each column for each cluster float *sqerr_sum = (float *)malloc(nclus*sizeof(float)); //keep track of rmse float *rmse = (float *)malloc(nclus*sizeof(float)); //keep track of rmse float *rmsediff = (float *)malloc(nclus*sizeof(float)); //differences in rmse int *cnt = (int *)malloc(nclus*sizeof(int)); //number of elements in each cluster float *last_rmse = (float *)malloc(nclus*sizeof(float)); int test = 0; int count = 0; do{//repeat until test = 1 (RMSE converges) test = kmeans(head,&means[0],nclus,&last_rmse[0], attributes, &att_sum[0], &sqerr_sum[0], &rmse[0], &rmsediff[0], &cnt[0]); printf("press enter for next iteration\n"); getchar(); //printf("test %d\n", test); count ++; if (count > 100) test = 1; }while(test == 0); /*-------------------------------------------------------------------------- -------------print results in output file --------------------------------------------------------------------------*/ FILE *out; if ((out = fopen("output_gbm.csv","w")) == NULL){ printf("Error:cannot read the file output.txt\n"); }else{ printf("success opening output file output.txt for output\n"); } node = head; while(node->next != NULL){ node = node->next; for (j=0;j<attributes;j++) { fprintf(out, "%f,", node->att[j]); } fprintf(out,"%d\n",node->kclus); } fclose(out); fclose (fp) ; /*-------------------------------------------------------------------------- -------------run k-nearest neighbor algorithm --------------------------------------------------------------------------*/ int response; printf("if you want to run k-nearest neighbor type the number 1 and press enter, otherwise press any other key: "); if (scanf("%d", &response) != 1){ //read in value and make sure it is valid number printf("error: number invalid!\n"); exit(EXIT_FAILURE); } printf("response is %d\n", response); //run k-nn algorithm if (response == 1){ knn(head, nclus); //knn call function } free(last_rmse); free(means); free(att_sum); free(sqerr_sum); free(rmse); free(rmsediff); free(cnt); return 1; }
/*----< kmeans_clustering() >---------------------------------------------*/ float* kmeans_clustering(float *feature, /* in: [npoints][nfeatures] */ int nfeatures, int npoints, int nclusters, float threshold) { int i, j, n = 0; /* counters */ int loop=0, temp; int *new_centers_len; /* [nclusters]: no. of points in each cluster */ float delta; /* if the point moved */ float **clusters; /* out: [nclusters][nfeatures] */ float **new_centers; /* [nclusters][nfeatures] */ int *membership; /* which cluster a data point belongs to */ int *membership_new; /* newly assignment membership */ int *initial; /* used to hold the index of points not yet selected prevents the "birthday problem" of dual selection (?) considered holding initial cluster indices, but changed due to possible, though unlikely, infinite loops */ int initial_points; int c = 0; /* nclusters should never be > npoints that would guarantee a cluster without points */ if (nclusters > npoints) nclusters = npoints; /* allocate memory for membership and membership_new */ membership = (int*) malloc(npoints * sizeof(int)); membership_new = (int*) malloc(npoints * sizeof(int)); /* allocate space for and initialize returning variable clusters[] */ clusters = (float**) malloc(nclusters * sizeof(float*)); clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); for (i=1; i<nclusters; i++) clusters[i] = clusters[i-1] + nfeatures; /* initialize the random clusters */ initial = (int *) malloc (npoints * sizeof(int)); for (i = 0; i < npoints; i++) { initial[i] = i; } initial_points = npoints; /* randomly pick cluster centers */ for (i=0; i<nclusters && initial_points >= 0; i++) { //n = (int)rand() % initial_points; for (j=0; j<nfeatures; j++) clusters[i][j] = feature[initial[n]*npoints+j]; // remapped /* swap the selected index to the end (not really necessary, could just move the end up) */ temp = initial[n]; initial[n] = initial[initial_points-1]; initial[initial_points-1] = temp; initial_points--; n++; } #pragma acc data create(membership[0:npoints], membership_new[0:npoints]) { /* initialize the membership and membership_new to -1 for all */ #pragma acc kernels present(membership) for (i=0; i < npoints; i++) membership[i] = membership_new[i] = -1; /* allocate space for and initialize new_centers_len and new_centers */ new_centers_len = (int*) calloc(nclusters, sizeof(int)); new_centers = (float**) malloc(nclusters * sizeof(float*)); new_centers[0] = (float*) calloc(nclusters * nfeatures, sizeof(float)); for (i=1; i<nclusters; i++) new_centers[i] = new_centers[i-1] + nfeatures; /* iterate until convergence */ do { delta = 0.0; delta = (float) kmeans(feature, /* in: [npoints][nfeatures] */ nfeatures, /* number of attributes for each point */ npoints, /* number of data points */ nclusters, /* number of clusters */ membership, /* which cluster the point belongs to */ membership_new, /* newly assignment membership */ clusters, /* out: [nclusters][nfeatures] */ new_centers_len, /* out: number of points in each cluster */ new_centers /* sum of points in each cluster */ ); /* replace old cluster centers with new_centers */ /* CPU side of reduction */ for (i=0; i<nclusters; i++) { for (j=0; j<nfeatures; j++) { if (new_centers_len[i] > 0) clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ new_centers[i][j] = 0.0; /* set back to 0 */ } new_centers_len[i] = 0; /* set back to 0 */ } c++; } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ printf("iterated %d times\n", c); } /* end acc data */ free(new_centers[0]); free(new_centers); free(new_centers_len); free(membership); free(membership_new); return clusters; }
//从所有的feature 文件中读出所有的feature 。 void fImgSvm::createFeatureDict() { static char * dirname = "/home/fuxiang/code-fuxiang90/cpp/opencv/opencv-start/feature"; DIR *dp; struct dirent *entry; struct stat statbuf; chdir("/home/fuxiang/code-fuxiang90/cpp/opencv/opencv-start"); if( (dp = opendir( "/home/fuxiang/code-fuxiang90/cpp/opencv/opencv-start/feature" ) ) == NULL ) { fprintf(stderr, "cannot open directory: %s\n", dirname); //return ; } //double darray[SIFTN] ; vector < vector<double > > featurevec; int featuresum = 0; chdir(dirname); while( (entry = readdir(dp)) != NULL) { if( S_ISDIR(statbuf.st_mode) ) { continue; } else { //strcpy(filename[filenum++], entry->d_name); string filename = entry->d_name; //filename = "" if(filename == "." || filename == "..") continue; int pos = filename.find(".feature") ; if(pos == -1 ) continue; mfimgfeature.GetSiftFeatureFile(filename ,featurevec); } } int nfeature = featurevec.size(); SGMatrix<float64_t> data(SIFTN, nfeature) ; for (int i = 0; i < nfeature; ++i) { for (int j = 0; j < SIFTN; j++) { data(j ,i) = featurevec[i][j]; } } CDenseFeatures<float64_t>* centers; kmeans(data,centers,nfeature); // int cnClusterNumber = mwordnum; // CvMat *pszLabels = cvCreateMat(nfeature, 1, CV_32SC1); // CvMat szSamples, *pszClusterCenters ; // pszClusterCenters = cvCreateMat(cnClusterNumber, SIFTN, CV_32FC1); // cvInitMatHeader ( &szSamples,nfeature,SIFTN, CV_32FC1, pszDiscriptor); // cvKMeans2(&szSamples, cnClusterNumber, pszLabels,cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),1, (CvRNG *)0, 0, pszClusterCenters); //cvKMeans2(&szSamples, cnClusterNumber, pszLabels, pszClusterCenters); ofstream fout("dict"); for(int i = 0 ; i < mwordnum ; i ++) { vector<double > d ; for(int j = 0 ; j < SIFTN ; j ++) { fout << data(j,i) << " " ; d.push_back(data(j,i) ); } dictmap.insert( map<int ,vector<double> >::value_type(i ,d) ); fout << endl; } fout.close(); //SG_UNREF(data); //SG_UNREF(centers); }
void dirichlet_fit_main(struct data_t *data, int rseed) { const int N = data->N, S = data->S, K = data->K; int i, j, k; gsl_rng *ptGSLRNG; gsl_rng_env_setup(); gsl_set_error_handler_off(); ptGSLRNG = gsl_rng_alloc(gsl_rng_default); gsl_set_error_handler_off(); gsl_rng_set(ptGSLRNG, rseed); /* allocate matrices */ double **aadZ, **aadLambda, **aadErr, *adW; adW = (double *) calloc(K, sizeof(double)); aadZ = (double **) calloc(K, sizeof(double *)); aadLambda = (double **) calloc(K, sizeof(double *)); aadErr = (double **) calloc(K, sizeof(double*)); aadZ[0] = (double *) calloc(K * N, sizeof(double)); aadLambda[0] = (double *) calloc(K * S, sizeof(double)); aadErr[0] = (double *) calloc(K * S, sizeof(double)); for (k = 1; k < K; k++) { aadZ[k] = aadZ[0] + k * N; aadLambda[k] = aadLambda[0] + k * S; aadErr[k] = aadErr[0] + k * S; } /* soft k means initialiser */ kmeans(data, ptGSLRNG, adW, aadZ, aadLambda); for (k = 0; k < K; k++) { adW[k] = 0.0; for (i = 0; i < N; i++) adW[k] += aadZ[k][i]; } if (data->verbose) Rprintf(" Expectation Maximization setup\n"); for (k = 0; k < K; k++) { for (j = 0; j < S; j++) { const double x = aadLambda[k][j]; aadLambda[k][j] = (x > 0.0) ? log(x) : -10; } optimise_lambda_k(aadLambda[k], data, aadZ[k]); } /* simple EM algorithm */ int iter = 0; double dNLL = 0.0, dNew, dChange = BIG_DBL; if (data->verbose) Rprintf(" Expectation Maximization\n"); while (dChange > 1.0e-6 && iter < 100) { calc_z(aadZ, data, adW, aadLambda); /* latent var expectation */ for (k = 0; k < K; k++) /* mixture components, given pi */ optimise_lambda_k(aadLambda[k], data, aadZ[k]); for (k = 0; k < K; k++) { /* current likelihood & weights */ adW[k] = 0.0; for(i = 0; i < N; i++) adW[k] += aadZ[k][i]; } dNew = neg_log_likelihood(adW, aadLambda, data); dChange = fabs(dNLL - dNew); dNLL = dNew; iter++; R_CheckUserInterrupt(); if (data->verbose && (iter % 10) == 0) Rprintf(" iteration %d change %f\n", iter, dChange); } /* hessian */ if (data->verbose) Rprintf(" Hessian\n"); gsl_matrix *ptHessian = gsl_matrix_alloc(S, S), *ptInverseHessian = gsl_matrix_alloc(S, S); gsl_permutation *p = gsl_permutation_alloc(S); double dLogDet = 0., dTemp; int signum, status; for (k = 0; k < K; k++) { data->adPi = aadZ[k]; if (k > 0) dLogDet += 2.0 * log(N) - log(adW[k]); hessian(ptHessian, aadLambda[k], data); status = gsl_linalg_LU_decomp(ptHessian, p, &signum); gsl_linalg_LU_invert(ptHessian, p, ptInverseHessian); for (j = 0; j < S; j++) { aadErr[k][j] = gsl_matrix_get(ptInverseHessian, j, j); dTemp = gsl_matrix_get(ptHessian, j, j); dLogDet += log(fabs(dTemp)); } } gsl_matrix_free(ptHessian); gsl_matrix_free(ptInverseHessian); gsl_permutation_free(p); /* results */ double dP = K * S + K - 1; data->NLE = dNLL; data->LogDet = dLogDet; data->fit_laplace = dNLL + 0.5 * dLogDet - 0.5 * dP * log(2. * M_PI); data->fit_bic = dNLL + 0.5 * log(N) * dP; data->fit_aic = dNLL + dP; group_output(data, aadZ); mixture_output(data, adW, aadLambda, aadErr); free(aadErr[0]); free(aadErr); free(aadLambda[0]); free(aadLambda); free(aadZ[0]); free(aadZ); free(adW); }
Mat BOWKMeansTrainer::cluster( const Mat& _descriptors ) const { Mat labels, vocabulary; kmeans( _descriptors, clusterCount, labels, termcrit, attempts, flags, vocabulary ); return vocabulary; }
void ClusterContacts(vector<dContactGeom>& contacts,int maxClusters,Real clusterNormalScale) { if((int)contacts.size() <= maxClusters) return; vector<Vector> pts(contacts.size()); for(size_t i=0;i<pts.size();i++) { pts[i].resize(7); pts[i][0] = contacts[i].pos[0]; pts[i][1] = contacts[i].pos[1]; pts[i][2] = contacts[i].pos[2]; pts[i][3] = contacts[i].normal[0]*clusterNormalScale; pts[i][4] = contacts[i].normal[1]*clusterNormalScale; pts[i][5] = contacts[i].normal[2]*clusterNormalScale; pts[i][6] = contacts[i].depth; } Statistics::KMeans kmeans(pts,maxClusters); //randomized //kmeans.RandomInitialCenters(); //deterministic for(size_t i=0;i<kmeans.centers.size();i++) kmeans.centers[i] = kmeans.data[(i*pts.size())/kmeans.centers.size()]; int iters=20; kmeans.Iterate(iters); contacts.resize(kmeans.centers.size()); vector<int> degenerate; for(size_t i=0;i<contacts.size();i++) { contacts[i].pos[0] = kmeans.centers[i][0]; contacts[i].pos[1] = kmeans.centers[i][1]; contacts[i].pos[2] = kmeans.centers[i][2]; contacts[i].normal[0] = kmeans.centers[i][3]/clusterNormalScale; contacts[i].normal[1] = kmeans.centers[i][4]/clusterNormalScale; contacts[i].normal[2] = kmeans.centers[i][5]/clusterNormalScale; Real len = Vector3(contacts[i].normal[0],contacts[i].normal[1],contacts[i].normal[2]).length(); if(FuzzyZero(len) || !IsFinite(len)) { printf("ODESimulator: Warning, clustered normal became zero/infinite\n"); //pick any in the cluster int found = -1; for(size_t k=0;k<kmeans.labels.size();k++) { if(kmeans.labels[k] == (int)i) { found = (int)k; break; } } if(found < 0) { //strange -- degenerate cluster? degenerate.push_back(i); continue; } contacts[i].pos[0] = pts[found][0]; contacts[i].pos[1] = pts[found][1]; contacts[i].pos[2] = pts[found][2]; contacts[i].normal[0] = pts[found][3]; contacts[i].normal[1] = pts[found][4]; contacts[i].normal[2] = pts[found][5]; Real len = Vector3(contacts[i].normal[0],contacts[i].normal[1],contacts[i].normal[2]).length(); contacts[i].normal[0] /= len; contacts[i].normal[1] /= len; contacts[i].normal[2] /= len; contacts[i].depth = pts[found][6]; continue; } contacts[i].normal[0] /= len; contacts[i].normal[1] /= len; contacts[i].normal[2] /= len; //cout<<"Clustered contact "<<contacts[i].pos[0]<<" "<<contacts[i].pos[1]<<" "<<contacts[i].pos[2]<<endl; //cout<<"Clustered normal "<<contacts[i].normal[0]<<" "<<contacts[i].normal[1]<<" "<<contacts[i].normal[2]<<endl; contacts[i].depth = kmeans.centers[i][6]; } reverse(degenerate.begin(),degenerate.end()); for(size_t i=0;i<degenerate.size();i++) { contacts.erase(contacts.begin()+degenerate[i]); } }
int main(){ s4_init_simulation(); kmeans(TIME); s4_wrapup_simulation(); return 0; }
std::vector<cv::Rect> SoftPPWordSplitter::split(const CCGroup &grp) { cv::Rect bb = grp.get_rect(); // generate the projection profile sums cv::Mat sums(1, bb.width, CV_32FC1, cv::Scalar(0)); ProjectionProfileComputer pp_computer(cv::Size(bb.width, 1), bb.x); for (int i = 0; i < grp.ccs.size(); i++) { sums = pp_computer.compute(grp.ccs[i].pixels, sums); } int threshold = pp_computer.compute_threshold(sums); if (_verbose) { std::cout << "Projection Profile Threshold: " << threshold << std::endl; } cv::Mat gaps = sums < threshold; // now shrink each bounding rect on the border with the gaps matrix std::vector<cv::Rect> original_rects(grp.ccs.size()); std::transform( grp.ccs.begin(), grp.ccs.end(), original_rects.begin(), [](const CC &cc) -> cv::Rect { return cc.rect; }); std::sort( original_rects.begin(), original_rects.end(), [](const cv::Rect &a, const cv::Rect &b) -> bool { return a.x < b.x; }); RectShrinker shrinker(0.10, bb.x); std::vector<cv::Rect> shrinked_rects(shrinker.shrink(original_rects, gaps)); //cv::Mat img(grp.get_image()); //cv::imshow("RECTS-wo-rects", img); //cv::waitKey(0); //for (cv::Rect r : shrinked_rects) { // cv::rectangle(img, r.tl(), r.br(), cv::Scalar(128)); //} //cv::imshow("RECTS", img); //cv::waitKey(0); std::vector<bool> collide(bb.width, false); for (int i = 0; i < shrinked_rects.size(); i++) { for (int j = shrinked_rects[i].x; j < shrinked_rects[i].x + shrinked_rects[i].width; j++) { collide[j-bb.x] = true; } } //std::vector<bool> collide(bb.width, false); //for (int i = 0; i < ccs.size(); i++) { // for (int j = ccs[i].rect.x; j < ccs[i].rect.x + ccs[i].rect.width; j++) { // collide[j-bb.x] = true; // } //} std::vector<float> heights(grp.ccs.size(), 0.0); std::transform( grp.ccs.begin(), grp.ccs.end(), heights.begin(), [] (const CC &c) -> float { return c.rect.height; }); float mean_height = cv::sum(heights)[0] / heights.size(); // Now find the rects from this binary mask. // This merges overlapping/touching CCs into a single component std::vector<cv::Rect> rects; cv::Rect last_rect(bb.x, bb.y, 1, bb.height); for (int i = 0; i < collide.size(); i++) { if (collide[i]) { last_rect.width += 1; } else { if (last_rect.width > 0) { rects.push_back(last_rect); } last_rect = cv::Rect(bb.x + i, bb.y, 0, bb.height); } } if (last_rect.width > 0) { rects.push_back(last_rect); } if (_verbose) std::cout << "#Rects: " << rects.size() << std::endl; if (rects.size() <= 2) { std::vector<cv::Rect> result; result.push_back(bb); return result; } // find the dists std::vector<float> dists; for (int i = 1; i < rects.size(); i++) { dists.push_back(rects[i].tl().x - rects[i-1].br().x); } // kmeans cv::Mat dist_mat(dists.size(), 1, CV_32FC1); for (size_t i = 0; i < dists.size(); i++) { dist_mat.at<float>(i,0) = dists[i]; } cv::Mat centers; cv::Mat labels;//(dists.size(),1, CV_32SC1, cv::Scalar(0)); /* float min = *std::min_element(dists.begin(), dists.end()); float max = *std::max_element(dists.begin(), dists.end()); for (size_t i = 0; i < dists.size(); i++) { labels.at<int>(i,0) = std::abs(dists[i] - min) < std::abs(dists[i] - max) ? 0 : 1; } */ if (_verbose) std::cout << dist_mat << std::endl; kmeans(dist_mat, 2, labels, cv::TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 100, .01), 5, cv::KMEANS_PP_CENTERS, centers); if (_verbose) std::cout << centers << std::endl; std::vector<float> cpy(dists); std::sort(cpy.begin(), cpy.end()); float median = cpy[cpy.size() / 2]; if (cpy.size() % 2 == 0) { median = cpy[cpy.size() / 2] + cpy[cpy.size() / 2 - 1]; median = median / 2.0f; } float medval = median; float height = std::abs(centers.at<float>(0,0) - centers.at<float>(1,0)) / mean_height; median = std::abs(centers.at<float>(0,0) - centers.at<float>(1,0)) / (median + 1e-10); if (_verbose) { std::cout << dists.size() << " " << medval << " " << median << " " << height << std::endl; } // liblinear: 92% ACC: (10-F) // ./train -v 10 -B 1 -w1 2 -c 100 dists_cleaned.dat // do we have a single cluster?! //if (dists.size() > 3 && median * 0.84320891 + height * 0.3127415 < 1.23270849 || // dists.size() <= 3 && height < 0.43413942) { if (median * 0.33974138 + height * 0.47850904 < 0.56307525) { std::vector<cv::Rect> result; result.push_back(bb); return result; } // get the index of the smallest center int small_center = centers.at<float>(0,0) < centers.at<float>(1,0) ? 0 : 1; // count the distance to cluster assignments int cnt[2] = {0,0}; for (int i = 0; i < labels.rows; i++) { cnt[labels.at<int>(i,0)]++; } // we have more word gaps than letter gaps -> don't split! if (cnt[small_center] < cnt[1-small_center]) { std::vector<cv::Rect> result; result.push_back(bb); return result; } // start from left to right and iteratively merge rects if the // distance between them is clustered into the smaller center last_rect = rects[0]; std::vector<cv::Rect> word_candidates; for (int i = 1; i < rects.size(); i++) { if (_allow_single_letters) { if (labels.at<int>(i-1,0) == small_center) { // extend the last rect last_rect = last_rect | rects[i]; } else { // do not extend it! word_candidates.push_back(last_rect); last_rect = rects[i]; } } else { if (labels.at<int>(i-1,0) == small_center) { // extend the last rect last_rect = last_rect | rects[i]; } else if (i < labels.rows && labels.at<int>(i,0) == small_center) { // do not extend it! word_candidates.push_back(last_rect); last_rect = rects[i]; } else { last_rect = last_rect | rects[i]; } } } word_candidates.push_back(last_rect); // for each rect, find the original connected component rects std::vector<cv::Rect> words; for (cv::Rect candidate : word_candidates) { std::vector<cv::Rect> word; for (size_t i = 0; i < grp.ccs.size(); i++) { cv::Rect intersect(grp.ccs[i].rect & candidate); if (float (intersect.width * intersect.height) / float (grp.ccs[i].rect.width * grp.ccs[i].rect.height) >= 0.8f) { cv::Rect r = grp.ccs[i].rect; // set the text height correctly r.y = bb.y; r.height = bb.height; word.push_back(r); } } if (_verbose) { std::cout << "Accumulated: " << word.size() << " rects!" << std::endl; } if (word.empty()) continue; assert(!word.empty()); cv::Rect r = word[0]; for (size_t i = 1; i < word.size(); i++) { r = r | word[i]; } words.push_back(r); } return words; }