gmm_t * gmm_read(FILE *f) { int d,k; READANDCHECK(&d,1); READANDCHECK(&k,1); gmm_t *g=gmm_new(d,k); READANDCHECK(g->w,g->k); READANDCHECK(g->mu,g->k*g->d); READANDCHECK(g->sigma,g->k*g->d); return g; }
int main() { const char *data_filenm = "sample_data.txt"; const int gmm_num_components = 3; // Load data from file FILE *fp = fopen(data_filenm, "r"); if (fp == NULL) { printf("ERROR: File 'sample_data.txt' not found.\nRun scripts/generate_data.py to generate sample data.\n"); exit(1); } int N = 0, D = 0; size_t bytes_read, len = 0; char *line = NULL; while ((bytes_read = getline(&line, &len, fp)) != -1) { if (bytes_read > 0) N++; } rewind(fp); len = 0; getline(&line, &len, fp); char *token = strtok(line, " \n"); while (token != NULL) { D++; token = strtok(NULL, " \n"); } double *X = malloc(N*D*sizeof(double)); rewind(fp); line = NULL; len = 0; for (int t=0; t<N; t++) { getline(&line, &len, fp); token = strtok(line, " \n"); X[D*t+0] = atof(token); for (int i=1; i<D; i++) { token = strtok(NULL, " \n"); X[D*t+i] = atof(token); } } fclose(fp); // Train the SGMM GMM *gmm = gmm_new(gmm_num_components, D, "diagonal"); gmm_set_convergence_tol(gmm, 1e-6); gmm_set_regularization_value(gmm, 1e-6); gmm_set_initialization_method(gmm, "random"); struct timeval st, en; gettimeofday(&st, NULL); gmm_fit(gmm, X, N); gettimeofday(&en, NULL); printf("Time elapsed = %lf s\n", (en.tv_sec-st.tv_sec) + (1e-6)*(en.tv_usec-st.tv_usec)); gmm_print_params(gmm); double llh = gmm_score(gmm, X, N); printf("Score (LLH) = %lf\n", llh); gmm_free(gmm); // Free data free(X); return 0; }
gmm_t * gmm_learn (int di, int ni, int ki, int niter, const float * v, int nt, int seed, int nredo, int flags) { long d=di,k=ki,n=ni; int iter, iter_tot = 0; double old_key, key = 666; niter = (niter == 0 ? 10000 : niter); /* the GMM parameters */ float * p = fvec_new_0 (n * k); /* p(ci|x) for all i */ gmm_t * g = gmm_new (d, k); /* initialize the GMM: k-means + variance estimation */ int * nassign = ivec_new (n); /* not useful -> to be removed when debugged */ float * dis = fvec_new (n); kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign); fflush (stderr); fprintf (stderr, "assign = "); ivec_print (nassign, k); fprintf (stderr, "\n"); free (nassign); /* initialization of the GMM parameters assuming a diagonal matrix */ fvec_set (g->w, k, 1.0 / k); double sig = fvec_sum (dis, n) / n; printf ("sigma at initialization = %.3f\n", sig); fvec_set (g->sigma, k * d, sig); free (dis); /* start the EM algorithm */ fprintf (stdout, "<><><><> GMM <><><><><>\n"); if(flags & GMM_FLAGS_PURE_KMEANS) niter=0; for (iter = 1 ; iter <= niter ; iter++) { gmm_compute_p_thread (n, v, g, p, flags, nt); fflush(stdout); gmm_handle_empty(n, v, g, p); gmm_compute_params (n, v, p, g, flags, nt); fflush(stdout); iter_tot++; /* convergence reached -> leave */ old_key = key; key = fvec_sum (g->mu, k * d); printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key); fflush(stdout); if (key == old_key) break; } fprintf (stderr, "\n"); free(p); return g; }