Пример #1
0
gmm_t * gmm_read(FILE *f) {
    int d,k;

    READANDCHECK(&d,1);
    READANDCHECK(&k,1);

    gmm_t *g=gmm_new(d,k);

    READANDCHECK(g->w,g->k);
    READANDCHECK(g->mu,g->k*g->d);
    READANDCHECK(g->sigma,g->k*g->d);

    return g;
}
Пример #2
0
int main()
{
	const char *data_filenm = "sample_data.txt";
	const int gmm_num_components = 3;

	// Load data from file
	FILE *fp = fopen(data_filenm, "r");
	if (fp == NULL)
	{
		printf("ERROR: File 'sample_data.txt' not found.\nRun scripts/generate_data.py to generate sample data.\n");
		exit(1);
	}
	int N = 0, D = 0;
	size_t bytes_read, len = 0;
	char *line = NULL;
	while ((bytes_read = getline(&line, &len, fp)) != -1)
	{
		if (bytes_read > 0)
			N++;
	}
	rewind(fp);
	len = 0;
	getline(&line, &len, fp);
	char *token = strtok(line, " \n");
	while (token != NULL)
	{
		D++;
		token = strtok(NULL, " \n");
	}
	double *X = malloc(N*D*sizeof(double));
	rewind(fp);
	line = NULL; len = 0;
	for (int t=0; t<N; t++)
	{
		getline(&line, &len, fp);
		token = strtok(line, " \n");
		X[D*t+0] = atof(token);
		for (int i=1; i<D; i++)
		{
			token = strtok(NULL, " \n");
			X[D*t+i] = atof(token);
		}
	}
	fclose(fp);

	// Train the SGMM
	GMM *gmm = gmm_new(gmm_num_components, D, "diagonal");
	gmm_set_convergence_tol(gmm, 1e-6);
	gmm_set_regularization_value(gmm, 1e-6);
	gmm_set_initialization_method(gmm, "random");
	struct timeval st, en;
	gettimeofday(&st, NULL);
	gmm_fit(gmm, X, N);
	gettimeofday(&en, NULL);
	printf("Time elapsed = %lf s\n", (en.tv_sec-st.tv_sec) + (1e-6)*(en.tv_usec-st.tv_usec));
	gmm_print_params(gmm);
	double llh = gmm_score(gmm, X, N);
	printf("Score (LLH) = %lf\n", llh);
	gmm_free(gmm);

	// Free data
	free(X);

	return 0;
}
Пример #3
0
gmm_t * gmm_learn (int di, int ni, int ki, int niter,
                   const float * v, int nt, int seed, int nredo,
                   int flags)
{
    long d=di,k=ki,n=ni;

    int iter, iter_tot = 0;
    double old_key, key = 666;

    niter = (niter == 0 ? 10000 : niter);

    /* the GMM parameters */
    float * p = fvec_new_0 (n * k);      /* p(ci|x) for all i */
    gmm_t * g = gmm_new (d, k);

    /* initialize the GMM: k-means + variance estimation */
    int * nassign = ivec_new (n);  /* not useful -> to be removed when debugged */
    float * dis = fvec_new (n);
    kmeans (d, n, k, niter, v, nt, seed, nredo, g->mu, dis, NULL, nassign);

    fflush (stderr);
    fprintf (stderr, "assign = ");
    ivec_print (nassign, k);
    fprintf (stderr, "\n");
    free (nassign);

    /* initialization of the GMM parameters assuming a diagonal matrix */
    fvec_set (g->w, k, 1.0 / k);
    double sig = fvec_sum (dis, n) / n;
    printf ("sigma at initialization = %.3f\n", sig);
    fvec_set (g->sigma, k * d, sig);
    free (dis);


    /* start the EM algorithm */
    fprintf (stdout, "<><><><> GMM  <><><><><>\n");

    if(flags & GMM_FLAGS_PURE_KMEANS) niter=0;

    for (iter = 1 ; iter <= niter ; iter++) {

        gmm_compute_p_thread (n, v, g, p, flags, nt);
        fflush(stdout);

        gmm_handle_empty(n, v, g, p);

        gmm_compute_params (n, v, p, g, flags, nt);
        fflush(stdout);


        iter_tot++;

        /* convergence reached -> leave */
        old_key = key;
        key = fvec_sum (g->mu, k * d);

        printf ("keys %5d: %.6f -> %.6f\n", iter, old_key, key);
        fflush(stdout);

        if (key == old_key)
            break;
    }
    fprintf (stderr, "\n");

    free(p);

    return g;
}