/* Load the samples from a plain text file with the specified format. */ data *feas_load(char *filename,workers *pool){ data *feas=(data*)calloc(1,sizeof(data)); loader *t=(loader*)calloc(1,sizeof(loader)); t->s=t->d=t->c=t->point=t->next=0,t->header=2,t->dec=t->sign=1; t->feas=feas,t->buff=NULL; number i,r; gzFile f=gzopen(filename,"rb"); /* Read the file using zlib library. */ if(!f)fprintf(stderr,"Error: Not %s feature file found.\n",filename),exit(1); while(!gzeof(f)){ char *buff=(char*)calloc(BUFFER_SIZE,sizeof(char)); r=gzread(f,buff,BUFFER_SIZE); /* Read the buffer and do asynchronous load. */ if(t->buff!=NULL){ workers_waitall(pool); free(t->buff); } t->buff=buff,t->r=r; workers_addtask(pool,thread_loader,(void*)t); } workers_waitall(pool); gzclose(f); free(t->buff); free(t); for(i=0;i<feas->dimension;i++){ /* Compute the mean and variance of the data. */ feas->mean[i]/=feas->samples; feas->variance[i]=(feas->variance[i]/feas->samples)-(feas->mean[i]*feas->mean[i]); if(feas->variance[i]<0.000001)feas->variance[i]=0.000001; } return feas; }
/* Detailed Gaussian Mixture classifier using a Viterbi aproximation. */ decimal gmm_classify(char *filename,data *feas,gmm *gmix,gmm *gworld,workers *pool){ number i,n=workers_number(pool),inc=feas->samples/n; decimal result=0; workers_mutex *mutex=workers_mutex_create(); classifier *t=(classifier*)calloc(n,sizeof(classifier)); number *flag=(number*)calloc(1,sizeof(number)); FILE *f=fopen(filename,"w"); if(!f)fprintf(stderr,"Error: Can not write to %s file.\n",filename),exit(1); fprintf(f,"{\n\t\"samples\": %i,\n\t\"classes\": %i,",feas->samples,gmix->num); fprintf(f,"\n\t\"samples_results\": [ "); for(i=0;i<gmix->num;i++) gmix->mix[i]._cfreq=0; for(i=0;i<n;i++){ /* Set and launch the parallel classify. */ t[i].feas=feas,t[i].gmix=gmix,t[i].gworld=gworld,t[i].ini=i*inc,t[i].mutex=mutex; t[i].end=(i==n-1)?(feas->samples):((i+1)*inc),t[i].f=f,t[i].flag=flag; workers_addtask(pool,thread_classifier,(void*)&t[i]); } workers_waitall(pool); /* Wait to the end of the parallel classify. */ for(i=0;i<n;i++) result+=t[i].result; workers_mutex_delete(mutex); fprintf(f,"\n\t],\n\t\"mixture_occupation\": [ %i",gmix->mix[0]._cfreq); for(i=1;i<gmix->num;i++) fprintf(f,", %i",gmix->mix[i]._cfreq); fprintf(f," ],\n\t\"global_score\": %.10f\n}",result); fclose(f); free(t); free(flag); return result/feas->samples; }
/* Perform one iteration of the EM algorithm with the data and the mixture indicated. */ decimal gmm_EMtrain(data *feas,gmm *gmix,workers *pool){ number m,i,inc,n=workers_number(pool); decimal tz,x; workers_mutex *mutex=workers_mutex_create(); trainer *t=(trainer*)calloc(n,sizeof(trainer)); /* Calculate expected value and accumulate the counts (E Step). */ gmm_init_classifier(gmix); for(m=0;m<gmix->num;m++) gmix->mix[m]._cfreq=0; inc=feas->samples/n; for(i=0;i<n;i++){ /* Set and launch the parallel training. */ t[i].feas=feas,t[i].gmix=gmix,t[i].mutex=mutex,t[i].ini=i*inc; t[i].end=(i==n-1)?(feas->samples):((i+1)*inc); workers_addtask(pool,thread_trainer,(void*)&t[i]); } workers_waitall(pool); workers_mutex_delete(mutex); /* Estimate the new parameters of the Gaussian Mixture (M Step). */ for(m=0;m<gmix->num;m++){ gmix->mix[m].prior=log((tz=gmix->mix[m]._z)/feas->samples); for(i=0;i<gmix->dimension;i++){ gmix->mix[m].mean[i]=(x=gmix->mix[m]._mean[i]/tz); gmix->mix[m].dcov[i]=(gmix->mix[m]._dcov[i]/tz)-(x*x); if(gmix->mix[m].dcov[i]<gmix->mcov[i]) /* Smoothing covariances. */ gmix->mix[m].dcov[i]=gmix->mcov[i]; } } free(t); return gmix->llh/feas->samples; }
/* Efficient Gaussian Mixture classifier using a Viterbi aproximation. */ decimal gmm_simple_classify(data *feas,gmm *gmix,gmm *gworld,workers *pool){ number i,n=workers_number(pool),inc=feas->samples/n; decimal result=0; classifier *t=(classifier*)calloc(n,sizeof(classifier)); for(i=0;i<n;i++){ /* Set and launch the parallel classify. */ t[i].feas=feas,t[i].gmix=gmix,t[i].gworld=gworld,t[i].ini=i*inc; t[i].end=(i==n-1)?(feas->samples):((i+1)*inc); workers_addtask(pool,thread_simple_classifier,(void*)&t[i]); } workers_waitall(pool); /* Wait to the end of the parallel classify. */ for(i=0;i<n;i++) result+=t[i].result; free(t); return (result*0.5)/feas->samples; }