void em_train(float *input_data, float *component_memberships, float *loglikelihoods,int num_components, int num_dimensions,int num_events,int min_iters,int max_iters, char* cvtype, float *ret_likelihood, float **ret_mean, float **ret_covars) {
    float* N =  (float*)malloc(sizeof(float) * num_components);  // expected # of pixels in component: [M]
    float* pi = (float*)malloc(sizeof(float) * num_components);       // probability of component in GMM: [M]
    float* CP = (float*)malloc(sizeof(float) * num_components); //cluster probability [M]
    float* constant = (float*)malloc(sizeof(float) * num_components); // Normalizing constant [M]
    float* avgvar = (float*)malloc(sizeof(float) * num_components);    // average variance [M]
    float* means = (float*)malloc(sizeof(float) * num_components * num_dimensions);   // Spectral mean for the component: [M*D]
    float* R = (float*)malloc(sizeof(float) * num_components * num_dimensions * num_dimensions);      // Covariance matrix: [M*D*D]
    float* Rinv = (float*)malloc(sizeof(float) * num_components * num_dimensions * num_dimensions);   //


    float* data_by_dimension;
    components_t components;

    components.N = N;
    components.pi = pi;
    components.CP = CP;
    components.constant = constant;
    components.avgvar = avgvar;
    components.means = means;
    components.R = R;
    components.Rinv = Rinv;

    data_by_dimension  = (float*)malloc(sizeof(float) * num_events * num_dimensions);
    
    for(int e = 0; e < num_events; e++) {
        for(int d = 0; d < num_dimensions; d++) {
            data_by_dimension[d * num_events + e] = input_data[e * num_dimensions + d];
        }
    }

    seed_components(input_data,&components,num_dimensions,num_components,num_events);

    // Computes the R matrix inverses, and the gaussian constant
    constants (&components,num_components,num_dimensions);
    // Compute average variance based on the data
    compute_average_variance(input_data,&components,num_dimensions,num_components,num_events);
    // Calculate an epsilon value
    //int ndata_points = num_events*num_dimensions;
    float epsilon = (1 + num_dimensions + 0.5 * (num_dimensions + 1) * num_dimensions) *log((float)num_events * num_dimensions) * 0.0001;

    printf ("%f\n",epsilon);
    int iters;
    float likelihood = -100000;
    float old_likelihood = likelihood * 10;
    
    float change = epsilon*2;
  
    iters = 0;

    // This is the iterative loop for the EM algorithm.
    // It re-estimates parameters, re-computes constants, and then regroups the events
    // These steps keep repeating until the change in likelihood is less than some epsilon        
    // while(iters < min_iters || (fabs(change) > epsilon && iters < max_iters)) {
    while(iters < min_iters || (iters < max_iters && change > epsilon)) {
        //printf("loop");
        //printf("%d\n",iters);
        //printf("Training iteration: %u\n", iters);
        old_likelihood = likelihood;

        estep1(data_by_dimension,&components, component_memberships,num_dimensions,num_components,num_events,loglikelihoods,cvtype);
        //printf("estep1\n");
        estep2(data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events, &likelihood);
        //printf("estep2\n");
        //printf("Likelihood: %g\n", likelihood);
        
        // This kernel computes a new N, pi isn't updated until compute_constants though
        mstep_n(data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
        //printf("mstep_n\n");
        mstep_mean(data_by_dimension,&components,component_memberships, num_dimensions, num_components,num_events);
        //printf("mstep_mean\n");
        mstep_covar(data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,cvtype);
        //printf("mstep_covar\n");
        
        // Inverts the R matrices, computes the constant, normalizes cluster probabilities
        constants(&components,num_components,num_dimensions);
        //printf("constants");
        change = likelihood - old_likelihood;
        //printf("%f\n",change);
        iters++;
    }

    //printf("%f\n", likelihood);
    estep1(data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods,cvtype);
    estep2(data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,&likelihood);
      
    *ret_likelihood = likelihood;
    *ret_mean = components.means;
    *ret_covars = components.R;
    
}
Esempio n. 2
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv) {
    int num_clusters;
    
    // For profiling 
    clock_t seed_start, seed_end, seed_total = 0;
    clock_t regroup_start, regroup_end, regroup_total = 0;
    int regroup_iterations = 0;
    clock_t params_start, params_end, params_total = 0;
    int params_iterations = 0;
    clock_t constants_start, constants_end, constants_total = 0;
    int constants_iterations = 0;
    clock_t total_timer = clock();
    double total_time = 0;
    clock_t io_timer;
    double io_time = 0;
    clock_t cpu_timer;
    double cpu_time = 0;

    io_timer = clock();
    // Validate the command-line arguments, parse # of clusters, etc 
    if(validateArguments(argc,argv,&num_clusters)) {
        return 1; //Bard args
    }
    
    int num_dimensions;
    int num_events;
    
    // Read FCS data   
    PRINT("Parsing input file...");
    // This stores the data in a 1-D array with consecutive values being the dimensions from a single event
    // (num_events by num_dimensions matrix)
    float* fcs_data_by_event = readData(argv[2],&num_dimensions,&num_events);    

    if(!fcs_data_by_event) {
        printf("Error parsing input file. This could be due to an empty file ");
        printf("or an inconsistent number of dimensions. Aborting.\n");
        return 1;
    }
    
    // Transpose the event data (allows coalesced access pattern in E-step kernel)
    // This has consecutive values being from the same dimension of the data 
    // (num_dimensions by num_events matrix)
    float* fcs_data_by_dimension  = (float*) malloc(sizeof(float)*num_events*num_dimensions);
    
    for(int e=0; e<num_events; e++) {
        for(int d=0; d<num_dimensions; d++) {
            fcs_data_by_dimension[d*num_events+e] = fcs_data_by_event[e*num_dimensions+d];
        }
    }    

    io_time += (double)(clock() - io_timer);
   
    PRINT("Number of events: %d\n",num_events);
    PRINT("Number of dimensions: %d\n",num_dimensions);
    PRINT("Number of target clusters: %d\n\n",num_clusters);
   
    cpu_timer = clock();
    
    // Setup the cluster data structures on host
    clusters_t clusters;
    clusters.N = (float*) malloc(sizeof(float)*num_clusters);
    clusters.pi = (float*) malloc(sizeof(float)*num_clusters);
    clusters.constant = (float*) malloc(sizeof(float)*num_clusters);
    clusters.avgvar = (float*) malloc(sizeof(float)*num_clusters);
    clusters.means = (float*) malloc(sizeof(float)*num_dimensions*num_clusters);
    clusters.R = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions*num_clusters);
    clusters.Rinv = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions*num_clusters);
    clusters.memberships = (float*) malloc(sizeof(float)*num_events*num_clusters);
    if(!clusters.means || !clusters.R || !clusters.Rinv || !clusters.memberships) { 
        printf("ERROR: Could not allocate memory for clusters.\n"); 
        return 1; 
    }
    DEBUG("Finished allocating memory on host for clusters.\n");
    
    float rissanen;
    
    //////////////// Initialization done, starting kernels //////////////// 
    DEBUG("Invoking seed_clusters kernel.\n");
    fflush(stdout);

    // seed_clusters sets initial pi values, 
    // finds the means / covariances and copies it to all the clusters
    // TODO: Does it make any sense to use multiple blocks for this?
    seed_start = clock();
    seed_clusters(fcs_data_by_event, &clusters, num_dimensions, num_clusters, num_events);
   
    DEBUG("Invoking constants kernel.\n");
    // Computes the R matrix inverses, and the gaussian constant
    //constants_kernel<<<num_clusters, num_threads>>>(d_clusters,num_clusters,num_dimensions);
    constants(&clusters,num_clusters,num_dimensions);
    constants_iterations++;
    seed_end = clock();
    seed_total = seed_end - seed_start;
    
    // Calculate an epsilon value
    //int ndata_points = num_events*num_dimensions;
    float epsilon = (1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)*log((float)num_events*num_dimensions)*0.01;
    float likelihood, old_likelihood;
    int iters;
    
    epsilon = 1e-6;
    PRINT("Gaussian.cu: epsilon = %f\n",epsilon);

    /*************** EM ALGORITHM *****************************/
    
    // do initial regrouping
    // Regrouping means calculate a cluster membership probability
    // for each event and each cluster. Each event is independent,
    // so the events are distributed to different blocks 
    // (and hence different multiprocessors)
    DEBUG("Invoking regroup (E-step) kernel with %d blocks.\n",NUM_BLOCKS);
    regroup_start = clock();
    estep1(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
    estep2(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
    //estep2b(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
    regroup_end = clock();
    regroup_total += regroup_end - regroup_start;
    regroup_iterations++;
    DEBUG("Regroup Kernel Iteration Time: %f\n\n",((double)(regroup_end-regroup_start))/CLOCKS_PER_SEC);

    DEBUG("Likelihood: %e\n",likelihood);

    float change = epsilon*2;
    
    PRINT("Performing EM algorithm on %d clusters.\n",num_clusters);
    iters = 0;
    // This is the iterative loop for the EM algorithm.
    // It re-estimates parameters, re-computes constants, and then regroups the events
    // These steps keep repeating until the change in likelihood is less than some epsilon        
    while(iters < MIN_ITERS || (fabs(change) > epsilon && iters < MAX_ITERS)) {
        old_likelihood = likelihood;
        
        DEBUG("Invoking reestimate_parameters (M-step) kernel.\n");
        params_start = clock();
        // This kernel computes a new N, pi isn't updated until compute_constants though
        mstep_n(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events);
        mstep_mean(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events);
        mstep_covar(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events);
        params_end = clock();
        params_total += params_end - params_start;
        params_iterations++;
        DEBUG("Model M-Step Iteration Time: %f\n\n",((double)(params_end-params_start))/CLOCKS_PER_SEC);
        //return 0; // RETURN FOR FASTER PROFILING
        
        DEBUG("Invoking constants kernel.\n");
        // Inverts the R matrices, computes the constant, normalizes cluster probabilities
        constants_start = clock();
        constants(&clusters,num_clusters,num_dimensions);
        constants_end = clock();
        constants_total += constants_end - constants_start;
        constants_iterations++;
        DEBUG("Constants Kernel Iteration Time: %f\n\n",((double)(constants_end-constants_start))/CLOCKS_PER_SEC);

        DEBUG("Invoking regroup (E-step) kernel with %d blocks.\n",NUM_BLOCKS);
        regroup_start = clock();
        // Compute new cluster membership probabilities for all the events
        estep1(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
        estep2(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
        //estep2b(fcs_data_by_dimension,&clusters,num_dimensions,num_clusters,num_events,&likelihood);
        regroup_end = clock();
        regroup_total += regroup_end - regroup_start;
        regroup_iterations++;
        DEBUG("E-step Iteration Time: %f\n\n",((double)(regroup_end-regroup_start))/CLOCKS_PER_SEC);
    
        change = likelihood - old_likelihood;
        DEBUG("likelihood = %f\n",likelihood);
        DEBUG("Change in likelihood: %f\n",change);

        iters++;

    }
    
    // Calculate Rissanen Score
    rissanen = -likelihood + 0.5*(num_clusters*(1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)-1)*logf((float)num_events*num_dimensions);
    PRINT("\nFinal rissanen Score was: %f, with %d clusters.\n",rissanen,num_clusters);
    
    char* result_suffix = ".results";
    char* summary_suffix = ".summary";
    int filenamesize1 = strlen(argv[3]) + strlen(result_suffix) + 1;
    int filenamesize2 = strlen(argv[3]) + strlen(summary_suffix) + 1;
    char* result_filename = (char*) malloc(filenamesize1);
    char* summary_filename = (char*) malloc(filenamesize2);
    strcpy(result_filename,argv[3]);
    strcpy(summary_filename,argv[3]);
    strcat(result_filename,result_suffix);
    strcat(summary_filename,summary_suffix);
    
    PRINT("Summary filename: %s\n",summary_filename);
    PRINT("Results filename: %s\n",result_filename);
    cpu_time += (double)(clock() - cpu_timer);
    
    io_timer = clock();
    // Open up the output file for cluster summary
    FILE* outf = fopen(summary_filename,"w");
    if(!outf) {
        printf("ERROR: Unable to open file '%s' for writing.\n",argv[3]);
    }

    // Print the clusters with the lowest rissanen score to the console and output file
    for(int c=0; c<num_clusters; c++) {
        //if(saved_clusters.N[c] == 0.0) {
        //    continue;
        //}
        if(ENABLE_PRINT) {
            // Output the final cluster stats to the console
            PRINT("Cluster #%d\n",c);
            printCluster(clusters,c,num_dimensions);
            PRINT("\n\n");
        }

        if(ENABLE_OUTPUT) {
            // Output the final cluster stats to the output file        
            fprintf(outf,"Cluster #%d\n",c);
            writeCluster(outf,clusters,c,num_dimensions);
            fprintf(outf,"\n\n");
        }
    }
    
    // Print profiling information
    printf("Program Component\tTotal\tIters\tTime Per Iteration\n");
    printf("        Seed Kernel:\t%7.4f\t%d\t%7.4f\n",seed_total/(double)CLOCKS_PER_SEC,1, (double) seed_total / (double) CLOCKS_PER_SEC);
    printf("      E-step Kernel:\t%7.4f\t%d\t%7.4f\n",regroup_total/(double)CLOCKS_PER_SEC,regroup_iterations, (double) regroup_total / (double) CLOCKS_PER_SEC / (double) regroup_iterations);
    printf("      M-step Kernel:\t%7.4f\t%d\t%7.4f\n",params_total/(double)CLOCKS_PER_SEC,params_iterations, (double) params_total / (double) CLOCKS_PER_SEC / (double) params_iterations);
    printf("   Constants Kernel:\t%7.4f\t%d\t%7.4f\n",constants_total/(double)CLOCKS_PER_SEC,constants_iterations, (double) constants_total / (double) CLOCKS_PER_SEC / (double) constants_iterations);    
   
    // Write profiling info to summary file
    fprintf(outf,"Program Component\tTotal\tIters\tTime Per Iteration\n");
    fprintf(outf,"        Seed Kernel:\t%7.4f\t%d\t%7.4f\n",seed_total/(double)CLOCKS_PER_SEC,1, (double) seed_total / (double) CLOCKS_PER_SEC);
    fprintf(outf,"      E-step Kernel:\t%7.4f\t%d\t%7.4f\n",regroup_total/(double)CLOCKS_PER_SEC,regroup_iterations, (double) regroup_total / (double) CLOCKS_PER_SEC / (double) regroup_iterations);
    fprintf(outf,"      M-step Kernel:\t%7.4f\t%d\t%7.4f\n",params_total/(double)CLOCKS_PER_SEC,params_iterations, (double) params_total / (double) CLOCKS_PER_SEC / (double) params_iterations);
    fprintf(outf,"   Constants Kernel:\t%7.4f\t%d\t%7.4f\n",constants_total/(double)CLOCKS_PER_SEC,constants_iterations, (double) constants_total / (double) CLOCKS_PER_SEC / (double) constants_iterations);    
    fclose(outf);
    
    
    // Open another output file for the event level clustering results
    FILE* fresults = fopen(result_filename,"w");
   
    if(ENABLE_OUTPUT) { 
        for(int i=0; i<num_events; i++) {
            for(int d=0; d<num_dimensions-1; d++) {
                fprintf(fresults,"%f,",fcs_data_by_event[i*num_dimensions+d]);
            }
            fprintf(fresults,"%f",fcs_data_by_event[i*num_dimensions+num_dimensions-1]);
            fprintf(fresults,"\t");
            for(int c=0; c<num_clusters-1; c++) {
                fprintf(fresults,"%f,",clusters.memberships[c*num_events+i]);
            }
            fprintf(fresults,"%f",clusters.memberships[(num_clusters-1)*num_events+i]);
            fprintf(fresults,"\n");
        }
    }
    fclose(fresults); 
    io_time += (double)(clock() - io_timer);
    printf("\n");
    printf( "I/O time: %f (ms)\n", 1000.0*io_time/CLOCKS_PER_SEC);
    printf( "CPU processing time: %f (ms)\n", 1000.0*cpu_time/CLOCKS_PER_SEC);
    total_time += (double)(clock() - total_timer);
    printf( "Total time: %f (ms)\n", 1000.0*total_time/CLOCKS_PER_SEC);
 
    // cleanup host memory
    free(fcs_data_by_event);
    free(fcs_data_by_dimension);
    free(clusters.N);
    free(clusters.pi);
    free(clusters.constant);
    free(clusters.avgvar);
    free(clusters.means);
    free(clusters.R);
    free(clusters.Rinv);
    free(clusters.memberships);

    return 0;
}