void activateHiddenUnits(int visible[], int stochastic, int hidden[])
{
	accept_roi_begin();
	// Calculate activation energy for hidden units
	APPROX double hiddenEnergies[NUM_HIDDEN];
	int h;
	for (h = 0; h < NUM_HIDDEN; h++)
	{
		// Get the sum of energies
		APPROX double sum = 0;
		int v;
		for (v = 0; v < NUM_VISIBLE + 1; v++) // remove the +1 if you want to skip the bias
		{
			if (visible[v] != -1)
				sum += (double) visible[v] * edges[v][h];
		}
		hiddenEnergies[h] = sum;
	}

	// Activate hidden units
	for (h = 0; h < NUM_HIDDEN; h++)
	{
		double prob = 1.0 / ENDORSE(1.0 + exp(-hiddenEnergies[h]));
		if (stochastic)
		{
			if (ENDORSE(RAND) < prob)
				hidden[h] = 1;
			else
				hidden[h] = 0;
		}
		else
		{
			if (prob > 0.5)
				hidden[h] = 1;
			else
				hidden[h] = 0;
		}
	}

	hidden[NUM_HIDDEN] = 1; // turn on bias
	accept_roi_end();
}
int main(int argc, char **argv)
{
    u16 (*bayer)[WAMI_DEBAYER_IMG_NUM_COLS] = NULL;
    rgb_pixel (*debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL;
    char *input_directory = NULL;
#ifdef ENABLE_CORRECTNESS_CHECKING
    rgb_pixel (*gold_debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL;
#endif

    const size_t num_bayer_pixels = WAMI_DEBAYER_IMG_NUM_ROWS *
        WAMI_DEBAYER_IMG_NUM_COLS;
    const size_t num_debayer_pixels = (WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD) *
        (WAMI_DEBAYER_IMG_NUM_COLS-2*PAD);

    if (argc != 2)
    {
        fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    input_directory = argv[1];

    bayer = XMALLOC(sizeof(u16) * num_bayer_pixels);
    debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels);
#ifdef ENABLE_CORRECTNESS_CHECKING
    gold_debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels);
#endif

    read_image_file(
        (char *) bayer,
        input_filename,
        input_directory,
        sizeof(u16) * num_bayer_pixels);

    memset(debayer, 0, sizeof(u16) * num_debayer_pixels);

    printf("WAMI kernel 1 parameters:\n\n");
    printf("Input image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS);
    printf("Input image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS);
    printf("Output image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS-2*PAD);
    printf("Output image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD);

    printf("\nStarting WAMI kernel 1 (debayer).\n");
    tic();
    accept_roi_begin();
    wami_debayer(
        debayer,
        bayer);
    accept_roi_end();
    PRINT_STAT_DOUBLE("CPU time using func toc - ", toc());

#ifdef ENABLE_CORRECTNESS_CHECKING
    read_image_file(
        (char *) gold_debayer,
        golden_output_filename,
        input_directory,    
        sizeof(rgb_pixel) * num_debayer_pixels);

    /*
     * An exact match is expected for the debayer kernel, so we check
     * each pixel individually and report either the first failure or
     * a success message.
     */
    {
        /*
        // original error metric
        int r, c, success = 1;
        for (r = 0; success && r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r)
        {
            for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c)
            {
	        if (ENDORSE(debayer[r][c].r != gold_debayer[r][c].r))
                {
                    printf("Validation error: red pixel mismatch at row=%d, col=%d : "
                        "test value = %u, golden value = %u\n\n", r, c,
                        debayer[r][c].r, gold_debayer[r][c].r);
                    success = 0;
                    break;
                }

                if (ENDORSE(debayer[r][c].g != gold_debayer[r][c].g))
                {
                    printf("Validation error: green pixel mismatch at row=%d, col=%d : "
                        "test value = %u, golden value = %u\n\n", r, c,
                        debayer[r][c].g, gold_debayer[r][c].g);
                    success = 0;
                    break;
                }

                if (ENDORSE(debayer[r][c].b != gold_debayer[r][c].b))
                {
                    printf("Validation error: blue pixel mismatch at row=%d, col=%d : "
                        "test value = %u, golden value = %u\n\n", r, c,
                        debayer[r][c].b, gold_debayer[r][c].b);
                    success = 0;
                    break;
                }
            }
        }
        if (success)
        {
            printf("\nValidation checks passed -- the test output matches the golden output.\n\n");
        }
        */

        // new error metric
        int r, c;
	double err;
        for (r = 0; r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r)
        {
            for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c)
            {
	        double pixel_error = 0.0;
		pixel_error += ENDORSE(((double) abs(debayer[r][c].r - gold_debayer[r][c].r)) / ((double) 65535));
		pixel_error += ENDORSE(((double) abs(debayer[r][c].g - gold_debayer[r][c].g)) / ((double) 65535));
		pixel_error += ENDORSE(((double) abs(debayer[r][c].b - gold_debayer[r][c].b)) / ((double) 65535));

		err += (pixel_error / ((double) 3)) 
		  / ((double) ((WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD) * (WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD)));
            }
        }

	FILE *fp = fopen("err.txt", "wb");
	assert(fp != NULL);
	fprintf(fp, "%.2f\n", err);
	fclose(fp);
    }
#endif

#ifdef WRITE_OUTPUT_TO_DISK
    printf("Writing output to %s/%s.\n", output_directory, output_filename);
    {
        const u16 output_channels = 3;
        write_image_file(
            (char *) debayer,
            output_filename,
            output_directory,
            WAMI_DEBAYER_IMG_NUM_COLS - 2*PAD,
            WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD,
            output_channels);
    }
#endif

    FREE_AND_NULL(bayer);
    FREE_AND_NULL(debayer);
#ifdef ENABLE_CORRECTNESS_CHECKING
    FREE_AND_NULL(gold_debayer);
#endif

    return 0;
}
Beispiel #3
0
int main(int argc, char* argv[]) {

    // Performance counters
    // andreolb
    // We are not doing measurements.
    /*
    unsigned int t_precise;
    unsigned int t_approx;
    unsigned int dynInsn_precise;
    unsigned int dynInsn_approx;
    unsigned int evt_counter[1] = {0x68};
    */

    // fft variables
	int i;
	int K = MAX_K;


    ///////////////////////////////
    // 1 - Initialization
    ///////////////////////////////
    
    // Init performance counters:
    // andreolb
    // We are not doing measurements.
    /*
    init_perfcounters (1, 0, 1, evt_counter);
    t_kernel_precise = 0;
    t_kernel_approx = 0;
    dynInsn_kernel_approx = 0;
    */
    
    // Init npu:
    // andreolb: not using npu
    //npu();
	
	// Initialize input data
    for(i = 0; i < K; ++i) {
        x_1[i].real = i;
    	x_1[i].imag = 0;
        x_2[i].real = i;
    	x_2[i].imag = 0;
    }
    
#if BUFFER_SIZE == 1
    printf("\n\nRunning fft benchmark on %u inputs (single invocation mode)\n", K);
#else
    printf("\n\nRunning fft benchmark on %u inputs\n", K);
#endif

    
    ///////////////////////////////
    // 2 - Precise execution
    ///////////////////////////////
    
    
// andreolb
// We are not doing measurements.
/*
#if TIMER==0
    t_precise = get_cyclecount();
#else
    t_precise = rd_fpga_clk();
#endif //TIMER

    dynInsn_precise = get_eventcount(0);  
*/
    
    accept_roi_begin();
#if POWER_MODE == 1
    while (1) {
#endif //POWER_MODE

        radix2DitCooleyTukeyFft(K, indices_1, x_1, f_precise);
    
#if POWER_MODE == 1
    }
#endif //POWER_MODE
    accept_roi_end();

// andreolb
// We are not doing measurements.
/*
    dynInsn_precise = get_eventcount(0) - dynInsn_precise; 
    
#if TIMER==0
    t_precise = get_cyclecount() - t_precise;
#else
    t_precise = rd_fpga_clk() - t_precise;
#endif //TIMER
*/
    
    
    ///////////////////////////////
    // 3 - Approximate execution
    ///////////////////////////////

// andreolb
// No NPU execution.
/*
    
    // TLB page settings
    Xil_SetTlbAttributes(OCM_SRC,0x15C06);
    Xil_SetTlbAttributes(OCM_DST,0x15C06);
    
#if TIMER==0
    t_approx = get_cyclecount();
#else
    t_approx = rd_fpga_clk();
#endif //TIMER

    dynInsn_approx = get_eventcount(0);  
    
#if POWER_MODE == 2
    while (1) {
#endif //POWER_MODE

        // NPU OFFLOADING
        npuFft(K, indices_2, x_2, f_approx);
        
#if POWER_MODE == 2
    }
#endif //POWER_MODE

    dynInsn_approx = get_eventcount(0) - dynInsn_approx; 
    
#if TIMER==0
    t_approx = get_cyclecount() - t_approx;
#else
    t_approx = rd_fpga_clk() - t_approx;
#endif //TIMER
*/
    
    
    ///////////////////////////////
    // 4 - Compute RMSE
    ///////////////////////////////

// andreolb
// No NPU execution, so there's no need to compare the npu results with the precise ones.
/*
    
    double RMSE = 0;
    double NRMSE = 0;
    double diff;
    float min_imag = f_precise[i].imag;
    float max_imag = f_precise[i].imag;
    for (i = 1; i < K; i ++){
        min_imag = (f_precise[i].imag < min_imag) ? f_precise[i].imag : min_imag;
        max_imag = (f_precise[i].imag > max_imag) ? f_precise[i].imag : max_imag;
        diff = f_precise[i].imag - f_approx[i].imag;
        RMSE += (diff*diff);
    }
    RMSE = RMSE/K;
    RMSE = sqrt(RMSE);
    NRMSE = RMSE/(max_imag-min_imag);
*/
    
    
    ///////////////////////////////
    // 5 - Report results
    ///////////////////////////////
    
// andreolb: no need to report results.
/*
#if PROFILE_MODE != 0
    printf("WARNING: kernel level profiling affects cycle counts of whole application\n");
#endif
    printf("Precise execution took:     %u cycles \n", t_precise);
    printf("                            %u dynamic instructions\n", dynInsn_precise);
    printf("Approximate execution took: %u cycles\n" , t_approx);
    printf("                            %u dynamic instructions\n", dynInsn_approx);
#if PROFILE_MODE == 1
    printf("                            %lld dynamic NPU instructions\n", dynInsn_kernel_approx);
#endif
    printf("==> NPU speedup is %.2fX\n", (float) t_precise/t_approx);
    printf("==> NPU dynamic instruction reduction is %.2fX\n", (float) dynInsn_precise/dynInsn_approx);
    printf("==> RMSE = %.4f (NRMSE = %.2f%%)\n", (float) RMSE, (float) ((100.*NRMSE)));
#if PROFILE_MODE == 1
    printf("==> Percentage of dynamic NPU instructions %.2f%%\n", (float) dynInsn_kernel_approx/dynInsn_approx*100);
#elif PROFILE_MODE == 2
    printf("\nKERNEL INFO: \n");
    printf("Number of kernels:            %d \n", kernel_invocations);
    printf("Precise execution:          %lld cycles spent in kernels\n", t_kernel_precise);
    printf("Approximate execution:      %lld cycles spent in kernels \n", t_kernel_approx);
#endif //PROFILE_MODE
*/
    
// andreolb: no need to dump final data
/*
#if DUMP_DATA==1
    printf("\nFFT precise real data dump...\n");
    for (i = 1; i < K; i ++){
        printf("%.2f,", f_precise[i].real);
    }
    printf("\nFFT precise imaginary data dump...\n");
    for (i = 1; i < K; i ++){
        printf("%.2f,", f_precise[i].imag);
    }
    printf("\nFFT approx real data dump...\n");
    for (i = 1; i < K; i ++){
        printf("%.2f,", f_approx[i].real);
    }
    printf("\nFFT approx imaginary data dump...\n");
    for (i = 1; i < K; i ++){
        printf("%.2f,", f_approx[i].imag);
    }
#endif //DUMP_DATA
*/
    
    return 0;
}
Beispiel #4
0
int main(int argc, char** argv)
{
        FILE* file;
        char *output_file = "my_output.txt";
        int i, ret_val;
        ccv_dense_matrix_t* image = 0;
        ccv_array_t* seq;

        accept_roi_begin();
	assert(argc >= 3);
	ccv_enable_default_cache();
	ccv_bbf_classifier_cascade_t* cascade = ccv_bbf_read_classifier_cascade(argv[2]);
	ccv_read(argv[1], &image, CCV_IO_GRAY | CCV_IO_ANY_FILE);
	if (image != 0)
	{
		unsigned int elapsed_time = get_current_time();
		seq = ccv_bbf_detect_objects(image, &cascade, 1, ccv_bbf_default_params);
		elapsed_time = get_current_time() - elapsed_time;
		for (i = 0; ENDORSE(i < seq->rnum); i++)
		{
			ccv_comp_t* comp = (ccv_comp_t*)ENDORSE(ccv_array_get(seq, i));
			printf("%d %d %d %d %f\n", comp->rect.x, comp->rect.y, comp->rect.width, comp->rect.height, comp->classification.confidence);
		}
		printf("total : %d in time %dms\n", seq->rnum, elapsed_time);
                ccv_bbf_classifier_cascade_free(cascade);
                ccv_disable_cache();
                accept_roi_end();
                
                file = fopen(output_file, "w");
                if (file == NULL) {
                  perror("fopen for write failed");
                  return EXIT_FAILURE;
                }
                // latest changes
                struct coordinates {
                    APPROX int x;
                    APPROX int y;
                };
                for (i = 0; ENDORSE(i < seq->rnum); i++) {
                  ccv_comp_t* comp = (ccv_comp_t*) ENDORSE(ccv_array_get(seq, i));
                  struct coordinates upperleft, upperright, lowerleft, lowerright;
                  upperleft.x = comp->rect.x;
                  upperleft.y = comp->rect.y;
                  upperright.x = comp->rect.x + comp->rect.width;
                  upperright.y = upperleft.y;
                  lowerleft.x = upperleft.x;
                  lowerleft.y = upperleft.y + comp->rect.height;
                  lowerright.x = upperright.x;
                  lowerright.y = lowerleft.y;
                  ret_val = fprintf(file, "%d %d\n%d %d\n%d %d\n%d %d\n", upperleft.x, upperleft.y, upperright.x, upperright.y,
                      lowerright.x, lowerright.y, lowerleft.x, lowerleft.y);
                  // latest changes
                  if (ret_val < 0) {
                    perror("fprintf of coordinates failed");
                    fclose(file);
                    return EXIT_FAILURE;
                  }
                }
                ret_val = fclose(file);
                if (ret_val != 0) {
                  perror("fclose failed");
                  return EXIT_FAILURE;
                }
                ccv_array_free(seq);
                ccv_matrix_free(image);
	} else {
		FILE* r = fopen(argv[1], "rt");
		if (argc == 4)
			chdir(argv[3]);
		if(r)
		{
			size_t len = 1024;
			char* file = (char*)malloc(len);
			ssize_t read;
			while((read = getline(&file, &len, r)) != -1)
			{
				while(read > 1 && isspace(file[read - 1]))
					read--;
				file[read] = 0;
				image = 0;
				ccv_read(file, &image, CCV_IO_GRAY | CCV_IO_ANY_FILE);
				assert(image != 0);
				seq = ccv_bbf_detect_objects(image, &cascade, 1, ccv_bbf_default_params); // seq already declared above
				for (i = 0; ENDORSE(i < seq->rnum); i++)
				{
					ccv_comp_t* comp = (ccv_comp_t*) ENDORSE(ccv_array_get(seq, i));
					printf("%s %d %d %d %d %f\n", file, comp->rect.x, comp->rect.y, comp->rect.width, comp->rect.height, comp->classification.confidence);
				}
			}
			free(file);
			fclose(r);
		}
                ccv_bbf_classifier_cascade_free(cascade);
                ccv_disable_cache();
                accept_roi_end();
                file = fopen(output_file, "w");
                if (file == NULL) {
                  perror("fopen for write failed");
                  return EXIT_FAILURE;
                }
                for (i = 0; ENDORSE(i < seq->rnum); i++) {
                  ccv_comp_t* comp = (ccv_comp_t*) ENDORSE(ccv_array_get(seq, i));
                  ret_val = fprintf(file, "%d\n%d\n%d\n%d\n%f\n", comp->rect.x, comp->rect.y, comp->rect.width,
                      comp->rect.height, comp->classification.confidence);
                  if (ret_val < 0) {
                    perror("fprintf of coordinates and confidence failed");
                    fclose(file);
                    return EXIT_FAILURE;
                  }
                }
                ret_val = fclose(file);
                if (ret_val != 0) {
                  perror("fclose failed");
                  return EXIT_FAILURE;
                }
                ccv_array_free(seq);
                ccv_matrix_free(image);
	}
	return 0;
}
void activateVisibleUnits(int hidden[], int stochastic, int visible[])
{
	accept_roi_begin();
	// Calculate activation energy for visible units
	APPROX double visibleEnergies[NUM_VISIBLE];
	int v;
	for (v = 0; v < NUM_VISIBLE; v++)
	{
		// Get the sum of energies
		APPROX double sum = 0;
		int h;
		for (h = 0; h < NUM_HIDDEN + 1; h++) // remove the +1 if you want to skip the bias
			sum += (double) hidden[h] * edges[v][h];
		visibleEnergies[v] = sum;
	}

	// Activate visible units, handles K visible units at a time
	for (v = 0; v < NUM_VISIBLE; v += K)
	{
		APPROX double exps[K]; // this is the numerator
		APPROX double sumOfExps = 0.0; // this is the denominator

		int j;
		for (j = 0; j < K; j++)
		{
			exps[j] = exp(visibleEnergies[v + j]);
			sumOfExps += exps[j];
		}

		// Getting the probabilities

		APPROX double probs[K];

		for (j = 0; j < K; j++)
			probs[j] = exps[j] / sumOfExps;

		// Activate units

		if (stochastic) // used for training
		{
			for (j = 0; j < K; j++)
			{
				if (ENDORSE(RAND) < ENDORSE(probs[j]))
					visible[v + j] = 1;
				else
					visible[v + j] = 0;
			}
		}
		else // used for prediction: uses expectation
		{

			APPROX double expectation = 0.0;
			for (j = 0; j < K; j++)
				expectation += j * probs[j]; // we will predict rating between 0 to K-1, not between 1 to K

			long prediction = round((ENDORSE(expectation)));

			for (j = 0; j < K; j++)
			{
				if (j == prediction)
					visible[v + j] = 1;
				else
					visible[v + j] = 0;
			}
		}
	}

	visible[NUM_VISIBLE] = 1; // turn on bias
	accept_roi_end();
}
Beispiel #6
0
int main (int argc, char * argv[])
{
  APPROX int * frame;
  APPROX int * output;
  int i;

  int nFilterRowsFD = 9; 
  int nFilterColsFD = 9;
	  
  APPROX fltPixel_t FD[] =  {
			 1,   3,   4,   5,   6,   5,  4,    3,  1,
			 3,   9,  12,  15,  18,  15,  12,   9,  3,
			 4,  12,  16,  20,  24,  20,  16,  12,  4,
			 5,  15,  20,  25,  30,  25,  20,  15,  5,
			 6,  18,  24,  30,  36,  30,  24,  18,  6,
			 5,  15,  20,  25,  30,  25,  20,  15,  5,
			 4,  12,  16,  20,  24,  20,  16,  12,  4,
			 3,   9,  12,  15,  18,  15,  12,   9,  3,
			 1,   3,   4,   5,   6,   5,   4,   3,  1
  };

  for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID
  {
    FD[i] /= (1024.0);
  }

  srand (time (NULL));

  STATS_INIT ();
  PRINT_STAT_STRING ("kernel", "2d_convolution");
  PRINT_STAT_INT ("rows", N);
  PRINT_STAT_INT ("columns", M);
  PRINT_STAT_INT ("num_frames", BATCH_SIZE);

  frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t));
  output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t));

  if (!frame || !output) {
    fprintf(stderr, "ERROR: Allocation failed.\n");
    exit(-1);
  }

  /* load image */
  tic ();
  read_array_from_octave (ENDORSE(frame), N, M, FILENAME);
  PRINT_STAT_DOUBLE ("time_load_image", toc ());

  /* Make BATCH_SIZE-1 copies */
  tic ();
  for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t));
  }
  PRINT_STAT_DOUBLE ("time_copy", toc ());

  /* Perform the 2D convolution */
  tic ();
  accept_roi_begin();
  for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD);
  }
  accept_roi_end();
  PRINT_STAT_DOUBLE ("time_2d_convolution", toc ());

  /* Write the results out to disk */
  for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    char buffer [30];
    sprintf (buffer, "2dconv_output.%d.mat", i);
    write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE);
  }
  PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat");

  STATS_END ();

  free (output);
  free (frame);
  return 0;
}
int main(int argc, char **argv)
{
    float (*mu)[WAMI_GMM_IMG_NUM_COLS][WAMI_GMM_NUM_MODELS] = NULL;
    float (*sigma)[WAMI_GMM_IMG_NUM_COLS][WAMI_GMM_NUM_MODELS] = NULL;
    float (*weights)[WAMI_GMM_IMG_NUM_COLS][WAMI_GMM_NUM_MODELS] = NULL;
    u8 (*foreground)[WAMI_GMM_IMG_NUM_ROWS][WAMI_GMM_IMG_NUM_COLS] = NULL;
#ifdef ENABLE_CORRECTNESS_CHECKING
    u8 (*golden_foreground)[WAMI_GMM_IMG_NUM_ROWS][WAMI_GMM_IMG_NUM_COLS] = NULL;
    u8 (*golden_eroded)[WAMI_GMM_IMG_NUM_COLS] = NULL;
    u8 (*eroded)[WAMI_GMM_IMG_NUM_COLS] = NULL;
#endif
    u8 (*morph)[WAMI_GMM_IMG_NUM_COLS] = NULL;
    u16 (*frames)[WAMI_GMM_IMG_NUM_ROWS][WAMI_GMM_IMG_NUM_COLS] = NULL;
    int i;

    char *input_directory = NULL;

    const size_t num_pixels = WAMI_GMM_IMG_NUM_ROWS * WAMI_GMM_IMG_NUM_COLS;

    if (argc != 2)
    {
        fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    input_directory = argv[1];

    mu = XMALLOC(sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS);
    sigma = XMALLOC(sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS);
    weights = XMALLOC(sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS);
    foreground = XMALLOC(sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES);
#ifdef ENABLE_CORRECTNESS_CHECKING
    golden_foreground = XMALLOC(sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES);
    eroded = XMALLOC(sizeof(u8) * num_pixels);
    golden_eroded = XMALLOC(sizeof(u8) * num_pixels);
#endif
    morph = XMALLOC(sizeof(u8) * num_pixels);
    frames = XMALLOC(sizeof(u16) * num_pixels * WAMI_GMM_NUM_FRAMES);

    memset(mu, 0, sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS);
    memset(sigma, 0, sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS);
    memset(weights, 0, sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS);
    memset(foreground, 0, sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES);
    memset(morph, 0, sizeof(u8) * num_pixels);
    memset(frames, 0, sizeof(u16) * num_pixels * WAMI_GMM_NUM_FRAMES);

    read_gmm_input_data(
        mu, sigma, weights, frames, input_filename, input_directory);

#ifdef ENABLE_CORRECTNESS_CHECKING
    read_data_file(
        (char *) golden_foreground,
        golden_output_filename,
        input_directory,
        sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES);
#endif

    printf("WAMI kernel 2 parameters:\n\n");
    printf("Image width = %d pixels\n", WAMI_DEBAYER_IMG_NUM_COLS);
    printf("Image height = %d pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS);
    printf("Number of input frames = %d\n", WAMI_GMM_NUM_FRAMES);

    printf("\nStarting WAMI kernel 3 (Gaussian Mixture Model / Change Detection).\n\n");
    tic();
    accept_roi_begin();
    for (i = 0; i < WAMI_GMM_NUM_FRAMES; ++i)
    {
        wami_gmm(
            (u8 (*)[WAMI_GMM_IMG_NUM_COLS]) &foreground[i][0][0],
            mu,
            sigma,
            weights,
            (u16 (*)[WAMI_GMM_IMG_NUM_COLS]) &frames[i][0][0]);
    }
    accept_roi_end();
    PRINT_STAT_DOUBLE("CPU time using func toc - ", toc());
    printf ("\n");
#ifdef ENABLE_CORRECTNESS_CHECKING
    {
      int j, k, validation_warning = 0;
      double err;
        for (i = 0; i < WAMI_GMM_NUM_FRAMES; ++i)
        {
            int num_misclassified = 0, num_foreground = 0;
            double misclassification_rate = 0;

            wami_morpho_erode(
                eroded, (u8 (*)[WAMI_GMM_IMG_NUM_COLS]) &foreground[i][0][0]);
            wami_morpho_erode(
                golden_eroded, (u8 (*)[WAMI_GMM_IMG_NUM_COLS]) &golden_foreground[i][0][0]);

            printf("\nValidating frame %d output...\n", i);

            for (j = 0; j < WAMI_GMM_IMG_NUM_ROWS; ++j)
            {
                for (k = 0; k < WAMI_GMM_IMG_NUM_COLS; ++k)
                {
                    if (eroded[j][k] != golden_eroded[j][k])
                    {
                        ++num_misclassified;
                    }
                    if (golden_eroded[j][k] != 0)
                    {
                        ++num_foreground;
                    }
                }
            }
            misclassification_rate = (100.0*num_misclassified)/num_foreground;
	    err += (((double) num_misclassified) / ((double) num_foreground)) / ((double) WAMI_GMM_NUM_FRAMES);
            printf("\tMisclassified pixels: %d\n", num_misclassified);
            printf("\tGolden foreground pixels (after erosion): %d\n", num_foreground);
            printf("\tMisclassification rate relative to foreground: %f%%\n",
                misclassification_rate);
            if (misclassification_rate > 0.1)
            {
                validation_warning = 1;
            }
        }
	
	FILE *fp = fopen("err.txt", "wb");
	assert(fp != NULL);
	fprintf(fp, "%.2f\n", err);
	fclose(fp);

        if (validation_warning)
        {
            printf("\nValidation warning: Misclassification rate appears high; check images.\n\n");
        }
        else
        {
            printf("\nValidation checks passed.\n\n");
        }
    }
#endif
    
#ifdef WRITE_OUTPUT_TO_DISK
    printf("Writing output to %s.\n", output_filename);
    {
        FILE *fp = fopen(output_filename, "wb");
        assert(fp != NULL);
        assert(fwrite(foreground, sizeof(u8), num_pixels * WAMI_GMM_NUM_FRAMES, fp) ==
            num_pixels * WAMI_GMM_NUM_FRAMES);
        fclose(fp);
    }
#endif

    FREE_AND_NULL(mu);
    FREE_AND_NULL(sigma);
    FREE_AND_NULL(weights);
    FREE_AND_NULL(foreground);
#ifdef ENABLE_CORRECTNESS_CHECKING
    FREE_AND_NULL(golden_foreground);
    FREE_AND_NULL(eroded);
    FREE_AND_NULL(golden_eroded);
#endif
    FREE_AND_NULL(morph);
    FREE_AND_NULL(frames);

    return 0;
}