Exemple #1
0
int main(void)
{
   RCC_Configuration();
   GPIO_Configuration();

   RCC_ClocksTypeDef RCC_Clocks;
   RCC_GetClocksFreq(&RCC_Clocks);
   SysTick_Config(RCC_Clocks.HCLK_Frequency / 1000 - 1);

   setup_adc();
   tim1_init();
   usart_init();

   PWM_U = 0;
   PWM_V = 0;
   PWM_W = 0;

   while(1){
      if(uartsend == 1){
         amp = AMP(amp_raw);
         volt = VOLT(volt_raw);
         if(temp_raw < ARES && temp_raw > 0){
            temp = TEMP(temp_raw);
         }
         from_hv.dc_volt = TOFIXED(volt);
         from_hv.dc_cur =  TOFIXED(amp);
         from_hv.hv_temp = TOFIXED(temp);
#ifdef TROLLER
         from_hv.dc_cur =  TOFIXED(0);
         from_hv.hv_temp = TOFIXED(0);
         from_hv.a = TOFIXED(AMP(ADCConvertedValue[1]));
         from_hv.b = TOFIXED(AMP(ADCConvertedValue[2]));
         from_hv.c = TOFIXED(AMP(ADCConvertedValue[3]));
#endif
         uartsend = 0;
         while (USART_GetFlagStatus(USART2, USART_FLAG_TXE) == RESET);
         USART_SendData(USART2, 0x154);
         for(int j = 0;j<sizeof(from_hv_t);j++){
            while (USART_GetFlagStatus(USART2, USART_FLAG_TXE) == RESET);
            USART_SendData(USART2, ((uint8_t*)&from_hv)[j]);
         }
      }
      //GPIOA->BSRR = (GPIOA->ODR ^ GPIO_Pin_2) | (GPIO_Pin_2 << 16);//toggle red led
   }
}
Exemple #2
0
int
main(int argc, char **argv)
{
	int i, mark, pos, delay;
	char dir;

	hmax = 50;
	hist = malloc(hmax * sizeof(struct config));
	memset(&hist[0], 0, sizeof (struct config));

	for (i = 1; i <= bsize; i++)
		sumsize += AMP(i);

	if (argc == 1)
		delay = 28;
	else
		delay = atoi(argv[1]);
	DPRINTF(("delay=%d\n", delay));

#if 0
	scanf("%d\n", &mark);
	if (mark != bsize) exit(1);
#endif

	for (mark = X; scanf("%c%d\n", &dir, &pos) == 2; mark = OTHER(mark)) {
		domove(dir, pos-1, mark);
	}

	signal(SIGALRM, timeout);
	alarm(delay);

	computemove(&bestdir, &bestpos, mark);

	DPRINTF(("done\n"));
	timeout();
	exit(0);
}
int main(int argc, char *argv[])
{
    int i, j, m, n;
	double			tempo_kernel;
    double			tempo_total;
    char			*output_filename;
	float			*image_amplitudes;
	float           (*x)[2];  
	float           (*X)[2]; 
	
	pgm_t			ipgm, opgm;   
	image_file_t	*image_filename;
    timer_reset();
    timer_start();

    if (argc < 2)
    {
        printf("**Erro: parametros de entrada invalidos");
        exit(EXIT_FAILURE);
    }

    image_filename = (image_file_t *) malloc(sizeof(image_file_t));
    split_image_filename(image_filename, argv[1]);
    output_filename = (char *) malloc(40*sizeof(char));
    sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO);

    if( ler_pgm(&ipgm, argv[1]) == -1)
        exit(EXIT_FAILURE);

	n = ipgm.width;
	m = (int)(log((double)n)/log(2.0));
	x = malloc(2 * n * n * sizeof(float));
	X = malloc(2 * n * n * sizeof(float));
		
	opgm.width = n;
    opgm.height = n;
    
	for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
            x[i*n + j][0] = (float) ipgm.buf[i*n + j];
			x[i*n + j][1] = (float) 0;
        }
    }

	/* Check that n = 2^m for some integer m >= 1. */
	if (n >= 2) {
		i = n;
		while(i==2*(i/2)) i = i/2;  /* While i is even, factor out a 2. */
	}  /* For n >=2, we now have N = 2^n iff i = 1. */
	if (n < 2 || i != 1) {
		  printf(" %d deve ser um inteiro tal que n = 2^m , para m >= 1", n);
		  exit(EXIT_FAILURE);
	}
	
	timer_stop();
    tempo_total = get_elapsed_time();

    //====== Performance Test - start =======================================
    timer_reset();
    timer_start();
	j = 0;
	j = n*n;
	
    //fft direta
	fft(j, x, X);
	// filtro passa baixa
	lowpass_filter(X, n);
	//fft inversa
	for(i=0; i<j; i++) x[i][0] = x[i][1] = 0;
	ifft(j, x, X);
   
	timer_stop();
    tempo_kernel = get_elapsed_time();

    tempo_total += tempo_kernel;
    //====== Performance Test - end ============================================

    save_log_cpu(image_filename, tempo_kernel, tempo_total, LOG_NAME);
	
	image_amplitudes = (float*)malloc(n*n*sizeof(float));
	 for (i=0; i < n; i++) {
        for (j=0; j < n; j++) {
            image_amplitudes[i*n + j] = (float) (AMP(x[i*n + j][0], x[i*n + j][1]));
        }
    }
	
	normalizar_pgm(&opgm, image_amplitudes);
    escrever_pgm(&opgm, output_filename);
    free(x);
	free(X);
	destruir_pgm(&ipgm);
    destruir_pgm(&opgm);
    free(image_filename);
    free(output_filename);
    free(image_amplitudes); 
   _CrtDumpMemoryLeaks();
    return 0;
}
Exemple #4
0
int main(int argc, char *argv[])
{
	//fprintf(stderr, "[%s:%d:%s()] FFT!\n", __FILE__, __LINE__, __func__);
	LOG("FFT Start\n");
	cl_mem xmobj = NULL;
	cl_mem rmobj = NULL;
	cl_mem wmobj = NULL;
	cl_kernel sfac = NULL;
	cl_kernel trns = NULL;
	cl_kernel hpfl = NULL;

	cl_uint ret_num_platforms;
	cl_uint ret_num_devices;

	cl_int ret;

	cl_float2 *xm;
	cl_float2 *rm;
	cl_float2 *wm;

	pgm_t ipgm;
	pgm_t opgm;

	FILE *fp;
	const char fileName[] = "./fft.cl";
	size_t source_size;
	char *source_str;
	cl_int i, j;
	cl_int n;
	cl_int m;

	size_t gws[2];
	size_t lws[2];

	fp = fopen(fileName, "r");
	if(!fp)
	{
		fprintf(stderr, "[%s:%d:%s()] ERROR, Failed to load kernel source.\n", __FILE__, __LINE__, __func__);
		return 1;
	}

	source_str = (char *)malloc(MAX_SOURCE_SIZE);
	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose(fp);

	readPGM(&ipgm, "./lena.pgm");

	n = ipgm.width;
	m = (cl_int)(log((double)n)/log(2.0));

	LOG("n = %d, m = %d.\n", m, n);

	xm = (cl_float2*)malloc(n*n*sizeof(cl_float2));
	rm = (cl_float2*)malloc(n*n*sizeof(cl_float2));
	wm = (cl_float2*)malloc(n/2 *sizeof(cl_float2));

	for( i = 0; i < n; i++)
	{
		for(j = 0; j < n; j++)
		{
			((float*)xm)[2*(n*j + i) + 0] = (float)ipgm.buf[n*j + i];
			((float*)xm)[2*(n*j + i) + 1] = (float)0;
		}
	}

	CL_CHECK(ret = clGetPlatformIDs(MAX_PLATFORM_IDS, platform_ids, &ret_num_platforms));
	platform_id = platform_ids[0];
	CL_CHECK(ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices));

	LOG("platform_id = %p, device_id = %p\n", platform_id, device_id);


	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
	CL_CHECK(ret);

	queue = clCreateCommandQueue(context, device_id, 0, &ret);

	xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
	CL_CHECK(ret);
	rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
	CL_CHECK(ret);
	wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
	CL_CHECK(ret);

	CL_CHECK(ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL));

	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	CL_CHECK(ret);

	CL_CHECK(ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));

	sfac = clCreateKernel(program, "spinFact", &ret);
	CL_CHECK(ret);
	trns = clCreateKernel(program, "transpose", &ret);
	CL_CHECK(ret);
	hpfl = clCreateKernel(program, "highPassFilter", &ret);
	CL_CHECK(ret);

	CL_CHECK(ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj));
	CL_CHECK(ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n));
	setWorkSize(gws, lws, n/2, 1);
	CL_CHECK(ret = clEnqueueNDRangeKernel(queue, sfac, 1, NULL, gws, lws, 0, NULL, NULL));

	fftCore(rmobj, xmobj, wmobj, m, forward);

	CL_CHECK(ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj));
	CL_CHECK(ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj));
	CL_CHECK(ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n));
	setWorkSize(gws, lws, n, n);
	CL_CHECK(ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL));

	fftCore(rmobj, xmobj, wmobj, m, forward);

#if 1 //FILTER
	cl_int radius = n>>4;
	CL_CHECK(ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj));
	CL_CHECK(ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n));
	CL_CHECK(ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius));
	setWorkSize(gws, lws, n, n);
	CL_CHECK(ret = clEnqueueNDRangeKernel(queue, hpfl, 2, NULL, gws, lws, 0, NULL, NULL));
#endif

#if 1 /* Inverse FFT */
	fftCore(xmobj, rmobj, wmobj, m, inverse);

	CL_CHECK(ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj));
	CL_CHECK(ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj));
	CL_CHECK(ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n));
	setWorkSize(gws, lws, n, n);
	CL_CHECK(ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL));

	fftCore(xmobj, rmobj, wmobj, m, inverse);
#endif

	CL_CHECK(ret = clEnqueueReadBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL));

	float *ampd;
	ampd = (float*)malloc(n*n*sizeof(float));
	for(i = 0; i < n; i++)
	{
		for(j = 0; j < n; j++)
		{
			ampd[n*i + j] = AMP( ((float*)xm)[2*(n*i + j)], ((float*)xm)[2*(n*i + j) + 1] );
//			fprintf(stderr, "%d ", (int)ampd[n*i + j]);
		}
//		fprintf(stderr, "\n");
	}

	opgm.width = n;
	opgm.height = n;
	normalizeF2PGM(&opgm, ampd);
	free(ampd);

	writePGM(&opgm, "output.pgm");

	/* Termination */
	CL_CHECK(ret = clFlush(queue));
	CL_CHECK(ret = clFinish(queue));
	CL_CHECK(ret = clReleaseKernel(hpfl));
	CL_CHECK(ret = clReleaseKernel(trns));
	CL_CHECK(ret = clReleaseKernel(sfac));
	CL_CHECK(ret = clReleaseProgram(program));
	CL_CHECK(ret = clReleaseMemObject(xmobj));
	CL_CHECK(ret = clReleaseMemObject(rmobj));
	CL_CHECK(ret = clReleaseMemObject(wmobj));
	CL_CHECK(ret = clReleaseCommandQueue(queue));
	CL_CHECK(ret = clReleaseContext(context));

	destroyPGM(&ipgm);
	destroyPGM(&opgm);

	free(source_str);
	free(wm);
	free(rm);
	free(xm);

	return 0;
}
static int mrvl_usb_phy_28nm_init(u32 base)
{
	struct usb_file *file = (struct usb_file *)(uintptr_t)base;
	u32 tmp32;
	int ret;

	/*
	 * pll control 0:
	 * 0xd420_7000[6:0] = 0xd, b'000_1101	  --->  REFDIV
	 * 0xd420_7000[24:16] = 0xf0, b'1111_0000 --->  FBDIV
	 * 0xd420_7000[11:8] = 0x3, b'0011        --->  ICP
	 * 0xd420_7000[29:28] = 0x1,  b'01	  --->  SEL_LPFR
	 */
	tmp32 = readl(&file->pll_reg0);
	tmp32 &= ~(REFDIV_MASK | FB_DIV_MASK | ICP_MASK | SEL_LPFR_MASK);
	tmp32 |= REFDIV(0xd) | FB_DIV(0xf0) | ICP(0x3) | SEL_LPFR(0x1);
	writel(tmp32, &file->pll_reg0); /*
	 * pll control 1:
	 * 0xd420_7004[1:0] = 0x3, b'11	   ---> [PU_PLL_BY_REG:PU_PLL]
	 */
	tmp32 = readl(&file->pll_reg1);
	tmp32 &= ~(PLL_PU_MASK | PU_BY_MASK);
	tmp32 |= PLL_PU(0x1) | PU_BY(0x1);
	writel(tmp32, &file->pll_reg1);
	/*
	 * tx reg 0:
	 * 0xd420_700c[22:20] = 0x3, b'11  ---> AMP
	 */
	tmp32 = readl(&file->tx_reg0);
	tmp32 &= ~(AMP_MASK);
	tmp32 |= AMP(0x3);
	writel(tmp32, &file->tx_reg0);
	/*
	 * rx reg 0:
	 * 0xd420_7018[3:0] = 0xa, b'1010 ---> SQ_THRESH
	 */
	tmp32 = readl(&file->rx_reg0);
	tmp32 &= ~(SQ_THRESH_MASK);
	tmp32 |= SQ_THRESH(0xa);
	writel(tmp32, &file->rx_reg0);
	/*
	 * dig reg 0:
	 * 0xd420_701c[31] = 0, b'0 ---> BITSTAFFING_ERROR
	 * 0xd420_701c[30] = 0, b'0 ---> LOSS_OF_SYNC_ERROR
	 * 0xd420_701c[18:16] = 0x7, b'111 ---> SQ_FILT
	 * 0xd420_701c[14:12] = 0x4, b'100 ---> SQ_BLK
	 * 0xd420_701c[1:0] = 0x2, b'10 ---> SYNC_NUM
	 */
	tmp32 = readl(&file->dig_reg0);
	tmp32 &= ~(BITSTAFFING_ERR_MASK | SYNC_ERR_MASK | SQ_FILT_MASK | SQ_BLK_MASK | SYNC_NUM_MASK);
	tmp32 |= (SQ_FILT(0x0) | SQ_BLK(0x0) | SYNC_NUM(0x1));
	writel(tmp32, &file->dig_reg0);

	/*
	 * otg reg:
	 * 0xd420_7034[5:4] = 0x1,  b'01   ---> [OTG_CONTROL_BY_PIN:PU_OTG]
	 */
	tmp32 = readl(&file->otg_reg);
	tmp32 &= ~(OTG_CTRL_BY_MASK | PU_OTG_MASK);
	tmp32 |= OTG_CTRL_BY(0x0) | PU_OTG(0x1);
	writel(tmp32, &file->otg_reg);
	/*
	 * tx reg 0:
	 * 0xd420_700c[25:24] = 0x3, b'11  --->  [PU_ANA:PU_BY_REG]
	 */
	tmp32 = readl(&file->tx_reg0);
	tmp32 &= ~(ANA_PU_MASK | TX_PU_BY_MASK);
	tmp32 |= ANA_PU(0x1) | TX_PU_BY(0x1);
	writel(tmp32, &file->tx_reg0);

	udelay(400);
	ret = wait_for_phy_ready(file);
	if (ret < 0) {
		printf("initialize usb phy failed, dump usb registers:\n");
		dump_phy_regs(base);
	}

	printf("usb phy inited 0x%x!\n", readl(&file->usb_ctrl0));
	return 0;
}
int asf_igram_coh(int lookLine, int lookSample, int stepLine, int stepSample,
		  char *masterFile, char *slaveFile, char *outBase,
		  float *average)
{
  char ampFile[255], phaseFile[255]; //, igramFile[512];
  char cohFile[512], ml_ampFile[255], ml_phaseFile[255]; //, ml_igramFile[512];
  FILE *fpMaster, *fpSlave, *fpAmp, *fpPhase, *fpCoh, *fpAmp_ml, *fpPhase_ml;
  int line, sample_count, line_count, count;
  float	bin_high, bin_low, max=0.0, sum_a, sum_b, ampScale;
  double hist_sum=0.0, percent, percent_sum;
  long long hist_val[HIST_SIZE], hist_cnt=0;
  meta_parameters *inMeta,*outMeta, *ml_outMeta;
  complexFloat *master, *slave, *sum_igram, *sum_ml_igram;
  float *amp, *phase, *sum_cpx_a, *sum_cpx_b, *coh, *pCoh;
  float *ml_amp, *ml_phase;

  // FIXME: Processing flow with two-banded interferogram needed - backed out
  //        for now
  create_name(ampFile, outBase,"_igram_amp.img");
  create_name(phaseFile, outBase,"_igram_phase.img");
  create_name(ml_ampFile, outBase,"_igram_ml_amp.img");
  create_name(ml_phaseFile, outBase,"_igram_ml_phase.img");
  //create_name(igramFile, outBase,"_igram.img");
  //create_name(ml_igramFile, outBase, "_igram_ml.img");
  //sprintf(cohFile, "coherence.img");
  create_name(cohFile, outBase, "_coh.img");

  // Read input meta file
  inMeta = meta_read(masterFile);
  line_count = inMeta->general->line_count; 
  sample_count = inMeta->general->sample_count;
  ampScale = 1.0/(stepLine*stepSample);

  // Generate metadata for single-look images 
  outMeta = meta_read(masterFile);
  outMeta->general->data_type = REAL32;

  // Write metadata for interferometric amplitude
  outMeta->general->image_data_type = AMPLITUDE_IMAGE;
  meta_write(outMeta, ampFile);

  // Write metadata for interferometric phase
  outMeta->general->image_data_type = PHASE_IMAGE;
  meta_write(outMeta, phaseFile);

  /*
  // Write metadata for interferogram
  outMeta->general->image_data_type = INTERFEROGRAM;
  outMeta->general->band_count = 2;
  strcpy(outMeta->general->bands, "IGRAM-AMP,IGRAM-PHASE");
  meta_write(outMeta, igramFile);
  */	
     
  // Generate metadata for multilooked images
  ml_outMeta = meta_read(masterFile);
  ml_outMeta->general->data_type = REAL32;
  ml_outMeta->general->line_count = line_count/stepLine;
  ml_outMeta->general->sample_count = sample_count/stepSample;
  ml_outMeta->general->x_pixel_size *= stepSample;
  ml_outMeta->general->y_pixel_size *= stepLine;
  ml_outMeta->sar->multilook = 1;
  ml_outMeta->sar->line_increment   *= stepLine;
  ml_outMeta->sar->sample_increment *= stepSample;
  // FIXME: This is the wrong increment but create_dem_grid does not know any
  //        better at the moment.
  //ml_outMeta->sar->line_increment = 1;
  //ml_outMeta->sar->sample_increment = 1;

  // Write metadata for multilooked interferometric amplitude
  ml_outMeta->general->image_data_type = AMPLITUDE_IMAGE;
  meta_write(ml_outMeta, ml_ampFile);

  // Write metadata for multilooked interferometric phase
  ml_outMeta->general->image_data_type = PHASE_IMAGE;
  meta_write(ml_outMeta, ml_phaseFile);

  // Write metadata for coherence image
  ml_outMeta->general->image_data_type = COHERENCE_IMAGE;
  meta_write(ml_outMeta, cohFile);

  /*
  // Write metadata for multilooked interferogram
  ml_outMeta->general->image_data_type = INTERFEROGRAM;
  strcpy(ml_outMeta->general->bands, "IGRAM-AMP,IGRAM-PHASE");
  ml_outMeta->general->band_count = 2;
  meta_write(ml_outMeta, ml_igramFile);
  */

  // Allocate memory
  master = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count*lookLine);
  slave = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count*lookLine);
  amp = (float *) MALLOC(sizeof(float)*sample_count*lookLine);
  phase = (float *) MALLOC(sizeof(float)*sample_count*lookLine);
  ml_amp = (float *) MALLOC(sizeof(float)*sample_count/stepSample);
  ml_phase = (float *) MALLOC(sizeof(float)*sample_count/stepSample);
  coh = (float *) MALLOC(sizeof(float)*sample_count/stepSample);
  sum_cpx_a = (float *) MALLOC(sizeof(float)*sample_count);
  sum_cpx_b = (float *) MALLOC(sizeof(float)*sample_count);
  sum_igram = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count);
  sum_ml_igram = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count);

  // Open files
  fpMaster = FOPEN(masterFile,"rb");
  fpSlave = FOPEN(slaveFile,"rb");
  fpAmp = FOPEN(ampFile,"wb");
  fpPhase = FOPEN(phaseFile,"wb");
  fpAmp_ml = FOPEN(ml_ampFile,"wb");
  fpPhase_ml = FOPEN(ml_phaseFile,"wb");
  //FILE *fpIgram = FOPEN(igramFile, "wb");
  //FILE *fpIgram_ml = FOPEN(ml_igramFile, "wb");
  fpCoh = FOPEN(cohFile,"wb");

  // Initialize histogram
  for (count=0; count<HIST_SIZE; count++) hist_val[count] = 0;

  asfPrintStatus("   Calculating interferogram and coherence ...\n\n");

  for (line=0; line<line_count; line+=stepLine)
  {
    register int offset, row, column, limitLine;
    double igram_real, igram_imag;
    int inCol;
    limitLine=MIN(lookLine, line_count-line);

    printf("Percent completed %3.0f\r",(float)line/line_count*100.0);

    pCoh = coh;

    // Read in the next lines of data
    get_complexFloat_lines(fpMaster, inMeta, line, limitLine, master);
    get_complexFloat_lines(fpSlave, inMeta, line, limitLine, slave);

    // Add the remaining rows into sum vectors
    offset = sample_count;
    for (column=0; column<sample_count; column++)
    {
      offset = column;
      sum_cpx_a[column] = 0.0;
      sum_cpx_b[column] = 0.0;
      sum_igram[column].real = 0.0;
      sum_igram[column].imag = 0.0;
      sum_ml_igram[column].real = 0.0;
      sum_ml_igram[column].imag = 0.0;
      igram_real = 0.0;
      igram_imag = 0.0;

      for (row=0; row<limitLine; row++)
      {
	// Complex multiplication for interferogram generation
	igram_real = master[offset].real*slave[offset].real + 
	  master[offset].imag*slave[offset].imag;
        igram_imag = master[offset].imag*slave[offset].real - 
	  master[offset].real*slave[offset].imag;
        amp[offset] = sqrt(igram_real*igram_real + igram_imag*igram_imag);
        if (FLOAT_EQUIVALENT(igram_real, 0.0) || 
	    FLOAT_EQUIVALENT(igram_imag, 0.0))
	  phase[offset]=0.0;
        else
	  phase[offset] = atan2(igram_imag, igram_real);

       	sum_cpx_a[column] += AMP(master[offset])*AMP(master[offset]);
      	sum_cpx_b[column] += AMP(slave[offset])*AMP(slave[offset]);
       	sum_igram[column].real += igram_real;
	sum_igram[column].imag += igram_imag;
	if (line % stepLine == 0 && row < stepLine) {
	  sum_ml_igram[column].real += igram_real;
	  sum_ml_igram[column].imag += igram_imag;
	}

	offset += sample_count;
      }

      ml_amp[column] = 
	sqrt(sum_ml_igram[column].real*sum_ml_igram[column].real + 
	     sum_ml_igram[column].imag*sum_ml_igram[column].imag)*ampScale;
      if (FLOAT_EQUIVALENT(sum_ml_igram[column].real, 0.0) || 
	  FLOAT_EQUIVALENT(sum_ml_igram[column].imag, 0.0))
	ml_phase[column] = 0.0;
      else
	ml_phase[column] = atan2(sum_ml_igram[column].imag, 
				 sum_ml_igram[column].real);
    }

    // Write single-look and multilooked amplitude and phase
    put_float_lines(fpAmp, outMeta, line, stepLine, amp);
    put_float_lines(fpPhase, outMeta, line, stepLine, phase);
    put_float_line(fpAmp_ml, ml_outMeta, line/stepLine, ml_amp);
    put_float_line(fpPhase_ml, ml_outMeta, line/stepLine, ml_phase);
    //put_band_float_lines(fpIgram, outMeta, 0, line, stepLine, amp);
    //put_band_float_lines(fpIgram, outMeta, 1, line, stepLine, phase);
    //put_band_float_line(fpIgram_ml, ml_outMeta, 0, line/stepLine, ml_amp);
    //put_band_float_line(fpIgram_ml, ml_outMeta, 1, line/stepLine, ml_phase);

    // Calculate the coherence by adding from sum vectors
    for (inCol=0; inCol<sample_count; inCol+=stepSample)
    {
      register int limitSample = MIN(lookSample,sample_count-inCol);
      sum_a = 0.0;
      sum_b = 0.0;
      igram_real = 0.0;
      igram_imag = 0.0;

      // Step over multilook area and sum output columns
      for (column=0; column<limitSample; column++)
      {
	igram_real += sum_igram[inCol+column].real;
	igram_imag += sum_igram[inCol+column].imag;				
	sum_a += sum_cpx_a[inCol+column];
	sum_b += sum_cpx_b[inCol+column];
      }

      if (FLOAT_EQUIVALENT((sum_a*sum_b), 0.0))
	*pCoh = 0.0;
      else 
      {
	*pCoh = (float) sqrt(igram_real*igram_real + igram_imag*igram_imag) /
	  sqrt(sum_a * sum_b);
	if (*pCoh>1.0001)
	{ 
	  printf("   coh = %f -- setting to 1.0\n",*pCoh);
	  printf("   You shouldn't have seen this!\n");
	  printf("   Exiting.\n");
          exit(EXIT_FAILURE);
	  *pCoh=1.0;
	}
      }
     pCoh++;
    } 

    // Write out values for coherence
    put_float_line(fpCoh, ml_outMeta, line/stepLine, coh);

    // Keep filling coherence histogram
    for (count=0; count<sample_count/stepSample; count++)
    {
      register int tmp;
      tmp = (int) (coh[count]*HIST_SIZE); /* Figure out which bin this value is in */
      /* This shouldn't happen */
      if(tmp >= HIST_SIZE)
	tmp = HIST_SIZE-1;
      if(tmp < 0)
	tmp = 0;
      
      hist_val[tmp]++;        // Increment that bin for the histogram
      hist_sum += coh[count];   // Add up the values for the sum
      hist_cnt++;             // Keep track of the total number of values
      if (coh[count]>max) 
	max = coh[count];  // Calculate maximum coherence
    }
  } // End for line

  printf("Percent completed %3.0f\n",(float)line/line_count*100.0);

  // Sum and print the statistics
  percent_sum = 0.0;
  printf("   Coherence  :  Occurrences  :  Percent\n");
  printf("   ---------------------------------------\n");
  for (count=0; count<HIST_SIZE; count++) {
    bin_low  = (float)(count)/(float)HIST_SIZE;
    bin_high = (float)(count+1)/(float)HIST_SIZE;
    percent  = (double)hist_val[count]/(double)hist_cnt;
    percent_sum += (float)100*percent;
    printf("   %.2f -> %.2f :   %.8lld       %2.3f \n",
		   bin_low,bin_high, (long long) hist_val[count],100*percent);
  }
  *average = (float)hist_sum/(float)hist_cnt;
  printf("   ---------------------------------------\n");
  printf("   Maximum Coherence: %.3f\n", max);
  printf("   Average Coherence: %.3f  (%.1f / %lld) %f\n", 
		 *average,hist_sum, hist_cnt, percent_sum);

  // Free and exit
  FREE(master); 
  FREE(slave);
  FREE(amp); 
  FREE(phase);
  FREE(ml_amp); 
  FREE(ml_phase); 
  FREE(coh); 
  FCLOSE(fpMaster); 
  FCLOSE(fpSlave);
  FCLOSE(fpAmp); 
  FCLOSE(fpPhase);
  FCLOSE(fpAmp_ml); 
  FCLOSE(fpPhase_ml);
  //FCLOSE(fpIgram);
  //FCLOSE(fpIgram_ml);
  FCLOSE(fpCoh); 
  meta_free(inMeta);
  meta_free(outMeta);
  meta_free(ml_outMeta);
  return(0);
}
int main(int argc, char *argv[])
{
    //FILE *fp;

    cl_platform_id      platform_id[2];
    cl_uint             ret_num_devices;
    cl_uint             ret_num_platforms;
    cl_int              ret_code;

    cl_mem              image_in_mem = NULL;
    cl_mem              image_out_mem = NULL;
    cl_mem              twiddle_factors_mem = NULL;
    cl_float2           *image_in_host;
    cl_float2           *twiddle_factors_host;

    cl_kernel           kernel_twiddle_factors;
    cl_kernel           kernel_matriz_transpose;
    cl_kernel           kernel_lowpass_filter;

    pgm_t ipgm;
    pgm_t opgm;

    image_file_t        *image_filename;
    char                *output_filename;
    FILE                *fp;
    const char          *kernel_filename = C_NOME_ARQ_KERNEL;
    size_t              source_size;
    char                *source_str;
    cl_int              i, j,n ,m;
    cl_int              raio = 0;
    size_t              global_wg[2];
    size_t              local_wg[2];
    float               *image_amplitudes;
    size_t              log_size;
    char                *log_file;

    cl_event            kernels_events_out_fft[4];

    cl_ulong            kernel_runtime              = (cl_ulong) 0;
    cl_ulong            kernel_start_time           = (cl_ulong) 0;
    cl_ulong            kernel_end_time             = (cl_ulong) 0;

    cl_event            write_host_dev_event;
    cl_ulong            write_host_dev_start_time   = (cl_ulong) 0;
    cl_ulong            write_host_dev_end_time     = (cl_ulong) 0;
    cl_ulong            write_host_dev_run_time     = (cl_ulong) 0;

    cl_event            read_dev_host_event;
    cl_ulong            read_dev_host_start_time    = (cl_ulong) 0;
    cl_ulong            read_dev_host_end_time      = (cl_ulong) 0;
    cl_ulong            read_dev_host_run_time      = (cl_ulong) 0;

    unsigned __int64    image_tam;
    unsigned __int64    MEGA_BYTES   =  1048576; // 1024*1024
    double              image_tam_MB;
    double              tempo_total;

    struct event_in_fft_t *fft_events;


   //=== Timer count start ==============================================================================
    timer_reset();
    timer_start();
    //===================================================================================================

    if (argc < 2) {
        printf("**Erro: O arquivo de entrada eh necessario.\n");
        exit(EXIT_FAILURE);
    }

    image_filename = (image_file_t *) malloc(sizeof(image_file_t));
    split_image_filename(image_filename, argv[1]);
    output_filename = (char *) malloc(40*sizeof(char));
    sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO);

    fp = fopen(kernel_filename, "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(EXIT_FAILURE);
    }

    source_str = (char *)malloc(MAX_SOURCE_SIZE);
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    //===================================================================================================
     /* Abrindo imagem do arquivo para objeto de memoria local*/
    if( ler_pgm(&ipgm, argv[1]) == -1)
        exit(EXIT_FAILURE);

    n = ipgm.width;
    raio = n/8;
    m = (cl_int)(log((double)n)/log(2.0));

    image_in_host = (cl_float2 *)malloc((n*n)*sizeof(cl_float2));
    twiddle_factors_host = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));

    for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
            image_in_host[n*i + j].s[0] = (float)ipgm.buf[n*i + j];
            image_in_host[n*i + j].s[1] = (float)0;
        }
    }

    fft_events = (struct event_in_fft_t *)malloc(MAX_CALL_FFT*sizeof(struct event_in_fft_t));

    kernel_butter_events = (cl_event *)malloc(MAX_CALL_FFT*m*sizeof(cl_event));

    //===================================================================================================
    CL_CHECK(clGetPlatformIDs(MAX_PLATFORM_ID, platform_id, &ret_num_platforms));

    if (ret_num_platforms == 0 ) {
        fprintf(stderr,"[Erro] Não existem plataformas OpenCL\n");
        exit(2);
    }

    //===================================================================================================

    CL_CHECK(clGetDeviceIDs( platform_id[0], CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices));
    //print_platform_info(&platform_id[1]);

    //===================================================================================================
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret_code);
    //===================================================================================================

    cmd_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret_code);
    //===================================================================================================

    image_in_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code);
    image_out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code);
    twiddle_factors_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret_code);
    //===================================================================================================

    /* Transfer data to memory buffer */
    CL_CHECK(clEnqueueWriteBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &write_host_dev_event));

    image_tam = n*n*sizeof(cl_float2);

    //===================================================================================================
    program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret_code);
    //===================================================================================================
    ret_code = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    //===================================================================================================
    if (ret_code != CL_SUCCESS) {
    // Determine the size of the log
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
    //===================================================================================================

    // Allocate memory for the log
    log_file = (char *) malloc(log_size);

    // Get the log
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log_file, NULL);
    printf("%s\n", log_file);
    system("pause");
    exit(0);
}
    kernel_twiddle_factors = clCreateKernel(program, "twiddle_factors", &ret_code);
    kernel_matriz_transpose = clCreateKernel(program, "matrix_trasponse", &ret_code);
    kernel_lowpass_filter  = clCreateKernel(program, "lowpass_filter", &ret_code);

    /* Processa os fatores Wn*/
    //===================================================================================================
    CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 0, sizeof(cl_mem), (void *)&twiddle_factors_mem));
    CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 1, sizeof(cl_int), (void *)&n));
    config_workgroup_size(global_wg, local_wg, n/2, 1);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_twiddle_factors, 1, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[0]));

    //===================================================================================================
    /* Executa a FFT em N/2 */
    fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[0]);

    //===================================================================================================
    /* Realiza a transposta da Matriz (imagem) */
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_in_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_out_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n));
    config_workgroup_size(global_wg, local_wg, n, n);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[1]));

    //===================================================================================================
    /* Executa a FFT N/2 */
    fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[1]);

    //===================================================================================================
    /* Processa o filtro passa baixa */
    CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 0, sizeof(cl_mem), (void *)&image_out_mem));
    CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 1, sizeof(cl_int), (void *)&n));
    CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 2, sizeof(cl_int), (void *)&raio));
    config_workgroup_size(global_wg, local_wg, n, n);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_lowpass_filter, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[2]));

    //===================================================================================================
    /* Obtem a FFT inversa*/
    fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[2]);
    //===================================================================================================

    /* Realiza a transposta da Matriz (imagem) */
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_out_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_in_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n));
    config_workgroup_size(global_wg, local_wg, n, n);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[3]));

    //===================================================================================================
    fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[3]);
    //===================================================================================================

    CL_CHECK(clEnqueueReadBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &read_dev_host_event));
    //===================================================================================================

    //== Total time elapsed ============================================================================
    timer_stop();
    tempo_total = get_elapsed_time();
    //==================================================================================================

    //====== Get time of Profile Info ==================================================================
    // Write data time
    CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &write_host_dev_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &write_host_dev_end_time, NULL));
    // Read data time
    CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &read_dev_host_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &read_dev_host_end_time, NULL));

    for (i = 0; i < MAX_CALL_FFT; i++) {

        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;
        CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
        CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
        kernel_runtime += (kernel_end_time - kernel_start_time);

        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;
        CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
        CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
        kernel_runtime += (kernel_end_time - kernel_start_time);

        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;

        if (fft_events[i].kernel_normalize != NULL) {
            CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
            CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
            kernel_runtime += (kernel_end_time - kernel_start_time);
        }
   }

    for (j=0; j < MAX_CALL_FFT*m; j++){
        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;

        CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
        CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
        kernel_runtime += (kernel_end_time - kernel_start_time);
    }

    write_host_dev_run_time = write_host_dev_end_time - write_host_dev_start_time;
    read_dev_host_run_time =  read_dev_host_end_time -  read_dev_host_start_time;

/* save_log_debug(write_host_dev_run_time,fp);
    save_log_debug(read_dev_host_run_time,fp);
    close_log_debug(fp); */

    image_tam_MB = (double) (((double) image_tam)/(double) MEGA_BYTES);

    //==================================================================================================
    save_log_gpu(image_filename, kernel_runtime, (double) (image_tam_MB/( (double) read_dev_host_run_time/(double) NANOSECONDS)),
    (double) (image_tam_MB/ ((double) write_host_dev_run_time/ (double) NANOSECONDS)), tempo_total, LOG_NAME);

    //===================================================================================================
    image_amplitudes = (float*)malloc(n*n*sizeof(float));
    for (i=0; i < n; i++) {
        for (j=0; j < n; j++) {
            image_amplitudes[n*j + i] = (float) (AMP(((float*)image_in_host)[(2*n*j)+2*i], ((float*)image_in_host)[(2*n*j)+2*i+1]));
        }
    }

    //clFlush(cmd_queue);
    //clFinish(cmd_queue);
    opgm.width = n;
    opgm.height = n;

    normalizar_pgm(&opgm, image_amplitudes);
    escrever_pgm(&opgm, output_filename);

    //===================================================================================================
	clFinish(cmd_queue);
    clReleaseKernel(kernel_twiddle_factors);
    clReleaseKernel(kernel_matriz_transpose);
    clReleaseKernel(kernel_lowpass_filter);
    clReleaseProgram(program);
    clReleaseMemObject(image_in_mem);
    clReleaseMemObject(image_out_mem);
    clReleaseMemObject(twiddle_factors_mem);
    clReleaseCommandQueue(cmd_queue);
    clReleaseContext(context);
	clReleaseEvent(read_dev_host_event);
	clReleaseEvent(write_host_dev_event);
	clReleaseEvent(kernels_events_out_fft[0]);
	clReleaseEvent(kernels_events_out_fft[1]);
	clReleaseEvent(kernels_events_out_fft[2]);
	clReleaseEvent(kernels_events_out_fft[3]);
    destruir_pgm(&ipgm);
    destruir_pgm(&opgm);
    free(image_amplitudes);
    free(source_str);
    free(image_in_host);
    free(image_filename);
    free(twiddle_factors_host);
    free(output_filename);
    free(fft_events);
    free(kernel_butter_events);

    //_CrtDumpMemoryLeaks();

    return 0;
}