int main(void) { RCC_Configuration(); GPIO_Configuration(); RCC_ClocksTypeDef RCC_Clocks; RCC_GetClocksFreq(&RCC_Clocks); SysTick_Config(RCC_Clocks.HCLK_Frequency / 1000 - 1); setup_adc(); tim1_init(); usart_init(); PWM_U = 0; PWM_V = 0; PWM_W = 0; while(1){ if(uartsend == 1){ amp = AMP(amp_raw); volt = VOLT(volt_raw); if(temp_raw < ARES && temp_raw > 0){ temp = TEMP(temp_raw); } from_hv.dc_volt = TOFIXED(volt); from_hv.dc_cur = TOFIXED(amp); from_hv.hv_temp = TOFIXED(temp); #ifdef TROLLER from_hv.dc_cur = TOFIXED(0); from_hv.hv_temp = TOFIXED(0); from_hv.a = TOFIXED(AMP(ADCConvertedValue[1])); from_hv.b = TOFIXED(AMP(ADCConvertedValue[2])); from_hv.c = TOFIXED(AMP(ADCConvertedValue[3])); #endif uartsend = 0; while (USART_GetFlagStatus(USART2, USART_FLAG_TXE) == RESET); USART_SendData(USART2, 0x154); for(int j = 0;j<sizeof(from_hv_t);j++){ while (USART_GetFlagStatus(USART2, USART_FLAG_TXE) == RESET); USART_SendData(USART2, ((uint8_t*)&from_hv)[j]); } } //GPIOA->BSRR = (GPIOA->ODR ^ GPIO_Pin_2) | (GPIO_Pin_2 << 16);//toggle red led } }
int main(int argc, char **argv) { int i, mark, pos, delay; char dir; hmax = 50; hist = malloc(hmax * sizeof(struct config)); memset(&hist[0], 0, sizeof (struct config)); for (i = 1; i <= bsize; i++) sumsize += AMP(i); if (argc == 1) delay = 28; else delay = atoi(argv[1]); DPRINTF(("delay=%d\n", delay)); #if 0 scanf("%d\n", &mark); if (mark != bsize) exit(1); #endif for (mark = X; scanf("%c%d\n", &dir, &pos) == 2; mark = OTHER(mark)) { domove(dir, pos-1, mark); } signal(SIGALRM, timeout); alarm(delay); computemove(&bestdir, &bestpos, mark); DPRINTF(("done\n")); timeout(); exit(0); }
int main(int argc, char *argv[]) { int i, j, m, n; double tempo_kernel; double tempo_total; char *output_filename; float *image_amplitudes; float (*x)[2]; float (*X)[2]; pgm_t ipgm, opgm; image_file_t *image_filename; timer_reset(); timer_start(); if (argc < 2) { printf("**Erro: parametros de entrada invalidos"); exit(EXIT_FAILURE); } image_filename = (image_file_t *) malloc(sizeof(image_file_t)); split_image_filename(image_filename, argv[1]); output_filename = (char *) malloc(40*sizeof(char)); sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO); if( ler_pgm(&ipgm, argv[1]) == -1) exit(EXIT_FAILURE); n = ipgm.width; m = (int)(log((double)n)/log(2.0)); x = malloc(2 * n * n * sizeof(float)); X = malloc(2 * n * n * sizeof(float)); opgm.width = n; opgm.height = n; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { x[i*n + j][0] = (float) ipgm.buf[i*n + j]; x[i*n + j][1] = (float) 0; } } /* Check that n = 2^m for some integer m >= 1. */ if (n >= 2) { i = n; while(i==2*(i/2)) i = i/2; /* While i is even, factor out a 2. */ } /* For n >=2, we now have N = 2^n iff i = 1. */ if (n < 2 || i != 1) { printf(" %d deve ser um inteiro tal que n = 2^m , para m >= 1", n); exit(EXIT_FAILURE); } timer_stop(); tempo_total = get_elapsed_time(); //====== Performance Test - start ======================================= timer_reset(); timer_start(); j = 0; j = n*n; //fft direta fft(j, x, X); // filtro passa baixa lowpass_filter(X, n); //fft inversa for(i=0; i<j; i++) x[i][0] = x[i][1] = 0; ifft(j, x, X); timer_stop(); tempo_kernel = get_elapsed_time(); tempo_total += tempo_kernel; //====== Performance Test - end ============================================ save_log_cpu(image_filename, tempo_kernel, tempo_total, LOG_NAME); image_amplitudes = (float*)malloc(n*n*sizeof(float)); for (i=0; i < n; i++) { for (j=0; j < n; j++) { image_amplitudes[i*n + j] = (float) (AMP(x[i*n + j][0], x[i*n + j][1])); } } normalizar_pgm(&opgm, image_amplitudes); escrever_pgm(&opgm, output_filename); free(x); free(X); destruir_pgm(&ipgm); destruir_pgm(&opgm); free(image_filename); free(output_filename); free(image_amplitudes); _CrtDumpMemoryLeaks(); return 0; }
int main(int argc, char *argv[]) { //fprintf(stderr, "[%s:%d:%s()] FFT!\n", __FILE__, __LINE__, __func__); LOG("FFT Start\n"); cl_mem xmobj = NULL; cl_mem rmobj = NULL; cl_mem wmobj = NULL; cl_kernel sfac = NULL; cl_kernel trns = NULL; cl_kernel hpfl = NULL; cl_uint ret_num_platforms; cl_uint ret_num_devices; cl_int ret; cl_float2 *xm; cl_float2 *rm; cl_float2 *wm; pgm_t ipgm; pgm_t opgm; FILE *fp; const char fileName[] = "./fft.cl"; size_t source_size; char *source_str; cl_int i, j; cl_int n; cl_int m; size_t gws[2]; size_t lws[2]; fp = fopen(fileName, "r"); if(!fp) { fprintf(stderr, "[%s:%d:%s()] ERROR, Failed to load kernel source.\n", __FILE__, __LINE__, __func__); return 1; } source_str = (char *)malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose(fp); readPGM(&ipgm, "./lena.pgm"); n = ipgm.width; m = (cl_int)(log((double)n)/log(2.0)); LOG("n = %d, m = %d.\n", m, n); xm = (cl_float2*)malloc(n*n*sizeof(cl_float2)); rm = (cl_float2*)malloc(n*n*sizeof(cl_float2)); wm = (cl_float2*)malloc(n/2 *sizeof(cl_float2)); for( i = 0; i < n; i++) { for(j = 0; j < n; j++) { ((float*)xm)[2*(n*j + i) + 0] = (float)ipgm.buf[n*j + i]; ((float*)xm)[2*(n*j + i) + 1] = (float)0; } } CL_CHECK(ret = clGetPlatformIDs(MAX_PLATFORM_IDS, platform_ids, &ret_num_platforms)); platform_id = platform_ids[0]; CL_CHECK(ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices)); LOG("platform_id = %p, device_id = %p\n", platform_id, device_id); context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); CL_CHECK(ret); queue = clCreateCommandQueue(context, device_id, 0, &ret); xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); CL_CHECK(ret); rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); CL_CHECK(ret); wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); CL_CHECK(ret); CL_CHECK(ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL)); program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); CL_CHECK(ret); CL_CHECK(ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); sfac = clCreateKernel(program, "spinFact", &ret); CL_CHECK(ret); trns = clCreateKernel(program, "transpose", &ret); CL_CHECK(ret); hpfl = clCreateKernel(program, "highPassFilter", &ret); CL_CHECK(ret); CL_CHECK(ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj)); CL_CHECK(ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n)); setWorkSize(gws, lws, n/2, 1); CL_CHECK(ret = clEnqueueNDRangeKernel(queue, sfac, 1, NULL, gws, lws, 0, NULL, NULL)); fftCore(rmobj, xmobj, wmobj, m, forward); CL_CHECK(ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj)); CL_CHECK(ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj)); CL_CHECK(ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n)); setWorkSize(gws, lws, n, n); CL_CHECK(ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL)); fftCore(rmobj, xmobj, wmobj, m, forward); #if 1 //FILTER cl_int radius = n>>4; CL_CHECK(ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj)); CL_CHECK(ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n)); CL_CHECK(ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius)); setWorkSize(gws, lws, n, n); CL_CHECK(ret = clEnqueueNDRangeKernel(queue, hpfl, 2, NULL, gws, lws, 0, NULL, NULL)); #endif #if 1 /* Inverse FFT */ fftCore(xmobj, rmobj, wmobj, m, inverse); CL_CHECK(ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj)); CL_CHECK(ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj)); CL_CHECK(ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n)); setWorkSize(gws, lws, n, n); CL_CHECK(ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL)); fftCore(xmobj, rmobj, wmobj, m, inverse); #endif CL_CHECK(ret = clEnqueueReadBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL)); float *ampd; ampd = (float*)malloc(n*n*sizeof(float)); for(i = 0; i < n; i++) { for(j = 0; j < n; j++) { ampd[n*i + j] = AMP( ((float*)xm)[2*(n*i + j)], ((float*)xm)[2*(n*i + j) + 1] ); // fprintf(stderr, "%d ", (int)ampd[n*i + j]); } // fprintf(stderr, "\n"); } opgm.width = n; opgm.height = n; normalizeF2PGM(&opgm, ampd); free(ampd); writePGM(&opgm, "output.pgm"); /* Termination */ CL_CHECK(ret = clFlush(queue)); CL_CHECK(ret = clFinish(queue)); CL_CHECK(ret = clReleaseKernel(hpfl)); CL_CHECK(ret = clReleaseKernel(trns)); CL_CHECK(ret = clReleaseKernel(sfac)); CL_CHECK(ret = clReleaseProgram(program)); CL_CHECK(ret = clReleaseMemObject(xmobj)); CL_CHECK(ret = clReleaseMemObject(rmobj)); CL_CHECK(ret = clReleaseMemObject(wmobj)); CL_CHECK(ret = clReleaseCommandQueue(queue)); CL_CHECK(ret = clReleaseContext(context)); destroyPGM(&ipgm); destroyPGM(&opgm); free(source_str); free(wm); free(rm); free(xm); return 0; }
static int mrvl_usb_phy_28nm_init(u32 base) { struct usb_file *file = (struct usb_file *)(uintptr_t)base; u32 tmp32; int ret; /* * pll control 0: * 0xd420_7000[6:0] = 0xd, b'000_1101 ---> REFDIV * 0xd420_7000[24:16] = 0xf0, b'1111_0000 ---> FBDIV * 0xd420_7000[11:8] = 0x3, b'0011 ---> ICP * 0xd420_7000[29:28] = 0x1, b'01 ---> SEL_LPFR */ tmp32 = readl(&file->pll_reg0); tmp32 &= ~(REFDIV_MASK | FB_DIV_MASK | ICP_MASK | SEL_LPFR_MASK); tmp32 |= REFDIV(0xd) | FB_DIV(0xf0) | ICP(0x3) | SEL_LPFR(0x1); writel(tmp32, &file->pll_reg0); /* * pll control 1: * 0xd420_7004[1:0] = 0x3, b'11 ---> [PU_PLL_BY_REG:PU_PLL] */ tmp32 = readl(&file->pll_reg1); tmp32 &= ~(PLL_PU_MASK | PU_BY_MASK); tmp32 |= PLL_PU(0x1) | PU_BY(0x1); writel(tmp32, &file->pll_reg1); /* * tx reg 0: * 0xd420_700c[22:20] = 0x3, b'11 ---> AMP */ tmp32 = readl(&file->tx_reg0); tmp32 &= ~(AMP_MASK); tmp32 |= AMP(0x3); writel(tmp32, &file->tx_reg0); /* * rx reg 0: * 0xd420_7018[3:0] = 0xa, b'1010 ---> SQ_THRESH */ tmp32 = readl(&file->rx_reg0); tmp32 &= ~(SQ_THRESH_MASK); tmp32 |= SQ_THRESH(0xa); writel(tmp32, &file->rx_reg0); /* * dig reg 0: * 0xd420_701c[31] = 0, b'0 ---> BITSTAFFING_ERROR * 0xd420_701c[30] = 0, b'0 ---> LOSS_OF_SYNC_ERROR * 0xd420_701c[18:16] = 0x7, b'111 ---> SQ_FILT * 0xd420_701c[14:12] = 0x4, b'100 ---> SQ_BLK * 0xd420_701c[1:0] = 0x2, b'10 ---> SYNC_NUM */ tmp32 = readl(&file->dig_reg0); tmp32 &= ~(BITSTAFFING_ERR_MASK | SYNC_ERR_MASK | SQ_FILT_MASK | SQ_BLK_MASK | SYNC_NUM_MASK); tmp32 |= (SQ_FILT(0x0) | SQ_BLK(0x0) | SYNC_NUM(0x1)); writel(tmp32, &file->dig_reg0); /* * otg reg: * 0xd420_7034[5:4] = 0x1, b'01 ---> [OTG_CONTROL_BY_PIN:PU_OTG] */ tmp32 = readl(&file->otg_reg); tmp32 &= ~(OTG_CTRL_BY_MASK | PU_OTG_MASK); tmp32 |= OTG_CTRL_BY(0x0) | PU_OTG(0x1); writel(tmp32, &file->otg_reg); /* * tx reg 0: * 0xd420_700c[25:24] = 0x3, b'11 ---> [PU_ANA:PU_BY_REG] */ tmp32 = readl(&file->tx_reg0); tmp32 &= ~(ANA_PU_MASK | TX_PU_BY_MASK); tmp32 |= ANA_PU(0x1) | TX_PU_BY(0x1); writel(tmp32, &file->tx_reg0); udelay(400); ret = wait_for_phy_ready(file); if (ret < 0) { printf("initialize usb phy failed, dump usb registers:\n"); dump_phy_regs(base); } printf("usb phy inited 0x%x!\n", readl(&file->usb_ctrl0)); return 0; }
int asf_igram_coh(int lookLine, int lookSample, int stepLine, int stepSample, char *masterFile, char *slaveFile, char *outBase, float *average) { char ampFile[255], phaseFile[255]; //, igramFile[512]; char cohFile[512], ml_ampFile[255], ml_phaseFile[255]; //, ml_igramFile[512]; FILE *fpMaster, *fpSlave, *fpAmp, *fpPhase, *fpCoh, *fpAmp_ml, *fpPhase_ml; int line, sample_count, line_count, count; float bin_high, bin_low, max=0.0, sum_a, sum_b, ampScale; double hist_sum=0.0, percent, percent_sum; long long hist_val[HIST_SIZE], hist_cnt=0; meta_parameters *inMeta,*outMeta, *ml_outMeta; complexFloat *master, *slave, *sum_igram, *sum_ml_igram; float *amp, *phase, *sum_cpx_a, *sum_cpx_b, *coh, *pCoh; float *ml_amp, *ml_phase; // FIXME: Processing flow with two-banded interferogram needed - backed out // for now create_name(ampFile, outBase,"_igram_amp.img"); create_name(phaseFile, outBase,"_igram_phase.img"); create_name(ml_ampFile, outBase,"_igram_ml_amp.img"); create_name(ml_phaseFile, outBase,"_igram_ml_phase.img"); //create_name(igramFile, outBase,"_igram.img"); //create_name(ml_igramFile, outBase, "_igram_ml.img"); //sprintf(cohFile, "coherence.img"); create_name(cohFile, outBase, "_coh.img"); // Read input meta file inMeta = meta_read(masterFile); line_count = inMeta->general->line_count; sample_count = inMeta->general->sample_count; ampScale = 1.0/(stepLine*stepSample); // Generate metadata for single-look images outMeta = meta_read(masterFile); outMeta->general->data_type = REAL32; // Write metadata for interferometric amplitude outMeta->general->image_data_type = AMPLITUDE_IMAGE; meta_write(outMeta, ampFile); // Write metadata for interferometric phase outMeta->general->image_data_type = PHASE_IMAGE; meta_write(outMeta, phaseFile); /* // Write metadata for interferogram outMeta->general->image_data_type = INTERFEROGRAM; outMeta->general->band_count = 2; strcpy(outMeta->general->bands, "IGRAM-AMP,IGRAM-PHASE"); meta_write(outMeta, igramFile); */ // Generate metadata for multilooked images ml_outMeta = meta_read(masterFile); ml_outMeta->general->data_type = REAL32; ml_outMeta->general->line_count = line_count/stepLine; ml_outMeta->general->sample_count = sample_count/stepSample; ml_outMeta->general->x_pixel_size *= stepSample; ml_outMeta->general->y_pixel_size *= stepLine; ml_outMeta->sar->multilook = 1; ml_outMeta->sar->line_increment *= stepLine; ml_outMeta->sar->sample_increment *= stepSample; // FIXME: This is the wrong increment but create_dem_grid does not know any // better at the moment. //ml_outMeta->sar->line_increment = 1; //ml_outMeta->sar->sample_increment = 1; // Write metadata for multilooked interferometric amplitude ml_outMeta->general->image_data_type = AMPLITUDE_IMAGE; meta_write(ml_outMeta, ml_ampFile); // Write metadata for multilooked interferometric phase ml_outMeta->general->image_data_type = PHASE_IMAGE; meta_write(ml_outMeta, ml_phaseFile); // Write metadata for coherence image ml_outMeta->general->image_data_type = COHERENCE_IMAGE; meta_write(ml_outMeta, cohFile); /* // Write metadata for multilooked interferogram ml_outMeta->general->image_data_type = INTERFEROGRAM; strcpy(ml_outMeta->general->bands, "IGRAM-AMP,IGRAM-PHASE"); ml_outMeta->general->band_count = 2; meta_write(ml_outMeta, ml_igramFile); */ // Allocate memory master = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count*lookLine); slave = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count*lookLine); amp = (float *) MALLOC(sizeof(float)*sample_count*lookLine); phase = (float *) MALLOC(sizeof(float)*sample_count*lookLine); ml_amp = (float *) MALLOC(sizeof(float)*sample_count/stepSample); ml_phase = (float *) MALLOC(sizeof(float)*sample_count/stepSample); coh = (float *) MALLOC(sizeof(float)*sample_count/stepSample); sum_cpx_a = (float *) MALLOC(sizeof(float)*sample_count); sum_cpx_b = (float *) MALLOC(sizeof(float)*sample_count); sum_igram = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count); sum_ml_igram = (complexFloat *) MALLOC(sizeof(complexFloat)*sample_count); // Open files fpMaster = FOPEN(masterFile,"rb"); fpSlave = FOPEN(slaveFile,"rb"); fpAmp = FOPEN(ampFile,"wb"); fpPhase = FOPEN(phaseFile,"wb"); fpAmp_ml = FOPEN(ml_ampFile,"wb"); fpPhase_ml = FOPEN(ml_phaseFile,"wb"); //FILE *fpIgram = FOPEN(igramFile, "wb"); //FILE *fpIgram_ml = FOPEN(ml_igramFile, "wb"); fpCoh = FOPEN(cohFile,"wb"); // Initialize histogram for (count=0; count<HIST_SIZE; count++) hist_val[count] = 0; asfPrintStatus(" Calculating interferogram and coherence ...\n\n"); for (line=0; line<line_count; line+=stepLine) { register int offset, row, column, limitLine; double igram_real, igram_imag; int inCol; limitLine=MIN(lookLine, line_count-line); printf("Percent completed %3.0f\r",(float)line/line_count*100.0); pCoh = coh; // Read in the next lines of data get_complexFloat_lines(fpMaster, inMeta, line, limitLine, master); get_complexFloat_lines(fpSlave, inMeta, line, limitLine, slave); // Add the remaining rows into sum vectors offset = sample_count; for (column=0; column<sample_count; column++) { offset = column; sum_cpx_a[column] = 0.0; sum_cpx_b[column] = 0.0; sum_igram[column].real = 0.0; sum_igram[column].imag = 0.0; sum_ml_igram[column].real = 0.0; sum_ml_igram[column].imag = 0.0; igram_real = 0.0; igram_imag = 0.0; for (row=0; row<limitLine; row++) { // Complex multiplication for interferogram generation igram_real = master[offset].real*slave[offset].real + master[offset].imag*slave[offset].imag; igram_imag = master[offset].imag*slave[offset].real - master[offset].real*slave[offset].imag; amp[offset] = sqrt(igram_real*igram_real + igram_imag*igram_imag); if (FLOAT_EQUIVALENT(igram_real, 0.0) || FLOAT_EQUIVALENT(igram_imag, 0.0)) phase[offset]=0.0; else phase[offset] = atan2(igram_imag, igram_real); sum_cpx_a[column] += AMP(master[offset])*AMP(master[offset]); sum_cpx_b[column] += AMP(slave[offset])*AMP(slave[offset]); sum_igram[column].real += igram_real; sum_igram[column].imag += igram_imag; if (line % stepLine == 0 && row < stepLine) { sum_ml_igram[column].real += igram_real; sum_ml_igram[column].imag += igram_imag; } offset += sample_count; } ml_amp[column] = sqrt(sum_ml_igram[column].real*sum_ml_igram[column].real + sum_ml_igram[column].imag*sum_ml_igram[column].imag)*ampScale; if (FLOAT_EQUIVALENT(sum_ml_igram[column].real, 0.0) || FLOAT_EQUIVALENT(sum_ml_igram[column].imag, 0.0)) ml_phase[column] = 0.0; else ml_phase[column] = atan2(sum_ml_igram[column].imag, sum_ml_igram[column].real); } // Write single-look and multilooked amplitude and phase put_float_lines(fpAmp, outMeta, line, stepLine, amp); put_float_lines(fpPhase, outMeta, line, stepLine, phase); put_float_line(fpAmp_ml, ml_outMeta, line/stepLine, ml_amp); put_float_line(fpPhase_ml, ml_outMeta, line/stepLine, ml_phase); //put_band_float_lines(fpIgram, outMeta, 0, line, stepLine, amp); //put_band_float_lines(fpIgram, outMeta, 1, line, stepLine, phase); //put_band_float_line(fpIgram_ml, ml_outMeta, 0, line/stepLine, ml_amp); //put_band_float_line(fpIgram_ml, ml_outMeta, 1, line/stepLine, ml_phase); // Calculate the coherence by adding from sum vectors for (inCol=0; inCol<sample_count; inCol+=stepSample) { register int limitSample = MIN(lookSample,sample_count-inCol); sum_a = 0.0; sum_b = 0.0; igram_real = 0.0; igram_imag = 0.0; // Step over multilook area and sum output columns for (column=0; column<limitSample; column++) { igram_real += sum_igram[inCol+column].real; igram_imag += sum_igram[inCol+column].imag; sum_a += sum_cpx_a[inCol+column]; sum_b += sum_cpx_b[inCol+column]; } if (FLOAT_EQUIVALENT((sum_a*sum_b), 0.0)) *pCoh = 0.0; else { *pCoh = (float) sqrt(igram_real*igram_real + igram_imag*igram_imag) / sqrt(sum_a * sum_b); if (*pCoh>1.0001) { printf(" coh = %f -- setting to 1.0\n",*pCoh); printf(" You shouldn't have seen this!\n"); printf(" Exiting.\n"); exit(EXIT_FAILURE); *pCoh=1.0; } } pCoh++; } // Write out values for coherence put_float_line(fpCoh, ml_outMeta, line/stepLine, coh); // Keep filling coherence histogram for (count=0; count<sample_count/stepSample; count++) { register int tmp; tmp = (int) (coh[count]*HIST_SIZE); /* Figure out which bin this value is in */ /* This shouldn't happen */ if(tmp >= HIST_SIZE) tmp = HIST_SIZE-1; if(tmp < 0) tmp = 0; hist_val[tmp]++; // Increment that bin for the histogram hist_sum += coh[count]; // Add up the values for the sum hist_cnt++; // Keep track of the total number of values if (coh[count]>max) max = coh[count]; // Calculate maximum coherence } } // End for line printf("Percent completed %3.0f\n",(float)line/line_count*100.0); // Sum and print the statistics percent_sum = 0.0; printf(" Coherence : Occurrences : Percent\n"); printf(" ---------------------------------------\n"); for (count=0; count<HIST_SIZE; count++) { bin_low = (float)(count)/(float)HIST_SIZE; bin_high = (float)(count+1)/(float)HIST_SIZE; percent = (double)hist_val[count]/(double)hist_cnt; percent_sum += (float)100*percent; printf(" %.2f -> %.2f : %.8lld %2.3f \n", bin_low,bin_high, (long long) hist_val[count],100*percent); } *average = (float)hist_sum/(float)hist_cnt; printf(" ---------------------------------------\n"); printf(" Maximum Coherence: %.3f\n", max); printf(" Average Coherence: %.3f (%.1f / %lld) %f\n", *average,hist_sum, hist_cnt, percent_sum); // Free and exit FREE(master); FREE(slave); FREE(amp); FREE(phase); FREE(ml_amp); FREE(ml_phase); FREE(coh); FCLOSE(fpMaster); FCLOSE(fpSlave); FCLOSE(fpAmp); FCLOSE(fpPhase); FCLOSE(fpAmp_ml); FCLOSE(fpPhase_ml); //FCLOSE(fpIgram); //FCLOSE(fpIgram_ml); FCLOSE(fpCoh); meta_free(inMeta); meta_free(outMeta); meta_free(ml_outMeta); return(0); }
int main(int argc, char *argv[]) { //FILE *fp; cl_platform_id platform_id[2]; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret_code; cl_mem image_in_mem = NULL; cl_mem image_out_mem = NULL; cl_mem twiddle_factors_mem = NULL; cl_float2 *image_in_host; cl_float2 *twiddle_factors_host; cl_kernel kernel_twiddle_factors; cl_kernel kernel_matriz_transpose; cl_kernel kernel_lowpass_filter; pgm_t ipgm; pgm_t opgm; image_file_t *image_filename; char *output_filename; FILE *fp; const char *kernel_filename = C_NOME_ARQ_KERNEL; size_t source_size; char *source_str; cl_int i, j,n ,m; cl_int raio = 0; size_t global_wg[2]; size_t local_wg[2]; float *image_amplitudes; size_t log_size; char *log_file; cl_event kernels_events_out_fft[4]; cl_ulong kernel_runtime = (cl_ulong) 0; cl_ulong kernel_start_time = (cl_ulong) 0; cl_ulong kernel_end_time = (cl_ulong) 0; cl_event write_host_dev_event; cl_ulong write_host_dev_start_time = (cl_ulong) 0; cl_ulong write_host_dev_end_time = (cl_ulong) 0; cl_ulong write_host_dev_run_time = (cl_ulong) 0; cl_event read_dev_host_event; cl_ulong read_dev_host_start_time = (cl_ulong) 0; cl_ulong read_dev_host_end_time = (cl_ulong) 0; cl_ulong read_dev_host_run_time = (cl_ulong) 0; unsigned __int64 image_tam; unsigned __int64 MEGA_BYTES = 1048576; // 1024*1024 double image_tam_MB; double tempo_total; struct event_in_fft_t *fft_events; //=== Timer count start ============================================================================== timer_reset(); timer_start(); //=================================================================================================== if (argc < 2) { printf("**Erro: O arquivo de entrada eh necessario.\n"); exit(EXIT_FAILURE); } image_filename = (image_file_t *) malloc(sizeof(image_file_t)); split_image_filename(image_filename, argv[1]); output_filename = (char *) malloc(40*sizeof(char)); sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO); fp = fopen(kernel_filename, "r"); if (!fp) { fprintf(stderr, "Failed to load kernel.\n"); exit(EXIT_FAILURE); } source_str = (char *)malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose( fp ); //=================================================================================================== /* Abrindo imagem do arquivo para objeto de memoria local*/ if( ler_pgm(&ipgm, argv[1]) == -1) exit(EXIT_FAILURE); n = ipgm.width; raio = n/8; m = (cl_int)(log((double)n)/log(2.0)); image_in_host = (cl_float2 *)malloc((n*n)*sizeof(cl_float2)); twiddle_factors_host = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { image_in_host[n*i + j].s[0] = (float)ipgm.buf[n*i + j]; image_in_host[n*i + j].s[1] = (float)0; } } fft_events = (struct event_in_fft_t *)malloc(MAX_CALL_FFT*sizeof(struct event_in_fft_t)); kernel_butter_events = (cl_event *)malloc(MAX_CALL_FFT*m*sizeof(cl_event)); //=================================================================================================== CL_CHECK(clGetPlatformIDs(MAX_PLATFORM_ID, platform_id, &ret_num_platforms)); if (ret_num_platforms == 0 ) { fprintf(stderr,"[Erro] Não existem plataformas OpenCL\n"); exit(2); } //=================================================================================================== CL_CHECK(clGetDeviceIDs( platform_id[0], CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices)); //print_platform_info(&platform_id[1]); //=================================================================================================== context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret_code); //=================================================================================================== cmd_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret_code); //=================================================================================================== image_in_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code); image_out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code); twiddle_factors_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret_code); //=================================================================================================== /* Transfer data to memory buffer */ CL_CHECK(clEnqueueWriteBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &write_host_dev_event)); image_tam = n*n*sizeof(cl_float2); //=================================================================================================== program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret_code); //=================================================================================================== ret_code = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); //=================================================================================================== if (ret_code != CL_SUCCESS) { // Determine the size of the log clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); //=================================================================================================== // Allocate memory for the log log_file = (char *) malloc(log_size); // Get the log clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log_file, NULL); printf("%s\n", log_file); system("pause"); exit(0); } kernel_twiddle_factors = clCreateKernel(program, "twiddle_factors", &ret_code); kernel_matriz_transpose = clCreateKernel(program, "matrix_trasponse", &ret_code); kernel_lowpass_filter = clCreateKernel(program, "lowpass_filter", &ret_code); /* Processa os fatores Wn*/ //=================================================================================================== CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 0, sizeof(cl_mem), (void *)&twiddle_factors_mem)); CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 1, sizeof(cl_int), (void *)&n)); config_workgroup_size(global_wg, local_wg, n/2, 1); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_twiddle_factors, 1, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[0])); //=================================================================================================== /* Executa a FFT em N/2 */ fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[0]); //=================================================================================================== /* Realiza a transposta da Matriz (imagem) */ CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_in_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_out_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n)); config_workgroup_size(global_wg, local_wg, n, n); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[1])); //=================================================================================================== /* Executa a FFT N/2 */ fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[1]); //=================================================================================================== /* Processa o filtro passa baixa */ CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 0, sizeof(cl_mem), (void *)&image_out_mem)); CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 1, sizeof(cl_int), (void *)&n)); CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 2, sizeof(cl_int), (void *)&raio)); config_workgroup_size(global_wg, local_wg, n, n); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_lowpass_filter, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[2])); //=================================================================================================== /* Obtem a FFT inversa*/ fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[2]); //=================================================================================================== /* Realiza a transposta da Matriz (imagem) */ CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_out_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_in_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n)); config_workgroup_size(global_wg, local_wg, n, n); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[3])); //=================================================================================================== fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[3]); //=================================================================================================== CL_CHECK(clEnqueueReadBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &read_dev_host_event)); //=================================================================================================== //== Total time elapsed ============================================================================ timer_stop(); tempo_total = get_elapsed_time(); //================================================================================================== //====== Get time of Profile Info ================================================================== // Write data time CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &write_host_dev_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &write_host_dev_end_time, NULL)); // Read data time CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &read_dev_host_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &read_dev_host_end_time, NULL)); for (i = 0; i < MAX_CALL_FFT; i++) { kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; if (fft_events[i].kernel_normalize != NULL) { CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); } } for (j=0; j < MAX_CALL_FFT*m; j++){ kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); } write_host_dev_run_time = write_host_dev_end_time - write_host_dev_start_time; read_dev_host_run_time = read_dev_host_end_time - read_dev_host_start_time; /* save_log_debug(write_host_dev_run_time,fp); save_log_debug(read_dev_host_run_time,fp); close_log_debug(fp); */ image_tam_MB = (double) (((double) image_tam)/(double) MEGA_BYTES); //================================================================================================== save_log_gpu(image_filename, kernel_runtime, (double) (image_tam_MB/( (double) read_dev_host_run_time/(double) NANOSECONDS)), (double) (image_tam_MB/ ((double) write_host_dev_run_time/ (double) NANOSECONDS)), tempo_total, LOG_NAME); //=================================================================================================== image_amplitudes = (float*)malloc(n*n*sizeof(float)); for (i=0; i < n; i++) { for (j=0; j < n; j++) { image_amplitudes[n*j + i] = (float) (AMP(((float*)image_in_host)[(2*n*j)+2*i], ((float*)image_in_host)[(2*n*j)+2*i+1])); } } //clFlush(cmd_queue); //clFinish(cmd_queue); opgm.width = n; opgm.height = n; normalizar_pgm(&opgm, image_amplitudes); escrever_pgm(&opgm, output_filename); //=================================================================================================== clFinish(cmd_queue); clReleaseKernel(kernel_twiddle_factors); clReleaseKernel(kernel_matriz_transpose); clReleaseKernel(kernel_lowpass_filter); clReleaseProgram(program); clReleaseMemObject(image_in_mem); clReleaseMemObject(image_out_mem); clReleaseMemObject(twiddle_factors_mem); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); clReleaseEvent(read_dev_host_event); clReleaseEvent(write_host_dev_event); clReleaseEvent(kernels_events_out_fft[0]); clReleaseEvent(kernels_events_out_fft[1]); clReleaseEvent(kernels_events_out_fft[2]); clReleaseEvent(kernels_events_out_fft[3]); destruir_pgm(&ipgm); destruir_pgm(&opgm); free(image_amplitudes); free(source_str); free(image_in_host); free(image_filename); free(twiddle_factors_host); free(output_filename); free(fft_events); free(kernel_butter_events); //_CrtDumpMemoryLeaks(); return 0; }