// Main programm int main (int argc, char *argv[]) { Search_settings sett; Command_line_opts opts; Search_range s_range; Aux_arrays aux_arr; double *F; // F-statistic array int i, j, r, c, a, b, g; int d, o, m, k; int bins = 2, ROW, dim = 4; // neighbourhood of point will be divide into defined number of bins double pc[4]; // % define neighbourhood around each parameter for initial grid double pc2[4]; // % define neighbourhood around each parameter for direct maximum search (MADS & Simplex) double tol = 1e-10; // double delta = 1e-5; // initial step in MADS function // double *results; // Vector with results from Fstatnet function // double *maximum; // True maximum of Fstat // double results_max[11]; double s1, s2, s3, s4; double sgnlo[4]; double **arr; // arr[ROW][COL], arrg[ROW][COL]; double nSource[3]; double sinalt, cosalt, sindelt, cosdelt; double F_min; char path[512]; double x, y; ROW = pow((bins+1),4); #ifdef YEPPP yepLibrary_Init(); Yep64f *results_max = (Yep64f*)malloc(sizeof(Yep64f)*11); Yep64f *results_first = (Yep64f*)malloc(sizeof(Yep64f)*11); Yep64f *results = (Yep64f*)malloc(sizeof(Yep64f)*11); Yep64f *maximum = (Yep64f*)malloc(sizeof(Yep64f)*11); // Yep64f *sgnlo = (Yep64f*)malloc(sizeof(Yep64f)*4); // Yep64f *nSource = (Yep64f*)malloc(sizeof(Yep64f)*3); Yep64f *mean = (Yep64f*)malloc(sizeof(Yep64f)*4); enum YepStatus status; #endif pc[0] = 0.015; pc[1] = 0.015; pc[2] = 0.015; pc[3] = 0.015; for (i = 0; i < 4; i++){ pc2[i] = 2*pc[i]/bins; } // Time tests double tdiff; clock_t tstart, tend; // Command line options handle_opts(&sett, &opts, argc, argv); // Output data handling /* struct stat buffer; if (stat(opts.prefix, &buffer) == -1) { if (errno == ENOENT) { // Output directory apparently does not exist, try to create one if(mkdir(opts.prefix, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) == -1) { perror (opts.prefix); return 1; } } else { // can't access output directory perror (opts.prefix); return 1; } } */ sprintf(path, "%s/candidates.coi", opts.dtaprefix); //Glue function if(strlen(opts.glue)) { glue(&opts); sprintf(opts.dtaprefix, "./data_total"); sprintf(opts.dtaprefix, "%s/followup_total_data", opts.prefix); opts.ident = 000; } FILE *coi; int z; if ((coi = fopen(path, "r")) != NULL) { // while(!feof(coi)) { /* if(!fread(&w, sizeof(unsigned short int), 1, coi)) { break; } fread(&mean, sizeof(float), 5, coi); fread(&fra, sizeof(unsigned short int), w, coi); fread(&ops, sizeof(int), w, coi); if((fread(&mean, sizeof(float), 4, coi)) == 4){ */ while(fscanf(coi, "%le %le %le %le", &mean[0], &mean[1], &mean[2], &mean[3]) == 4){ //Time test // tstart = clock(); arr = matrix(ROW, 4); //Function neighbourhood - generating grid around point arr = neigh(mean, pc, bins); // Output data handling /* struct stat buffer; if (stat(opts.prefix, &buffer) == -1) { if (errno == ENOENT) { // Output directory apparently does not exist, try to create one if(mkdir(opts.prefix, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) == -1) { perror (opts.prefix); return 1; } } else { // can't access output directory perror (opts.prefix); return 1; } } */ // Grid data if(strlen(opts.addsig)) { read_grid(&sett, &opts); } // Search settings search_settings(&sett); // Detector network settings detectors_settings(&sett, &opts); // Array initialization init_arrays(&sett, &opts, &aux_arr, &F); // Amplitude modulation functions for each detector for(i=0; i<sett.nifo; i++) rogcvir(&ifo[i]); // Adding signal from file if(strlen(opts.addsig)) { add_signal(&sett, &opts, &aux_arr, &s_range); } // Setting number of using threads (not required) omp_set_num_threads(1); results_max[5] = 0.; // ifo zostaje shared // ifo....shft i ifo....xdatm{a,b] prerobić na lokalne tablice w fstatnet // w regionie parallel wprowadzić tablice private aa i bb ; alokować i przekazywać je jako argumenty do fstatnet i amoeba // Main loop - over all parameters + parallelisation #pragma omp parallel default(shared) private(d, i, sgnlo, sinalt, cosalt, sindelt, cosdelt, nSource, results, maximum) { double **sigaa, **sigbb; // aa[nifo][N] sigaa = matrix(sett.nifo, sett.N); sigbb = matrix(sett.nifo, sett.N); #pragma omp for for (d = 0; d < ROW; ++d){ for (i = 0; i < 4; i++){ sgnlo[i] = arr[d][i]; // sgnlo[i] = mean[i]; } sinalt = sin(sgnlo[3]); cosalt = cos(sgnlo[3]); sindelt = sin(sgnlo[2]); cosdelt = cos(sgnlo[2]); nSource[0] = cosalt*cosdelt; nSource[1] = sinalt*cosdelt; nSource[2] = sindelt; for (i = 0; i < sett.nifo; ++i){ modvir(sinalt, cosalt, sindelt, cosdelt, sett.N, &ifo[i], &aux_arr, sigaa[i], sigbb[i]); } // F-statistic in given point results = Fstatnet(&sett, sgnlo, nSource, sigaa, sigbb); //printf("Fstatnet: %le %le %le %le %le %le\n", results[6], results[7], results[8], results[9], results[5], results[4]); #pragma omp critical if(results[5] < results_max[5]){ for (i = 0; i < 11; i++){ results_max[i] = results[i]; } } // Maximum search using simplex algorithm if(opts.simplex_flag){ // puts("Simplex"); maximum = amoeba(&sett, &aux_arr, sgnlo, nSource, results, dim, tol, pc2, sigaa, sigbb); printf("Amoeba: %le %le %le %le %le %le\n", maximum[6], maximum[7], maximum[8], maximum[9], maximum[5], maximum[4]); // Maximum value in points searching #pragma omp critical if(maximum[5] < results_max[5]){ for (i = 0; i < 11; i++){ results_max[i] = maximum[i]; } } } //simplex } // d - main outside loop free_matrix(sigaa, sett.nifo, sett.N); free_matrix(sigbb, sett.nifo, sett.N); } //pragma for(g = 0; g < 11; g++) results_first[g] = results_max[g]; // Maximum search using MADS algorithm if(opts.mads_flag) { // puts("MADS"); maximum = MADS(&sett, &aux_arr, results_max, mean, tol, pc2, bins); } //Time test // tend = clock(); // tdiff = (tend - tstart)/(double)CLOCKS_PER_SEC; printf("%le %le %le %le %le %le\n", results_max[6], results_max[7], results_max[8], results_max[9], results_max[5], results_max[4]); } // while fread coi // } } //if coi else { perror (path); return 1; } // Output information /* puts("**********************************************************************"); printf("*** Maximum value of F-statistic for grid is : (-)%.8le ***\n", -results_first[5]); printf("Sgnlo: %.8le %.8le %.8le %.8le\n", results_first[6], results_first[7], results_first[8], results_first[9]); printf("Amplitudes: %.8le %.8le %.8le %.8le\n", results_first[0], results_first[1], results_first[2], results_first[3]); printf("Signal-to-noise ratio: %.8le\n", results_first[4]); printf("Signal-to-noise ratio from estimated amplitudes (for h0 = 1): %.8le\n", results_first[10]); puts("**********************************************************************"); if((opts.mads_flag)||(opts.simplex_flag)){ printf("*** True maximum is : (-)%.8le ***\n", -maximum[5]); printf("Sgnlo for true maximum: %.8le %.8le %.8le %.8le\n", maximum[6], maximum[7], maximum[8], maximum[9]); printf("Amplitudes for true maximum: %.8le %.8le %.8le %.8le\n", maximum[0], maximum[1], maximum[2], maximum[3]); printf("Signal-to-noise ratio for true maximum: %.8le\n", maximum[4]); printf("Signal-to-noise ratio from estimated amplitudes (for h0 = 1) for true maximum: %.8le\n", maximum[10]); puts("**********************************************************************"); }*/ // Cleanup & memory free free(results_max); free(results_first); free(results); free(maximum); free(mean); free_matrix(arr, ROW, 4); cleanup_followup(&sett, &opts, &s_range, &aux_arr, F); return 0; }
kernel void convolute(int4 imagesize, global unsigned char *input, global unsigned char *output, global kernf *filterG) { int4 gid = (int4)(get_global_id(0)*CONV_UNROLL, get_global_id(1), get_global_id(2), 0); int4 lid = (int4)(get_local_id(0), get_local_id(1), get_local_id(2), 0); int4 group = (int4)(get_group_id(0), get_group_id(1), get_group_id(2), 0); // First (?) pixel to process with this kernel int4 pixelid = gid; // Starting offset of the first pixel to process int imoffset = pixelid.s0 + imagesize.s0 * pixelid.s1 + imagesize.s0 * imagesize.s1 * pixelid.s2; int i,j; int dx,dy,dz; /* MAD performs a single convolution operation for each kernel, using the current 'raw' value as the input image 'ko' as an instance of an unrolled convolution filter 'pos' as the X-offset for each of the unrolled convolution filters Note that all the if statements dependent only on static values - meaning that they can be optimized away by the compiler */ #define MAD(ko,pos) {if(CONV_UNROLL>ko) { \ if(pos-ko >= 0 && pos-ko < kernsize) { \ val[ko] = mmad(val[ko],(kernf)(raw),filter[(pos-ko)+offset]); \ }}} #define MADS(pos) {if(pos<kernsize) { \ raw=input[imoffset2+pos]; \ MAD(0,pos); MAD(1,pos); MAD(2,pos); MAD(3,pos); MAD(4,pos); MAD(5,pos); MAD(6,pos); MAD(7,pos); \ MAD(8,pos); MAD(9,pos); MAD(10,pos); MAD(11,pos); MAD(12,pos); MAD(13,pos); MAD(14,pos); MAD(15,pos); \ MAD(16,pos); MAD(17,pos); MAD(18,pos); MAD(19,pos); MAD(20,pos); MAD(21,pos); MAD(22,pos); MAD(23,pos); \ MAD(24,pos); MAD(25,pos); MAD(26,pos); MAD(27,pos); MAD(28,pos); MAD(29,pos); MAD(30,pos); MAD(31,pos); \ MAD(32,pos); MAD(33,pos); MAD(34,pos); MAD(35,pos); MAD(36,pos); MAD(37,pos); MAD(38,pos); MAD(39,pos); \ }} kernf val[CONV_UNROLL]; for(j=0;j<CONV_UNROLL;j++) val[j]=(kernf)(0.0); int localSize = get_local_size(0) * get_local_size(1) * get_local_size(2); local kernf filter[kernsize*kernsize*kernsize]; /* Copy global filter to local memory */ event_t event = async_work_group_copy(filter,filterG,kernsize*kernsize*kernsize,0); wait_group_events(1, &event); if(gid.s0 + kernsize + CONV_UNROLL > imagesize.s0 || gid.s1 + kernsize > imagesize.s1 || gid.s2 + kernsize > imagesize.s2) return; for(dz=0;dz<kernsize;dz++) for(dy=0;dy<kernsize;dy++) { int offset = dy*kernsize*nkernels + dz*kernsize*kernsize*nkernels; int imoffset2 = imoffset+dy*imagesize.s0 + dz*imagesize.s0*imagesize.s1; unsigned char raw; /* kernsize + convolution_unroll < 42 */ MADS(0); MADS(1); MADS(2); MADS(3); MADS(4); MADS(5); MADS(6); MADS(7); MADS(8); MADS(9); MADS(10); MADS(11); MADS(12); MADS(13); MADS(14); MADS(15); MADS(16); MADS(17); MADS(18); MADS(19); MADS(20); MADS(21); MADS(22); MADS(23); MADS(24); MADS(25); MADS(26); MADS(27); MADS(28); MADS(29); MADS(30); MADS(31); MADS(32); MADS(33); MADS(34); MADS(35); MADS(36); MADS(37); MADS(38); MADS(39); MADS(40); MADS(41); } for(j=0;j<CONV_UNROLL;j++) { kernstore( convert_kernuc(val[j]), imoffset+j, output); } }