void assign_trees_in_forest_to_same_file(const int64_t ntrees, struct locations *locations, struct locations *new_locations, const int nfiles, const int BOX_DIVISIONS) { sort_locations_on_fid_file_offset(ntrees, locations); sort_locations_on_fid_file_offset(ntrees, new_locations); int64_t *histogram_fileids = my_calloc(sizeof(*histogram_fileids), nfiles); /* the fun begins here -> in case, assign all trees from a forest into the same file */ int64_t start_forestid = locations[0].forestid; int64_t min_fileid = locations[0].fileid; int64_t max_fileid = locations[0].fileid; int64_t start_index_forest = 0; int64_t end_index_forest = 1; int64_t num_trees_moved = 0; fprintf(stderr, ANSI_COLOR_MAGENTA"Assigning all trees in a forest into the same file...."ANSI_COLOR_RESET"\n"); /* setup the progressbar */ int interrupted=0; init_my_progressbar(ntrees, &interrupted); for(int64_t i=1;i<ntrees;i++) { my_progressbar(i, &interrupted); if(locations[i].forestid == start_forestid) { if(locations[i].fileid < min_fileid) { min_fileid = locations[i].fileid; } if(locations[i].fileid > min_fileid) { max_fileid = locations[i].fileid; } end_index_forest++; continue; } else { if(min_fileid == max_fileid) { for(int64_t j=start_index_forest;j<end_index_forest;j++) { new_locations[j].fileid = min_fileid; } } else { /* fprintf(stderr,"For forest id = %"PRId64" trees are stored in separate files (min, max) = (%"PRId64", %"PRId64")\n", */ /* start_forestid, min_fileid, max_fileid); */ /* interrupted=1; */ /* create a histogram of the fileids */ memset(histogram_fileids, 0, sizeof(*histogram_fileids) * nfiles); for(int64_t j=start_index_forest;j<end_index_forest;j++) { histogram_fileids[locations[j].fileid]++; } int64_t max_common_value = 0; int max_common_fileid = 0; for(int j=0;j<nfiles;j++) { if(histogram_fileids[j] > max_common_value) { max_common_value = histogram_fileids[j]; max_common_fileid = j; } } const int ii = max_common_fileid/(BOX_DIVISIONS*BOX_DIVISIONS); const int jj = (max_common_fileid%((int64_t)(BOX_DIVISIONS*BOX_DIVISIONS)))/BOX_DIVISIONS; const int kk = max_common_fileid%((int64_t)BOX_DIVISIONS); for(int64_t j=start_index_forest;j<end_index_forest;j++) { new_locations[j].fileid = max_common_fileid; my_snprintf(new_locations[j].filename, LOCATIONS_FILENAME_SIZE, "tree_%d_%d_%d.dat", ii, jj, kk); if(new_locations[j].fileid != locations[j].fileid) { num_trees_moved++; /* fprintf(stderr,"Moved tree = %10"PRId64" from fileid=%3"PRId64" to fileid=%3"PRId64"\n",locations[j].tree_root, locations[j].fileid, new_locations[j].fileid); */ /* interrupted=1; */ } } } start_forestid = locations[i].forestid; start_index_forest = i; end_index_forest = i+1; min_fileid = locations[i].fileid; max_fileid = locations[i].fileid; } } finish_myprogressbar(&interrupted); free(histogram_fileids); if(num_trees_moved > 0) { fprintf(stderr,"Number of trees moved into different files = %"PRId64"\n",num_trees_moved); } fprintf(stderr, ANSI_COLOR_GREEN"Assigning all trees in a forest into the same file.......done"ANSI_COLOR_RESET"\n\n"); }
int main(int argc, char **argv) { char *input_dir, *output_dir; if(argc != 3) { usage(argc, argv); return EXIT_FAILURE; } else { input_dir = argv[1]; output_dir = argv[2]; } if(strcmp(input_dir, output_dir) == 0) { fprintf(stderr,"ERROR: Input and output directories are the same..exiting\n"); return EXIT_FAILURE; } struct timeval tstart, tend; gettimeofday(&tstart, NULL); char locations_filename[MAXLEN], forests_filename[MAXLEN]; int64_t *forests=NULL, *tree_roots=NULL; my_snprintf(locations_filename, MAXLEN, "%s/locations.dat", input_dir); my_snprintf(forests_filename, MAXLEN, "%s/forests.list", input_dir); fprintf(stderr, ANSI_COLOR_MAGENTA"Reading forests...."ANSI_COLOR_RESET"\n"); const int64_t ntrees = read_forests(forests_filename, &forests, &tree_roots); fprintf(stderr, ANSI_COLOR_GREEN"Reading forests......done"ANSI_COLOR_RESET"\n\n"); /* fprintf(stderr, "Number of trees = %"PRId64"\n\n",ntrees); */ struct locations *locations = my_malloc(sizeof(*locations), ntrees); int nfiles = 0, BOX_DIVISIONS=0; fprintf(stderr, ANSI_COLOR_MAGENTA"Reading locations...."ANSI_COLOR_RESET"\n"); const int64_t ntrees_loc = read_locations(locations_filename, ntrees, locations, &nfiles, &BOX_DIVISIONS); fprintf(stderr, ANSI_COLOR_GREEN"Reading locations......done"ANSI_COLOR_RESET"\n\n"); XASSERT(ntrees == ntrees_loc, "ntrees=%"PRId64" should be equal to ntrees_loc=%"PRId64"\n", ntrees, ntrees_loc); /* the following function will sort locations and forests based on tree root id*/ assign_forest_ids(ntrees, locations, forests, tree_roots); /* Forests are now contained inside locations -> free the pointers */ free(forests);free(tree_roots); FILE **tree_outputs = my_malloc(sizeof(FILE *), nfiles); FILE **tree_inputs = my_malloc(sizeof(FILE *), nfiles); int *tree_inputs_fd = my_malloc(sizeof(*tree_inputs_fd), nfiles); int *tree_outputs_fd = my_malloc(sizeof(*tree_outputs_fd), nfiles); XASSERT(sizeof(off_t) == 8, "File offset bits must be 64\n" "Please ensure "ANSI_COLOR_RED"#define _FILE_OFFSET_BITS 64"ANSI_COLOR_RESET" is present\n"); off_t *tree_outputs_fd_offset = my_malloc(sizeof(*tree_outputs_fd_offset), nfiles); int64_t *tree_counts = my_calloc(sizeof(*tree_counts), nfiles); int64_t *inp_file_sizes = my_calloc(sizeof(*inp_file_sizes), nfiles); char buffer[MAXLEN]; for (int i=0; i<BOX_DIVISIONS; i++) { for (int j=0; j<BOX_DIVISIONS; j++) { for(int k=0; k<BOX_DIVISIONS; k++) { my_snprintf(buffer,MAXLEN,"%s/tree_%d_%d_%d.dat", input_dir, i, j, k); int id = id = i*BOX_DIVISIONS*BOX_DIVISIONS + j*BOX_DIVISIONS + k; tree_inputs[id] = my_fopen(buffer, "r"); XASSERT(setvbuf(tree_inputs[id], NULL, _IONBF, 0) == 0, "Could not set unbuffered fgets"); my_fseek(tree_inputs[id],0L, SEEK_END); inp_file_sizes[id] = ftello(tree_inputs[id]); rewind(tree_inputs[id]); tree_inputs_fd[id] = fileno(tree_inputs[id]); my_snprintf(buffer,MAXLEN,"%s/tree_%d_%d_%d.dat", output_dir, i, j, k); unlink(buffer); tree_outputs[id] = my_fopen(buffer, "w"); /* setbuf(tree_outputs[id], _IOFBF); */ tree_outputs_fd[id] = fileno(tree_outputs[id]); } } } /* the following function will sort locations based on 1) filename 2) offsets */ sort_locations_file_offset(ntrees, locations); /* holder to check later that bytes have been assigned */ for(int64_t i=0;i<ntrees;i++) { locations[i].bytes = -1;/* Make sure bytes is a signed type! */ } /* Create a copy of current locations */ struct locations *new_locations = my_malloc(sizeof(*new_locations), ntrees); assert(sizeof(*new_locations) == sizeof(*locations) && "locations struct is varying in size! The sky is falling!!"); memcpy(new_locations, locations, sizeof(*locations) * ntrees); /* figure out the byte size for each tree */ int64_t start = locations[0].offset; int64_t start_fileid = locations[0].fileid; /* tree_roots are 64 bit integers -> max digits in decimal = log10(2^64) < 20. Add 1 char for +-, in case consistent tree changes. and then strlen('#tree ') and the previous \n. I need to read up to previous newline. */ const int64_t guess_max_linesize = 20 + 1 + 6 + 1; fprintf(stderr, ANSI_COLOR_MAGENTA"Calculating the number of bytes for each tree...."ANSI_COLOR_RESET"\n"); /* setup the progressbar */ int interrupted=0; init_my_progressbar(ntrees, &interrupted); for(int64_t i=1;i<=ntrees-1;i++) { my_progressbar(i, &interrupted); const int64_t fileid = locations[i].fileid; /* Are we starting on a new file ?*/ if(start_fileid != fileid) { /* fill out the bytes for the last tree in the previous file */ const int64_t num_bytes = compute_numbytes_with_off(inp_file_sizes[start_fileid], start); locations[i-1].bytes = num_bytes; new_locations[i-1].bytes = num_bytes; /* now we reset the start fields */ start = locations[i].offset; start_fileid = locations[i].fileid; continue; } const int64_t current_offset_guess = locations[i].offset - guess_max_linesize; my_fseek(tree_inputs[fileid], current_offset_guess, SEEK_SET); while(1) { const int a = fgetc(tree_inputs[fileid]); if(a == EOF) { fprintf(stderr,"Encountered EOF while looking for end of current tree\n"); exit(EXIT_FAILURE); } const unsigned char c = (unsigned char) a; if(c == '\n') { const int64_t num_bytes = compute_numbytes(tree_inputs[start_fileid], start); locations[i-1].bytes = num_bytes; new_locations[i-1].bytes = num_bytes; /* fprintf(stderr,"%"PRId64"\n",num_bytes); */ start = locations[i].offset; break; } } } /* fill out the bytes for the last tree */ { start = locations[ntrees-1].offset; const int64_t fileid = locations[ntrees-1].fileid; my_fseek(tree_inputs[fileid], 0L, SEEK_END); const int64_t num_bytes = compute_numbytes(tree_inputs[fileid], start); locations[ntrees-1].bytes = num_bytes; new_locations[ntrees-1].bytes = num_bytes; } finish_myprogressbar(&interrupted); fprintf(stderr, ANSI_COLOR_GREEN"Calculating the number of bytes for each tree.....done"ANSI_COLOR_RESET"\n\n"); for(int64_t i=ntrees-1;i>=0;i--) { XASSERT(locations[i].bytes > 0, "locations[%"PRId64"].bytes = %"PRId64" should be positive\n", i,locations[i].bytes); XASSERT(new_locations[i].bytes == locations[i].bytes, "locations[%"PRId64"].bytes = %"PRId64" should be equal new_locations->bytes = %"PRId64"\n", i,locations[i].bytes,new_locations[i].bytes); XASSERT(strncmp(new_locations[i].filename, locations[i].filename, LOCATIONS_FILENAME_SIZE) == 0, "new_locations[%"PRId64"].filename = %s should equal locations filename = %s\n", i, new_locations[i].filename, locations[i].filename); assert(new_locations[i].forestid == locations[i].forestid); assert(new_locations[i].tree_root == locations[i].tree_root); assert(new_locations[i].fileid == locations[i].fileid); assert(new_locations[i].offset == locations[i].offset); assert(new_locations[i].bytes == locations[i].bytes); /* fprintf(stderr,"locations[%"PRId64"].bytes = %"PRId64"\n", */ /* i,locations[i].bytes); */ } /* Check that the preceeding bytes computation is correct */ { int64_t *total_tree_bytes = my_calloc(sizeof(*total_tree_bytes), nfiles); for(int64_t i=0;i<ntrees;i++) { /* add the number of bytes for tree in each file */ total_tree_bytes[locations[i].fileid] += locations[i].bytes; } for(int i=0;i<nfiles;i++) { XASSERT(total_tree_bytes[i] < inp_file_sizes[i], "Bytes in tree = %"PRId64" must be smaller than file size = %"PRId64"\n", total_tree_bytes[i], inp_file_sizes[i]); } free(total_tree_bytes); } /* Now assign all trees in the same forest to the same file The new fileids goes into new_locations (which is otherwise a copy of locations) */ assign_trees_in_forest_to_same_file(ntrees, locations, new_locations, nfiles, BOX_DIVISIONS); /* Now write out both the old and the new struct locations */ my_snprintf(buffer, MAXLEN, "%s/forests_and_locations_old.list",output_dir); write_forests_and_locations(buffer, ntrees, locations); /* write new the forests file */ my_snprintf(buffer,MAXLEN,"%s/forests.list", output_dir); unlink(buffer); FILE *fp_forests = my_fopen(buffer,"w"); fprintf(fp_forests, "#TreeRootID ForestID\n"); for(int64_t i=0;i<ntrees;i++) { fprintf(fp_forests, "%"PRId64" %"PRId64"\n", locations[i].tree_root, locations[i].forestid); } fclose(fp_forests); /* open the locations file*/ my_snprintf(buffer,MAXLEN,"%s/locations.dat", output_dir); unlink(buffer); FILE *fp_locations = my_fopen(buffer,"w"); fprintf(fp_locations, "#TreeRootID FileID Offset Filename\n"); /* copy the headers between the tree_* files */ /* break when the number of trees is encountered -- should be the first one line that doesn't have a '#' character at front */ int64_t *tree_header_offsets = my_malloc(sizeof(*tree_header_offsets), nfiles); for(int i=0;i<nfiles;i++) { /* All of the file pointers have been moved around to figure out the bytes -> reposition them at the beginning of the tree_*.dat file */ rewind(tree_inputs[i]); while(fgets(buffer, MAXLEN, tree_inputs[i]) != NULL) { if(buffer[0] != '#') { tree_header_offsets[i] = ftello(tree_outputs[i]); /* write a place holder for the number of trees in the file. There are 18 X's in the following line, DO NOT CHANGE. */ fprintf(tree_outputs[i], "XXXXXXXXXXXXXXXXXX\n"); //For the number of trees break; } else { fprintf(tree_outputs[i], "%s", buffer); } } tree_outputs_fd_offset[i] = ftello(tree_outputs[i]); } /* Figure out the offsets and write out a binary file containing the new locations info */ for(int64_t i=0;i<ntrees;i++) { const int tree_bytes_line_size = my_snprintf(buffer, MAXLEN, "#tree %"PRId64"\n", locations[i].tree_root); const int64_t bytes_to_write = locations[i].bytes; const int64_t out_fileid = new_locations[i].fileid; XASSERT(out_fileid < nfiles, "Output fileid = %"PRId64" must be smaller than total number of files = %d\n" , out_fileid, nfiles); /* XASSERT(new_locations[i].bytes == bytes_to_write, */ /* "new locations bytes = %"PRId64"should be identical to old locations bytes = %"PRId64"\n", */ /* new_locations[i].bytes,bytes_to_write); */ new_locations[i].offset = tree_outputs_fd_offset[out_fileid] + tree_bytes_line_size; tree_outputs_fd_offset[out_fileid] += (bytes_to_write + tree_bytes_line_size); } /* Valgrind complains there is use of uninitialized bytes -> so ditching this binary file output for now */ /* /\* Output the binary locations struct so I can skip over recalculating the bytes *\/ */ /* { */ /* my_snprintf(buffer, MAXLEN, "%s/new_locations.binary",output_dir); */ /* FILE *fp = my_fopen(buffer, "w"); */ /* /\* fprintf(stderr,"ntrees = %"PRId64"\n",ntrees); *\/ */ /* my_fwrite(&ntrees, sizeof(int64_t), 1, fp); */ /* const size_t size_of_struct = sizeof(struct locations); */ /* /\* fprintf(stderr,"struct size = %zu\n", size_of_struct); *\/ */ /* my_fwrite(&size_of_struct, sizeof(size_t), 1, fp); */ /* /\* my_fwrite(new_locations, size_of_struct, ntrees, fp); *\/ */ /* fclose(fp); */ /* } */ /* Write out the combined forests and locations file */ my_snprintf(buffer, MAXLEN, "%s/forests_and_locations_new.list",output_dir); write_forests_and_locations(buffer, ntrees, new_locations); fprintf(stderr, ANSI_COLOR_MAGENTA"Writing out trees in contiguous order...."ANSI_COLOR_RESET"\n"); interrupted=0; init_my_progressbar(ntrees, &interrupted); /* Now copy each one of the trees */ for(int64_t i=0;i<ntrees;i++) { my_progressbar(i, &interrupted); const int64_t fileid = locations[i].fileid; XASSERT(locations[i].tree_root == new_locations[i].tree_root, "locations->tree_root = %"PRId64" must equal new_locations->tree_root = %"PRId64"\n", locations[i].tree_root, new_locations[i].tree_root); XASSERT(locations[i].forestid == new_locations[i].forestid, "locations->forestid = %"PRId64" must equal new_locations->forestid = %"PRId64"\n", locations[i].forestid, new_locations[i].forestid); /* Make sure all output is done using new_locations[i].fileid */ const int64_t out_fileid = new_locations[i].fileid; FILE *out_fp = tree_outputs[out_fileid]; /* const int tree_bytes_line_size = fprintf(out_fp, "#tree %"PRId64"\n", locations[i].tree_root); */ fprintf(out_fp, "#tree %"PRId64"\n", locations[i].tree_root); fflush(out_fp); const int64_t offset = locations[i].offset; const int64_t bytes_to_write = locations[i].bytes; if(bytes_to_write == 0) { fprintf(stderr, "Strange! bytes for tree data = %zu should not be 0\n", bytes_to_write); continue; } #ifdef USE_FGETS //USE_FGETS -> stdio.h family #warning using fgets (slowest) FILE *in_fp = tree_inputs[fileid]; my_fseek(in_fp, (long) offset, SEEK_SET); const long actual_offset = ftello(out_fp); XASSERT(actual_offset == new_locations[i].offset, "actual offset = %ld should equal calculated offset = %"PRId64"\n", actual_offset, new_locations[i].offset); /* new_locations[i].offset = ftello(out_fp); */ const int64_t bytes_written = copy_bytes_between_two_files(bytes_to_write, in_fp, out_fp); #else //use pread/write #warning using pread int in_fd = tree_inputs_fd[fileid]; int out_fd = tree_outputs_fd[out_fileid]; off_t in_offset = offset; const int64_t bytes_written = copy_bytes_with_pread(bytes_to_write, in_fd, out_fd, in_offset); /* I have already figured out the offsets */ /* new_locations[i].offset = tree_outputs_fd_offset[out_fileid] + tree_bytes_line_size; */ #endif//USE_FGETS -> stdio.h family XASSERT(bytes_written == bytes_to_write, "bytes_to_write = %zu does not equal bytes_written = %zu\n", bytes_to_write, bytes_written); /* Update the number of trees in that file */ tree_counts[out_fileid]++; /* write the locations info*/ const int ii = out_fileid/(BOX_DIVISIONS*BOX_DIVISIONS); const int jj = (out_fileid%((int64_t)(BOX_DIVISIONS*BOX_DIVISIONS)))/BOX_DIVISIONS; const int kk = out_fileid%((int64_t)BOX_DIVISIONS); fprintf(fp_locations, "%"PRId64" %"PRId64" %"PRId64" tree_%d_%d_%d.dat\n", new_locations[i].tree_root, out_fileid, new_locations[i].offset, ii, jj, kk); /* This line is only required if offsets have not been computed earlier */ /* tree_outputs_fd_offset[out_fileid] += (bytes_written + tree_bytes_line_size) ; */ } /* fill in the number of trees written per file. the number in the format *MUST EXACTLY* match the number of XXX's in the previous place-holder. */ for(int i=0;i<nfiles;i++) { FILE *out_fp = tree_outputs[i]; fseek(out_fp, tree_header_offsets[i], SEEK_SET); fprintf(out_fp, "%-18"PRId64"\n", tree_counts[i]); } finish_myprogressbar(&interrupted); fprintf(stderr, ANSI_COLOR_GREEN "Writing out trees in contiguous order.....done"ANSI_COLOR_RESET"\n\n"); /* close open file pointers + free memory for file pointers */ fclose(fp_locations); for(int i=0;i<nfiles;i++) { fclose(tree_inputs[i]); fclose(tree_outputs[i]); } free(tree_inputs);free(tree_outputs); free(tree_inputs_fd);free(tree_outputs_fd); /* free other heap allocations */ free(tree_header_offsets); free(tree_outputs_fd_offset); free(tree_counts); free(inp_file_sizes); free(locations); free(new_locations); gettimeofday(&tend, NULL); fprintf(stderr,"Wrote out %"PRId64" trees in contiguous order. Time taken = %0.2g seconds\n", ntrees, ADD_DIFF_TIME(tstart, tend)); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { //////////////////////////***Definitions***//////////////////////////////////////////////////////////////////// int nthreads=16,chunk=CHUNKSIZE; /*Input args/files */ FILE *fp1, /*Spectroscopic Galaxy File */ *fp2; /*Imaging Galaxy File */ char *Gxy_Spectro, *Gxy_Imaging; int N_Bins; /*Number of log bins */ double Start_Bin, /*Location of the edge of the smallest bin */ Max_Separation, /*Maximum rp Separation */ log_Bin_Size, /*Rp Bin Size in log*/ Minimum_Redshift=1000.0, /*Used to calculated maximum serapartion to filter pairs.*/ Maximum_Redshift=0; int Normalization_Choice; /*Which normalization should be used for the imaging catalogue 1= Di 2=Ri */ /* Spectroscopic Galaxy/Randoms Information */ int Spectro_Size=1E5; /*This is the assumed length of the galaxy file */ double *RA_s, /* Given */ *Dec_s, /* Given */ *Redshift_s, /*Given */ *Weight_s, /*The Fiber Collision or Completeness Weight of The Galaxy/Randoms */ *Distance_s; double *X_s,*Y_s,*Z_s; /*The cartesian elements to calculate cos_Theta*/ double area_tot=4*PI; // fprintf(stderr,"ASSUMMING SPHERE GEOMETRY FOR NORMALIZATION CHOICE!!!!!!!!!!!!!\n"); /* Imaging Galaxy/Randoms Information */ int Imaging_Size=4E5; /*This is the assumed length of the imaging file */ double *RA_i, /* Given */ *Dec_i; /* Given */ double *X_i,*Y_i,*Z_i; /* Wp calculation information */ double *DD, /*This is not an int because the counts will be weights. It is the shape Nbins X NJackknife */ Maximum_Dec_Separation, /*Filter by this dec difference */ Distance_to_Near_Z=1646., /*distance to inner redshift bin */ Distance_to_Far_Z=0., /*distance to inner redshift bin */ cos_Theta, rp; int bin; /*Random Counters and Such */ int i=0,j=0,k=0; int Ngal_s=0; /*Number of Galaxies/Randoms in the Spectro Sample */ int Ngal_i=0; /*Number of Galaxies/Randoms in the Imagin Sample */ /* void gridlink1D(int np,double rmin,double rmax,double rcell,double *z,int *ngrid,int **gridinit,int **gridlist); */ void gridlink1D_with_struct(int np,double dmin,double dmax,double rcell,double *x1,double *y1,double *z1,double *dec,int *ngrid,cellarray **lattice); struct timeval t0,t1; int nitems,nread; char buffer[MAXBUFSIZE]; /*Read in Args */ Gxy_Spectro=argv[1]; Gxy_Imaging=argv[2]; sscanf(argv[3],"%lf",&Start_Bin); sscanf(argv[4],"%lf",&Max_Separation); sscanf(argv[5],"%d",&N_Bins); sscanf(argv[6],"%d",&Normalization_Choice); if(argc > 6) sscanf(argv[7],"%lf",&area_tot) ; log_Bin_Size=(log10(Max_Separation)-log10(Start_Bin))/(N_Bins); //log_Bin_Size=(log10(Max_Separation)-log10(Start_Bin))/(N_Bins-1.); fprintf(stderr,"BOSS Wp > Log Bin size = %lf \n",log_Bin_Size); //////////////////////////////*Allocate the Arrays that are going to be used *////////////////////////////////////////////// /* #ifdef USE_BINLOOKUP */ /* int *binlookup=NULL; */ /* const int NBINLOOKUP=5e4; */ /* binlookup = my_calloc(sizeof(*binlookup),NBINLOOKUP+2); */ /* #ifdef AVOID_SQRT */ /* setup_squared_bin_lookup(sdss_data_file,&rmin,&rmax,&nbin,NBINLOOKUP,&rupp,binlookup); */ /* binfac=NBINLOOKUP/(rmax*rmax); */ /* #else */ /* setup_bin_lookup(sdss_data_file,&rmin,&rmax,&nbin,NBINLOOKUP,&rupp,binlookup); */ /* binfac=NBINLOOKUP/rmax; */ /* #endif */ /* #endif */ /*Spectro Arrays*/ //Variables in the file RA_s = my_calloc(sizeof(*RA_s),Spectro_Size); Dec_s = my_calloc(sizeof(*Dec_s),Spectro_Size); Redshift_s = my_calloc(sizeof(*Redshift_s),Spectro_Size); Weight_s = my_calloc(sizeof(*Weight_s),Spectro_Size); /////////////////////////////* [ READ IN THE GALAXY FILES AND CONVERT REDSHIFTS TO MPC ] *//////////////////////////////////// /*Read in Spectro Sample*/ gettimeofday(&t0,NULL); fp1 = my_fopen(Gxy_Spectro,"r") ; i=0; int flag=0,trash_d; nitems=5; /* while(fscanf(fp1,"%lf %lf %lf %lf %d",&RA_s[i],&Dec_s[i],&Redshift_s[i],&Weight_s[i],&Sector_s[i])!=EOF) { */ while(fgets(buffer,MAXBUFSIZE,fp1)!=NULL) { nread=sscanf(buffer,"%lf %lf %lf %lf %d",&RA_s[i],&Dec_s[i],&Redshift_s[i],&Weight_s[i],&trash_d); if (nread == nitems) { if(Redshift_s[i] > 10.0) { Redshift_s[i]/=SPEED_OF_LIGHT; flag=1; } if(Redshift_s[i] < 0) { fprintf(stderr,"BOSS Wp > Warning! Redshift = %lf, NR = %d. Setting to nearly 0.\n",Redshift_s[i],i); Redshift_s[i]=0.00001; } i++; if(i==Spectro_Size) { fprintf(stderr,"Increasing memory allocation for the spectroscopic sample\n"); Spectro_Size *= MEMORY_INCREASE_FAC; RA_s = my_realloc(RA_s,sizeof(*RA_s),Spectro_Size,"RA_s"); Dec_s = my_realloc(Dec_s,sizeof(*Dec_s),Spectro_Size,"Dec_s"); Redshift_s = my_realloc(Redshift_s,sizeof(*Redshift_s),Spectro_Size,"Redshift_s"); Weight_s = my_realloc(Weight_s,sizeof(*Weight_s),Spectro_Size,"Weight_s"); } } else { fprintf(stderr,"WARNING: In spectroscopic sample line %d did not contain %d elements...skipping line\n",i,nitems); } } Ngal_s=i; fclose(fp1); gettimeofday(&t1,NULL); if(flag!=0) fprintf(stderr,"BOSS Wp > Warning! You gave me cz instead of redshift!\n"); //Derived variables Distance_s = my_calloc(sizeof(*Distance_s),Ngal_s); X_s = my_calloc(sizeof(*X_s),Ngal_s); Y_s = my_calloc(sizeof(*Y_s),Ngal_s); Z_s = my_calloc(sizeof(*Z_s),Ngal_s); if(Ngal_s >= Spectro_Size) { fprintf(stderr,"BOSS Wp > Something Terrible Has Happened: SPECTROSCOPIC FILE TOO LONG!!!"); return EXIT_FAILURE; } fprintf(stderr,"BOSS Wp > There are %d Galaxies in the Spectro Sample. Time taken = %6.2lf sec\n",Ngal_s,ADD_DIFF_TIME(t0,t1)); /*Convert Redshift to Comoving Distance in MPC */ /* Here I am using Simpsons' Numerical Integration Rule To * convert the redshift of the galaxy into Megaparsecs. * The details of the integrals I am using is obviously * in Hogg's Distance Measures in Cosmology and you can * wikipedia Simpsons' Rule. I am assuming WMAP7 Cosmology * throughout. You can adjust all those parameters in the header. * I'm including an extra parameter (the equation of state of dark energy) * because I felt like it. */ double mean_distance=0; /*GSL Numerical Integration Crap */ gsl_integration_workspace * w = gsl_integration_workspace_alloc (1000); double result, error,redshift_gsl; gsl_function F; F.function = &f; F.params = &redshift_gsl; for(i=0;i<Ngal_s;i++) { gsl_integration_qags (&F, 0, Redshift_s[i], 0, 1e-7, 1000, w, &result, &error); Distance_s[i]=result; if(Redshift_s[i] < Minimum_Redshift) { Distance_to_Near_Z=Distance_s[i]; Minimum_Redshift=Redshift_s[i]; } if(Redshift_s[i] > Maximum_Redshift){ Distance_to_Far_Z=Distance_s[i]; Maximum_Redshift=Redshift_s[i]; } mean_distance+=Distance_s[i]; } gsl_integration_workspace_free(w); fprintf(stderr,"BOSS Wp > Mean Distance = %lf\n",mean_distance/Ngal_s); fprintf(stderr,"BOSS Wp > The Distance to the closest redshift is %lf\n",Distance_to_Near_Z); fprintf(stderr,"BOSS Wp > The Distance to the furthest redshift %lf is %lf\n",Maximum_Redshift,Distance_to_Far_Z); double dist_range=(Distance_to_Far_Z - Distance_to_Near_Z); double Volume1=4./3.*PI*pow(Distance_to_Far_Z,3); double Volume2=4./3.*PI*pow(Distance_to_Near_Z,3); double percentage_area=area_tot/(4.*PI); double Volume=(Volume1-Volume2)*percentage_area; fprintf(stderr,"BOSS Wp > Spherical Volume =%lf\n",Volume); fprintf(stderr,"BOSS Wp > Number Density of Spectro Gal =%17.16f\n",Ngal_s/Volume); // fprintf(stderr,"The Maximum Separation you decided is %lf\n",Max_Separation); Maximum_Dec_Separation=asin(Max_Separation/(2*Distance_to_Near_Z))*2.*RAD_TO_DEG*1.00002; //The maximum separation that can happen and let's multiply it by 20% more fprintf(stderr,"BOSS Wp > Maximum Dec Separation is %lf\n",Maximum_Dec_Separation); /*Read in Imaging File*/ /*Imaging Arrays */ RA_i = my_calloc(sizeof(*RA_i),Imaging_Size); Dec_i = my_calloc(sizeof(*Dec_i),Imaging_Size); nitems=3; gettimeofday(&t0,NULL); fp2=my_fopen(Gxy_Imaging,"r") ; i=0; while(fgets(buffer,MAXBUFSIZE,fp2)!=NULL) { nread = sscanf(buffer,"%lf %lf %d",&RA_i[i],&Dec_i[i],&trash_d); if(nread == nitems) { i++; if(i==Imaging_Size) { fprintf(stderr,"Increasing memory allocation for the imaging sample\n"); Imaging_Size *= MEMORY_INCREASE_FAC; RA_i = my_realloc(RA_i,sizeof(*RA_i),Imaging_Size,"RA_i"); Dec_i = my_realloc(Dec_i,sizeof(*Dec_i),Imaging_Size,"Dec_i"); } } else { fprintf(stderr,"WARNING: line %d did not contain %d elements - skipping\n",i,nitems); } } fclose(fp2); gettimeofday(&t1,NULL); Ngal_i=i; if(Ngal_i >= Imaging_Size) { fprintf(stderr,"BOSS Wp > Something Terrible Has Happened: IMAGING FILE TOO LONG!!!\n"); return EXIT_FAILURE; } X_i = my_calloc(sizeof(*X_i),Ngal_i); Y_i = my_calloc(sizeof(*Y_i),Ngal_i); Z_i = my_calloc(sizeof(*Z_i),Ngal_i); fprintf(stderr,"BOSS Wp > There are %d Galaxies in the Imaging Sample. Time taken = %6.2lf sec\n",Ngal_i,ADD_DIFF_TIME(t0,t1)); for(i=0;i<Ngal_s;i++) { X_s[i]=sin((90-Dec_s[i]) * DEG_TO_RAD)*cos(RA_s[i] * DEG_TO_RAD) ; Y_s[i]=sin((90-Dec_s[i]) * DEG_TO_RAD)*sin(RA_s[i] * DEG_TO_RAD) ; Z_s[i]=cos((90-Dec_s[i]) * DEG_TO_RAD) ; } for(i=0;i<Ngal_i;i++){ X_i[i]=sin((90-Dec_i[i]) * DEG_TO_RAD)*cos(RA_i[i] * DEG_TO_RAD) ; Y_i[i]=sin((90-Dec_i[i]) * DEG_TO_RAD)*sin(RA_i[i] * DEG_TO_RAD) ; Z_i[i]=cos((90-Dec_i[i]) * DEG_TO_RAD) ; } /* *This is where the jackknife call is going to go. *It's going to take the map file,the number of jackknife samples and the observed sectors in the same order as the observed galaxies. *It will return the vector of jackknife ID's in the same order the sector list was given to it. *The jackknife ID corresponds to the *one* jackknife sample that galaxy doesn't belong in. */ double number_density_of_imaging=Ngal_i/area_tot; double distance_squared=0.0,Normalization=0.0; if(Normalization_Choice==1) { for(i=0;i<Ngal_s;i++) { Normalization+=Weight_s[i]; } } else { for(i=0;i<Ngal_s;i++){ distance_squared+=1./SQR(Distance_s[i]); Normalization+=number_density_of_imaging*Weight_s[i]*1./SQR(Distance_s[i]); } // Normalization=number_density_of_imaging*1.204988; fprintf(stderr,"Distance Squared = %lf,Normalization =%lf\n",distance_squared,Normalization); } //gridlink the spectroscopic sample /*---Gridlink-variables----------------*/ int ngrid;/* *gridinit1D,*gridlist1D ; */ double dmin=-90,dmax=90.0;//min/max dec double inv_dmax_diff = 1.0/(dmax-dmin); cellarray *lattice; ngrid=0 ; /* gridlink1D(Ngal_i,dmin,dmax,Max_Separation,Dec_i,&ngrid,&gridinit1D,&gridlist1D) ; */ gridlink1D_with_struct(Ngal_i,dmin,dmax,Maximum_Dec_Separation,X_i,Y_i,Z_i,Dec_i,&ngrid,&lattice); fprintf(stderr,"gridlink1D done. ngrid= %d\n",ngrid) ; ////////////////////////////////////****Calculation of Wp****///////////////////////////////////////////////////////////////////////// // double rp_sqr=0.0; double max_sep_sqr = Max_Separation*Max_Separation; double start_bin_sqr = Start_Bin*Start_Bin; double inv_start_bin_sqr = 1.0/start_bin_sqr; double inv_log_bin_size = 1.0/log_Bin_Size; /* int icen,icell; */ /* double *x1,*y1,*z1,*dec; */ /* int *imaging; */ cellarray *cellstruct __attribute__((aligned(ALIGNMENT))); int xx=0; for(i=0;i<ngrid;i++) xx+= lattice[i].nelements; if(xx!=Ngal_i) { fprintf(stderr,"ERROR: xx=%d is not equal to Ngal_i=%d\n",xx,Ngal_i); exit(EXIT_FAILURE); } /*Wp Measurement Arrays */ DD = my_calloc(sizeof(*DD),N_Bins); double DD_threads[N_Bins][nthreads]; for(i=0;i<N_Bins;i++) { for(j=0;j<nthreads;j++) { DD_threads[i][j]=0.0; } } /* int ispectro=0,ii=0,p; */ gettimeofday(&t0,NULL); omp_set_num_threads(nthreads); int counter=0; int interrupted=0; init_my_progressbar(Ngal_s,&interrupted); /* #pragma omp parallel shared(Dec_s,Weight_s,X_s,Y_s,Z_s,chunk) private(cos_Theta,ispectro,icen,icell,rp_sqr,bin,x1,y1,z1,imaging,cellstruct) */ #pragma omp parallel default(none) shared(interrupted,stderr,counter,Ngal_s,Dec_s,Weight_s,X_s,Y_s,Z_s,chunk,ngrid,dmin,inv_dmax_diff,Maximum_Dec_Separation,Distance_s,inv_start_bin_sqr,max_sep_sqr,inv_log_bin_size,start_bin_sqr,DD_threads,lattice) { int tid = omp_get_thread_num(); #pragma omp for schedule(dynamic,chunk) for(int ispectro=0;ispectro<Ngal_s;ispectro++) { #pragma omp atomic counter++; if(tid==0){ my_progressbar(counter,&interrupted); } int icen = (int)(ngrid*(Dec_s[ispectro]-dmin)*inv_dmax_diff); if(icen<0) icen++; if(icen>=ngrid) icen = icen--; assert(icen >=0 && icen < ngrid && "icen needs to be in [0, ngrid)"); for(int ii=-BIN_REFINE_FACTOR;ii<=BIN_REFINE_FACTOR;ii++) { int icell = icen + ii ; /* for(icell=0;icell<ngrid;icell++) { */ // This makes no difference in the output - so the logic is correct if(icell>=0 && icell<ngrid) { /*---Loop-over-particles-in-each-cell-----------------*/ cellarray *cellstruct=&(lattice[icell]); double *x1 = cellstruct->x; double *y1 = cellstruct->y; double *z1 = cellstruct->z; double *dec = cellstruct->dec; int *imaging = cellstruct->index; for(int p=0;p<cellstruct->nelements;p++) { if(fabs(Dec_s[ispectro]-dec[p]) <= Maximum_Dec_Separation) { double cos_Theta=X_s[ispectro] * x1[p] + Y_s[ispectro] * y1[p] + Z_s[ispectro] * z1[p]; /* rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */ double rp_sqr=2.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta); /* sin(arccos x) = sqrt(1-x^2) */ if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { int bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); // bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size)-1; /* bin=(int)floor((log10(sqrt(rp_sqr)/Start_Bin))/log_Bin_Size); */ DD_threads[bin][tid]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// } } } } } } } finish_myprogressbar(&interrupted); for(i=0;i<N_Bins;i++) { for(j=0;j<nthreads;j++){ DD[i]+=DD_threads[i][j]; } } gettimeofday(&t1,NULL); fprintf(stderr,"Double loop time in main -> %6.2lf sec \n",ADD_DIFF_TIME(t0,t1)); /* #ifndef USE_AVX */ /* for(p=0;p<cellstruct->nelements;p++) { */ /* if(fabs(Dec_s[ispectro]-dec[p]) <= Maximum_Dec_Separation) { */ /* cos_Theta=X_s[ispectro] * x1[p] + Y_s[ispectro] * y1[p] + Z_s[ispectro] * z1[p]; */ /* rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */ /* if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */ /* bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */ /* /\* bin=(int)floor((log10(sqrt(rp_sqr)/Start_Bin))/log_Bin_Size); *\/ */ /* DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */ /* DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */ /* if(Jackknife_i[imaging[p]]!=Jackknife_s[ispectro]){ */ /* DD[bin][Jackknife_i[imaging[p]]+1]+=Weight_s[ispectro]; */ /* } */ /* } */ /* } */ /* } */ /* #else */ /* double dec_separation[NVECD]; */ /* double rp_sqr_array[NVECD],cos_theta_array[NVECD]; */ /* for(p=0;(p+NVECD)<cellstruct->nelements;p+=NVECD) { */ /* #pragma vector always */ /* for(int j=0;j<NVECD;j++) { */ /* dec_separation[j] = fabs(Dec_s[ispectro]-dec[p]); */ /* cos_theta_array[j] = X_s[ispectro] * x1[p+j] + Y_s[ispectro] * y1[p+j] + Z_s[ispectro] * z1[p+j]; */ /* rp_sqr_array[j] = 4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_theta_array[j])*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */ /* } */ /* #pragma novector */ /* for(int j=0;j<NVECD;j++) { */ /* rp_sqr = rp_sqr_array[j]; */ /* if(dec_separation[j] <= Maximum_Dec_Separation) { */ /* if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */ /* bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */ /* DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */ /* DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */ /* if(Jackknife_i[imaging[p+j]]!=Jackknife_s[ispectro]){ */ /* DD[bin][Jackknife_i[imaging[p+j]]+1]+=Weight_s[ispectro]; */ /* } */ /* } */ /* } */ /* } */ /* } */ /* //Now serially process the rest */ /* p = p > cellstruct->nelements ? p-NVECD:p; */ /* for(;p<cellstruct->nelements;p++){ /* if(fabs(Dec_s[ispectro]-dec[p]) <= Maximum_Dec_Separation) { */ /* cos_Theta=X_s[ispectro] * x1[p] + Y_s[ispectro] * y1[p] + Z_s[ispectro] * z1[p]; */ /* rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */ /* if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */ /* bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */ /* DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */ /* DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */ /* if(Jackknife_i[imaging[p]]!=Jackknife_s[ispectro]){ */ /* DD[bin][Jackknife_i[imaging[p]]+1]+=Weight_s[ispectro]; */ /* } */ /* } */ /* } */ /* } */ /* #endif */ /* for(int ispectro=0;ispectro<Ngal_s;ispectro++){ */ /* for(int imaging=0;imaging<Ngal_i;imaging++){ */ /* if(fabs(Dec_s[ispectro]-Dec_i[imaging]) <= Maximum_Dec_Separation){ */ /* cos_Theta=X_s[ispectro] * X_i[imaging] + Y_s[ispectro] * Y_i[imaging] + Z_s[ispectro] * Z_i[imaging]; */ /* //rp=2.0*Distance_s[ispectro]*SQRT((1.0 - cos_Theta)/2.); /\* sin(arccos x) = sqrt(1-x^2) *\/ */ /* rp_sqr=4.0*Distance_s[ispectro]*Distance_s[ispectro]*(1.0 - cos_Theta)*0.5; /\* sin(arccos x) = sqrt(1-x^2) *\/ */ /* //fprintf(stderr,"distance = %lf,cos_Theta=%lf,rp = %lf\n",Distance_s[ispectro],cos_Theta,rp); */ /* /\* if(rp < Max_Separation && rp>=Start_Bin){ *\/ */ /* if(rp_sqr < max_sep_sqr && rp_sqr >= start_bin_sqr) { */ /* /\* bin=(int)floor((log10(rp/Start_Bin))/log_Bin_Size); *\/ */ /* bin=(int)floor((0.5*log10(rp_sqr*inv_start_bin_sqr))*inv_log_bin_size); */ /* DD[bin][0]+=Weight_s[ispectro]; //Put the Count in the Keeping Track Bin// */ /* DD[bin][Jackknife_s[ispectro]+1]+=Weight_s[ispectro]; */ /* if(Jackknife_i[imaging]!=Jackknife_s[ispectro]){ */ /* // fprintf(fp3,"%d %lf %d %d %d %d \n",bin, rp,Jackknife_s[ispectro],Sector_s[ispectro],Jackknife_i[imaging],Sector_i[imaging]); */ /* DD[bin][Jackknife_i[imaging]+1]+=Weight_s[ispectro]; */ /* } */ /* } */ /* } */ /* } */ /* } */ for(i=0;i<N_Bins;i++) { // fprintf(stderr,"%lf %e %e %e ",pow(10,(log_Bin_Size*(i)+log10(Start_Bin))),DD[i][0]/(Normalization),Mean[i],Error[i]); fprintf(stdout,"%lf %e %lf\n",pow(10,(log_Bin_Size*(i)+log10(Start_Bin))),DD[i]/(Normalization),DD[i]); } /* Free ALL the arrays */ free(RA_i); free(Dec_i); free(X_s); free(Y_s); free(Z_s); free(X_i); free(Y_i); free(Z_i); free(RA_s); free(Dec_s); free(Redshift_s); free(Distance_s); free(Weight_s); free(DD); for(i=0;i<ngrid;i++) { free(lattice[i].x); free(lattice[i].y); free(lattice[i].z); free(lattice[i].dec); free(lattice[i].index); } free(lattice); return 0; }