Vector_double stf::linCorr(const Vector_double& data, const Vector_double& templ, stfio::ProgressInfo& progDlg) { bool skipped = false; // the template has to be smaller than the data waveform: if (data.size()<templ.size()) { throw std::runtime_error("Template larger than data in stf::crossCorr"); } if (data.size()==0 || templ.size()==0) { throw std::runtime_error("Array of size 0 in stf::crossCorr"); } Vector_double Corr(data.size()-templ.size()); // Optimal scaling & offset: // avoid redundant computations: double sum_templ_data=0.0, sum_templ=0.0, sum_templ_sqr=0.0, sum_data=0.0, sum_data_sqr=0.0; for (int n_templ=0; n_templ<(int)templ.size();++n_templ) { sum_templ_data+=templ[n_templ]*data[0+n_templ]; sum_data+=data[0+n_templ]; sum_data_sqr+=data[0+n_templ]*data[0+n_templ]; sum_templ+=templ[n_templ]; sum_templ_sqr+=templ[n_templ]*templ[n_templ]; } double y_old=0.0; double y2_old=0.0; int progCounter=0; double progFraction=(data.size()-templ.size())/100; for (unsigned n_data=0; n_data<data.size()-templ.size(); ++n_data) { if (n_data/progFraction>progCounter) { progDlg.Update( (int)((double)n_data/(double)(data.size()-templ.size())*100.0), "Calculating correlation coefficient", &skipped ); if (skipped) { Corr.resize(0); return Corr; } progCounter++; } if (n_data!=0) { sum_templ_data=0.0; // The product has to be computed in full length: for (int n_templ=0; n_templ<(int)templ.size();++n_templ) { sum_templ_data+=templ[n_templ]*data[n_data+n_templ]; } // The new value that will be added is: double y_new=data[n_data+templ.size()-1]; double y2_new=data[n_data+templ.size()-1]*data[n_data+templ.size()-1]; sum_data+=y_new-y_old; sum_data_sqr+=y2_new-y2_old; } // The first value that was added (and will have to be subtracted during // the next loop): y_old=data[n_data+0]; y2_old=data[n_data+0]*data[n_data+0]; double scale=(sum_templ_data-sum_templ*sum_data/templ.size())/ (sum_templ_sqr-sum_templ*sum_templ/templ.size()); double offset=(sum_data-scale*sum_templ)/templ.size(); // Now that the optimal template has been found, // compute the correlation between data and optimal template. // The correlation coefficient is computed in a way that avoids // numerical instability; therefore, the sum of squares // computed above can't be re-used. // Get the means: double mean_data=sum_data/templ.size(); double sum_optTempl=sum_templ*scale+offset*templ.size(); double mean_optTempl=sum_optTempl/templ.size(); // Get SDs: double sd_data=0.0; double sd_templ=0.0; for (int i=0;i<(int)templ.size();++i) { sd_data+=SQR(data[i+n_data]-mean_data); sd_templ+=SQR(templ[i]*scale+offset-mean_optTempl); } sd_data=sqrt(sd_data/templ.size()); sd_templ=sqrt(sd_templ/templ.size()); // Get correlation: double r=0.0; for (int i=0;i<(int)templ.size();++i) { r+=(data[i+n_data]-mean_data)*(templ[i]*scale+offset-mean_optTempl); } r/=((templ.size()-1)*sd_data*sd_templ); Corr[n_data]=r; } return Corr; }
int main (int ac, char* av[]) { // *************************************************************************** // *************************************************************************** // initialization ************************************************************ // *************************************************************************** // *************************************************************************** // initialization of eigen OMP paralization Eigen::initParallel(); // set numer of threads used by eigen // first line sets number of threads directly // second line lets OMP decide on the number of threads, // e. g. via OMP_NUM_THREADS //Eigen::setNbThreads(4); Eigen::setNbThreads(0); //check the number of threads used const int nthreads = Eigen::nbThreads(); std::cout << "contraction code for stochastic dilution" << std::endl; std::cout << "using " << nthreads << " threads for eigen\n" << std::endl; // reading in global parameters from input file GlobalData* global_data = GlobalData::Instance(); global_data->read_parameters(ac, av); // reading in of data ReadWrite* rewr = new ReadWrite; // everything for operator handling BasicOperator* basic = new BasicOperator(); // global variables from input file needed in main function const int Lt = global_data->get_Lt(); const int end_config = global_data->get_end_config(); const int delta_config = global_data->get_delta_config(); const int start_config = global_data->get_start_config(); const int number_of_eigen_vec = global_data->get_number_of_eigen_vec(); const int number_of_max_mom = global_data->get_number_of_max_mom(); const int max_mom_squared = number_of_max_mom * number_of_max_mom; const int number_of_momenta = global_data->get_number_of_momenta(); const std::vector<int> mom_squared = global_data->get_momentum_squared(); const std::vector<quark> quarks = global_data->get_quarks(); const int number_of_rnd_vec = quarks[0].number_of_rnd_vec; const int dirac_min = global_data->get_dirac_min(); const int dirac_max = global_data->get_dirac_max(); const int number_of_dirac = dirac_max - dirac_min + 1; const int displ_min = global_data->get_displ_min(); const int displ_max = global_data->get_displ_max(); const int number_of_displ = displ_max - displ_min + 1; const int p_min = 0; //number_of_momenta/2; const int p_max = number_of_momenta; // TODO decide on path std::string outpath = global_data->get_output_path() + "/"; // other variables clock_t time; const std::complex<double> I(0.0, 1.0); char outfile[400]; FILE *fp = NULL; // *************************************************************************** // *************************************************************************** // memory allocation ********************************************************* // *************************************************************************** // *************************************************************************** // abbreviations for clearer memory allocation. Wont be used in loops and // when building the contractions // CJ: but it is a little bit ugly... const size_t nmom = number_of_momenta; const size_t nrnd = number_of_rnd_vec; const size_t ndir = number_of_dirac; const size_t ndis = number_of_displ; // memory for the correlation function array_cd_d7 C2_mes(boost::extents[nmom][nmom][ndir][ndir][ndis][ndis][Lt]); //TODO: dont need the memory for p_u^2 > p_d^2 array_cd_d10 Corr(boost::extents[nmom][nmom][ndir][ndir][ndis][ndis][Lt][Lt][nrnd][nrnd]); int norm = 0; for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd3 = rnd1 + 1; rnd3 < number_of_rnd_vec; ++rnd3){ for(int rnd2 = 0; rnd2 < number_of_rnd_vec; ++rnd2){ if((rnd2 != rnd1) && (rnd2 != rnd3)){ for(int rnd4 = rnd2 + 1; rnd4 < number_of_rnd_vec; ++rnd4){ if((rnd4 != rnd1) && (rnd4 != rnd3)){ norm++; //std::cout << "\n\nnorm: " << norm << rnd1 << rnd3 << rnd2 << rnd4 << std::endl; } } } } } } std::cout << "\n\tNumber of contraction combinations: " << norm << std::endl; // const double norm1 = Lt * norm; // Memory for propagation matrices (is that a word?) from t_source to t_sink // (op_1) and vice versa (op_2) // additional t_source to t_sink (op_3) and t_sink to t_source (op_4) for // 4-point functions // 1, 3 -> u-quarks; 2, 4 -> d-quarks; 5, 6 -> u quarks for neutral particle array_Xcd_d2_eigen op_1(boost::extents[nrnd][nrnd]); array_Xcd_d2_eigen op_3(boost::extents[nrnd][nrnd]); vec_Xcd_eigen op_2(number_of_rnd_vec); vec_Xcd_eigen op_4(number_of_rnd_vec); vec_Xcd_eigen op_5(number_of_rnd_vec); vec_Xcd_eigen op_6(number_of_rnd_vec); for(int rnd_i = 0; rnd_i < number_of_rnd_vec; ++rnd_i){ for(int rnd_j = 0; rnd_j < number_of_rnd_vec; ++rnd_j){ op_1[rnd_i][rnd_j] = Eigen::MatrixXcd(4 * number_of_eigen_vec, 4 * quarks[0].number_of_dilution_E); } op_2[rnd_i] = Eigen::MatrixXcd(4 * quarks[0].number_of_dilution_E, 4 * number_of_eigen_vec); } // *************************************************************************** // *************************************************************************** // Loop over all configurations ********************************************** // *************************************************************************** // *************************************************************************** for(int config_i = start_config; config_i <= end_config; config_i += delta_config){ std::cout << "\nprocessing configuration: " << config_i << "\n\n"; rewr->read_perambulators_from_file(config_i); rewr->read_rnd_vectors_from_file(config_i); // rewr->read_eigenvectors_from_file(config_i); rewr->read_lime_gauge_field_doubleprec_timeslices(config_i); rewr->build_source_matrix(config_i); // ************************************************************************* // TWO PT CONTRACTION 1 **************************************************** // ************************************************************************* // setting the correlation function to zero std::cout << "\n\tcomputing the connected contribution of pi_+/-:\n"; time = clock(); // setting the correlation function to zero for(int p1 = 0; p1 < number_of_momenta; ++p1) for(int p2 = 0; p2 < number_of_momenta; ++p2) for(int dirac1 = 0; dirac1 < number_of_dirac; ++dirac1) for(int dirac2 = 0; dirac2 < number_of_dirac; ++dirac2) for(int displ1 = 0; displ1 < number_of_displ; ++displ1) for(int displ2 = 0; displ2 < number_of_displ; ++displ2) for(int t1 = 0; t1 < Lt; ++t1) for(int t1 = 0; t1 < Lt; ++t1) C2_mes[p1][p2][dirac1][dirac2][displ1][displ2][t1] = std::complex<double>(0.0, 0.0); for(int p1 = 0; p1 < number_of_momenta; ++p1) for(int p2 = 0; p2 < number_of_momenta; ++p2) for(int dirac1 = 0; dirac1 < number_of_dirac; ++dirac1) for(int dirac2 = 0; dirac2 < number_of_dirac; ++dirac2) for(int displ1 = 0; displ1 < number_of_displ; ++displ1) for(int displ2 = 0; displ2 < number_of_displ; ++displ2) for(int t1 = 0; t1 < Lt; ++t1) for(int t2 = 0; t2 < Lt; ++t2) for(int rnd1 = 0; rnd1 < number_of_rnd_vec; rnd1++) for(int rnd2 = 0; rnd2 < number_of_rnd_vec; rnd2++) Corr[p1][p2][dirac1][dirac2][displ1][displ2][t1][t2][rnd1][rnd2] = std::complex<double>(0.0, 0.0); #if 1 // PI^+/- // initializing of Corr: calculate all two-operator traces of the form tr(u \Gamma \bar{d}) // build all combinations of momenta, dirac_structures and displacements as specified in // infile for(int displ_u = 0; displ_u < number_of_displ; displ_u++){ for(int displ_d = 0; displ_d < number_of_displ; displ_d++){ for(int t_source = 0; t_source < Lt; ++t_source){ for(int t_sink = 0; t_sink < Lt; ++t_sink){ for(int p = p_min; p < p_max; ++p) { // initialize contraction[rnd_i] = perambulator * basicoperator // = D_u^-1 // choose 'i' for interlace or 'b' for block time dilution scheme // TODO: get that from input file // choose 'c' for charged or 'u' for uncharged particles basic->init_operator_u(0, t_source, t_sink, rewr, 'b', p, displ_min + displ_u); basic->init_operator_d(0, t_source, t_sink, rewr, 'b', p, displ_min + displ_d); } for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int p_u = p_min; p_u < p_max; ++p_u) { // code for pi+- // "multiply contraction[rnd_i] with gamma structure" // contraction[rnd_i] are the columns of D_u^-1 which get // reordered by gamma multiplication. No actual multiplication // is carried out basic->get_operator_charged(op_1, 0, t_sink, rewr, dirac_min + dirac_u, p_u); for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ // same as get_operator but with gamma_5 trick. D_u^-1 is // daggered and multipied with gamma_5 from left and right // the momentum is changed to reflect the switched sign in // the momentum exponential for pi_+- basic->get_operator_g5(op_2, 0, dirac_min + dirac_d, p_d); for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd2 = rnd1 + 1; rnd2 < number_of_rnd_vec; ++rnd2){ // build all 2pt traces leading to C2_mes // Corr = tr(D_d^-1(t_sink) Gamma // D_u^-1(t_source) Gamma) Corr[p_u][p_d][dirac_u][dirac_d][displ_u][displ_d] [t_source][t_sink][rnd1][rnd2] = (op_2[rnd2] * op_1[rnd1][rnd2]).trace(); // std::cout << "p" << p_u << p_d << "dirac" << dirac_u << dirac_d << "\nCorr " // << Corr[p_u][p_d][dirac_u][dirac_d][displ_u][displ_d] // [t_source][t_sink][rnd1][rnd2] << std::endl; } } } } } } } } } } } // build 2pt-function C2_mes for pi^+ from Corr. Equivalent two just summing // up traces with same time difference between source and sink (all to all) // for every dirac structure, momentum, displacement // build 2pt-function C2_mes for pi^+ from Corr. Equivalent two just summing // up traces with same time difference between source and sink (all to all) // for every dirac structure, momentum, displacement for(int t_source = 0; t_source < Lt; ++t_source){ for(int t_sink = 0; t_sink < Lt; ++t_sink){ for(int p_u = p_min; p_u < p_max; ++p_u) { for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int displ_u = 0; displ_u < number_of_displ; displ_u++){ for(int displ_d = 0; displ_d < number_of_displ; displ_d++){ for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd2 = rnd1 + 1; rnd2 < number_of_rnd_vec; ++rnd2){ // building Correlation function // C2 = tr(D_d^-1 Gamma D_u^-1 Gamma) // TODO: find signflip of imaginary part // TODO: is C2_mes[dirac][p] better? C2_mes[p_u][p_d][dirac_u][dirac_d][displ_u][displ_d] [abs((t_sink - t_source - Lt) % Lt)] += Corr[p_u][number_of_momenta - p_d - 1] [dirac_u][dirac_d][displ_u][displ_d] [t_source][t_sink][rnd1][rnd2]; } } } } } } } } } } } // normalization of correlation function double norm3 = Lt * number_of_rnd_vec * (number_of_rnd_vec - 1) * 0.5; for(int p_u = p_min; p_u < p_max; ++p_u) { for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int displ_u = 0; displ_u < number_of_displ; ++displ_u){ for(int displ_d = 0; displ_d < number_of_displ; ++displ_d){ for(int t = 0; t < Lt; ++t){ C2_mes[p_u][p_d][dirac_u][dirac_d][displ_u][displ_d][t] /= norm3; } } } } } } } } #endif // output to binary file // to build a GEVP, the correlators are written into a seperate folder // for every dirac structure, momentum, displacement (entry of the GEVP // matrix). In the folders a file is created for every configuration which // contains all momentum combinations with same momentum squared // The folders are created when running create_runs.sh. If they dont exist, // a segmentation fault will occur // TODO: implement check for existence of folders for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p = 0; p <= max_mom_squared; p++){ for(int displ_u = 0; displ_u < number_of_displ; ++displ_u){ for(int displ_d = 0; displ_d < number_of_displ; ++displ_d){ sprintf(outfile, "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d_unsuppressed/" "C2_pi+-_conf%04d.dat", outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, p, p, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = p_min; p_u < p_max; ++p_u){ if(rewr.mom_squared[p_u] == p){ fwrite((double*) &(C2_mes[p_u][p_u][dirac_u][dirac_d] [displ_u][displ_d][0]), sizeof(double), 2 * Lt, fp); } } fclose(fp); } } } } } for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p1 = 0; p1 <= max_mom_squared; p1++){ for(int p2 = p1; p2 <= max_mom_squared; p2++){ for(int displ_u = 0; displ_u < number_of_displ; ++displ_u){ for(int displ_d = 0; displ_d < number_of_displ; ++displ_d){ printf("Writing to file: "); // sprintf(outfile, // "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d/" // "C2_pi+-_conf%04d.dat", // outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, // p1, p2, displ_min + displ_u, displ_min + displ_d, config_i); sprintf(outfile, "%s/C2_pi+-_conf%04d.dat", outpath.c_str(), config_i); printf("%s\n", outfile); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = p_min; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p1){ for(int p_d = p_min; p_d < p_max; ++p_d){ if(mom_squared[p_d] == p2){ fwrite((double*) &(C2_mes[p_u][p_d][dirac_u][dirac_d][displ_u][displ_d][0]), sizeof(double), 2 * Lt, fp); } } } } fclose(fp); } } } } } } #if 0 // (old?) output routine and output to terminal sprintf(outfile, "%s/dirac_%02d_%02d_p_0_%01d_displ_%01d_%01d/C2_pi+-_conf%04d.dat", outpath.c_str(), dirac_min, dirac_max, 0, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac) fwrite((double*) C2_mes[number_of_momenta/2] [number_of_momenta/2][dirac], sizeof(double), 2 * Lt, fp); fclose(fp); for(int rnd_i = 0; rnd_i < number_of_rnd_vec; ++rnd_i) { sprintf(outfile, "%s/dirac_%02d_%02d_p_0_%01d_displ_%01d_%01d/C2_dis_u_rnd%02d_conf%04d.dat", outpath.c_str(), dirac_min, dirac_max, number_of_max_mom, displ_min, displ_max, rnd_i, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac) for(int p = 0; p < number_of_momenta; ++p) fwrite((double*) C2_dis[p][dirac][rnd_i], sizeof(double), 2 * Lt, fp); fclose(fp); } // output to terminal // printf("\n"); // for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac){ // printf("\tdirac = %02d\n", dirac); // for(int p = 0; p <= max_mom_squared; p++){ // printf("\tmomentum_u = %02d\n", p); // printf("\tmomentum_d = %02d\n", p); // for(int p_u = p_min; p_u < p_max; ++p_u){ // if((mom_squared[p_u] == p)){ // //printf( // // "\t t\tRe(C2_con)\tIm(C2_con)\n\t----------------------------------\n"); //// for(int t1 = 0; t1 < Lt; ++t1){ //// printf("\t%02d\t%.5e\t%.5e\n", t1, real(C2_mes[p_u][p_u][dirac][t1]), //// imag(C2_mes[p_u][p_u][dirac][t1])); //// } // printf("\n"); // printf("p_u = %02d\n", p_u); // } // } // } // // for(int p = 1; p <= max_mom_squared; p++){ // printf("\tmomentum_u = %02d\n", 0); // printf("\tmomentum_d = %02d\n", p); // for(int p_u = p_min; p_u < p_max; ++p_u){ // if((mom_squared[p_u] == p)){ // //printf( // // "\t t\tRe(C2_con)\tIm(C2_con)\n\t----------------------------------\n"); //// for(int t1 = 0; t1 < Lt; ++t1){ //// printf("\t%02d\t%.5e\t%.5e\n", t1, real(C2_mes[p_u][p_u][dirac][t1]), //// imag(C2_mes[p_u][p_u][dirac][t1])); //// } // printf("\n"); // printf("p_u = %02d\n", p_u); // } // } // } // // } #endif // (old?) output routine and output to terminal time = clock() - time; std::cout << "\t\tSUCCESS - " << std::fixed << std::setprecision(1) << ((float) time)/CLOCKS_PER_SEC << " seconds" << std::endl; // ************************************************************************* // FOUR PT CONTRACTION 1 *************************************************** // ************************************************************************* #if 0 //4-point contraction 1 // setting the correlation function to zero std::cout << "\n\tcomputing the connected contribution of C4_1:\n"; time = clock(); // displacement not supported for 4pt functions atm displ_min = 0; displ_max = 0; std::cout << "\n\tcomputing the connected contribution of C4_1:\n"; time = clock(); // setting the correlation function to zero for(int p_u = 0; p_u < number_of_momenta; ++p_u) for(int p_d = 0; p_d < number_of_momenta; ++p_d) for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u) for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d) for(int t1 = 0; t1 < Lt; ++t1) C4_mes[p_u][p_d][dirac_u][dirac_d][t1] = std::complex<double>(0.0, 0.0); for(int t_source = 0; t_source < Lt; ++t_source){ for(int t_sink = 0; t_sink < Lt; ++t_sink){ int t_source_1 = (t_source + 1) % Lt; int t_sink_1 = (t_sink + 1) % Lt; for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_u = p_min; p_u < p_max; ++p_u) { for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ // complete diagramm // every quark line must have its own random vec for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd3 = rnd1 + 1; rnd3 < number_of_rnd_vec; ++rnd3){ for(int rnd2 = 0; rnd2 < number_of_rnd_vec; ++rnd2){ if((rnd2 != rnd1) && (rnd2 != rnd3)){ for(int rnd4 = rnd2 + 1; rnd4 < number_of_rnd_vec; ++rnd4){ if((rnd4 != rnd1) && (rnd4 != rnd3)){ C4_mes[p_u][p_d][dirac_u][dirac_d] [abs((t_sink - t_source - Lt) % Lt)] += (Corr[p_u] [number_of_momenta - p_d - 1] [dirac_u][dirac_d][0][0] [t_source_1][t_sink_1][rnd1][rnd3]) * (Corr[number_of_momenta - p_u - 1] [p_d][dirac_u][dirac_d][0][0] [t_source][t_sink][rnd2][rnd4]); } } } } } } } } } } } } } // Normalization of 4pt-function. Accounts for all rnd-number combinations for(int t = 0; t < Lt; ++t){ for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_u = p_min; p_u < p_max; ++p_u) { for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ C4_mes[p_u][p_d][dirac_u][dirac_d][t] /= norm1; } } } } } } // output to binary file // see output to binary file for C2. // write into folders with suffix "_unsuppressed". These only include // correlators of the diagonal matrix elements of the GEVP for which // the three-momentum remains unchanged for both quarks. Because the // quarks have to be back-to-back, for the offdiagonal elements this // cannot occur. The suppression can be interpreted as Zweig-suppressed // gluon exchange for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p = 0; p <= max_mom_squared; p++){ sprintf(outfile, "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d_unsuppressed/" "C4_1_conf%04d.dat", outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, p, p, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = p_min; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p){ fwrite((double*) &(C4_mes[p_u][p_u][dirac_u][dirac_d][0]), sizeof(double), 2 * Lt, fp); } } fclose(fp); } } } // to build a GEVP, the correlators are written into a seperate folder // for every dirac structure, momentum, (entry of the GEVP matrix). // displacement is not supported at the moment for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p1 = 0; p1 <= max_mom_squared; p1++){ for(int p2 = p1; p2 <= max_mom_squared; p2++){ sprintf(outfile, "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d/" "C4_1_conf%04d.dat", outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, p1, p2, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = p_min; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p1){ for(int p_d = p_min; p_d < p_max; ++p_d){ if(mom_squared[p_d] == p2){ fwrite((double*) &(C4_mes[p_u][p_d][dirac_u][dirac_d][0]), sizeof(double), 2 * Lt, fp); } } } } fclose(fp); } } } } // output to terminal // printf("\n"); // for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac){ // printf("\tdirac = %02d\n", dirac); // for(int offset = 0; offset <= max_mom_squared; offset++){ // for(int p = 0; p <= max_mom_squared; p++){ // if((p + offset) <= max_mom_squared){ // for(int p_u = p_min; p_u < p_max; ++p_u){ // if((rewr.mom_squared[p_u] == p) && ((p + offset) <= max_mom_squared)){ // for(int p_d = p_min; p_d < p_max; ++p_d){ // if(rewr.mom_squared[p_d] == (p + offset)){ // //printf( // // "\t t\tRe(C4_1_con)\tIm(C4_1_con)\n\t----------------------------------\n"); //// for(int t1 = 0; t1 < Lt; ++t1){ //// printf("\t%02d\t%.5e\t%.5e\n", t1, real(C4_mes[p_u][p_d][dirac][dirac][t1]), //// imag(C4_mes[p_u][p_d][dirac][dirac][t1])); //// } // printf("\n"); // printf("p_u = %02d\tp_d = %02d\n", p_u, p_d); // } // } // } // } // } // printf("\n"); // } // } // printf("\n"); // } time = clock() - time; printf("\t\tSUCCESS - %.1f seconds\n", ((float) time)/CLOCKS_PER_SEC); #endif // 4-point contraction 1 // ************************************************************************* // FOUR PT CONTRACTION 2 *************************************************** // ************************************************************************* #if 0 // 4-point contraction 2 // setting the correlation function to zero std::cout << "\n\tcomputing the connected contribution of C4_2:\n"; time = clock(); for(int p_u = 0; p_u < number_of_momenta; ++p_u) for(int p_d = 0; p_d < number_of_momenta; ++p_d) for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u) for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d) for(int t1 = 0; t1 < Lt; ++t1) C4_mes[p_u][p_d][dirac_u][dirac_d][t1] = std::complex<double>(0.0, 0.0); for(int t_source = 0; t_source < Lt; ++t_source){ for(int t_sink = 0; t_sink < Lt - 1; ++t_sink){ int t_source_1 = (t_source + 1) % Lt; int t_sink_1 = (t_sink + 1) % Lt; for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_u = p_min; p_u < p_max; ++p_u) { for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ // complete diagramm // every quark line must have its own random vec for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd3 = rnd1 + 1; rnd3 < number_of_rnd_vec; ++rnd3){ for(int rnd2 = 0; rnd2 < number_of_rnd_vec; ++rnd2){ if((rnd2 != rnd1) && (rnd2 != rnd3)){ for(int rnd4 = rnd2 + 1; rnd4 < number_of_rnd_vec; ++rnd4){ if((rnd4 != rnd1) && (rnd4 != rnd3)){ C4_mes[p_u][p_d][dirac_u][dirac_d] [abs((t_sink - t_source - Lt) % Lt)] += (Corr[p_u][number_of_momenta - p_d - 1] [dirac_u][dirac_d][0][0][t_source_1][t_sink] [rnd1][rnd3]) * (Corr[number_of_momenta - p_u - 1][p_d] [dirac_u][dirac_d][0][0][t_source][t_sink_1] [rnd2][rnd4]); } } } } } } } } } } } } } // Normalization of 4pt-function. Accounts for all rnd-number combinations for(int t = 0; t < Lt; ++t){ for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_u = p_min; p_u < p_max; ++p_u) { for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ C4_mes[p_u][p_d][dirac_u][dirac_d][t] /= norm1; } } } } } } // output to binary file // see output to binary file for C2. // write into folders with suffix "_unsuppressed". These only include // correlators of the diagonal matrix elements of the GEVP for which // the three-momentum remains unchanged for both quarks. Because the // quarks have to be back-to-back, for the offdiagonal elements this // cannot occur. The suppression can be interpreted as Zweig-suppressed // gluon exchange for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p = 0; p <= max_mom_squared; p++){ sprintf(outfile, "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d_unsuppressed/" "C4_2_conf%04d.dat", outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, p, p, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = p_min; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p){ fwrite((double*) &(C4_mes[p_u][p_u][dirac_u][dirac_d][0]), sizeof(double), 2 * Lt, fp); } } fclose(fp); } } } // to build a GEVP, the correlators are written into a seperate folder // for every dirac structure, momentum, (entry of the GEVP matrix). // displacement is not supported at the moment for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p1 = 0; p1 <= max_mom_squared; p1++){ for(int p2 = p1; p2 <= max_mom_squared; p2++){ sprintf(outfile, "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d/" "C4_2_conf%04d.dat", outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, p1, p2, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = p_min; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p1){ for(int p_d = p_min; p_d < p_max; ++p_d){ if(mom_squared[p_d] == p2){ fwrite((double*) &(C4_mes[p_u][p_d][dirac_u][dirac_d][0]), sizeof(double), 2 * Lt, fp); } } } } fclose(fp); } } } } // output to terminal // printf("\n"); // for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac){ // printf("\tdirac = %02d\n", dirac); // for(int offset = 0; offset <= max_mom_squared; offset++){ // for(int p = 0; p <= max_mom_squared; p++){ // if((p + offset) <= max_mom_squared){ // for(int p_u = p_min; p_u < p_max; ++p_u){ // if((rewr.mom_squared[p_u] == p) && ((p + offset) <= max_mom_squared)){ // for(int p_d = p_min; p_d < p_max; ++p_d){ // if(rewr.mom_squared[p_d] == (p + offset)){ // //printf( // // "\t t\tRe(C4_2_con)\tIm(C4_2_con)\n\t----------------------------------\n"); //// for(int t1 = 0; t1 < Lt; ++t1){ //// printf("\t%02d\t%.5e\t%.5e\n", t1, real(C4_mes[p][p][dirac][dirac][t1]), //// imag(C4_mes[p][p][dirac][dirac][t1])); //// } // printf("\n"); // printf("p_u = %02d\tp_d = %02d\n", p_u, p_d); // } // } // } // } // } // printf("\n"); // } // } // printf("\n"); // } time = clock() - time; printf("\t\tSUCCESS - %.1f seconds\n", ((float) time)/CLOCKS_PER_SEC); #endif // 4-point contraction 2 // ************************************************************************* // FOUR PT CONTRACTION 3 *************************************************** // ************************************************************************* // TODO: check dirac indices. maybe dirac(t_source) and dirac(t_sink) have // to be equal or there may be four different structures rather than u- and // d-quark always having the same dirac structure // doesn't matter as long as all used dirac structures are equal #if 0 // 4-point contraction 3 std::cout << "\n\tcomputing the connected contribution of C4_3:\n"; time = clock(); // setting the correlation function to zero for(int p_u = 0; p_u < number_of_momenta; ++p_u) for(int p_d = 0; p_d < number_of_momenta; ++p_d) for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u) for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d) for(int t1 = 0; t1 < Lt; ++t1) C4_mes[p_u][p_d][dirac_u][dirac_d][t1] = std::complex<double>(0.0, 0.0); for(int t_source = 0; t_source < Lt; ++t_source){ for(int t_sink = 0; t_sink < Lt; ++t_sink){ int t_source_1 = (t_source + 1) % Lt; int t_sink_1 = (t_sink + 1) % Lt; // initialize basic->contraction[] // p_u = number_of_momenta/2 and the break; statement arrange // for one-to-all calculation in momentum space. (only one source // momentum is used. the first five are {(0,0,0), (0,0,1), // (0,1,-1), (1,-1,-1), (0,0,2)} for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int p = 0; p <= max_mom_squared; p++){ for(int p_u = number_of_momenta/2; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p){ basic->init_operator_u(0, t_source, t_sink, rewr, 'b', p_u, 0); basic->init_operator_u(1, t_source_1, t_sink_1, rewr, 'b', number_of_momenta - p_u - 1, 0); break; } } } } // initialize basic->contraction_dagger[] // build all momenta for sinks for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_d = p_min; p_d < p_max; ++p_d){ basic->init_operator_d(0, t_source_1, t_sink, rewr, 'b', p_d, 0); basic->init_operator_d(1, t_source, t_sink_1, rewr, 'b', number_of_momenta - p_d - 1, 0); } } // build 4pt-function C4_mes for pi^+pi^+ Equivalent two just summing // up the four-trace with same time difference between source and sink // (all to all) for every dirac structure, momentum // displacement not supported at the moment // to build the trace with four matrices, build combinations // X = D_d^-1(t_sink | t_source + 1) // Gamma D_u^-1(t_source + 1 | t_sink + 1) Gamma // Y = D_d^-1(t_sink + 1| t_source) // Gamma D_u^-1(t_source| t_sink) Gamma // these have dimension // (4 * quarks[0].number_of_dilution_E) x (4 * // quarks[0].number_of_dilution_E) // thus the multiplication in this order is fastest for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int p = 0; p <= max_mom_squared; p++){ for(int p_u = number_of_momenta / 2; p_u < p_max; ++p_u) { if(mom_squared[p_u] == p){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p_d = p_min; p_d < p_max; ++p_d) { if(mom_squared[p_u] <= mom_squared[p_d]){ // initialisation of X. rnd loops and if-statements rule // forbidden randomvector combinations (to improve // statistical error never use the same randomvector // for different indices basic->get_operator_g5(op_2, 0, dirac_min + dirac_d, number_of_momenta - p_d - 1); basic->get_operator_charged(op_3, 1, t_sink_1, &rewr, dirac_min + dirac_u, number_of_momenta - p_u - 1); // second u quark: t_source_1 -> t_sink_1 for(int rnd3 = 1; rnd3 < number_of_rnd_vec; ++rnd3){ for(int rnd2 = 0; rnd2 < number_of_rnd_vec; ++rnd2){ if(rnd2 != rnd3){ // first d quark: t_sink_1 -> t_source for(int rnd4 = rnd2 + 1; rnd4 < number_of_rnd_vec; ++rnd4){ if(rnd4 != rnd3){ X[rnd3][rnd2][rnd4] = op_2[rnd3] * op_3[rnd2][rnd4] ; } } } } } // initialisation of Y. see initialisation of X basic->get_operator_g5(op_4, 1, dirac_min + dirac_d, p_d); basic->get_operator_charged(op_1, 0, t_sink, rewr, dirac_min + dirac_u, p_u); // first u quark: t_source -> t_sink for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd3 = rnd1 + 1; rnd3 < number_of_rnd_vec; ++rnd3){ // second d quark: t_sink -> t_source_1 for(int rnd4 = 1; rnd4 < number_of_rnd_vec; ++rnd4){ if((rnd4 != rnd1) && (rnd4 != rnd3)){ Y[rnd4][rnd1][rnd3] = op_4[rnd4] * op_1[rnd1][rnd3]; } } } } // complete diagramm. combine X and Y to four-trace // C4_mes = tr(D_u^-1(t_source| t_sink) Gamma // D_d^-1(t_sink | t_source + 1) Gamma // D_u^-1(t_source + 1 | t_sink + 1) Gamma // D_d^-1(t_sink + 1| t_source) Gamma) // every quark line must have its own random vec for(int rnd1 = 0; rnd1 < number_of_rnd_vec; ++rnd1){ for(int rnd3 = rnd1 + 1; rnd3 < number_of_rnd_vec; ++rnd3){ for(int rnd2 = 0; rnd2 < number_of_rnd_vec; ++rnd2){ if((rnd2 != rnd1) && (rnd2 != rnd3)){ for(int rnd4 = rnd2 + 1; rnd4 < number_of_rnd_vec; ++rnd4){ if((rnd4 != rnd1) && (rnd4 != rnd3)){ C4_mes[p_u][p_d][dirac_u][dirac_d] [abs((t_sink - t_source - Lt) % Lt)] += ((X[rnd3][rnd2][rnd4] * Y[rnd4][rnd1][rnd3]).trace()); } } } } } } } } } break; } } } } } } // Normalization of 4pt-function. Accounts for all rnd-number combinations for(int p1 = 0; p1 < number_of_momenta; ++p1) for(int p2 = 0; p2 < number_of_momenta; ++p2) for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u) for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d) for(int t = 0; t < Lt; ++t) C4_mes[p1][p2][dirac_u][dirac_d][t] /= norm1; // output to binary file // see output to binary file for C2. // for the C4_3 diagram the four propagators are connected in the same // trace. Thus there are no gluon lines which could be cut to create a // disconnected diagrams and thus no Zweig suppression. // To build a GEVP, the correlators are written into a seperate folder // for every dirac structure, momentum, (entry of the GEVP matrix). // displacement is not supported at the moment for(int dirac_u = 0; dirac_u < number_of_dirac; ++dirac_u){ for(int dirac_d = 0; dirac_d < number_of_dirac; ++dirac_d){ for(int p1 = 0; p1 <= max_mom_squared; p1++){ for(int p2 = p1; p2 <= max_mom_squared; p2++){ sprintf(outfile, "%s/dirac_%02d_%02d_p_%01d_%01d_displ_%01d_%01d/" "C4_3_conf%04d.dat", outpath.c_str(), dirac_min + dirac_u, dirac_min + dirac_d, p1, p2, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int p_u = number_of_momenta / 2; p_u < p_max; ++p_u){ if(mom_squared[p_u] == p1){ for(int p_d = p_min; p_d < p_max; ++p_d){ if(mom_squared[p_d] == p2){ fwrite((double*) &(C4_mes[p_u][p_d][dirac_u][dirac_d][0]), sizeof(double), 2 * Lt, fp); } } break; } } fclose(fp); } } } } #if 0 sprintf(outfile, "%s/dirac_%02d_%02d_p_0_%01d_displ_%01d_%01d/C4_3_conf%04d.dat", outpath.c_str(), dirac_min, dirac_max, 0, displ_min, displ_max, config_i); if((fp = fopen(outfile, "wb")) == NULL) std::cout << "fail to open outputfile" << std::endl; for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac) fwrite((double*) C4_mes[number_of_momenta/2] [number_of_momenta/2][dirac][dirac], sizeof(double), 2 * Lt, fp); fclose(fp); #endif // output to terminal // printf("\n"); // for(int dirac = dirac_min; dirac < dirac_max + 1; ++dirac){ // printf("\tdirac = %02d\n", dirac); // for(int p = p_min; p < p_max; ++p) { // printf("\tmomentum = %02d\n", p); // //printf( // // "\t t\tRe(C4_3_con)\tIm(C4_3_con)\n\t----------------------------------\n"); // for(int t1 = 0; t1 < Lt; ++t1){ // printf("\t%02d\t%.5e\t%.5e\n", t1, real(C4_mes[p][p][dirac][dirac][t1]), // imag(C4_mes[p][p][dirac][dirac][t1])); // } // printf("\n"); // } // printf("\n"); // } time = clock() - time; printf("\t\tSUCCESS - %.1f seconds\n", ((float) time)/CLOCKS_PER_SEC); #endif // 4-point contraction 3 // ************************************************************************* // FOUR PT CONTRACTION 4 *************************************************** // ************************************************************************* // identical to FOUR PT CONTRACTION 3 } // loop over configs ends here // TODO: freeing all memory!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! delete rewr; delete basic; }
int main (int argc, char *argv[]) { if ((argc < 3) || (argc > 4)) usage (argv[0]); std::string mapfile = argv[1]; std::string quad_list_prefix = argv[2]; Healpix_Map<double> map; read_Healpix_map_from_fits (mapfile, map); bool have_mask = false; Healpix_Map<double> mask; if (argc == 4) { read_Healpix_map_from_fits (argv[3], mask); have_mask = true; } // Figure out how many bins there are by trying to open files. std::vector<std::string> quad_list_files = Npoint_Functions::get_range_file_list(quad_list_prefix, 0, 180); std::vector<double> bin_list(quad_list_files.size()); std::vector<double> Corr(quad_list_files.size()); #pragma omp parallel shared(Corr, bin_list, quad_list_files) { Npoint_Functions::Quadrilateral_List_File<int> qlf; #pragma omp for schedule(dynamic,2) for (size_t k=0; k < quad_list_files.size(); ++k) { if (! qlf.initialize (quad_list_files[k])) { std::cerr << "Error initializing quadrilateral list from " << quad_list_files[k] << std::endl; std::exit(1); } if (static_cast<size_t>(map.Nside()) != qlf.Nside()) { std::cerr << "Map has Nside = " << map.Nside() << " but quad list has Nside = " << qlf.Nside() << "\nGiving up!\n"; std::exit(1); } if (map.Scheme() != qlf.Scheme()) map.swap_scheme(); if (have_mask) { if (static_cast<size_t>(mask.Nside()) != qlf.Nside()) { std::cerr << "Mask and quadrilateral lists do not have" << " the same Nside: " << mask.Nside() << " != " << qlf.Nside() << std::endl; std::exit(1); } if (mask.Scheme() != qlf.Scheme()) mask.swap_scheme(); } #pragma omp critical { std::cerr #ifdef OMP << omp_get_thread_num() << " " #endif << k << std::endl; } bin_list[k] = qlf.bin_value(); if (have_mask) { Corr[k] = calculate_masked_fourpoint_function (map, mask, qlf); } else { Corr[k] = calculate_fourpoint_function (map, qlf); } } } for (size_t k=0; k < bin_list.size(); ++k) { // Same format as spice std::cout << bin_list[k]*M_PI/180 << " " << cos(bin_list[k]*M_PI/180) << " " << Corr[k] << std::endl; } return 0; }
/** * @brief Apply the whole algorithm of K-SVD * * @param img_noisy : pointer to an allocated array containing * the original noisy image; * @param img_denoised : pointer to an allocated array which * will contain the final denoised image; * @param patches : matrix containing all patches including in * img_noisy; * @param dictionary : initial random dictionary, which will be * updated in each iteration of the algo; * @param sigma : noise value; * @param N1 : size of patches (N1 x N1); * @param N2 : number of atoms in the dictionary; * @param N_iter : number of iteration; * @param gamma : value used in the correction matrix in the * case of color image; * @param C : coefficient used for the stopping criteria of * the ORMP; * @param width : width of both images; * @param height : height of both images; * @param chnls : number of channels of both images; * @param doReconstruction : if true, do the reconstruction of * the final denoised image from patches * (only in the case of the acceleration * trick). * * @return none. **/ void ksvd_process(matD_t &patches, matD_t &dictionary, matD_t &gamma, const unsigned N1, // size of features (i.e. 324) const unsigned N2, // size of the dictionary (i.e. 1000) const unsigned N_iter, // i.e. 40 const double C) { //! Declarations const unsigned N1_2 = N1; const double corr = 0; //(sqrtl(1.0l + gamma) - 1.0l) / ((double) N1_2); const unsigned chnls = 1; const double eps = ((double) (N1_2)) * C * C; const unsigned h_p = patches[0].size(); const unsigned w_p = patches.size(); //! Mat & Vec initializations matD_t dict_ormp (N2 , vecD_t(h_p, 0.0l)); matD_t patches_ormp(w_p, vecD_t(h_p, 0.0l)); matD_t tmp (h_p, vecD_t(N2, 0.0l)); vecD_t normCol (N2); matD_t Corr (h_p, vecD_t(h_p, 0.0l)); vecD_t U (h_p); vecD_t V; matD_t E (w_p, vecD_t(h_p)); //! Vector for ORMP matD_t ormp_val (w_p, vecD_t ()); matU_t ormp_ind (w_p, vecU_t ()); matD_t res_ormp (N2, vecD_t (w_p)); matU_t omega_table (N2, vecU_t ()); vecU_t omega_size_table(N2, 0); matD_t alpha (N2, vecD_t ()); // this is a function parameter //! To avoid reallocation of memory for (unsigned k = 0; k < w_p; k++) { ormp_val[k].reserve(N2); ormp_ind[k].reserve(N2); } for (matU_t::iterator it = omega_table.begin(); it < omega_table.end(); it++) it->reserve(w_p); V.reserve(w_p); //! Correcting matrix for (unsigned i = 0; i < h_p; i++) Corr[i][i] = 1.0l; for (unsigned c = 0; c < 1; c++) { matD_t::iterator it_Corr = Corr.begin() + N1_2 * c; for (unsigned i = 0; i < N1_2; i++, it_Corr++) { iterD_t it = it_Corr->begin() + N1_2 * c; for (unsigned j = 0; j < N1_2; j++, it++) (*it) += corr; } } #pragma omp parallel for for (int j = 0; j < w_p; j++) { for (unsigned c = 0; c < chnls; c++) { iterD_t it_ormp = patches_ormp[j].begin() + c * N1_2; iterD_t it = patches[j].begin() + c * N1_2; for (unsigned i = 0; i < N1_2; i++, it++, it_ormp++) { double val = 0.0l; iterD_t it_tmp = patches[j].begin() + c * N1_2; for (unsigned k = 0; k < N1_2; k++, it_tmp++) val += corr * (*it_tmp); (*it_ormp) = val + (*it); } } } //! Big loop for (unsigned iter = 0; iter < N_iter; iter++) { std::cout << "Step " << iter + 1 << ":" << std::endl; std::cout << " - Sparse coding" << std::endl; for (unsigned i = 0; i < h_p; i++) { iterD_t it_tmp = tmp[i].begin(); for (unsigned j = 0; j < N2; j++, it_tmp++) { double val = 0.0l; iterD_t it_corr_i = Corr[i].begin(); iterD_t it_dict_j = dictionary[j].begin(); for (unsigned k = 0; k < h_p; k++, it_corr_i++, it_dict_j++) val += (*it_corr_i) * (*it_dict_j); (*it_tmp) = val * val; } } iterD_t it_normCol = normCol.begin(); for (unsigned j = 0; j < N2; j++, it_normCol++) { double val = 0.0l; for (unsigned i = 0; i < h_p; i++) val += tmp[i][j]; (*it_normCol) = 1.0l / sqrtl(val); } for (unsigned i = 0; i < h_p; i++) { iterD_t it_normCol_j = normCol.begin(); for (unsigned j = 0; j < N2; j++, it_normCol_j++) { double val = 0.0l; iterD_t it_corr_i = Corr[i].begin(); iterD_t it_dict_j = dictionary[j].begin(); for (unsigned k = 0; k < h_p; k++, it_corr_i++, it_dict_j++) val += (*it_corr_i) * (*it_dict_j); dict_ormp[j][i] = val * (*it_normCol_j); } } //! ORMP process std::cout << " - ORMP process" << std::endl; ormp_process(patches_ormp, dict_ormp, ormp_ind, ormp_val, N2, eps); for (unsigned i = 0; i < w_p; i++) { iterU_t it_ind = ormp_ind[i].begin(); iterD_t it_val = ormp_val[i].begin(); const unsigned size = ormp_val[i].size(); for (unsigned j = 0; j < size; j++, it_ind++, it_val++) (*it_val) *= normCol[*it_ind]; } //! Residus for (unsigned i = 0; i < N2; i++) { omega_size_table[i] = 0; omega_table[i].clear(); alpha[i].clear(); for (iterD_t it = res_ormp[i].begin(); it < res_ormp[i].end(); it++) *it = 0.0l; } for (unsigned i = 0; i < w_p; i++) { iterU_t it_ind = ormp_ind[i].begin(); iterD_t it_val = ormp_val[i].begin(); for (unsigned j = 0; j < ormp_val[i].size(); j++, it_ind++, it_val++) { omega_table[*it_ind].push_back(i); omega_size_table[*it_ind]++; alpha[*it_ind].push_back(*it_val); res_ormp[*it_ind][i] = *it_val; } } //! Dictionary update std::cout << " - Dictionary update" << std::endl; for (unsigned l = 0; l < N2; l++) { //! Initializations const unsigned omega_size = omega_size_table[l]; iterD_t it_dict_l = dictionary[l].begin(); iterD_t it_alpha_l = alpha[l].begin(); iterU_t it_omega_l = omega_table[l].begin(); U.assign(U.size(), 0.0l); if (omega_size > 0) { iterD_t it_a = it_alpha_l; iterU_t it_o = it_omega_l; for (unsigned j = 0; j < omega_size; j++, it_a++, it_o++) { iterD_t it_d = it_dict_l; iterD_t it_e = E[j].begin(); iterD_t it_p = patches[*it_o].begin(); for (unsigned i = 0; i < h_p; i++, it_d++, it_e++, it_p++) (*it_e) = (*it_p) + (*it_d) * (*it_a); } matD_t::iterator it_res = res_ormp.begin(); for (unsigned k = 0; k < N2; k++, it_res++) { iterU_t it_o = it_omega_l; iterD_t it_dict_k = dictionary[k].begin(); for (unsigned j = 0; j < omega_size; j++, it_o++) { const double val = (*it_res)[*it_o]; if (fabs(val) > 0.0l) { iterD_t it_d = it_dict_k; iterD_t it_e = E[j].begin(); for (unsigned i = 0; i < h_p; i++, it_d++, it_e++) (*it_e) -= (*it_d) * val; } } } //! SVD truncated V.resize(omega_size); double S = svd_trunc(E, U, V); dictionary[l] = U; it_a = it_alpha_l; iterD_t it_v = V.begin(); it_o = it_omega_l; for (unsigned j = 0; j < omega_size; j++, it_a++, it_v++, it_o++) res_ormp[l][*it_o] = (*it_a) = (*it_v) * S; } } std::cout << " - done." << std::endl; } // USE omega_table, omega_size_table, and alpha information // above to build the gamma matrix // the size of the gamma matrix should be (sizeofdict)x(numofpatches) for(unsigned i = 0; i < N2; i++) { for(unsigned j = 0; j < omega_size_table[i]; j++) { unsigned pI = omega_table[i].at(j); float alphaV = alpha[i].at(j); gamma[pI].at(i) = alphaV; } } }
int main (int argc, char *argv[]) { if ((argc < 5) || (argc > 6)) usage (argv[0]); std::string quad_list_prefix = argv[1]; std::string alm_dir = argv[2]; size_t Nstart, Nend; if (! Npoint_Functions::from_string (argv[3], Nstart)) { std::cerr << "Could not parse Nstart\n"; usage (argv[0]); } if (! Npoint_Functions::from_string (argv[4], Nend)) { std::cerr << "Could not parse Nend\n"; usage (argv[0]); } bool have_mask = false; Healpix_Map<double> mask; if (argc == 6) { read_Healpix_map_from_fits (argv[5], mask); have_mask = true; } // Figure out how many bins there are by trying to open files. std::vector<std::string> quad_list_files = Npoint_Functions::get_range_file_list(quad_list_prefix, 0, 400); if (quad_list_files.size() == 0) { std::cerr << "No quad list files found!\n"; usage (argv[0]); } int Lmax; std::vector<Healpix_Map<double> > maps (Nend-Nstart); // Make maps { Npoint_Functions::Quadrilateral_List_File<int> qlf; qlf.initialize (quad_list_files[0]); if (have_mask) { if (static_cast<size_t>(mask.Nside()) != qlf.Nside()) { std::cerr << "Mask and quadrilateral lists do not have" << " the same Nside: " << mask.Nside() << " != " << qlf.Nside() << std::endl; std::exit(1); } if (mask.Scheme() != qlf.Scheme()) mask.swap_scheme(); } Lmax = std::min(200UL, 4*qlf.Nside()+1); //#pragma omp parallel shared(qlf, maps) { Alm<xcomplex<double> > alm (Lmax, Lmax); //#pragma omp for schedule(static) for (size_t k=0; k < maps.size(); ++k) { read_Alm_from_fits (dirtree::filename(alm_dir, "alm_T_", ".fits", k+Nstart), alm, Lmax, Lmax); maps[k].SetNside (qlf.Nside(), RING); alm2map (alm, maps[k]); if (maps[k].Scheme() != qlf.Scheme()) maps[k].swap_scheme(); } } } std::vector<double> bin_list(quad_list_files.size()); /* We will generate this by bin for each map so make the bin number the * first index. */ std::vector<std::vector<double> > Corr(quad_list_files.size()); #pragma omp parallel shared(Corr, bin_list, quad_list_files, maps, mask) { Npoint_Functions::Quadrilateral_List_File<int> qlf; #pragma omp for schedule(dynamic,2) for (size_t k=0; k < quad_list_files.size(); ++k) { if (! qlf.initialize (quad_list_files[k])) { std::cerr << "Error initializing quadrilateral list from " << quad_list_files[k] << std::endl; std::exit(1); } bin_list[k] = qlf.bin_value(); if (have_mask) { Npoint_Functions::calculate_masked_fourpoint_function_list (maps, mask, qlf, Corr[k]); } else { Npoint_Functions::calculate_fourpoint_function_list (maps, qlf, Corr[k]); } } } std::cout << "# LCDM four point function from " << quad_list_prefix << std::endl; std::cout << "# First line is bin values, rest are the four point function.\n"; for (size_t k=0; k < bin_list.size(); ++k) { std::cout << bin_list[k] << " "; } std::cout << std::endl; for (size_t j=0; j < maps.size(); ++j) { for (size_t k=0; k < bin_list.size(); ++k) { std::cout << Corr[k][j] << " "; } std::cout << std::endl; } return 0; }