int GAStatistics::write(STD_OSTREAM & os) const { os << curgen << "\t# current generation\n"; os << convergence() << "\t# current convergence\n"; os << numsel << "\t# number of selections since initialization\n"; os << numcro << "\t# number of crossovers since initialization\n"; os << nummut << "\t# number of mutations since initialization\n"; os << numrep << "\t# number of replacements since initialization\n"; os << numeval << "\t# number of genome evaluations since initialization\n"; os << numpeval << "\t# number of population evaluations since initialization\n"; os << maxever << "\t# maximum score since initialization\n"; os << minever << "\t# minimum score since initialization\n"; os << on << "\t# average of all scores ('on-line' performance)\n"; os << offmax << "\t# average of maximum scores ('off-line' performance)\n"; os << offmin << "\t# average of minimum scores ('off-line' performance)\n"; os << "\n"; os << aveInit << "\t# mean score in initial population\n"; os << maxInit << "\t# maximum score in initial population\n"; os << minInit << "\t# minimum score in initial population\n"; os << devInit << "\t# standard deviation of initial population\n"; os <<divInit<<"\t# diversity of initial population (0=identical,-1=unset)\n"; os << "\n"; os << aveCur << "\t# mean score in current population\n"; os << maxCur << "\t# maximum score in current population\n"; os << minCur << "\t# minimum score in current population\n"; os << devCur << "\t# standard deviation of current population\n"; os << divCur<<"\t# diversity of current population (0=identical,-1=unset)\n"; os << "\n"; os << Nconv << "\t# how far back to look for convergence\n"; os << scoreFreq << "\t# how often to record scores\n"; os << Nscrs << "\t# how often to write scores to file\n"; os << scorefile << "\t# name of file to which scores are written\n"; return 0; }
/* * Entry point, main method */ int main (int argc, char * argv[]) { // Check input arguments if (argc != 4) { printf("Incorrect number of arguments. Example:\n\n"); printf("$ ./disposable 0.1 0.1 4 <inputfile.txt\n\n"); printf("Exiting.\n"); return(1); } int i, j; // Loop variables double min, max; // Minimum and maximum DSVs unused = scanf("%d %d %d", &numBoxes, &numRows, &numCols); numThreads = atoi(argv[3]); affect = atof(argv[1]); epsilon = atof(argv[2]); // Allocate memory for Domain Specific Value arrays dsv = (double *) malloc(numBoxes * sizeof(double)); newDsv = (double *) malloc(numBoxes * sizeof(double)); // Read input file readInput(); // Take beginning time time_t time1 = time(NULL); clock_t tick = clock(); // Converge int counter = convergence (&max, &min); // Take end time and calculate difference time_t time2 = time(NULL); tick = clock() - tick; double timeDiff = difftime(time2, time1); // Print output printf("\n*********************************************************\n"); printf("dissipation converged in %d iterations,\n", counter); printf("\twith max DSV = %f and min DSV = %f\n", max, min); printf("\taffect rate = %f;\t\tepsilon = %f\n\n", affect, epsilon); printf("elapsed convergence loop time\t(clock): %d\n", (int) tick); printf("elapsed convergence loop time\t (time): %.0f\n", timeDiff); printf("**********************************************************\n\n\n"); // for (i = 0; i < numBoxes; i++) { // printf("dsv[%d] = %f\n", i, dsv[i]); // } free(grid); free(dsv); free(newDsv); return(0); pthread_exit(NULL); }
int main() { int dim = 50; char m[dim][dim]; for(int i=0; i < dim; i++) { for(int j=0; j < dim; j++) { m[i][j]=-1; } } for(int i=0; i <1000; i++){ printf("should be 1: %f\n", cos(i/1000.)*cos(i/1000.) + sin(i/1000.)*sin(i/1000.)); } convergence(dim,dim,1000,0.,0.,2*M_PI/(__float128)dim,2*M_PI/(__float128)dim,1,m); }
void print() const { LOG(INFO) << "============================================================\n"; LOG(INFO) << "Error Information\n"; LOG(INFO) << " exact : " << getSolution() << "\n"; LOG(INFO) << " params : "; boost::for_each( parameters, [](symbol const& s ) {LOG(INFO) << s.get_name() << " ";}); LOG(INFO) << "\n"; if ( !M_rhs.empty() ) { LOG(INFO) << " rhs : " << getRhs() << "\n"; } LOG(INFO) << " convergence : " << convergence() << "\n"; LOG(INFO) << " convergence steps : " << numberOfConvergenceSteps() << "\n"; }
void WordSegmentation::engine() { wordMessage * tempWord = this->currentImage->wordHead; while(tempWord != NULL) { convergence(); calcAspectRatio(); verticalDivisibleCheck(tempWord); horizontalDivisibleCheck(tempWord); if(tempWord->verticalDivisible == DIVISIBLE) verticalDivision(tempWord); if(tempWord->horizontalDivisible == DIVISIBLE) horizontalDivision(tempWord); if(tempWord->verticalDivisible == UNDIVISIBLE && tempWord ->horizontalDivisible == UNDIVISIBLE) tempWord = tempWord->next; } }
void RIMLS::Fit() { double f = 0; Vec3d grad_f(0, 0, 0); do { int i = 0; do { double sumW, sumF; sumW = sumF = 0; Vec3d sumGW, sumGF, sumN; sumGW = sumGF = sumN = Vec3d(0.0, 0.0, 0.0); for (DataPoint& p : m_neighbors) { Vec3d px = m_x - p.pos(); double fx = px.dot(p.normal()); double alpha = 1.0; if (i > 0) { alpha = exp(-pow((fx - f) / m_sigmaR, 2)) * exp(-pow((p.normal() - grad_f).norm() / m_sigmaN, 2)); } double phi = exp(-pow(px.norm() / m_sigmaT, 2)); double w = alpha*phi; Vec3d grad_w = -2.0*alpha*phi*px / pow(m_sigmaT, 2); sumW += w; sumGW += grad_w; sumF += w*fx; sumGF += grad_w*fx; sumN += w*p.normal(); } f = sumF / sumW; grad_f = (sumGF - f*sumGW + sumN) / sumW; } while (++i<m_iter && !convergence()); m_x -= f*grad_f; } while ((f*grad_f).norm() > m_threshold); }
int jacobi() { int i,j,k; float sum=0; while (convergence(n)) { for (i=0; i<n; i++) { sum = 0; for (j=0; j<n; j++) { if(i != j) { sum = sum + a [i][j] * x[j]; } // end if statement } for (k=0; k<n; k++) { x[i] = (b[i]-sum)/a[i][i]; } } // end outside for loop } // end while loop printf("%f\n",x[i]); return k; }
void CG3::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.TPSTOP(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; return; } ColorSpinorParam csParam(x); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField x_prev(b, csParam); cudaColorSpinorField r_prev(b, csParam); cudaColorSpinorField temp(b, csParam); cudaColorSpinorField r(b); cudaColorSpinorField w(b); mat(r, x, temp); // r = Mx double r2 = xmyNormCuda(b,r); // r = b - Mx PrintStats("CG3", 0, r2, b2, 0.0); double stop = stopping(param.tol, b2, param.residual_type); if(convergence(r2, 0.0, stop, 0.0)) return; // First iteration mat(w, r, temp); double rAr = reDotProductCuda(r,w); double rho = 1.0; double gamma_prev = 0.0; double gamma = r2/rAr; cudaColorSpinorField x_new(x); cudaColorSpinorField r_new(r); axpyCuda(gamma, r, x_new); // x_new += gamma*r axpyCuda(-gamma, w, r_new); // r_new -= gamma*w // end of first iteration // axpbyCuda(a,b,x,y) => y = a*x + b*y int k = 1; // First iteration performed above double r2_prev; while(!convergence(r2, 0.0, stop, 0.0) && k<param.maxiter){ x_prev = x; x = x_new; r_prev = r; r = r_new; mat(w, r, temp); rAr = reDotProductCuda(r,w); r2_prev = r2; r2 = norm2(r); // Need to rearrange this! PrintStats("CG3", k, r2, b2, 0.0); gamma_prev = gamma; gamma = r2/rAr; rho = 1.0/(1. - (gamma/gamma_prev)*(r2/r2_prev)*(1.0/rho)); x_new = x; axCuda(rho,x_new); axpyCuda(rho*gamma,r,x_new); axpyCuda((1. - rho),x_prev,x_new); r_new = r; axCuda(rho,r_new); axpyCuda(-rho*gamma,w,r_new); axpyCuda((1.-rho),r_prev,r_new); double rr_old = reDotProductCuda(r_new,r); printfQuda("rr_old = %1.14lf\n", rr_old); k++; } if(k == param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); // compute the true residual mat(r, x, temp); param.true_res = sqrt(xmyNormCuda(b, r)/b2); PrintSummary("CG3", k, r2, b2); return; }
int main(int argc, char **argv) { /* the times recorded are: start of programm, start of chain generation, end of chain generation, end of programm */ double time[4]; time[0] = time_of_day(); //----------------------------------------------------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------------------------------------------------- // open all neccessary files // general output file, contains basically everything which was written to the terminal FILE * output_file; output_file = fopen("output_file.dat", "w+"); FILE * conv_obs_file; conv_obs_file = fopen("convergence_obs.dat", "w+"); // pair correlation function can be weighted with potentials FILE * pair_correl_file; pair_correl_file = fopen("pair_correlation_function.dat", "w+"); //------------------------------------------------------------------------------ // all files connected to intramolecular interactions FILE * intra_pot_file; intra_pot_file = fopen("intramolecular_obs.dat", "w+"); FILE * intra_boltzman_factors_hist; intra_boltzman_factors_hist = fopen("intramolecular_factors_hist.dat", "w+"); FILE * intra_interactions_file; intra_interactions_file = fopen("intramolecular_interactions_hist.dat", "w+"); // FILE * intra_interactions_testfile; // intra_interactions_testfile = fopen("intramolecular_interactions_testhist.dat", "w+"); FILE * convergence_intraweights; convergence_intraweights = fopen("intramolecular_convergence_Z.dat", "w+"); fprintf(convergence_intraweights, "### convergence_point -- sum-of-boltzman-factors \n"); FILE * conv_intraobs_file; conv_intraobs_file = fopen("intramolecular_convergence_obs.dat", "w+"); //----------------------------------------------------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------------------------------------------------- // input and first output // hello message of the programm, always displayed! log_out(output_file, "\n-----------------------------------------------------------------------------------\n"); log_out(output_file, "Roulattice version 1.1, Copyright (C) 2015 Johannes Dietschreit\n"); log_out(output_file, "This program comes with ABSOLUTELY NO WARRANTY; for version details type '-info'.\n"); log_out(output_file, "This is free software, and you are welcome to redistribute it\n"); log_out(output_file, "under certain conditions; type '-license' for details.\n"); log_out(output_file, "\tQuestions and bug reports to: [email protected]\n"); log_out(output_file, "\tPlease include in published work based on Roulattice:\n"); log_out(output_file, "\t\tDietschreit, J. C. B.; Diestler, D. J.; Knapp, E. W.,\n"); log_out(output_file, "\t\tModels for Self-Avoiding Polymer Chains on the Tetrahedral Lattice.\n"); log_out(output_file, "\t\tMacromol. Theory Simul. 2014, 23, 452-463\n"); log_out(output_file, "-----------------------------------------------------------------------------------\n"); /* get the arguments from the comand line */ getArgs(output_file, argc, argv); //------------------------------------------------------------------------------------ // for reading DCD-files // this has to be early in the code, because it sets the variables ARG_numberofbeads and ARG_numberofframes! // variables concerning reading dcd molfile_timestep_t timestep; void *v; dcdhandle *dcd; int natoms; float sizeMB =0.0, totalMB = 0.0; // reading the dcd-file and setting global variables accordingly if (ARG_typeofrun==0) { natoms = 0; v = open_dcd_read(dcdFileName, "dcd", &natoms); if (!v) { fprintf(stderr, "ERROR: open_dcd_read failed for file %s\n", dcdFileName); return EXIT_FAILURE; } dcd = (dcdhandle *)v; sizeMB = ((natoms * 3.0) * dcd->nsets * 4.0) / (1024.0 * 1024.0); totalMB += sizeMB; log_out(output_file, "Read DCD: %d atoms, %d frames, size: %6.1fMB\n", natoms, dcd->nsets, sizeMB); timestep.coords = (float *)malloc(3*sizeof(float)*natoms); ARG_numberofbeads=dcd->natoms; ARG_numberofframes=dcd->nsets; } //------------------------------------------------------------------------------------ // print all the options to the screen so one can check whether the right thing gets computed print_set_options(output_file, ARG_typeofrun, ARG_flength, ARG_fflength, ARG_blength, ARG_numberofbeads, ARG_numberofframes, ARG_randomseed, ARG_bondlength, ARG_torsion, ARG_intra_potential, ARG_intra_parameter1, ARG_intra_parameter2); //----------------------------------------------------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------------------------------------------------- // physical constants pi = acos(-1.0); //----------------------------------------------------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------------------------------------------------- /* Initialize the most important variables */ // stuff with bond lengths const double inv_sqrt3 = 1.0/sqrt(3.0); const double bondlength = ARG_bondlength; double recast_factor; if (ARG_typeofrun<40){// this recasts walk on diamond lattice recast_factor = inv_sqrt3*bondlength; } else {// this is for runs on simple cubic lattice recast_factor = bondlength; } // variables with atom numbers etc const int last_atom = ARG_numberofbeads -1; const unsigned int number_of_torsions = ARG_numberofbeads -3; // number of frag, fragfags, endfrags, fragbricks, endbricks unsigned int numof_frags_bricks[5] = {0}; // fractions of the number of frames const unsigned long permill_frames = ARG_numberofframes / 1000; // used for convergence const unsigned long percent_frames = ARG_numberofframes / 100; // used for ramining time const unsigned long tenth_frames = ARG_numberofframes / 10; // used for error estimation int cent; int tenth; int counter; // counter which can be used at any parts of the main programm, should only be used locally in a loop // basic moves on the tetrahedral lattice, back and forth const int move[2][4][3] = { { {-1, -1, -1}, {1, 1, -1}, {1, -1, 1}, {-1, 1, 1} }, { {1, 1, 1}, {-1, -1, 1}, {-1, 1, -1}, {1, -1, -1} } }; // moves possible in SAW (no walking back) const int sawmoves[4][3] = { {1, 2, 3}, {0, 2, 3}, {0, 1, 3}, {0, 1, 2} }; //----------------------------------------------------------------------- // building bricks are used to put parts together which are pre-checked int ***building_bricks=NULL; int numberofbricks; if (ARG_typeofrun==13 || ARG_typeofrun==14 || ARG_typeofrun==23 || ARG_typeofrun==24 || ARG_typeofrun==33 || ARG_typeofrun==34){ // thise generates the bricks building_bricks = make_bricks_saw(building_bricks, move, sawmoves, ARG_blength, &numberofbricks, ARG_strictness); if (NULL==building_bricks[0][0]){ return EXIT_FAILURE; } log_out(output_file, "%d bricks were generated, with a length of %d \n", numberofbricks, ARG_blength); } //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- /* Initialize beads vector and set all values to zero */ int **int_polymer; int_polymer = calloc(ARG_numberofbeads, sizeof(int *)); double **double_polymer; double_polymer = calloc(ARG_numberofbeads, sizeof(double *)); for (int dim1=0; dim1<ARG_numberofbeads;dim1++){ int_polymer[dim1] = calloc(3, sizeof(int *)); double_polymer[dim1] = calloc(3, sizeof(double *)); } //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- // observables OBSERVABLE normal_obs; normal_obs.maxee = 0.0; // maximal stretch of polymer OBSERVABLE intra_obs[100]; //------------------------------------------------------- // initialization of all the OBSERVABLE variables //------------------------------------------------------- normal_obs.err_ee2 = (double *) calloc(11, sizeof(double)); if(NULL == normal_obs.err_ee2) { fprintf(stderr, "Allocation of ee2 variable failed! \n"); return EXIT_FAILURE; } normal_obs.err_rgyr = (double *) calloc(11, sizeof(double)); if(NULL == normal_obs.err_rgyr) { fprintf(stderr, "Allocation of rgyr variable failed! \n"); return EXIT_FAILURE; } if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { intra_obs[dim1].err_ee2 =(double *) calloc(11, sizeof(double)); intra_obs[dim1].err_rgyr =(double *) calloc(11, sizeof(double)); if(NULL == intra_obs[dim1].err_ee2 || NULL == intra_obs[dim1].err_rgyr) { fprintf(stderr, "Allocation of ee2 or rgyr variable (intramolecular) failed! \n"); return EXIT_FAILURE; } } } // initializes the observables for torsional analysis (optional) if (ARG_torsion==1) { normal_obs.err_pt = (double *) calloc(11, sizeof(double)); if(NULL == normal_obs.err_pt) { fprintf(stderr, "Allocation of torsion variable failed! \n"); return EXIT_FAILURE; } if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { intra_obs[dim1].pt = 0.0; intra_obs[dim1].err_pt = (double *) calloc(11, sizeof(double)); if(NULL == intra_obs[dim1].err_pt) { fprintf(stderr, "Allocation of torsion variable (intramolecular) failed! \n"); return EXIT_FAILURE; } } } } // initializes the observables for loss of solven accessible surface area analysis (optional) if (ARG_sasa==1){ normal_obs.err_dsasa = (double *) calloc(11, sizeof(double)); if(NULL == normal_obs.err_dsasa) { fprintf(stderr, "Allocation of D-SASA variable failed! \n"); return EXIT_FAILURE; } if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { intra_obs[dim1].dsasa = 0.0; intra_obs[dim1].err_dsasa = (double *) calloc(11, sizeof(double)); if(NULL == intra_obs[dim1].err_dsasa) { fprintf(stderr, "Allocation of D-SASA variable (intramolecular) failed! \n"); return EXIT_FAILURE; } } } } // this is needed for the pair correlation function double *pair_correlation_obs; if (ARG_pair_correlation==1) { pair_correlation_obs = (double*) calloc(2*ARG_numberofbeads, sizeof(double)); if(NULL == pair_correlation_obs) { fprintf(stderr, "Allocation of pair_correlation_obs failed! \n"); return EXIT_FAILURE; } normal_obs.pair_corr = (double *) calloc(2*ARG_numberofbeads, sizeof(double)); if(NULL == normal_obs.pair_corr) { fprintf(stderr, "Allocation of pair_corr failed! \n"); return EXIT_FAILURE; } if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { intra_obs[dim1].pair_corr = (double *) calloc(2*ARG_numberofbeads, sizeof(double)); if(NULL == intra_obs[dim1].pair_corr) { fprintf(stderr, "Allocation of pair_corr (intramolecular) failed! \n"); return EXIT_FAILURE; } } } } //------------------------- // this will provide a measure for entropy loss calculation unsigned long *attempts_successes; double *log_attempts; log_attempts = (double*) calloc(11, sizeof(double)); // set the number of entries in this list, it depends on the typeofrun, but not the saw-type // last entry is the recast number of attempts which provides a measure for the entropy loss //------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------- // calculate variables which depend on the typeofrun switch (ARG_typeofrun) { // normal SAWX case 10: case 20: case 30: attempts_successes = calloc(2, sizeof(unsigned long)); break; // fSAWX case 11: case 21: case 31: attempts_successes = calloc(5, sizeof(unsigned long)); numof_frags_bricks[0] = (ARG_numberofbeads-1)/ARG_flength; break; // bSAWX case 13: case 23: case 33: attempts_successes = calloc(3, sizeof(unsigned long)); numof_frags_bricks[3] = (ARG_numberofbeads-1)/ARG_blength; break; // fb_SAWX case 14: case 24: case 34: attempts_successes = calloc(5, sizeof(unsigned long)); numof_frags_bricks[0] = (ARG_numberofbeads-1)/ARG_flength; numof_frags_bricks[3] = ARG_flength/ARG_blength; numof_frags_bricks[4] = (ARG_numberofbeads-1-ARG_flength*numof_frags_bricks[3])/ARG_blength; break; default: break; } //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- // intra molecular interactions // boltzman-factors, the intramolecular energy, the enrgy time the boltzmanfactor (entropy) double *intra_boltzman_factors; double *intra_highest_boltzmanfactor; double *intra_energy; double **intra_sum_of_boltzfactors; double **intra_sum_of_enrgyboltz; int *intra_interactions_hist; // double *intra_interactions_testhist; double intra_max_factor = 0.0; double intra_min_factor = 0.0; // binning the energies of the intra-factors double intra_binmin; double intra_binmax; double intra_binwidth; int **intra_energybin; // these will be the parameter of the intra molecular force double *intra_parameter1; double *intra_parameter2; if (ARG_intra_potential > 0){ switch (ARG_intra_potential) { // 1-10 potential well + torsional potential, given energy values case 1: intra_boltzman_factors = (double*) calloc(1, sizeof(double)); intra_highest_boltzmanfactor = (double*) calloc(1, sizeof(double)); intra_energy = (double*) calloc(1, sizeof(double)); intra_sum_of_boltzfactors = (double**) calloc(1, sizeof(double)); intra_sum_of_enrgyboltz = (double**) calloc(1, sizeof(double*)); intra_sum_of_boltzfactors[0] = (double*) calloc(10, sizeof(double)); intra_sum_of_enrgyboltz[0] = (double*) calloc(10, sizeof(double)); intra_parameter1 = (double*) malloc(1 * sizeof(double)); intra_parameter2 = (double*) malloc(1 * sizeof(double)); intra_interactions_hist = calloc(ARG_numberofbeads, sizeof(int)); // intra_interactions_testhist = calloc(100, sizeof(double)); intra_parameter1[0] = ARG_intra_parameter1[0]; // nearest neighbor potential intra_parameter2[0] = ARG_intra_parameter2[0]; // torsion potential intra_binmin = -100.0; intra_binmax = 100.0; intra_binwidth = 1.0; intra_energybin = (int**) calloc(1, sizeof(int *)); intra_energybin[0] = (int*) calloc(((intra_binmax-intra_binmin)/intra_binwidth), sizeof(int)); break; // 1-10 potential well + torsional potential, given energy value range! 10x10 case 2: intra_boltzman_factors = (double*) calloc(100, sizeof(double)); intra_highest_boltzmanfactor = (double*) calloc(100, sizeof(double)); intra_energy = (double*) calloc(100, sizeof(double)); intra_sum_of_boltzfactors = (double**) calloc(100, sizeof(double)); intra_sum_of_enrgyboltz = (double**) calloc(100, sizeof(double*)); for (int dim1=0; dim1<100; ++dim1) { intra_sum_of_boltzfactors[dim1] = (double*) calloc(10, sizeof(double)); intra_sum_of_enrgyboltz[dim1] = (double*) calloc(10, sizeof(double)); } intra_parameter1 = (double*) malloc(10 * sizeof(double)); intra_parameter2 = (double*) malloc(10 * sizeof(double)); intra_interactions_hist = calloc(ARG_numberofbeads, sizeof(int)); // intra_interactions_testhist = calloc(100, sizeof(double)); for (int dim1=0; dim1<10; dim1++) { intra_parameter1[dim1] = ARG_intra_parameter1[0]+ ARG_intra_parameter1[1]*(double)dim1; // nearest neighbor potential intra_parameter2[dim1] = ARG_intra_parameter2[0]+ ARG_intra_parameter2[1]*(double)dim1; // torsion potential } intra_binmin = -100.0; intra_binmax = 100.0; intra_binwidth = 1.0; intra_energybin = (int**) calloc(100, sizeof(int *)); for (int dim1=0; dim1<100; ++dim1){ intra_energybin[dim1] = (int*) calloc(((intra_binmax-intra_binmin)/intra_binwidth), sizeof(int)); } break; default: fprintf(stderr, "This intramolecular potential doesn't exist! \n"); break; } } //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- /* start of chain generation*/ time[1] = time_of_day(); for (unsigned long frame=0; frame<ARG_numberofframes; frame++){ tenth = frame/tenth_frames; switch(ARG_typeofrun){ case 0: dcd_to_polymer(double_polymer, v, natoms, ×tep, dcd, frame, ARG_numberofbeads); break; case 1: tetra_rw(int_polymer, move, ARG_numberofbeads); break; case 2: tetra_fww(int_polymer, move, sawmoves, ARG_numberofbeads); break; case 10: tetra_saw1(int_polymer, move, sawmoves, ARG_numberofbeads, attempts_successes); break; case 11: tetra_fsaw1(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_flength, attempts_successes); break; case 13: tetra_bsaw1(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_blength, numberofbricks, building_bricks, attempts_successes); break; // case 14: // here is something awfully wrong, can't find the mistake at the moment! // tetra_fb_saw1(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_blength, numberofbricks, building_bricks, ARG_flength, attempts_successes); // break; case 20: tetra_saw2(int_polymer, move, sawmoves, ARG_numberofbeads, attempts_successes); break; case 21: tetra_fsaw2(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_flength, attempts_successes); break; case 23: tetra_bsaw2(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_blength, numberofbricks, building_bricks, attempts_successes); break; case 24: tetra_fb_saw2(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_blength, numberofbricks, building_bricks, ARG_flength, attempts_successes); break; case 30: tetra_saw3(int_polymer, move, sawmoves, ARG_numberofbeads, attempts_successes); break; case 31: tetra_fsaw3(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_flength, attempts_successes); break; case 33: tetra_bsaw3(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_blength, numberofbricks, building_bricks, attempts_successes); break; case 34: tetra_fb_saw3(int_polymer, move, sawmoves, ARG_numberofbeads, ARG_blength, numberofbricks, building_bricks, ARG_flength, attempts_successes); break; default: log_out(output_file, "ERROR: ARG_typeofrun = %d isn't recognized by the main part of the programm!\n", ARG_typeofrun); usage_error(); } // end of chain generaiton // copies the lattice polymer into an array with doubles so that the chosen bond length can be used. if (ARG_typeofrun>0) { recast(double_polymer, int_polymer, ARG_numberofbeads, &recast_factor); } //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // get most important observables // end-to-end distance^2 normal_obs.ee2 = double_distance2(double_polymer[last_atom], double_polymer[0]); normal_obs.err_ee2[tenth] += normal_obs.ee2; // radius of gyration normal_obs.rgyr = radius_of_gyration(double_polymer, ARG_numberofbeads); normal_obs.err_rgyr[tenth] += normal_obs.rgyr; // pT if (ARG_torsion==1){ normal_obs.pt = get_nT(double_polymer, number_of_torsions); normal_obs.err_pt[tenth] += normal_obs.pt; } if (ARG_sasa==1){ //observables[4] = get_asa(double_polymer, ARG_numberofbeads, (sqrt(16.0/3.0)*bondlength/2.0)); normal_obs.dsasa = delta_asa(double_polymer, ARG_numberofbeads, bondlength); normal_obs.err_dsasa[tenth] += normal_obs.dsasa; } if (ARG_pair_correlation==1) { if (false==pair_correlation_fct(pair_correlation_obs, double_polymer, ARG_numberofbeads)){ return EXIT_FAILURE; } for (int pairs=0; pairs<(2*ARG_numberofbeads); pairs++) { normal_obs.pair_corr[pairs] += pair_correlation_obs[pairs]; } } //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // calculate boltzman factors if intramolecular forces are switched on switch (ARG_intra_potential) { // torsion + square well potential case 1: intrapot_torsion_well(intra_boltzman_factors, intra_energy, intra_interactions_hist, intra_highest_boltzmanfactor, intra_parameter1, intra_parameter2, normal_obs.pt, number_of_torsions, int_polymer, ARG_numberofbeads); intra_sum_of_enrgyboltz[0][tenth] += (intra_boltzman_factors[0]*intra_energy[0]); intra_sum_of_boltzfactors[0][tenth] += intra_boltzman_factors[0]; intra_obs[0].err_ee2[tenth] += normal_obs.ee2 * intra_boltzman_factors[0]; intra_obs[0].err_rgyr[tenth] += normal_obs.rgyr * intra_boltzman_factors[0]; intra_obs[0].err_pt[tenth] += normal_obs.pt * intra_boltzman_factors[0]; intra_binenergy(intra_energy[0], intra_binmin, intra_binwidth, intra_energybin[0]); // pair correlation function if (ARG_pair_correlation==1){ for (int pairs=0; pairs<(2*ARG_numberofbeads); pairs++) { intra_obs[0].pair_corr[pairs] += (pair_correlation_obs[pairs]*intra_boltzman_factors[0]); } } break; // torsion + square well potential, range of energy values case 2: intrapot_torsion_well_scan(intra_boltzman_factors, intra_energy, intra_interactions_hist, intra_highest_boltzmanfactor, intra_parameter1, intra_parameter2, normal_obs.pt, number_of_torsions, int_polymer, ARG_numberofbeads); //intrapot_torsion_well_test(intra_boltzman_factors, intra_energy, intra_interactions_hist, intra_interactions_testhist,intra_highest_boltzmanfactor, intra_parameter1, intra_parameter2, normal_obs.pt, number_of_torsions, int_polymer, ARG_numberofbeads); for (int dim1=0; dim1<100; ++dim1) { intra_sum_of_enrgyboltz[dim1][tenth] += (intra_boltzman_factors[dim1]*intra_energy[dim1]); intra_sum_of_boltzfactors[dim1][tenth] += intra_boltzman_factors[dim1]; intra_obs[dim1].err_ee2[tenth] += normal_obs.ee2 * intra_boltzman_factors[dim1]; intra_obs[dim1].err_rgyr[tenth] += normal_obs.rgyr * intra_boltzman_factors[dim1]; intra_obs[dim1].err_pt[tenth] += normal_obs.pt * intra_boltzman_factors[dim1]; intra_binenergy(intra_energy[dim1], intra_binmin, intra_binwidth, intra_energybin[dim1]); // pair correlation function if (ARG_pair_correlation==1){ for (int pairs=0; pairs<(2*ARG_numberofbeads); pairs++) { intra_obs[dim1].pair_corr[pairs] += (pair_correlation_obs[pairs]*intra_boltzman_factors[dim1]); } } } break; default: break; }// boltzman factors and energies have been determined // end of anything related to intramolecular potentials //-------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------- // everything has been calculated, now is the opportunity to look at convergence if ((frame+1)%permill_frames==0) { // convergence of variables with equal weights convergence(&normal_obs, (double)number_of_torsions, (frame+1), conv_obs_file); switch (ARG_intra_potential) { case 1: weighted_convergence(intra_obs, 1,intra_sum_of_boltzfactors, (double)number_of_torsions, (frame+1), conv_intraobs_file); weights_growth(intra_sum_of_boltzfactors, 1, (frame+1), convergence_intraweights); break; // convergence of weighted ensemble case 2: weighted_convergence(intra_obs, 100, intra_sum_of_boltzfactors, (double)number_of_torsions, (frame+1), conv_intraobs_file); weights_growth(intra_sum_of_boltzfactors, 100, (frame+1), convergence_intraweights); break; default: break; } if ((frame+1)%percent_frames==0) { cent = (frame+1)/percent_frames; log_out(output_file, "Finished %i%%\t...remaining time: %f seconds \n", (cent), ((time_of_day()-time[1])*(100-cent)/(cent))); if ((frame+1)%tenth_frames==0) { // every bin after the first will also include the attempts in the previous bin! log_attempts[tenth] = recalc_attempts(attempts_successes, numof_frags_bricks, numberofbricks, ARG_typeofrun); } } } } // end of loop over number of frames //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- /* Post-Process */ time[2] = time_of_day(); // Maybe there should be a function, which returns means and errors; it calls these subroutines .... log_attempts[10] = recalc_attempts(attempts_successes, numof_frags_bricks, numberofbricks, ARG_typeofrun); for (int dim1=9; dim1>0; --dim1) { log_attempts[dim1] = log( exp(log_attempts[dim1]) - exp(log_attempts[dim1-1]) ); } // get means and errors normal_obs.ee2 = sqrt( average(normal_obs.err_ee2, ARG_numberofframes) ); normal_obs.err_ee2[10] = error_sq_ten(normal_obs.ee2, normal_obs.err_ee2, ARG_numberofframes); normal_obs.rgyr = sqrt( average(normal_obs.err_rgyr, ARG_numberofframes) ); normal_obs.err_rgyr[10] = error_sq_ten(normal_obs.rgyr, normal_obs.err_rgyr, ARG_numberofframes); normal_obs.S = (log((double)ARG_numberofframes) - log_attempts[10]); if (ARG_torsion==1) { normal_obs.pt = average(normal_obs.err_pt, ARG_numberofframes); normal_obs.err_pt[10] = error_ten(normal_obs.pt, normal_obs.err_pt, ARG_numberofframes); } if (ARG_sasa==1) { normal_obs.dsasa = average(normal_obs.err_dsasa, ARG_numberofframes); normal_obs.err_dsasa[10] = error_ten(normal_obs.dsasa, normal_obs.err_dsasa, ARG_numberofframes); } //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- /* Output */ time[3] = time_of_day(); // print the output to screen and to the output_file log_out(output_file, "\n-----------------------------------------------------------------------------------\n"); log_out(output_file, "FINAL OUTPUT\n"); log_out(output_file, "\nEntropic Considerations:\n"); log_out(output_file, "Attempts to Compute the Ensemble: %e \n", exp(log_attempts[10])); log_out(output_file, "\tDelta S / k_B (FWW -> SAWn): %f \n", normal_obs.S); log_out(output_file, "\nChosen Observables:\n"); log_out(output_file, "Flory Radius: %f +- %f \n", normal_obs.ee2, normal_obs.err_ee2[10]); log_out(output_file, "Radius of Gyration: %f +- %f \n", normal_obs.rgyr, normal_obs.err_rgyr[10]); if (ARG_torsion==1) { log_out(output_file, "Probability of trans = %f +- %f\n", (normal_obs.pt/(double)number_of_torsions), (normal_obs.err_pt[10]/(double)number_of_torsions)); } if (ARG_sasa==1) { log_out(output_file, "Delta SASA = %f +- %f\n", normal_obs.dsasa, normal_obs.err_dsasa[10]); } if (ARG_pair_correlation==1) { log_out(output_file, "The pair-correlation function was written to 'pair_correlation_function.dat'.\n"); } log_out(output_file, "\nThis ouput is also written to 'output_file.dat'.\n"); log_out(output_file, "The convergence was written to 'convergence_obs.dat'.\n"); if (ARG_intra_potential>0) { log_out(output_file, "\nThe Boltzmann-weighted observables can be found in 'intramolecular_obs.dat'.\n"); log_out(output_file, "The Boltzmann-weighted convergence was written to 'intramolecular_convergence_obs.dat'.\n"); log_out(output_file, "A histogram of the Boltzmann-factors was written to 'intramolecular_factors_hist.dat'.\n"); log_out(output_file, "The sum of the Boltzmann-factors was written to 'intramolecular_convergence_Z.dat'.\n"); log_out(output_file, "A histogram of the intramolecular contacts was written to 'intramolecular_interactions_hist.dat'.\n"); } //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- /* Output to file */ // pair correlation function if (ARG_pair_correlation==1){ if (ARG_intra_potential==0) { fprintf(pair_correl_file,"# r_0-r_1 pair_correlation \n"); for (int dim1=0; dim1<(2*ARG_numberofbeads); dim1++){ fprintf(pair_correl_file, "%i %e \n", dim1, (normal_obs.pair_corr[dim1]/(double)ARG_numberofframes)); } } else if (ARG_intra_potential==2){ fprintf(pair_correl_file,"# r_0-r_1 pair_correlation(unweigthed) pair_correlation(weigthed)\n"); for (int dim1=0; dim1<(2*ARG_numberofbeads); dim1++){ fprintf(pair_correl_file, "%i %e %e \n", dim1, (normal_obs.pair_corr[dim1]/(double)ARG_numberofframes), (intra_obs[46].pair_corr[dim1]/average(intra_sum_of_boltzfactors[46], 1))); } } } switch (ARG_intra_potential) { case 1: // prints the weighted observables to file with error estimate fprintf(intra_pot_file, "### 1-9 well, torsion eps, Rf, Rf_err, Rg, Rg_err, pT, pT_err, DS, DS_err, <exp(-E/kT)>/exp(-Emax/kT) \n" ); intra_obs[0].ee2 = sqrt(weighted_average(intra_obs[0].err_ee2, intra_sum_of_boltzfactors[0])); intra_obs[0].err_ee2[10] = weighted_error_sq_ten(intra_obs[0].ee2, intra_obs[0].err_ee2, intra_sum_of_boltzfactors[0]); intra_obs[0].rgyr = sqrt(weighted_average(intra_obs[0].err_rgyr, intra_sum_of_boltzfactors[0])); intra_obs[0].err_rgyr[10] = weighted_error_sq_ten(intra_obs[0].rgyr, intra_obs[0].err_rgyr, intra_sum_of_boltzfactors[0]); intra_obs[0].pt = weighted_average(intra_obs[0].err_pt, intra_sum_of_boltzfactors[0]); intra_obs[0].err_pt[10] = weighted_error_ten(intra_obs[0].pt, intra_obs[0].err_pt, intra_sum_of_boltzfactors[0]); intra_obs[0].S = intra_entropy(intra_sum_of_boltzfactors[0], intra_sum_of_enrgyboltz[0], log_attempts[10]); intra_obs[0].err_S = intra_entropy_error_ten(intra_obs[0].S, intra_sum_of_boltzfactors[0], intra_sum_of_enrgyboltz[0], log_attempts); fprintf(intra_pot_file, "%f %f %e %e %e %e %e %e %e %e %e \n", intra_parameter1[0], intra_parameter2[0], intra_obs[0].ee2, intra_obs[0].err_ee2[10], intra_obs[0].rgyr, intra_obs[0].err_rgyr[10], (intra_obs[0].pt/(double)number_of_torsions), (intra_obs[0].err_pt[10]/(double)number_of_torsions), intra_obs[0].S, intra_obs[0].err_S, intra_loss_of_conf(intra_sum_of_boltzfactors[0], intra_highest_boltzmanfactor[0], ARG_numberofframes)); // histogram over 1-9 interacitons fprintf(intra_interactions_file, "# number-of-nn-interactions, occurence \n"); for (int dim1=0; dim1<ARG_numberofbeads; ++dim1) { fprintf(intra_interactions_file, "%i %i \n", dim1, intra_interactions_hist[dim1]); } // test histogram // fprintf(intra_interactions_testfile, "# sperating-bonds 0_contacts 1_contacts 2_contacts\n"); // for (int dim1=0; dim1<98; dim1+=3) { // fprintf(intra_interactions_testfile, "%i %e %e %e \n", (dim1/3+4), intra_interactions_testhist[dim1]/average(intra_sum_of_boltzfactors[46], 1), intra_interactions_testhist[dim1+1]/average(intra_sum_of_boltzfactors[46], 1), intra_interactions_testhist[dim1+2]/average(intra_sum_of_boltzfactors[46], 1)); // } // histogram of intra energies fprintf(intra_boltzman_factors_hist, "# energy, bincount-for-these-parameters"); for (int dim1=0; dim1<((intra_binmax-intra_binmin)/intra_binwidth); ++dim1) { fprintf(intra_boltzman_factors_hist, "%f %i \n", (intra_binmin+(double)dim1*intra_binwidth), intra_energybin[0][dim1]); } break; case 2: // prints the weighted observables to file with error estimate fprintf(intra_pot_file, "### 1-9 well, torsion eps, Rf, Rf_err, Rg, Rg_err, pT, pT_err, DS, DS_err, <exp(-E/kT)>/exp(-Emax/kT) \n" ); for (int dim1=0; dim1<10; dim1++) { for (int dim2=0; dim2<10; dim2++) { counter = dim1*10 + dim2; intra_obs[counter].ee2 = sqrt(weighted_average(intra_obs[counter].err_ee2, intra_sum_of_boltzfactors[counter])); intra_obs[counter].err_ee2[10] = weighted_error_sq_ten(intra_obs[counter].ee2, intra_obs[counter].err_ee2, intra_sum_of_boltzfactors[counter]); intra_obs[counter].rgyr = sqrt(weighted_average(intra_obs[counter].err_rgyr, intra_sum_of_boltzfactors[counter])); intra_obs[counter].err_rgyr[10] = weighted_error_sq_ten(intra_obs[counter].rgyr, intra_obs[counter].err_rgyr, intra_sum_of_boltzfactors[counter]); intra_obs[counter].pt = weighted_average(intra_obs[counter].err_pt, intra_sum_of_boltzfactors[counter]); intra_obs[counter].err_pt[10] = weighted_error_ten(intra_obs[counter].pt, intra_obs[counter].err_pt, intra_sum_of_boltzfactors[counter]); intra_obs[counter].S = intra_entropy(intra_sum_of_boltzfactors[counter], intra_sum_of_enrgyboltz[counter], log_attempts[10]); intra_obs[counter].err_S = intra_entropy_error_ten(intra_obs[counter].S, intra_sum_of_boltzfactors[counter], intra_sum_of_enrgyboltz[counter], log_attempts); fprintf(intra_pot_file, "%f %f %e %e %e %e %e %e %e %e %e \n", intra_parameter1[dim1], intra_parameter2[dim2], intra_obs[counter].ee2, intra_obs[counter].err_ee2[10], intra_obs[counter].rgyr, intra_obs[counter].err_rgyr[10], (intra_obs[counter].pt/(double)number_of_torsions), (intra_obs[counter].err_pt[10]/(double)number_of_torsions), intra_obs[counter].S, intra_obs[counter].err_S, intra_loss_of_conf(intra_sum_of_boltzfactors[counter], intra_highest_boltzmanfactor[counter], ARG_numberofframes)); } } // histogram over 1-9 interacitons fprintf(intra_interactions_file, "# number-of-nn-interactions, occurence \n"); for (int dim1=0; dim1<ARG_numberofbeads; ++dim1) { fprintf(intra_interactions_file, "%i %i \n", dim1, intra_interactions_hist[dim1]); } // test histogram // fprintf(intra_interactions_testfile, "# sperating-bonds 0_contacts 1_contacts 2_contacts\n"); // for (int dim1=0; dim1<98; dim1+=3) { // fprintf(intra_interactions_testfile, "%i %e %e %e \n", (dim1/3+4), intra_interactions_testhist[dim1]/average(intra_sum_of_boltzfactors[46], 1), intra_interactions_testhist[dim1+1]/average(intra_sum_of_boltzfactors[46], 1), intra_interactions_testhist[dim1+2]/average(intra_sum_of_boltzfactors[46], 1)); // } // histogram of intra energies fprintf(intra_boltzman_factors_hist, "# energy, bincount-for-these-parameters"); for (int dim1=0; dim1<((intra_binmax-intra_binmin)/intra_binwidth); ++dim1) { fprintf(intra_boltzman_factors_hist, "%f ", (intra_binmin+(double)dim1*intra_binwidth)); for (int dim2=0; dim2<100; ++dim2) { fprintf(intra_boltzman_factors_hist, "%i ", intra_energybin[dim2][dim1]); } fprintf(intra_boltzman_factors_hist, "\n"); } break; default: break; } log_out(output_file, "\nComputation of Chains:\t%f seconds \nTotal Runtime:\t\t%f seconds \n\n", (time[2]-time[1]), (time[3]-time[0])); log_out(output_file, "End of Programm!\n-----------------------------------------------------------------------------------\n"); // make sure everything gets printed fflush(stdout); //---------------------------------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------------------------------- // close every remaining file and free remaining pointers! // cleaning up ;-) //close all files if (ARG_typeofrun==0){// dcd-file close_file_read(v); } fclose(conv_obs_file); fclose(pair_correl_file); fclose(intra_pot_file); fclose(intra_boltzman_factors_hist); fclose(convergence_intraweights); fclose(intra_interactions_file); fclose(conv_intraobs_file); // very last file to close fclose(output_file); //----------------------------- // free pointers // free general variables free(normal_obs.err_ee2); free(normal_obs.err_rgyr); // torsion angles if (ARG_torsion==1) { free(normal_obs.err_pt); if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { free(intra_obs[dim1].err_pt); } } } // D-SASA if (ARG_sasa==1) { free(normal_obs.err_dsasa); if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { free(intra_obs[dim1].err_dsasa); } } } // pair correlation function if (ARG_pair_correlation==1) { free(pair_correlation_obs); free(normal_obs.pair_corr); if (ARG_intra_potential>0) { for (int dim1=0; dim1<100; dim1++) { free(intra_obs[dim1].pair_corr); } } } free(attempts_successes); // free arrays which held polymer coordinates for (int dim1=0; dim1<ARG_numberofbeads;dim1++){ free(int_polymer[dim1]); free(double_polymer[dim1]); } free(int_polymer); free(double_polymer); // free building blocks if (ARG_typeofrun==13 || ARG_typeofrun==14 || ARG_typeofrun==23 || ARG_typeofrun==24 || ARG_typeofrun==33 || ARG_typeofrun==34){ for (int dim1=0; dim1<numberofbricks; ++dim1) { for (int dim2=0; dim2<ARG_blength; ++dim2) { free(building_bricks[dim1][dim2]); } free(building_bricks[dim1]); } free(building_bricks); } // free potential variables if (ARG_intra_potential > 0){ switch (ARG_intra_potential) { case 1: free(intra_sum_of_boltzfactors[0]); break; case 2: for (int dim1=0; dim1<100; dim1++){ free(intra_sum_of_boltzfactors[dim1]); } break; default: break; } free(intra_energy); free(intra_boltzman_factors); free(intra_sum_of_boltzfactors); free(intra_sum_of_enrgyboltz); free(intra_interactions_hist); // free(intra_interactions_testhist); free(intra_parameter1); free(intra_parameter2); } // end of programm return EXIT_SUCCESS; }
void CG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { profile.Start(QUDA_PROFILE_INIT); // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.Stop(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; return; } cudaColorSpinorField r(b); ColorSpinorParam csParam(x); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField y(b, csParam); mat(r, x, y); // zeroCuda(y); double r2 = xmyNormCuda(b, r); csParam.setPrecision(param.precision_sloppy); cudaColorSpinorField Ap(x, csParam); cudaColorSpinorField tmp(x, csParam); cudaColorSpinorField *tmp2_p = &tmp; // tmp only needed for multi-gpu Wilson-like kernels if (mat.Type() != typeid(DiracStaggeredPC).name() && mat.Type() != typeid(DiracStaggered).name()) { tmp2_p = new cudaColorSpinorField(x, csParam); } cudaColorSpinorField &tmp2 = *tmp2_p; cudaColorSpinorField *x_sloppy, *r_sloppy; if (param.precision_sloppy == x.Precision()) { csParam.create = QUDA_REFERENCE_FIELD_CREATE; x_sloppy = &x; r_sloppy = &r; } else { csParam.create = QUDA_COPY_FIELD_CREATE; x_sloppy = new cudaColorSpinorField(x, csParam); r_sloppy = new cudaColorSpinorField(r, csParam); } cudaColorSpinorField &xSloppy = *x_sloppy; cudaColorSpinorField &rSloppy = *r_sloppy; cudaColorSpinorField p(rSloppy); if(&x != &xSloppy){ copyCuda(y,x); zeroCuda(xSloppy); }else{ zeroCuda(y); } const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false; profile.Stop(QUDA_PROFILE_INIT); profile.Start(QUDA_PROFILE_PREAMBLE); double r2_old; double stop = b2*param.tol*param.tol; // stopping condition of solver double heavy_quark_res = 0.0; // heavy quark residual if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z); int heavy_quark_check = 10; // how often to check the heavy quark residual double alpha=0.0, beta=0.0; double pAp; int rUpdate = 0; double rNorm = sqrt(r2); double r0Norm = rNorm; double maxrx = rNorm; double maxrr = rNorm; double delta = param.delta; // this parameter determines how many consective reliable update // reisudal increases we tolerate before terminating the solver, // i.e., how long do we want to keep trying to converge int maxResIncrease = 0; // 0 means we have no tolerance profile.Stop(QUDA_PROFILE_PREAMBLE); profile.Start(QUDA_PROFILE_COMPUTE); blas_flops = 0; int k=0; PrintStats("CG", k, r2, b2, heavy_quark_res); int steps_since_reliable = 1; while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter) { matSloppy(Ap, p, tmp, tmp2); // tmp as tmp double sigma; bool breakdown = false; if (param.pipeline) { double3 triplet = tripleCGReductionCuda(rSloppy, Ap, p); r2 = triplet.x; double Ap2 = triplet.y; pAp = triplet.z; r2_old = r2; alpha = r2 / pAp; sigma = alpha*(alpha * Ap2 - pAp); if (sigma < 0.0 || steps_since_reliable==0) { // sigma condition has broken down r2 = axpyNormCuda(-alpha, Ap, rSloppy); sigma = r2; breakdown = true; } r2 = sigma; } else { r2_old = r2; pAp = reDotProductCuda(p, Ap); alpha = r2 / pAp; // here we are deploying the alternative beta computation Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy); r2 = real(cg_norm); // (r_new, r_new) sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k+1-r_k) breaks } // reliable update conditions rNorm = sqrt(r2); if (rNorm > maxrx) maxrx = rNorm; if (rNorm > maxrr) maxrr = rNorm; int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0; int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0; // force a reliable update if we are within target tolerance (only if doing reliable updates) if ( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1; if ( !(updateR || updateX)) { //beta = r2 / r2_old; beta = sigma / r2_old; // use the alternative beta computation if (param.pipeline && !breakdown) tripleCGUpdateCuda(alpha, beta, Ap, rSloppy, xSloppy, p); else axpyZpbxCuda(alpha, p, xSloppy, rSloppy, beta); if (use_heavy_quark_res && k%heavy_quark_check==0) { copyCuda(tmp,y); heavy_quark_res = sqrt(xpyHeavyQuarkResidualNormCuda(xSloppy, tmp, rSloppy).z); } steps_since_reliable++; } else { axpyCuda(alpha, p, xSloppy); if (x.Precision() != xSloppy.Precision()) copyCuda(x, xSloppy); xpyCuda(x, y); // swap these around? mat(r, y, x); // here we can use x as tmp r2 = xmyNormCuda(b, r); if (x.Precision() != rSloppy.Precision()) copyCuda(rSloppy, r); zeroCuda(xSloppy); // break-out check if we have reached the limit of the precision static int resIncrease = 0; if (sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this warningQuda("CG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm); k++; rUpdate++; if (++resIncrease > maxResIncrease) break; } else { resIncrease = 0; } rNorm = sqrt(r2); maxrr = rNorm; maxrx = rNorm; r0Norm = rNorm; rUpdate++; // explicitly restore the orthogonality of the gradient vector double rp = reDotProductCuda(rSloppy, p) / (r2); axpyCuda(-rp, rSloppy, p); beta = r2 / r2_old; xpayCuda(rSloppy, beta, p); if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(y,r).z); steps_since_reliable = 0; } breakdown = false; k++; PrintStats("CG", k, r2, b2, heavy_quark_res); } if (x.Precision() != xSloppy.Precision()) copyCuda(x, xSloppy); xpyCuda(y, x); profile.Stop(QUDA_PROFILE_COMPUTE); profile.Start(QUDA_PROFILE_EPILOGUE); param.secs = profile.Last(QUDA_PROFILE_COMPUTE); double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops())*1e-9; reduceDouble(gflops); param.gflops = gflops; param.iter += k; if (k==param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); if (getVerbosity() >= QUDA_VERBOSE) printfQuda("CG: Reliable updates = %d\n", rUpdate); // compute the true residuals mat(r, x, y); param.true_res = sqrt(xmyNormCuda(b, r) / b2); #if (__COMPUTE_CAPABILITY__ >= 200) param.true_res_hq = sqrt(HeavyQuarkResidualNormCuda(x,r).z); #else param.true_res_hq = 0.0; #endif PrintSummary("CG", k, r2, b2); // reset the flops counters quda::blas_flops = 0; mat.flops(); matSloppy.flops(); profile.Stop(QUDA_PROFILE_EPILOGUE); profile.Start(QUDA_PROFILE_FREE); if (&tmp2 != &tmp) delete tmp2_p; if (param.precision_sloppy != x.Precision()) { delete r_sloppy; delete x_sloppy; } profile.Stop(QUDA_PROFILE_FREE); return; }
void PreconCG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { profile.Start(QUDA_PROFILE_INIT); // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.Stop(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; } int k=0; int rUpdate=0; cudaColorSpinorField* minvrPre; cudaColorSpinorField* rPre; cudaColorSpinorField* minvr; cudaColorSpinorField* minvrSloppy; cudaColorSpinorField* p; ColorSpinorParam csParam(b); cudaColorSpinorField r(b); if(K) minvr = new cudaColorSpinorField(b); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField y(b,csParam); mat(r, x, y); // => r = A*x; double r2 = xmyNormCuda(b,r); csParam.setPrecision(param.precision_sloppy); cudaColorSpinorField tmpSloppy(x,csParam); cudaColorSpinorField Ap(x,csParam); cudaColorSpinorField *r_sloppy; if(param.precision_sloppy == x.Precision()) { r_sloppy = &r; minvrSloppy = minvr; }else{ csParam.create = QUDA_COPY_FIELD_CREATE; r_sloppy = new cudaColorSpinorField(r,csParam); if(K) minvrSloppy = new cudaColorSpinorField(*minvr,csParam); } cudaColorSpinorField *x_sloppy; if(param.precision_sloppy == x.Precision() || !param.use_sloppy_partial_accumulator) { csParam.create = QUDA_REFERENCE_FIELD_CREATE; x_sloppy = &x; }else{ csParam.create = QUDA_COPY_FIELD_CREATE; x_sloppy = new cudaColorSpinorField(x,csParam); } cudaColorSpinorField &xSloppy = *x_sloppy; cudaColorSpinorField &rSloppy = *r_sloppy; if(&x != &xSloppy){ copyCuda(y, x); // copy x to y zeroCuda(xSloppy); }else{ zeroCuda(y); // no reliable updates // NB: check this } const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false; if(K){ csParam.create = QUDA_COPY_FIELD_CREATE; csParam.setPrecision(param.precision_precondition); rPre = new cudaColorSpinorField(rSloppy,csParam); // Create minvrPre minvrPre = new cudaColorSpinorField(*rPre); globalReduce = false; (*K)(*minvrPre, *rPre); globalReduce = true; *minvrSloppy = *minvrPre; p = new cudaColorSpinorField(*minvrSloppy); }else{ p = new cudaColorSpinorField(rSloppy); } profile.Stop(QUDA_PROFILE_INIT); profile.Start(QUDA_PROFILE_PREAMBLE); double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver double heavy_quark_res = 0.0; // heavy quark residual if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z); int heavy_quark_check = 10; // how often to check the heavy quark residual double alpha = 0.0, beta=0.0; double pAp; double rMinvr = 0; double rMinvr_old = 0.0; double r_new_Minvr_old = 0.0; double r2_old = 0; r2 = norm2(r); double rNorm = sqrt(r2); double r0Norm = rNorm; double maxrx = rNorm; double maxrr = rNorm; double delta = param.delta; if(K) rMinvr = reDotProductCuda(rSloppy,*minvrSloppy); profile.Stop(QUDA_PROFILE_PREAMBLE); profile.Start(QUDA_PROFILE_COMPUTE); quda::blas_flops = 0; int steps_since_reliable = 1; const int maxResIncrease = 0; while(!convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter){ matSloppy(Ap, *p, tmpSloppy); double sigma; bool breakdown = false; pAp = reDotProductCuda(*p,Ap); alpha = (K) ? rMinvr/pAp : r2/pAp; Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy); // r --> r - alpha*A*p r2_old = r2; r2 = real(cg_norm); sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k-1 - r_k) breaks if(K) rMinvr_old = rMinvr; rNorm = sqrt(r2); if(rNorm > maxrx) maxrx = rNorm; if(rNorm > maxrr) maxrr = rNorm; int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0; int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0; // force a reliable update if we are within target tolerance (only if doing reliable updates) if( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1; if( !(updateR || updateX) ){ if(K){ r_new_Minvr_old = reDotProductCuda(rSloppy,*minvrSloppy); *rPre = rSloppy; globalReduce = false; (*K)(*minvrPre, *rPre); globalReduce = true; *minvrSloppy = *minvrPre; rMinvr = reDotProductCuda(rSloppy,*minvrSloppy); beta = (rMinvr - r_new_Minvr_old)/rMinvr_old; axpyZpbxCuda(alpha, *p, xSloppy, *minvrSloppy, beta); }else{ beta = sigma/r2_old; // use the alternative beta computation axpyZpbxCuda(alpha, *p, xSloppy, rSloppy, beta); } } else { // reliable update axpyCuda(alpha, *p, xSloppy); // xSloppy += alpha*p copyCuda(x, xSloppy); xpyCuda(x, y); // y += x // Now compute r mat(r, y, x); // x is just a temporary here r2 = xmyNormCuda(b, r); copyCuda(rSloppy, r); // copy r to rSloppy zeroCuda(xSloppy); // break-out check if we have reached the limit of the precision static int resIncrease = 0; if(sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this warningQuda("PCG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm); k++; rUpdate++; if(++resIncrease > maxResIncrease) break; }else{ resIncrease = 0; } rNorm = sqrt(r2); maxrr = rNorm; maxrx = rNorm; r0Norm = rNorm; ++rUpdate; if(K){ *rPre = rSloppy; globalReduce = false; (*K)(*minvrPre, *rPre); globalReduce = true; *minvrSloppy = *minvrPre; rMinvr = reDotProductCuda(rSloppy,*minvrSloppy); beta = rMinvr/rMinvr_old; xpayCuda(*minvrSloppy, beta, *p); // p = minvrSloppy + beta*p }else{ // standard CG - no preconditioning // explicitly restore the orthogonality of the gradient vector double rp = reDotProductCuda(rSloppy, *p)/(r2); axpyCuda(-rp, rSloppy, *p); beta = r2/r2_old; xpayCuda(rSloppy, beta, *p); steps_since_reliable = 0; } } breakdown = false; ++k; PrintStats("PCG", k, r2, b2, heavy_quark_res); } profile.Stop(QUDA_PROFILE_COMPUTE); profile.Start(QUDA_PROFILE_EPILOGUE); if(x.Precision() != param.precision_sloppy) copyCuda(x, xSloppy); xpyCuda(y, x); // x += y param.secs = profile.Last(QUDA_PROFILE_COMPUTE); double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9; reduceDouble(gflops); param.gflops = gflops; param.iter += k; if (k==param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); if (getVerbosity() >= QUDA_VERBOSE) printfQuda("CG: Reliable updates = %d\n", rUpdate); // compute the true residual mat(r, x, y); double true_res = xmyNormCuda(b, r); param.true_res = sqrt(true_res / b2); // reset the flops counters quda::blas_flops = 0; mat.flops(); matSloppy.flops(); matPrecon.flops(); profile.Stop(QUDA_PROFILE_EPILOGUE); profile.Start(QUDA_PROFILE_FREE); if(K){ // These are only needed if preconditioning is used delete minvrPre; delete rPre; delete minvr; if(x.Precision() != param.precision_sloppy) delete minvrSloppy; } delete p; if(x.Precision() != param.precision_sloppy){ delete x_sloppy; delete r_sloppy; } profile.Stop(QUDA_PROFILE_FREE); return; }