void RGFlow<Two_scale>::solve() { check_setup(); unsigned int max_iterations = get_max_iterations(); if (models.empty() || max_iterations == 0) return; initial_guess(); iteration = 0; bool accuracy_reached = false; while (iteration < max_iterations && !accuracy_reached) { update_running_precision(); run_up(); run_down(); accuracy_reached = accuracy_goal_reached(); ++iteration; } apply_lowest_constraint(); if (!accuracy_reached) throw NoConvergenceError(max_iterations); VERBOSE_MSG("convergence reached after " << iteration << " iterations"); }
/// main for SCF int main (int argc, char **argv) { // init MPI int myrank; int nprocs; int provided; #if defined (USE_ELEMENTAL) ElInitialize( &argc, &argv ); ElMPICommRank( MPI_COMM_WORLD, &myrank ); ElMPICommSize( MPI_COMM_WORLD, &nprocs ); MPI_Query_thread(&provided); #else MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); #endif if (myrank == 0) { printf("MPI thread support: %s\n", MPI_THREAD_STRING(provided)); } #if 0 char hostname[1024]; gethostname (hostname, 1024); printf ("Rank %d of %d running on node %s\n", myrank, nprocs, hostname); #endif // create basis set BasisSet_t basis; CInt_createBasisSet(&basis); // input parameters and load basis set int nprow_fock; int npcol_fock; int nblks_fock; int nprow_purif; int nshells; int natoms; int nfunctions; int niters; if (myrank == 0) { if (argc != 8) { usage(argv[0]); MPI_Finalize(); exit(0); } // init parameters nprow_fock = atoi(argv[3]); npcol_fock = atoi(argv[4]); nprow_purif = atoi(argv[5]); nblks_fock = atoi(argv[6]); niters = atoi(argv[7]); assert(nprow_fock * npcol_fock == nprocs); assert(nprow_purif * nprow_purif * nprow_purif <= nprocs); assert(niters > 0); CInt_loadBasisSet(basis, argv[1], argv[2]); nshells = CInt_getNumShells(basis); natoms = CInt_getNumAtoms(basis); nfunctions = CInt_getNumFuncs(basis); assert(nprow_fock <= nshells && npcol_fock <= nshells); assert(nprow_purif <= nfunctions && nprow_purif <= nfunctions); printf("Job information:\n"); char *fname; fname = basename(argv[2]); printf(" molecule: %s\n", fname); fname = basename(argv[1]); printf(" basisset: %s\n", fname); printf(" charge = %d\n", CInt_getTotalCharge(basis)); printf(" #atoms = %d\n", natoms); printf(" #shells = %d\n", nshells); printf(" #functions = %d\n", nfunctions); printf(" fock build uses %d (%dx%d) nodes\n", nprow_fock * npcol_fock, nprow_fock, npcol_fock); printf(" purification uses %d (%dx%dx%d) nodes\n", nprow_purif * nprow_purif * nprow_purif, nprow_purif, nprow_purif, nprow_purif); printf(" #tasks = %d (%dx%d)\n", nblks_fock * nblks_fock * nprow_fock * nprow_fock, nblks_fock * nprow_fock, nblks_fock * nprow_fock); int nthreads = omp_get_max_threads(); printf(" #nthreads_cpu = %d\n", nthreads); } int btmp[8]; btmp[0] = nprow_fock; btmp[1] = npcol_fock; btmp[2] = nprow_purif; btmp[3] = nblks_fock; btmp[4] = niters; btmp[5] = natoms; btmp[6] = nshells; btmp[7] = nfunctions; MPI_Bcast(btmp, 8, MPI_INT, 0, MPI_COMM_WORLD); nprow_fock = btmp[0]; npcol_fock = btmp[1]; nprow_purif = btmp[2]; nblks_fock = btmp[3]; niters = btmp[4]; natoms = btmp[5]; nshells = btmp[6]; nfunctions = btmp[7]; // broadcast basis set void *bsbuf; int bsbufsize; if (myrank == 0) { CInt_packBasisSet(basis, &bsbuf, &bsbufsize); MPI_Bcast(&bsbufsize, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(bsbuf, bsbufsize, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&bsbufsize, 1, MPI_INT, 0, MPI_COMM_WORLD); bsbuf = (void *)malloc(bsbufsize); assert(bsbuf != NULL); MPI_Bcast(bsbuf, bsbufsize, MPI_CHAR, 0, MPI_COMM_WORLD); CInt_unpackBasisSet(basis, bsbuf); free(bsbuf); } // init PFock if (myrank == 0) { printf("Initializing pfock ...\n"); } PFock_t pfock; PFock_create(basis, nprow_fock, npcol_fock, nblks_fock, 1e-11, MAX_NUM_D, IS_SYMM, &pfock); if (myrank == 0) { double mem_cpu; PFock_getMemorySize(pfock, &mem_cpu); printf(" CPU uses %.3f MB\n", mem_cpu / 1024.0 / 1024.0); printf(" Done\n"); } // init purif purif_t *purif = create_purif(basis, nprow_purif, nprow_purif, nprow_purif); init_oedmat(basis, pfock, purif, nprow_fock, npcol_fock); // compute SCF if (myrank == 0) { printf("Computing SCF ...\n"); } int rowstart = purif->srow_purif; int rowend = purif->nrows_purif + rowstart - 1; int colstart = purif->scol_purif; int colend = purif->ncols_purif + colstart - 1; double energy0 = -1.0; double totaltime = 0.0; double purif_flops = 2.0 * nfunctions * nfunctions * nfunctions; double diis_flops; // set initial guess if (myrank == 0) { printf(" initialing D ...\n"); } PFock_setNumDenMat(NUM_D, pfock); initial_guess(pfock, basis, purif->runpurif, rowstart, rowend, colstart, colend, purif->D_block, purif->ldx); MPI_Barrier(MPI_COMM_WORLD); // compute nuc energy double ene_nuc = CInt_getNucEnergy(basis); if (myrank == 0) { printf(" nuc energy = %.10f\n", ene_nuc); } MPI_Barrier(MPI_COMM_WORLD); // main loop double t1, t2, t3, t4; for (int iter = 0; iter < niters; iter++) { if (myrank == 0) { printf(" iter %d\n", iter); } t3 = MPI_Wtime(); // fock matrix construction t1 = MPI_Wtime(); fock_build(pfock, basis, purif->runpurif, rowstart, rowend, colstart, colend, purif->ldx, purif->D_block, purif->F_block); if (myrank == 0) { printf("After fock build \n"); } // compute energy double energy = compute_energy(purif, purif->F_block, purif->D_block); t2 = MPI_Wtime(); if (myrank == 0) { printf(" fock build takes %.3f secs\n", t2 - t1); if (iter > 0) { printf(" energy %.10f (%.10f), %le\n", energy + ene_nuc, energy, fabs (energy - energy0)); } else { printf(" energy %.10f (%.10f)\n", energy + ene_nuc, energy); } } if (iter > 0 && fabs (energy - energy0) < 1e-11) { niters = iter + 1; break; } energy0 = energy; // compute DIIS t1 = MPI_Wtime(); compute_diis(pfock, purif, purif->D_block, purif->F_block, iter); t2 = MPI_Wtime(); if (myrank == 0) { if (iter > 1) { diis_flops = purif_flops * 6.0; } else { diis_flops = purif_flops * 2.0; } printf(" diis takes %.3f secs, %.3lf Gflops\n", t2 - t1, diis_flops / (t2 - t1) / 1e9); } #ifdef __SCF_OUT__ if (myrank == 0) { double outbuf[nfunctions]; char fname[1024]; sprintf(fname, "XFX_%d_%d.dat", nfunctions, iter); FILE *fp = fopen(fname, "w+"); assert(fp != NULL); for (int i = 0; i < nfunctions; i++) { PFock_getMat(pfock, PFOCK_MAT_TYPE_F, USE_D_ID, i, i, USE_D_ID, nfunctions - 1, outbuf, nfunctions); for (int j = 0; j < nfunctions; j++) { fprintf(fp, "%.10e\n", outbuf[j]); } } fclose(fp); } #endif // purification MPI_Barrier(MPI_COMM_WORLD); t1 = MPI_Wtime(); int it = compute_purification(purif, purif->F_block, purif->D_block); t2 = MPI_Wtime(); MPI_Barrier(MPI_COMM_WORLD); if (myrank == 0) { printf(" purification takes %.3f secs," " %d iterations, %.3f Gflops\n", t2 - t1, it, (it * 2.0 + 4.0) * purif_flops / (t2 - t1) / 1e9); } /* #if defined(USE_ELEMENTAL) ElGlobalArraysPrint_d( eldga, pfock->ga_D[USE_D_ID] ); #else GA_Print (pfock->ga_D[USE_D_ID]); #endif */ t4 = MPI_Wtime (); totaltime += t4 - t3; #ifdef __SCF_TIMING__ PFock_getStatistics(pfock); double purif_timedgemm; double purif_timepdgemm; double purif_timepass; double purif_timetr; MPI_Reduce(&purif->timedgemm, &purif_timedgemm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&purif->timepdgemm, &purif_timepdgemm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&purif->timepass, &purif_timepass, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&purif->timetr, &purif_timetr, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (myrank == 0) { printf(" Purification Statistics:\n"); printf(" average totaltime = %.3f\n" " average timetr = %.3f\n" " average timedgemm = %.3f, %.3f Gflops\n" " average timepdgemm = %.3f, %.3f Gflops\n", purif_timepass / purif->np_purif, purif_timetr / purif->np_purif, purif_timedgemm / purif->np_purif, (it * 2.0 + 4.0) * purif_flops / (purif_timedgemm / purif->np_purif) / 1e9, purif_timepdgemm / purif->np_purif, (it * 2.0 + 4.0) * purif_flops / (purif_timepdgemm / purif->np_purif) / 1e9); } #endif } /* for (iter = 0; iter < NITERATIONS; iter++) */ if (myrank == 0) { printf(" totally takes %.3f secs: %.3f secs/iters\n", totaltime, totaltime / niters); printf(" Done\n"); } destroy_purif(purif); PFock_destroy(pfock); CInt_destroyBasisSet(basis); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { USING_NAMESPACE_ACADO const double KKT_tol = 1e-6; //READ THE DEMO LENGTHS & nBF FROM THE COMMAND LINE std::deque<std::string> args(argv + 1, argv + argc + !argc); const int nBF=atoi(args[0].c_str()); args.pop_front(); const int nD=(int)args.size(); int nS=0; for(int i=0; i<nD;i++) nS+=atoi(args[i].c_str()); //READ DATA std::string path=DATA_PATH; Matrix D = readFromFile((path+"demos.dat").c_str()); //d(:,0)=time, d(:,1)=x, d(:,2)=dx, d(:,3)=ddx; Vector pI = readFromFile((path+"initial_guess.dat").c_str()); Matrix A = readFromFile((path+"inequality_constraint_matrix.dat").c_str()); Vector b = readFromFile((path+"inequality_constraint_vector.dat").c_str()); Matrix S = readFromFile((path+"scale_matrix.dat").c_str()); //RELEVANT INDEX SETS std::vector<std::vector<int> > d_ind=getDemoInd(args); std::vector<int> a_ind=getAInd(nBF,nD); std::vector<int> b_ind=getBInd(nBF,nD); std::vector<std::vector<int> > w_ind=getWInd(nBF,nD); std::vector<int> r_ind=getRInd(nBF,nD); std::vector<int> c_ind=getCInd(nBF,nD); //PARAMETER & OBJECTIVE FUNCTION Parameter p(2*nD+nBF*(2+nD)+1,1); Matrix BM(nS,2*nD+nBF*(2+nD)+1); BM.setZero(); Expression B(BM); double t,x,dx; for (int d=0; d<nD; d++) for(int s=0;s<(int)d_ind[d].size();s++) { t=D(d_ind[d][s],0); x=D(d_ind[d][s],1); dx=D(d_ind[d][s],2); B(d_ind[d][s],a_ind[d])=x; B(d_ind[d][s],b_ind[d])=dx; for(int n=0;n<nBF;n++){ B(d_ind[d][s],w_ind[d][n])=(-0.5*(t-p(c_ind[n])*S(c_ind[n],c_ind[n])).getPowInt(2)/(p(r_ind[n])*p(r_ind[n])*S(r_ind[n],r_ind[n])*S(r_ind[n],r_ind[n]))).getExp(); // std::cout<<d<<std::endl; //std::cout<< S(r_ind[d],r_ind[d])<<std::endl; } } Expression f; f<<B*S*p-D.getCol(3); Expression ez(nS); for (int i=0; i<nS; i++) ez(i)=p(2*nD+nBF*(2+nD)); Vector e(nS); e.setAll(1.0); Vector null(nS); null.setAll(0.0); NLP nlp; nlp.minimize(p(2*nD+nBF*(2+nD))); nlp.subjectTo(f - ez <= null); nlp.subjectTo(f + ez >= null); //nlp.subjectTo(A*S*p <= b); //ALGORITHM ParameterEstimationAlgorithm algorithm(nlp); VariablesGrid initial_guess(2*nD+nBF*(2+nD)+1,0.0,0.0,1 ); initial_guess.setVector( 0,S.getInverse()*pI ); algorithm.initializeParameters(initial_guess); // OPTIONS algorithm.set( KKT_TOLERANCE, KKT_tol); algorithm.set( ABSOLUTE_TOLERANCE, 1e-4); algorithm.set( PRINTLEVEL,HIGH); algorithm.set( MAX_NUM_ITERATIONS, 2000 ); algorithm.set (PRINT_COPYRIGHT, NO); // algorithm.set (PRINT_SCP_METHOD_PROFILE, YES); algorithm.set( HESSIAN_APPROXIMATION, EXACT_HESSIAN ); algorithm.set(GLOBALIZATION_STRATEGY, GS_LINESEARCH ); algorithm.set(LINESEARCH_TOLERANCE, 1e-2 ); algorithm.set(INFEASIBLE_QP_HANDLING,IQH_RELAX_L2); algorithm.set(FEASIBILITY_CHECK,BT_TRUE); // LOGGING LogRecord logRecord( LOG_AT_EACH_ITERATION,(path+"log.dat").c_str(),PS_PLAIN); logRecord << LOG_OBJECTIVE_VALUE; algorithm << logRecord; //SOLVING double clock1 = clock(); algorithm.solve(); double clock2 = clock(); Vector solution; algorithm.getParameters(solution); // solution.print("optimal solution \n"); solution.printToFile((path+"solution.dat").c_str(),"",PS_PLAIN); printf("\n computation time (ACADO) = %.16e \n", (clock2-clock1)/CLOCKS_PER_SEC); return 0; }