void run_all(int *atm, int natm, int *bas, int nbas, double *env) { int i, j, k, l, ij, kl; int di, dj, dk, dl; int kl_max; int shls[4]; double *buf; int *ishls = malloc(sizeof(int)*nbas*nbas); int *jshls = malloc(sizeof(int)*nbas*nbas); for (i = 0, ij = 0; i < nbas; i++) { for (j = 0; j <= i; j++, ij++) { ishls[ij] = i; jshls[ij] = j; } } int ncgto = CINTtot_cgto_spheric(bas, nbas); printf("\tshells = %d, total cGTO = %d, total pGTO = %d\n", nbas, ncgto, CINTtot_pgto_spheric(bas, nbas)); int pct; long count; double time0, time1 = 0; double tt, tot; tot = (double)ncgto*ncgto*ncgto*ncgto/8; time0 = omp_get_wtime(); printf("\tcint2e_sph with optimizer: total num ERI = %.2e\n", tot); CINTOpt *opt = NULL; cint2e_sph_optimizer(&opt, atm, natm, bas, nbas, env); pct = 0; count = 0; #pragma omp parallel default(none) \ shared(atm, natm, bas, nbas, env, ishls, jshls, opt, time0, pct, count, stdout) \ private(di, dj, dk, dl, i, j, k, l, ij, kl, kl_max, shls, buf, time1) #pragma omp for nowait schedule(dynamic, 2) for (ij = 0; ij < nbas*(nbas+1)/2; ij++) { i = ishls[ij]; j = jshls[ij]; di = CINTcgto_spheric(i, bas); dj = CINTcgto_spheric(j, bas); // when ksh==ish, there exists k<i, so it's possible kl>ij kl_max = (i+1)*(i+2)/2; for (kl = 0; kl < kl_max; kl++) { k = ishls[kl]; l = jshls[kl]; dk = CINTcgto_spheric(k, bas); dl = CINTcgto_spheric(l, bas); shls[0] = i; shls[1] = j; shls[2] = k; shls[3] = l; buf = malloc(sizeof(double) * di*dj*dk*dl); cint2e_sph(buf, shls, atm, natm, bas, nbas, env, opt); free(buf); } count += kl_max; if (100l*count/((long)nbas*nbas*(nbas+1)*(nbas+2)/8) > pct) { pct++; time1 = omp_get_wtime(); printf("\t%d%%, CPU time = %8.2f\r", pct, time1-time0); fflush(stdout); } } time1 = omp_get_wtime(); tt = time1-time0; printf("\t100%%, CPU time = %8.2f, %8.4f Mflops\n", tt, tot/1e6/tt); CINTdel_optimizer(&opt); free(ishls); free(jshls); }
/* * tot. contracted atomic spheric GTOs in a shell */ FINT cinttot_cgto_spheric_(const FINT *bas, const FINT *nbas) { return CINTtot_cgto_spheric(bas, *nbas); }