/* vv[nao,nao] = ao1[i,nao] * ao2[i,nao] */ void VXCdot_ao_ao(double *vv, double *ao1, double *ao2, int nao, int ngrids, int blksize, char *non0table, int *atm, int natm, int *bas, int nbas, double *env) { const int nblk = (ngrids+blksize-1) / blksize; int ip, ib; double *v_priv; memset(vv, 0, sizeof(double) * nao * nao); #pragma omp parallel default(none) \ shared(vv, ao1, ao2, nao, ngrids, blksize, non0table, \ atm, natm, bas, nbas, env) \ private(ip, ib, v_priv) { v_priv = malloc(sizeof(double) * nao * nao); memset(v_priv, 0, sizeof(double) * nao * nao); #pragma omp for nowait schedule(static) for (ib = 0; ib < nblk; ib++) { ip = ib * blksize; dot_ao_ao(v_priv, ao1+ip*nao, ao2+ip*nao, nao, MIN(ngrids-ip, blksize), non0table+ib*nbas, atm, natm, bas, nbas, env); } #pragma omp critical { for (ip = 0; ip < nao*nao; ip++) { vv[ip] += v_priv[ip]; } } free(v_priv); } }
/* vv[nao,nao] = ao1[i,nao] * ao2[i,nao] */ void VXCdot_ao_ao(double *vv, double *ao1, double *ao2, int nao, int ngrids, int nbas, int hermi, unsigned char *non0table, int *shls_slice, int *ao_loc) { const int nblk = (ngrids+BLKSIZE-1) / BLKSIZE; memset(vv, 0, sizeof(double) * nao * nao); #pragma omp parallel { int ip, ib; double *v_priv = calloc(nao*nao+2, sizeof(double)); #pragma omp for nowait schedule(static) for (ib = 0; ib < nblk; ib++) { ip = ib * BLKSIZE; dot_ao_ao(v_priv, ao1+ip, ao2+ip, nao, ngrids, MIN(ngrids-ip, BLKSIZE), hermi, non0table+ib*nbas, shls_slice, ao_loc); } #pragma omp critical { for (ip = 0; ip < nao*nao; ip++) { vv[ip] += v_priv[ip]; } } free(v_priv); } if (hermi != 0) { NPdsymm_triu(nao, vv, hermi); } }