/* vv[nao,nao] = conj(ao1[i,nao]) * ao2[i,nao] */ void VXCzdot_ao_ao(double complex *vv, double complex *ao1, double complex *ao2, int nao, int ngrids, int nbas, int hermi, unsigned char *non0table, int *shls_slice, int *ao_loc) { const int nblk = (ngrids+BLKSIZE-1) / BLKSIZE; memset(vv, 0, sizeof(double complex) * nao * nao); #pragma omp parallel default(none) \ shared(vv, ao1, ao2, nao, ngrids, nbas, hermi, \ non0table, shls_slice, ao_loc) { int ip, ib; double complex *v_priv = calloc(nao*nao+2, sizeof(double complex)); #pragma omp for nowait schedule(static) for (ib = 0; ib < nblk; ib++) { ip = ib * BLKSIZE; dot_ao_ao(v_priv, ao1+ip, ao2+ip, nao, ngrids, MIN(ngrids-ip, BLKSIZE), hermi, non0table+ib*nbas, shls_slice, ao_loc); } #pragma omp critical { for (ip = 0; ip < nao*nao; ip++) { vv[ip] += conj(v_priv[ip]); } } free(v_priv); } if (hermi != 0) { NPzhermi_triu(nao, vv, hermi); } }
void NPzunpack_tril(int n, double complex *tril, double complex *mat, int hermi) { size_t i, j, ij; for (ij = 0, i = 0; i < n; i++) { for (j = 0; j <= i; j++, ij++) { mat[i*n+j] = tril[ij]; } } if (hermi) { NPzhermi_triu(n, mat, hermi); } }