void FCI3pdm_kern_sf(double *rdm1, double *rdm2, double *rdm3, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; int i, j, k, l, ij; size_t n; double *tbra; double *t1bra = malloc(sizeof(double) * nnorb * bcount); double *t1ket = malloc(sizeof(double) * nnorb * bcount); double *t2bra = malloc(sizeof(double) * n4 * bcount); double *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t1ci_sf(bra, t1bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(bra, t2bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t1ci_sf(ket, t1ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); #pragma omp parallel default(none) \ shared(rdm3, t1ket, t2bra, norb, bcount), \ private(ij, i, j, k, l, n, tbra, pbra, pt2) { tbra = malloc(sizeof(double) * nnorb * bcount); #pragma omp for schedule(dynamic, 4) for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) for (n = 0; n < bcount; n++) { pbra = tbra + n * nnorb; pt2 = t2bra + n * n4 + ij; for (k = 0; k < norb; k++) { for (l = 0; l < norb; l++) { pbra[k*norb+l] = pt2[l*n3+k*nnorb]; } } } i = ij / norb; j = ij - i * norb; tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, t1ket, bcount, j+1, norb); } free(tbra); } make_rdm12_sf(rdm1, rdm2, bra, ket, t1bra, t1ket, bcount, stra_id, strb_id, norb, na, nb); free(t1bra); free(t1ket); free(t2bra); }
/* * use symmetry ci0[a,b] == ci0[b,a], t2[a,b,...] == t2[b,a,...] */ void FCI3pdm_kern_spin0(double *rdm1, double *rdm2, double *rdm3, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { int fill1; if (strb_id+bcount <= stra_id) { fill1 = bcount; } else if (stra_id >= strb_id) { fill1 = stra_id - strb_id + 1; } else { return; } const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; int i, j, k, l, ij; size_t n; double factor; double *tbra; double *t1bra = malloc(sizeof(double) * nnorb * fill1); double *t1ket = malloc(sizeof(double) * nnorb * fill1); double *t2bra = malloc(sizeof(double) * n4 * fill1); double *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t2ci_sf(bra, t2bra, fill1, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t1ci_sf(bra, t1bra, fill1, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t1ci_sf(ket, t1ket, fill1, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); #pragma omp parallel default(none) \ shared(rdm3, t1ket, t2bra, norb, stra_id, strb_id, fill1), \ private(ij, i, j, k, l, n, tbra, pbra, pt2, factor) { tbra = malloc(sizeof(double) * nnorb * fill1); #pragma omp for schedule(dynamic, 4) for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) i = ij / norb; j = ij - i * norb; for (n = 0; n < fill1; n++) { if (n+strb_id == stra_id) { factor = 1; } else { factor = 2; } for (k = 0; k <= j; k++) { pbra = tbra + n * nnorb + k*norb; pt2 = t2bra + n * n4 + k*nnorb + ij; for (l = 0; l < norb; l++) { pbra[l] = pt2[l*n3] * factor; } } } tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, t1ket, fill1, j+1, norb); } free(tbra); } make_rdm12_spin0(rdm1, rdm2, bra, ket, t1bra, t1ket, fill1, stra_id, strb_id, norb, na, nb); free(t1bra); free(t1ket); free(t2bra); }
void FCI4pdm_kern_sf(double *rdm1, double *rdm2, double *rdm3, double *rdm4, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; const size_t n6 = nnorb * nnorb * nnorb; int i, j, k, l, ij; size_t n; double *tbra; double *t1bra = malloc(sizeof(double) * nnorb * bcount * 2); double *t2bra = malloc(sizeof(double) * n4 * bcount * 2); double *t1ket = t1bra + nnorb * bcount; double *t2ket = t1bra + n4 * bcount; double *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t1ci_sf(bra, t1bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(bra, t2bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (bra == ket) { t1ket = t1bra; t2ket = t2bra; } else { FCI_t1ci_sf(ket, t1ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(ket, t2ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); } #pragma omp parallel default(none) \ shared(rdm3, rdm4, t1ket, t2bra, t2ket, norb, bcount), \ private(ij, i, j, k, l, n, tbra, pbra, pt2) { tbra = malloc(sizeof(double) * nnorb * bcount); #pragma omp for schedule(static, 1) nowait for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) for (n = 0; n < bcount; n++) { for (k = 0; k < norb; k++) { pbra = tbra + n * nnorb + k*norb; pt2 = t2bra + n * n4 + k*nnorb + ij; for (l = 0; l < norb; l++) { pbra[l] = pt2[l*n3]; } } } i = ij / norb; j = ij - i * norb; // contract <bra-of-Eij| with |E^k_l E^m_n ket> tril3pdm_particle_symm(rdm4+(j*norb+i)*n6, tbra, t2ket, bcount, j+1, norb); // rdm3 tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, t1ket, bcount, j+1, norb); } free(tbra); } make_rdm12_sf(rdm1, rdm2, bra, ket, t1bra, t1ket, bcount, stra_id, strb_id, norb, na, nb); free(t1bra); free(t2bra); }
void NEVPTkern_sf(void (*contract_kernel)(), double *rdm2, double *rdm3, double *eri, double *ci0, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; int i, j, k, l, ij; size_t n; double *t1ket = malloc(sizeof(double) * nnorb * bcount); double *t2ket = malloc(sizeof(double) * n4 * bcount); double *gt2 = malloc(sizeof(double) * nnorb * bcount); double *tbra, *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t1ci_sf(ci0, t1ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(ci0, t2ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); (*contract_kernel)(gt2, eri, t2ket, bcount, norb, na, nb); #pragma omp parallel default(none) \ shared(rdm2, rdm3, t1ket, t2ket, gt2, norb, bcount), \ private(ij, i, j, k, l, n, tbra, pbra, pt2) { tbra = malloc(sizeof(double) * nnorb * bcount); #pragma omp for schedule(dynamic, 4) for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) i = ij / norb; j = ij - i * norb; for (n = 0; n < bcount; n++) { for (k = 0; k <= j; k++) { pbra = tbra + n * nnorb + k*norb; pt2 = t2ket + n * n4 + k*nnorb + ij; for (l = 0; l < norb; l++) { pbra[l] = pt2[l*n3]; } } } tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, gt2, bcount, j+1, norb); } free(tbra); } // reordering of rdm2 is needed: rdm2.transpose(1,0,2,3) const char TRANS_N = 'N'; const char TRANS_T = 'T'; const double D1 = 1; dgemm_(&TRANS_N, &TRANS_T, &nnorb, &nnorb, &bcount, &D1, gt2, &nnorb, t1ket, &nnorb, &D1, rdm2, &nnorb); free(gt2); free(t1ket); free(t2ket); }