void FCI3pdm_kern_sf(double *rdm1, double *rdm2, double *rdm3, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; int i, j, k, l, ij; size_t n; double *tbra; double *t1bra = malloc(sizeof(double) * nnorb * bcount); double *t1ket = malloc(sizeof(double) * nnorb * bcount); double *t2bra = malloc(sizeof(double) * n4 * bcount); double *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t1ci_sf(bra, t1bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(bra, t2bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t1ci_sf(ket, t1ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); #pragma omp parallel default(none) \ shared(rdm3, t1ket, t2bra, norb, bcount), \ private(ij, i, j, k, l, n, tbra, pbra, pt2) { tbra = malloc(sizeof(double) * nnorb * bcount); #pragma omp for schedule(dynamic, 4) for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) for (n = 0; n < bcount; n++) { pbra = tbra + n * nnorb; pt2 = t2bra + n * n4 + ij; for (k = 0; k < norb; k++) { for (l = 0; l < norb; l++) { pbra[k*norb+l] = pt2[l*n3+k*nnorb]; } } } i = ij / norb; j = ij - i * norb; tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, t1ket, bcount, j+1, norb); } free(tbra); } make_rdm12_sf(rdm1, rdm2, bra, ket, t1bra, t1ket, bcount, stra_id, strb_id, norb, na, nb); free(t1bra); free(t1ket); free(t2bra); }
static void ctr_rhf2e_kern(double *eri, double *ci0, double *ci1, double *ci1buf, double *t1buf, int bcount_for_spread_a, int ncol_ci1buf, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const char TRANS_N = 'N'; const double D0 = 0; const double D1 = 1; const int nnorb = norb * norb; double *t1 = t1buf; double *vt1 = t1buf + nnorb*bcount; double csum; csum = FCI_t1ci_sf(ci0, t1, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (csum > CSUMTHR) { dgemm_(&TRANS_N, &TRANS_N, &nnorb, &bcount, &nnorb, &D1, eri, &nnorb, t1, &nnorb, &D0, vt1, &nnorb); spread_b_t1(ci1, vt1, bcount, stra_id, strb_id, norb, nb, nlinkb, clink_indexb); spread_a_t1(ci1buf, vt1, bcount_for_spread_a, stra_id, 0, norb, ncol_ci1buf, nlinka, clink_indexa); } }
/* * *********************************************** * transition density matrix, spin free */ void FCItdm12kern_sf(double *tdm1, double *tdm2, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb, int symm) { const int INC1 = 1; const char TRANS_N = 'N'; const char TRANS_T = 'T'; const double D1 = 1; const int nnorb = norb * norb; double csum; double *buf0 = malloc(sizeof(double) * nnorb*bcount); double *buf1 = malloc(sizeof(double) * nnorb*bcount); csum = FCI_t1ci_sf(bra, buf1, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (csum < CSUMTHR) { goto _normal_end; } csum = FCI_t1ci_sf(ket, buf0, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (csum < CSUMTHR) { goto _normal_end; } dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf0, &nnorb, bra+stra_id*nb+strb_id, &INC1, &D1, tdm1, &INC1); switch (symm) { case PARTICLESYM: tril_particle_symm(tdm2, buf1, buf0, bcount, norb, D1, D1); break; default: dgemm_(&TRANS_N, &TRANS_T, &nnorb, &nnorb, &bcount, &D1, buf0, &nnorb, buf1, &nnorb, &D1, tdm2, &nnorb); } _normal_end: free(buf0); free(buf1); }
/* * t2[:,i,j,k,l] = E^i_j E^k_l|ci0> */ static void rdm4_a_t2(double *ci0, double *t2, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; int i, j, k, l, a, sign, str1; double *pt1, *pt2; _LinkT *tab = clink_indexa + stra_id * nlinka; #pragma omp parallel default(none) \ shared(ci0, t2, bcount, strb_id, norb, na, nb, nlinka, nlinkb, \ clink_indexa, clink_indexb, tab), \ private(i, j, k, l, a, str1, sign, pt1, pt2) { double *t1 = malloc(sizeof(double) * bcount * nnorb); #pragma omp for schedule(static, 40) for (j = 0; j < nlinka; j++) { a = EXTRACT_CRE (tab[j]); i = EXTRACT_DES (tab[j]); str1 = EXTRACT_ADDR(tab[j]); sign = EXTRACT_SIGN(tab[j]); // form t1 which has alpha^+ alpha |t1> => target stra_id (through str1) FCI_t1ci_sf(ci0, t1, bcount, str1, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); for (k = 0; k < bcount; k++) { pt1 = t1 + k * nnorb; pt2 = t2 + k * n4 + (i*norb+a)*nnorb; if (sign > 0) { for (l = 0; l < nnorb; l++) { pt2[l] += pt1[l]; } } else { for (l = 0; l < nnorb; l++) { pt2[l] -= pt1[l]; } } } } free(t1); } }
/* * t2[:,i,j,k,l] = E^i_j E^k_l|ci0> */ static void rdm4_0b_t2(double *ci0, double *t2, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; int i, j, k, l, a, sign, str1; double *t1 = malloc(sizeof(double) * nb * nnorb); double *pt1, *pt2; _LinkT *tab; // form t1 which has beta^+ beta |t1> => target stra_id FCI_t1ci_sf(ci0, t1, nb, stra_id, 0, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); #pragma omp parallel default(none) \ shared(t1, t2, bcount, strb_id, norb, nlinkb, clink_indexb), \ private(i, j, k, l, a, str1, sign, pt1, pt2, tab) { #pragma omp for schedule(static, 1) nowait for (k = 0; k < bcount; k++) { memset(t2+k*n4, 0, sizeof(double)*n4); tab = clink_indexb + (strb_id+k) * nlinkb; for (j = 0; j < nlinkb; j++) { a = EXTRACT_CRE (tab[j]); i = EXTRACT_DES (tab[j]); str1 = EXTRACT_ADDR(tab[j]); sign = EXTRACT_SIGN(tab[j]); pt1 = t1 + str1 * nnorb; pt2 = t2 + k * n4 + (i*norb+a)*nnorb; if (sign > 0) { for (l = 0; l < nnorb; l++) { pt2[l] += pt1[l]; } } else { for (l = 0; l < nnorb; l++) { pt2[l] -= pt1[l]; } } } } } free(t1); }
void FCIrdm12kern_sf(double *rdm1, double *rdm2, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb, int symm) { const int INC1 = 1; const char UP = 'U'; const char TRANS_N = 'N'; const char TRANS_T = 'T'; const double D1 = 1; const int nnorb = norb * norb; double csum; double *buf = malloc(sizeof(double) * nnorb * bcount); csum = FCI_t1ci_sf(ket, buf, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (csum > CSUMTHR) { dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf, &nnorb, ket+stra_id*nb+strb_id, &INC1, &D1, rdm1, &INC1); switch (symm) { case BRAKETSYM: dsyrk_(&UP, &TRANS_N, &nnorb, &bcount, &D1, buf, &nnorb, &D1, rdm2, &nnorb); break; case PARTICLESYM: tril_particle_symm(rdm2, buf, buf, bcount, norb, 1, 1); break; default: dgemm_(&TRANS_N, &TRANS_T, &nnorb, &nnorb, &bcount, &D1, buf, &nnorb, buf, &nnorb, &D1, rdm2, &nnorb); } } free(buf); }
/* * use symmetry ci0[a,b] == ci0[b,a], t2[a,b,...] == t2[b,a,...] */ void FCI3pdm_kern_spin0(double *rdm1, double *rdm2, double *rdm3, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { int fill1; if (strb_id+bcount <= stra_id) { fill1 = bcount; } else if (stra_id >= strb_id) { fill1 = stra_id - strb_id + 1; } else { return; } const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; int i, j, k, l, ij; size_t n; double factor; double *tbra; double *t1bra = malloc(sizeof(double) * nnorb * fill1); double *t1ket = malloc(sizeof(double) * nnorb * fill1); double *t2bra = malloc(sizeof(double) * n4 * fill1); double *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t2ci_sf(bra, t2bra, fill1, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t1ci_sf(bra, t1bra, fill1, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t1ci_sf(ket, t1ket, fill1, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); #pragma omp parallel default(none) \ shared(rdm3, t1ket, t2bra, norb, stra_id, strb_id, fill1), \ private(ij, i, j, k, l, n, tbra, pbra, pt2, factor) { tbra = malloc(sizeof(double) * nnorb * fill1); #pragma omp for schedule(dynamic, 4) for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) i = ij / norb; j = ij - i * norb; for (n = 0; n < fill1; n++) { if (n+strb_id == stra_id) { factor = 1; } else { factor = 2; } for (k = 0; k <= j; k++) { pbra = tbra + n * nnorb + k*norb; pt2 = t2bra + n * n4 + k*nnorb + ij; for (l = 0; l < norb; l++) { pbra[l] = pt2[l*n3] * factor; } } } tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, t1ket, fill1, j+1, norb); } free(tbra); } make_rdm12_spin0(rdm1, rdm2, bra, ket, t1bra, t1ket, fill1, stra_id, strb_id, norb, na, nb); free(t1bra); free(t1ket); free(t2bra); }
void FCI4pdm_kern_sf(double *rdm1, double *rdm2, double *rdm3, double *rdm4, double *bra, double *ket, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; const size_t n6 = nnorb * nnorb * nnorb; int i, j, k, l, ij; size_t n; double *tbra; double *t1bra = malloc(sizeof(double) * nnorb * bcount * 2); double *t2bra = malloc(sizeof(double) * n4 * bcount * 2); double *t1ket = t1bra + nnorb * bcount; double *t2ket = t1bra + n4 * bcount; double *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t1ci_sf(bra, t1bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(bra, t2bra, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (bra == ket) { t1ket = t1bra; t2ket = t2bra; } else { FCI_t1ci_sf(ket, t1ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(ket, t2ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); } #pragma omp parallel default(none) \ shared(rdm3, rdm4, t1ket, t2bra, t2ket, norb, bcount), \ private(ij, i, j, k, l, n, tbra, pbra, pt2) { tbra = malloc(sizeof(double) * nnorb * bcount); #pragma omp for schedule(static, 1) nowait for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) for (n = 0; n < bcount; n++) { for (k = 0; k < norb; k++) { pbra = tbra + n * nnorb + k*norb; pt2 = t2bra + n * n4 + k*nnorb + ij; for (l = 0; l < norb; l++) { pbra[l] = pt2[l*n3]; } } } i = ij / norb; j = ij - i * norb; // contract <bra-of-Eij| with |E^k_l E^m_n ket> tril3pdm_particle_symm(rdm4+(j*norb+i)*n6, tbra, t2ket, bcount, j+1, norb); // rdm3 tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, t1ket, bcount, j+1, norb); } free(tbra); } make_rdm12_sf(rdm1, rdm2, bra, ket, t1bra, t1ket, bcount, stra_id, strb_id, norb, na, nb); free(t1bra); free(t2bra); }
void NEVPTkern_sf(void (*contract_kernel)(), double *rdm2, double *rdm3, double *eri, double *ci0, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const int nnorb = norb * norb; const int n4 = nnorb * nnorb; const int n3 = nnorb * norb; int i, j, k, l, ij; size_t n; double *t1ket = malloc(sizeof(double) * nnorb * bcount); double *t2ket = malloc(sizeof(double) * n4 * bcount); double *gt2 = malloc(sizeof(double) * nnorb * bcount); double *tbra, *pbra, *pt2; // t2[:,i,j,k,l] = E^i_j E^k_l|ket> FCI_t1ci_sf(ci0, t1ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); FCI_t2ci_sf(ci0, t2ket, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); (*contract_kernel)(gt2, eri, t2ket, bcount, norb, na, nb); #pragma omp parallel default(none) \ shared(rdm2, rdm3, t1ket, t2ket, gt2, norb, bcount), \ private(ij, i, j, k, l, n, tbra, pbra, pt2) { tbra = malloc(sizeof(double) * nnorb * bcount); #pragma omp for schedule(dynamic, 4) for (ij = 0; ij < nnorb; ij++) { // loop ij for (<ket| E^j_i E^l_k) i = ij / norb; j = ij - i * norb; for (n = 0; n < bcount; n++) { for (k = 0; k <= j; k++) { pbra = tbra + n * nnorb + k*norb; pt2 = t2ket + n * n4 + k*nnorb + ij; for (l = 0; l < norb; l++) { pbra[l] = pt2[l*n3]; } } } tril2pdm_particle_symm(rdm3+(j*norb+i)*n4, tbra, gt2, bcount, j+1, norb); } free(tbra); } // reordering of rdm2 is needed: rdm2.transpose(1,0,2,3) const char TRANS_N = 'N'; const char TRANS_T = 'T'; const double D1 = 1; dgemm_(&TRANS_N, &TRANS_T, &nnorb, &nnorb, &bcount, &D1, gt2, &nnorb, t1ket, &nnorb, &D1, rdm2, &nnorb); free(gt2); free(t1ket); free(t2ket); }