/* * This function returns incomplete rdm3, in which, particle * permutation symmetry is assumed. * kernel can be FCI3pdm_kern_ms0, FCI3pdm_kern_spin0 */ void FCIrdm3_drv(void (*kernel)(), double *rdm1, double *rdm2, double *rdm3, double *bra, double *ket, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { const size_t nnorb = norb * norb; const size_t n4 = nnorb * nnorb; int ib, strk, bcount; _LinkT *clinka = malloc(sizeof(_LinkT) * nlinka * na); _LinkT *clinkb = malloc(sizeof(_LinkT) * nlinkb * nb); FCIcompress_link(clinka, link_indexa, norb, na, nlinka); FCIcompress_link(clinkb, link_indexb, norb, nb, nlinkb); memset(rdm1, 0, sizeof(double) * nnorb); memset(rdm2, 0, sizeof(double) * n4); memset(rdm3, 0, sizeof(double) * n4 * nnorb); for (strk = 0; strk < na; strk++) { for (ib = 0; ib < nb; ib += BUFBASE) { bcount = MIN(BUFBASE, nb-ib); (*kernel)(rdm1, rdm2, rdm3, bra, ket, bcount, strk, ib, norb, na, nb, nlinka, nlinkb, clinka, clinkb); } } free(clinka); free(clinkb); }
void FCItrans_rdm1b(double *rdm1, double *bra, double *ket, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { int i, a, j, k, str0, str1, sign; double *pket, *pbra; double tmp; _LinkT *tab; _LinkT *clink = malloc(sizeof(_LinkT) * nlinkb * nb); FCIcompress_link(clink, link_indexb, norb, nb, nlinkb); memset(rdm1, 0, sizeof(double) * norb*norb); for (str0 = 0; str0 < na; str0++) { pbra = bra + str0 * nb; pket = ket + str0 * nb; for (k = 0; k < nb; k++) { tab = clink + k * nlinkb; tmp = pket[k]; for (j = 0; j < nlinkb; j++) { a = EXTRACT_CRE (tab[j]); i = EXTRACT_DES (tab[j]); str1 = EXTRACT_ADDR(tab[j]); sign = EXTRACT_SIGN(tab[j]); if (sign == 0) { break; } else { rdm1[a*norb+i] += sign*pbra[str1]*tmp; } } } } free(clink); }
void FCIcontract_b_1e_nosym(double *h1e, double *ci0, double *ci1, int norb, int nstra, int nstrb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { int j, k, i, a, sign; size_t str0, str1; double *pci1; double tmp; _LinkT *tab; _LinkT *clink = malloc(sizeof(_LinkT) * nlinkb * nstrb); FCIcompress_link(clink, link_indexb, norb, nstrb, nlinkb); for (str0 = 0; str0 < nstra; str0++) { pci1 = ci1 + str0 * nstrb; for (k = 0; k < nstrb; k++) { tab = clink + k * nlinkb; tmp = ci0[str0*nstrb+k]; for (j = 0; j < nlinkb; j++) { a = EXTRACT_CRE (tab[j]); i = EXTRACT_DES (tab[j]); str1 = EXTRACT_ADDR(tab[j]); sign = EXTRACT_SIGN(tab[j]); pci1[str1] += sign * tmp * h1e[a*norb+i]; } } } free(clink); }
void FCIcontract_a_1e_nosym(double *h1e, double *ci0, double *ci1, int norb, int nstra, int nstrb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { int j, k, i, a, sign; size_t str0, str1; double *pci0, *pci1; double tmp; _LinkT *tab; _LinkT *clink = malloc(sizeof(_LinkT) * nlinka * nstra); FCIcompress_link(clink, link_indexa, norb, nstra, nlinka); for (str0 = 0; str0 < nstra; str0++) { tab = clink + str0 * nlinka; for (j = 0; j < nlinka; j++) { a = EXTRACT_CRE (tab[j]); // propagate from t1 to bra, through a^+ i i = EXTRACT_DES (tab[j]); str1 = EXTRACT_ADDR(tab[j]); sign = EXTRACT_SIGN(tab[j]); pci0 = ci0 + str0 * nstrb; pci1 = ci1 + str1 * nstrb; tmp = sign * h1e[a*norb+i]; for (k = 0; k < nstrb; k++) { pci1[k] += tmp * pci0[k]; } } } free(clink); }
void NEVPTcontract(void (*kernel)(), double *rdm2, double *rdm3, double *eri, double *ci0, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { const size_t nnorb = norb * norb; const size_t n4 = nnorb * nnorb; int i, j, k, ib, strk, bcount; double *pdm2 = malloc(sizeof(double) * n4); double *cp1, *cp0; _LinkT *clinka = malloc(sizeof(_LinkT) * nlinka * na); _LinkT *clinkb = malloc(sizeof(_LinkT) * nlinkb * nb); FCIcompress_link(clinka, link_indexa, norb, na, nlinka); FCIcompress_link(clinkb, link_indexb, norb, nb, nlinkb); memset(pdm2, 0, sizeof(double) * n4); memset(rdm3, 0, sizeof(double) * n4 * nnorb); for (strk = 0; strk < na; strk++) { for (ib = 0; ib < nb; ib += BUFBASE) { bcount = MIN(BUFBASE, nb-ib); NEVPTkern_sf(kernel, pdm2, rdm3, eri, ci0, bcount, strk, ib, norb, na, nb, nlinka, nlinkb, clinka, clinkb); } } free(clinka); free(clinkb); for (i = 0; i < norb; i++) { for (j = 0; j < norb; j++) { cp1 = rdm2 + (i*norb+j) * nnorb; cp0 = pdm2 + (j*norb+i) * nnorb; for (k = 0; k < nnorb; k++) { cp1[k] = cp0[k]; } } } free(pdm2); }
void FCIcontract_2es1(double *eri, double *ci0, double *ci1, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { _LinkT *clinka = malloc(sizeof(_LinkT) * nlinka * na); _LinkT *clinkb = malloc(sizeof(_LinkT) * nlinkb * nb); FCIcompress_link(clinka, link_indexa, norb, na, nlinka); FCIcompress_link(clinkb, link_indexb, norb, nb, nlinkb); memset(ci1, 0, sizeof(double)*na*nb); #pragma omp parallel default(none) \ shared(eri, ci0, ci1, norb, na, nb, nlinka, nlinkb, \ clinka, clinkb) { int strk, ib, blen; double *t1buf = malloc(sizeof(double) * STRB_BLKSIZE*norb*norb*2); double *ci1buf = malloc(sizeof(double) * na*STRB_BLKSIZE); for (ib = 0; ib < nb; ib += STRB_BLKSIZE) { blen = MIN(STRB_BLKSIZE, nb-ib); memset(ci1buf, 0, sizeof(double) * na*blen); #pragma omp for schedule(static) for (strk = 0; strk < na; strk++) { ctr_rhf2e_kern(eri, ci0, ci1, ci1buf, t1buf, blen, blen, blen, strk, ib, norb, na, nb, nlinka, nlinkb, clinka, clinkb); } #pragma omp critical axpy2d(ci1+ib, ci1buf, na, nb, blen); } free(ci1buf); free(t1buf); } free(clinka); free(clinkb); }
/* * make_rdm1 assumed the hermitian of density matrix */ void FCImake_rdm1a(double *rdm1, double *cibra, double *ciket, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb) { int i, a, j, k, str0, str1, sign; double *pci0, *pci1; double *ci0 = ciket; _LinkT *tab; _LinkT *clink = malloc(sizeof(_LinkT) * nlinka * na); FCIcompress_link(clink, link_indexa, norb, na, nlinka); memset(rdm1, 0, sizeof(double) * norb*norb); for (str0 = 0; str0 < na; str0++) { tab = clink + str0 * nlinka; pci0 = ci0 + str0 * nb; for (j = 0; j < nlinka; j++) { a = EXTRACT_CRE (tab[j]); i = EXTRACT_DES (tab[j]); str1 = EXTRACT_ADDR(tab[j]); sign = EXTRACT_SIGN(tab[j]); pci1 = ci0 + str1 * nb; if (a >= i) { if (sign == 0) { break; } else if (sign > 0) { for (k = 0; k < nb; k++) { rdm1[a*norb+i] += pci0[k]*pci1[k]; } } else { for (k = 0; k < nb; k++) { rdm1[a*norb+i] -= pci0[k]*pci1[k]; } } } } } for (j = 0; j < norb; j++) { for (k = 0; k < j; k++) { rdm1[k*norb+j] = rdm1[j*norb+k]; } } free(clink); }
/* * Note! The returned rdm2 from FCI*kern* function corresponds to * [(p^+ q on <bra|) r^+ s] = [p q^+ r^+ s] * in FCIrdm12kern_sf, FCIrdm12kern_spin0, FCIrdm12kern_a, ... * t1 is calculated as |K> = i^+ j|0>. by doing dot(t1.T,t1) to get "rdm2", * The ket part (k^+ l|0>) will generate the correct order for the last * two indices kl of rdm2(i,j,k,l), But the bra part (i^+ j|0>)^dagger * will generate an order of (i,j), which is identical to call a bra of * (<0|i j^+). The so-obtained rdm2(i,j,k,l) corresponds to the * operator sequence i j^+ k^+ l. * * symm = 1: symmetrizes the 1pdm, and 2pdm. This is true only if bra == ket, * and the operators on bra are equivalent to those on ket, like * FCIrdm12kern_a, FCIrdm12kern_b, FCIrdm12kern_sf, FCIrdm12kern_spin0 * sym = 2: consider the particle permutation symmetry: * E^j_l E^i_k = E^i_k E^j_l - \delta_{il}E^j_k + \dleta_{jk}E^i_l */ void FCIrdm12_drv(void (*dm12kernel)(), double *rdm1, double *rdm2, double *bra, double *ket, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb, int symm) { const int nnorb = norb * norb; int strk, i, j, k, l, ib, blen; double *pdm1, *pdm2; memset(rdm1, 0, sizeof(double) * nnorb); memset(rdm2, 0, sizeof(double) * nnorb*nnorb); _LinkT *clinka = malloc(sizeof(_LinkT) * nlinka * na); _LinkT *clinkb = malloc(sizeof(_LinkT) * nlinkb * nb); FCIcompress_link(clinka, link_indexa, norb, na, nlinka); FCIcompress_link(clinkb, link_indexb, norb, nb, nlinkb); #pragma omp parallel default(none) \ shared(dm12kernel, bra, ket, norb, na, nb, nlinka, \ nlinkb, clinka, clinkb, rdm1, rdm2, symm), \ private(strk, i, ib, blen, pdm1, pdm2) { pdm1 = calloc(nnorb, sizeof(double)); pdm2 = calloc(nnorb*nnorb, sizeof(double)); #pragma omp for schedule(dynamic, 40) for (strk = 0; strk < na; strk++) { for (ib = 0; ib < nb; ib += BUFBASE) { blen = MIN(BUFBASE, nb-ib); (*dm12kernel)(pdm1, pdm2, bra, ket, blen, strk, ib, norb, na, nb, nlinka, nlinkb, clinka, clinkb, symm); } } #pragma omp critical { for (i = 0; i < nnorb; i++) { rdm1[i] += pdm1[i]; } for (i = 0; i < nnorb*nnorb; i++) { rdm2[i] += pdm2[i]; } } free(pdm1); free(pdm2); } free(clinka); free(clinkb); switch (symm) { case BRAKETSYM: for (i = 0; i < norb; i++) { for (j = 0; j < i; j++) { rdm1[j*norb+i] = rdm1[i*norb+j]; } } for (i = 0; i < nnorb; i++) { for (j = 0; j < i; j++) { rdm2[j*nnorb+i] = rdm2[i*nnorb+j]; } } _transpose_jikl(rdm2, norb); break; case PARTICLESYM: // right 2pdm order is required here, which transposes the cre/des on bra for (i = 0; i < norb; i++) { for (j = 0; j < i; j++) { pdm1 = rdm2 + (i*nnorb+j)*norb; pdm2 = rdm2 + (j*nnorb+i)*norb; for (k = 0; k < norb; k++) { for (l = 0; l < norb; l++) { pdm2[l*nnorb+k] = pdm1[k*nnorb+l]; } } // E^j_lE^i_k = E^i_kE^j_l + \delta_{il}E^j_k - \dleta_{jk}E^i_l for (k = 0; k < norb; k++) { pdm2[i*nnorb+k] += rdm1[j*norb+k]; pdm2[k*nnorb+j] -= rdm1[i*norb+k]; } } } break; default: _transpose_jikl(rdm2, norb); } }