static void ctr_rhf2e_kern(double *eri, double *ci0, double *ci1, double *ci1buf, double *t1buf, int bcount_for_spread_a, int ncol_ci1buf, int bcount, int stra_id, int strb_id, int norb, int na, int nb, int nlinka, int nlinkb, _LinkT *clink_indexa, _LinkT *clink_indexb) { const char TRANS_N = 'N'; const double D0 = 0; const double D1 = 1; const int nnorb = norb * norb; double *t1 = t1buf; double *vt1 = t1buf + nnorb*bcount; double csum; csum = FCI_t1ci_sf(ci0, t1, bcount, stra_id, strb_id, norb, na, nb, nlinka, nlinkb, clink_indexa, clink_indexb); if (csum > CSUMTHR) { dgemm_(&TRANS_N, &TRANS_N, &nnorb, &bcount, &nnorb, &D1, eri, &nnorb, t1, &nnorb, &D0, vt1, &nnorb); spread_b_t1(ci1, vt1, bcount, stra_id, strb_id, norb, nb, nlinkb, clink_indexb); spread_a_t1(ci1buf, vt1, bcount_for_spread_a, stra_id, 0, norb, ncol_ci1buf, nlinka, clink_indexa); } }
void FCIcontract_2e_spin1_symm(double *eri, double *ci0, double *ci1, int norb, int na, int nb, int nlinka, int nlinkb, int *link_indexa, int *link_indexb, int *dimirrep, int totirrep) { const int nnorb = norb * (norb+1)/2; const int blklenb = strb_buflen(nb, nnorb); int ic, strk1, strk0, strk, ib, blen; int bufbas = MIN(BUFBASE, nb); double *buf = (double *)malloc(sizeof(double) * bufbas*nnorb*blklenb); double *pbuf; _LinkT *clinka = malloc(sizeof(_LinkT) * nlinka * na); _LinkT *clinkb = malloc(sizeof(_LinkT) * nlinkb * nb); compress_link(clinka, link_indexa, na, nlinka); compress_link(clinkb, link_indexb, nb, nlinkb); memset(ci1, 0, sizeof(double)*na*nb); for (strk0 = 0; strk0 < na; strk0 += bufbas) { strk1 = MIN(na-strk0, bufbas); for (ib = 0; ib < nb; ib += blklenb) { blen = MIN(blklenb, nb-ib); #pragma omp parallel default(none) \ shared(eri, ci0, ci1, norb, na, nb, nlinka, nlinkb, \ clinka, clinkb, dimirrep, totirrep, \ buf, strk0, strk1, ib, blen), \ private(strk, ic, pbuf) #pragma omp for schedule(static) for (ic = 0; ic < strk1; ic++) { strk = strk0 + ic; pbuf = buf + ic * blen * nnorb; ctr_rhf2esym_kern(eri, ci0, ci1, pbuf, blen, strk, ib, norb, na, nb, nlinka, nlinkb, clinka, clinkb, dimirrep, totirrep); } // spread alpha-strings in serial mode for (ic = 0; ic < strk1; ic++) { strk = strk0 + ic; pbuf = buf + ic * blen * nnorb; spread_a_t1(ci1, pbuf, blen, strk, ib, norb, nb, nlinka, clinka); } } } free(clinka); free(clinkb); free(buf); }
void FCIcontract_2e_spin0_symm(double *eri, double *ci0, double *ci1, int norb, int na, int nlink, int *link_index, int *dimirrep, int totirrep) { const int nnorb = norb * (norb+1)/2; const int blklenb = strb_buflen(na, nnorb); int strk0, strk1, strk, ib, blen; int bufbas = MIN(BUFBASE, na); double *buf = malloc(sizeof(double)*bufbas*blklenb*nnorb); double *pbuf; _LinkT *clink = malloc(sizeof(_LinkT) * nlink * na); compress_link(clink, link_index, na, nlink); memset(ci1, 0, sizeof(double)*na*na); for (strk0 = 0, strk1 = na; strk0 < na; strk0 = strk1) { strk1 = _square_pace(strk0, bufbas, 1); strk1 = MIN(strk1, na); for (ib = 0; ib < strk1; ib += blklenb) { blen = MIN(blklenb, strk1-ib); #pragma omp parallel default(none) \ shared(eri, ci0, ci1, norb, na, nlink, clink, \ dimirrep, totirrep, strk0, strk1, bufbas, buf, ib, blen), \ private(strk, pbuf) #pragma omp for schedule(guided, 1) for (strk = MAX(strk0, ib); strk < strk1; strk++) { pbuf = buf + (strk-strk0)*blen*nnorb; ctr_rhf2esym_kern(eri, ci0, ci1, pbuf, MIN(blklenb, strk+1-ib), strk, ib, norb, na, na, nlink, nlink, clink, clink, dimirrep, totirrep); } /* Note: the bcount diffs in ctr_rhf2e_kern and spread_a_t1. * ctr_rhf2e_kern needs strk+1 beta-strings, spread_a_t1 takes strk * beta-strings */ for (strk = MAX(strk0, ib); strk < strk1; strk++) { pbuf = buf + (strk-strk0)*blen*nnorb; spread_a_t1(ci1, pbuf, MIN(blklenb,strk-ib), strk, ib, norb, na, nlink, clink); } } } free(clink); free(buf); }