/* * cvLapackBandSetup does the setup operations for the band linear solver. * It makes a decision whether or not to call the Jacobian evaluation * routine based on various state variables, and if not it uses the * saved copy. In any case, it constructs the Newton matrix M = I - gamma*J, * updates counters, and calls the band LU factorization routine. */ static int cvLapackBandSetup(CVodeMem cv_mem, int convfail, N_Vector yP, N_Vector fctP, booleantype *jcurPtr, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) { CVDlsMem cvdls_mem; realtype dgamma, fact; booleantype jbad, jok; int ier, retval, one = 1; cvdls_mem = (CVDlsMem) lmem; /* Use nst, gamma/gammap, and convfail to set J eval. flag jok */ dgamma = ABS((gamma/gammap) - ONE); jbad = (nst == 0) || (nst > nstlj + CVD_MSBJ) || ((convfail == CV_FAIL_BAD_J) && (dgamma < CVD_DGMAX)) || (convfail == CV_FAIL_OTHER); jok = !jbad; if (jok) { /* If jok = TRUE, use saved copy of J */ *jcurPtr = FALSE; dcopy_f77(&(savedJ->ldata), savedJ->data, &one, M->data, &one); } else { /* If jok = FALSE, call jac routine for new J value */ nje++; nstlj = nst; *jcurPtr = TRUE; SetToZero(M); retval = bjac(n, mu, ml, tn, yP, fctP, M, J_data, tmp1, tmp2, tmp3); if (retval == 0) { dcopy_f77(&(M->ldata), M->data, &one, savedJ->data, &one); } else if (retval < 0) { cvProcessError(cv_mem, CVDLS_JACFUNC_UNRECVR, "CVSLAPACK", "cvLapackBandSetup", MSGD_JACFUNC_FAILED); last_flag = CVDLS_JACFUNC_UNRECVR; return(-1); } else if (retval > 0) { last_flag = CVDLS_JACFUNC_RECVR; return(1); } } /* Scale J by - gamma */ fact = -gamma; dscal_f77(&(M->ldata), &fact, M->data, &one); /* Add identity to get M = I - gamma*J*/ AddIdentity(M); /* Do LU factorization of M */ dgbtrf_f77(&n, &n, &ml, &mu, M->data, &(M->ldim), pivots, &ier); /* Return 0 if the LU was complete; otherwise return 1 */ last_flag = ier; if (ier > 0) return(1); return(0); }
/*--------------------------------------------------------------- arkLapackBandSetup does the setup operations for the band linear solver. It makes a decision whether or not to call the Jacobian evaluation routine based on various state variables, and if not it uses the saved copy. In any case, it constructs the Newton matrix A = M - gamma*J, updates counters, and calls the band LU factorization routine. ---------------------------------------------------------------*/ static int arkLapackBandSetup(ARKodeMem ark_mem, int convfail, N_Vector yP, N_Vector fctP, booleantype *jcurPtr, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) { ARKDlsMem arkdls_mem; ARKDlsMassMem arkdls_mass_mem; realtype dgamma, fact, *Acol_j, *Mcol_j; booleantype jbad, jok; int ier, retval, one = 1; int intn, iml, imu, lenmat, ldmat, i, j, colSize; arkdls_mem = (ARKDlsMem) ark_mem->ark_lmem; intn = (int) arkdls_mem->d_n; iml = (int) arkdls_mem->d_ml; imu = (int) arkdls_mem->d_mu; lenmat = arkdls_mem->d_M->ldata; ldmat = arkdls_mem->d_M->ldim; /* Use nst, gamma/gammap, and convfail to set J eval. flag jok */ dgamma = SUNRabs((ark_mem->ark_gamma/ark_mem->ark_gammap) - ONE); jbad = (ark_mem->ark_nst == 0) || (ark_mem->ark_nst > arkdls_mem->d_nstlj + ARKD_MSBJ) || ((convfail == ARK_FAIL_BAD_J) && (dgamma < ARKD_DGMAX)) || (convfail == ARK_FAIL_OTHER); jok = !jbad; /* If jok = TRUE, use saved copy of J */ if (jok) { *jcurPtr = FALSE; dcopy_f77(&lenmat, arkdls_mem->d_savedJ->data, &one, arkdls_mem->d_M->data, &one); /* If jok = FALSE, call jac routine for new J value */ } else { arkdls_mem->d_nje++; arkdls_mem->d_nstlj = ark_mem->ark_nst; *jcurPtr = TRUE; SetToZero(arkdls_mem->d_M); retval = arkdls_mem->d_bjac(arkdls_mem->d_n, arkdls_mem->d_mu, arkdls_mem->d_ml, ark_mem->ark_tn, yP, fctP, arkdls_mem->d_M, arkdls_mem->d_J_data, tmp1, tmp2, tmp3); if (retval == 0) { dcopy_f77(&lenmat, arkdls_mem->d_M->data, &one, arkdls_mem->d_savedJ->data, &one); } else if (retval < 0) { arkProcessError(ark_mem, ARKDLS_JACFUNC_UNRECVR, "ARKLAPACK", "arkLapackBandSetup", MSGD_JACFUNC_FAILED); arkdls_mem->d_last_flag = ARKDLS_JACFUNC_UNRECVR; return(-1); } else if (retval > 0) { arkdls_mem->d_last_flag = ARKDLS_JACFUNC_RECVR; return(1); } } /* Scale J by -gamma */ fact = -ark_mem->ark_gamma; dscal_f77(&lenmat, &fact, arkdls_mem->d_M->data, &one); /* Add mass matrix to get A = M-gamma*J*/ if (ark_mem->ark_mass_matrix) { /* Compute mass matrix */ arkdls_mass_mem = (ARKDlsMassMem) ark_mem->ark_mass_mem; SetToZero(arkdls_mass_mem->d_M); retval = arkdls_mass_mem->d_bmass(arkdls_mass_mem->d_n, arkdls_mass_mem->d_mu, arkdls_mass_mem->d_ml, ark_mem->ark_tn, arkdls_mass_mem->d_M, arkdls_mass_mem->d_M_data, tmp1, tmp2, tmp3); arkdls_mass_mem->d_nme++; if (retval < 0) { arkProcessError(ark_mem, ARKDLS_MASSFUNC_UNRECVR, "ARKLAPACK", "arkLapackBandSetup", MSGD_MASSFUNC_FAILED); arkdls_mem->d_last_flag = ARKDLS_MASSFUNC_UNRECVR; return(-1); } if (retval > 0) { arkdls_mem->d_last_flag = ARKDLS_MASSFUNC_RECVR; return(1); } /* Add to A -- CURRENTLY ASSUMES THAT BOTH MATRICES HAVE THE SAME BAND STRUCTURE AND COLUMN SIZE */ colSize = arkdls_mem->d_M->mu + arkdls_mem->d_M->ml + 1; for (j=0; j<arkdls_mem->d_M->M; j++) { Acol_j = arkdls_mem->d_M->cols[j] + arkdls_mem->d_M->s_mu - arkdls_mem->d_M->mu; Mcol_j = arkdls_mass_mem->d_M->cols[j] + arkdls_mass_mem->d_M->s_mu - arkdls_mass_mem->d_M->mu; for (i=0; i<colSize; i++) Acol_j[i] += Mcol_j[i]; } } else { AddIdentity(arkdls_mem->d_M); } /* Do LU factorization of M */ dgbtrf_f77(&intn, &intn, &iml, &imu, arkdls_mem->d_M->data, &ldmat, arkdls_mem->d_pivots, &ier); /* Return 0 if the LU was complete; otherwise return 1 */ arkdls_mem->d_last_flag = (long int) ier; if (ier > 0) return(1); return(0); }
/* * cpLapackDenseProjSolve handles the solve operation for the dense linear * linear solver. */ static int cpLapackDenseProjSolve(CPodeMem cp_mem, N_Vector b, N_Vector x, N_Vector y, N_Vector cy, N_Vector c_tmp1, N_Vector s_tmp1) { CPDlsProjMem cpdlsP_mem; realtype *bd, *xd; realtype *ewt_data, *d_data, *da_data, tmp; int nd, i, j, k, pk, ier, one = 1; realtype coef_1 = ONE, coef_0 = ZERO, coef_m1 = -ONE; cpdlsP_mem = (CPDlsProjMem) lmemP; ewt_data = N_VGetArrayPointer(ewt); bd = N_VGetArrayPointer(b); xd = N_VGetArrayPointer(x); d_data = N_VGetArrayPointer(s_tmp1); nd = ny - nc; /* Solve the linear system, depending on ftype */ switch (ftype) { case CPDIRECT_LU: /* Solve L*U1*alpha = bd * (a) solve L*beta = bd using fwd. subst. * (b) solve U1*alpha = beta using bckwd. subst * where L^T and U1^T are stored in G[0...nc-1][0...nc-1]. * beta and then alpha overwrite bd. */ dtrsv_f77("U", "T", "N", &nc, G->data, &ny, bd, &one, 1, 1, 1); dtrsv_f77("L", "T", "U", &nc, G->data, &ny, bd, &one, 1, 1, 1); /* Compute S^T * (D1 * alpha) * alpha is stored in bd. * S^T is stored in g_mat[nc...ny-1][0...nc-1]. * Store result in x2 = x[nc...ny-1]. */ if (pnorm == CP_PROJ_ERRNORM) { /* Load squared error weights into d */ for (k=0; k<ny; k++) d_data[k] = ewt_data[k] * ewt_data[k]; /* Permute elements of d, based on pivotsP. * Note that pivot information from dgetrf is 1-based. * Swap d[k] and d[pivotsP[k]]. */ for (k=0; k<nc; k++) { pk = pivotsP[k]; if (pk != k) { tmp = d_data[k]; d_data[k] = d_data[pk]; d_data[pk] = tmp; } } /* Compute D1*alpha and store it into da_data */ da_data = N_VGetArrayPointer(c_tmp1); for(k=0; k<nc; k++) da_data[k] = d_data[k] * bd[k]; /* Compute S^T * D1 * alpha = S^T * da */ dgemv_f77("N", &ny, &nc, &coef_1, (G->data + nc), &ny, da_data, &one, &coef_0, (xd + nc), &one, 1); } else { /* Compute S^T * alpha */ dgemv_f77("N", &nd, &nc, &coef_1, (G->data + nc), &ny, bd, &one, &coef_0, (xd + nc), &one, 1); } /* Solve K*x2 = S^T*D1*alpha, using the Cholesky decomposition available in K. * S^T*D1*alpha is stored in x2 = x[nc...ny-1]. */ dpotrs_f77("L", &nd, &one, K->data, &nd, (xd + nc), &nd, &ier, 1); if (ier != 0) return(ier); /* Compute x1 = alpha - S*x2 * alpha is stored in bd. * x2 is stored in x[nc...ny-1]. * S^T is stored in g_mat[nc...ny-1][0...nc-1]. * Store result in x1 = x[0...nc-1]. */ dcopy_f77(&nc, bd, &one, xd, &one); dgemv_f77("T", &nd, &nc, &coef_m1, (G->data + nc), &ny, (xd + nc), &one, &coef_1, xd, &one, 1); /* Compute P^T * x, where P is encoded into pivotsP. * Note that pivot information from dgetrf is 1-based. * Store result in x. */ for (k=nc-1; k>=0; k--) { pk = pivotsP[k]-1; if(pk != k) { tmp = xd[k]; xd[k] = xd[pk]; xd[pk] = tmp; } } break; case CPDIRECT_QR: /* * Solve R^T*alpha = bd using fwd. subst. (row version) * The upper triangular matrix R is stored in g_mat[0...nc-1][0...nc-1] * alpha overwrites bd. */ dtrsv_f77("U", "T", "N", &nc, G->data, &ny, bd, &one, 1, 1, 1); /* If projecting in WRMS norm, solve K*beta = alpha */ if (pnorm == CP_PROJ_ERRNORM) { dpotrs_f77("L", &nc, &one, K->data, &nc, bd, &nc, &ier, 1); if (ier != 0) return(ier); } /* * Compute x = Q1*alpha * * Since we cannot really use the "thin" QR decomposition, we * first need to initialize xd = [alpha; 0]. */ for (k=0; k<nc; k++) xd[k] = bd[k]; for (k=nc; k<ny; k++) xd[k] = ZERO; dormqr_f77("L", "N", &ny, &one, &nc, G->data, &ny, beta, xd, &ny, wrk, &len_wrk, &ier, 1, 1); if (ier != 0) return(ier); /* If projecting in WRMS norm, scale x by D^(-1) */ if (pnorm == CP_PROJ_ERRNORM) { for (i=0; i<ny; i++) xd[i] /= ewt_data[i]*ewt_data[i]; } break; case CPDIRECT_QRP: /* Compute P^T * b, where P is encoded into pivotsP. * If pivotsP[j] = k, then, for the factorization, the j-th column of G^T*P * was the k-th column of G^T. * Therefore, to compute P^T*b, we must move the k-th element of b to the * j-th position, for j=1,2,... This is a forward permutation. * Note that pivot information from dgeqp3 is 1-based. * Store result in b. */ for (i=1; i<=nc; i++) pivotsP[i-1] = -pivotsP[i-1]; for (i=1; i<=nc; i++) { if (pivotsP[i-1] > 0) continue; j = i; pivotsP[j-1] = -pivotsP[j-1]; pk = pivotsP[j-1]; while (pivotsP[pk-1] < 0) { tmp = bd[j-1]; bd[j-1] = bd[pk-1]; bd[pk-1] = tmp; pivotsP[pk-1] = -pivotsP[pk-1]; j = pk; pk = pivotsP[pk-1]; } } /* * Solve R11^T * alpha = P^T * bd using fwd. subst. (row version) * The upper triangular matrix R is stored in g_mat[0...nr-1][0...nr-1] * P^T * bd is available in bd. * We only consider the first nr components in P^T*bd. * alpha overwrites bd. */ dtrsv_f77("U", "T", "N", &nr, G->data, &ny, bd, &one, 1, 1, 1); /* If projecting in WRMS norm, solve K*beta = alpha */ if (pnorm == CP_PROJ_ERRNORM) { dpotrs_f77("L", &nc, &one, K->data, &nc, bd, &nc, &ier, 1); if (ier != 0) return(ier); } /* * Compute x = Q1*alpha * * Since we cannot really use the "thin" QR decomposition, we * first need to initialize xd = [alpha; 0]. */ for (k=0; k<nr; k++) xd[k] = bd[k]; for (k=nr; k<ny; k++) xd[k] = ZERO; dormqr_f77("L", "N", &ny, &one, &nc, G->data, &ny, beta, xd, &ny, wrk, &len_wrk, &ier, 1, 1); if (ier != 0) return(ier); /* If projecting in WRMS norm, scale x by D^(-1) */ if (pnorm == CP_PROJ_ERRNORM) { for (i=0; i<ny; i++) xd[i] /= ewt_data[i]*ewt_data[i]; } break; case CPDIRECT_SC: /* * Solve K*xi = bd, using the Cholesky decomposition available in K. * xi overwrites bd. */ dpotrs_f77("L", &nc, &one, K->data, &nc, bd, &nc, &ier, 1); if (ier != 0) return(ier); /* Compute x = G^T * xi * G^T is stored in g_mat[0...ny-1][0...nc-1] * xi is available in bd. */ dgemv_f77("N", &ny, &nc, &coef_1, G->data, &ny, bd, &one, &coef_0, xd, &one, 1); /* If projecting in WRMS norm, scale x by D^(-1) */ if (pnorm == CP_PROJ_ERRNORM) { for (i=0; i<ny; i++) xd[i] /= ewt_data[i]*ewt_data[i]; } break; } return(0); }
/* * cpLapackBandSetup does the setup operations for the band linear solver. * It makes a decision whether or not to call the Jacobian evaluation * routine based on various state variables, and if not it uses the * saved copy (for explicit ODE only). In any case, it constructs * the Newton matrix M = I - gamma*J or M = F_y' - gamma*F_y, updates * counters, and calls the band LU factorization routine. */ static int cpLapackBandSetup(CPodeMem cp_mem, int convfail, N_Vector yP, N_Vector ypP, N_Vector fctP, booleantype *jcurPtr, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) { CPDlsMem cpdls_mem; realtype dgamma, fact; booleantype jbad, jok; int ier, retval, one = 1; cpdls_mem = (CPDlsMem) lmem; switch (ode_type) { case CP_EXPL: /* Use nst, gamma/gammap, and convfail to set J eval. flag jok */ dgamma = ABS((gamma/gammap) - ONE); jbad = (nst == 0) || (nst > nstlj + CPD_MSBJ) || ((convfail == CP_FAIL_BAD_J) && (dgamma < CPD_DGMAX)) || (convfail == CP_FAIL_OTHER); jok = !jbad; if (jok) { /* If jok = TRUE, use saved copy of J */ *jcurPtr = FALSE; dcopy_f77(&(savedJ->ldata), savedJ->data, &one, M->data, &one); } else { /* If jok = FALSE, call jac routine for new J value */ nje++; nstlj = nst; *jcurPtr = TRUE; retval = bjacE(n, mu, ml, tn, yP, fctP, M, J_data, tmp1, tmp2, tmp3); if (retval == 0) { dcopy_f77(&(M->ldata), M->data, &one, savedJ->data, &one); } else if (retval < 0) { cpProcessError(cp_mem, CPDIRECT_JACFUNC_UNRECVR, "CPLAPACK", "cpLapackBandSetup", MSGD_JACFUNC_FAILED); last_flag = CPDIRECT_JACFUNC_UNRECVR; return(-1); } else if (retval > 0) { last_flag = CPDIRECT_JACFUNC_RECVR; return(1); } } /* Scale J by - gamma */ fact = -gamma; dscal_f77(&(M->ldata), &fact, M->data, &one); /* Add identity to get M = I - gamma*J*/ LapackBandAddI(M); break; case CP_IMPL: /* Call Jacobian function */ nje++; retval = bjacI(n, mu, ml, tn, gamma, yP, ypP, fctP, M, J_data, tmp1, tmp2, tmp3); if (retval == 0) { break; } else if (retval < 0) { cpProcessError(cp_mem, CPDIRECT_JACFUNC_UNRECVR, "CPLAPACK", "cpLapackBandSetup", MSGD_JACFUNC_FAILED); last_flag = CPDIRECT_JACFUNC_UNRECVR; return(-1); } else if (retval > 0) { last_flag = CPDIRECT_JACFUNC_RECVR; return(+1); } break; } /* Do LU factorization of M */ dgbtrf_f77(&n, &n, &ml, &mu, M->data, &(M->ldim), pivots, &ier); /* Return 0 if the LU was complete; otherwise return 1 */ last_flag = ier; if (ier > 0) return(1); return(0); }
/* * cpLapackDenseProjSetup does the setup operations for the dense * linear solver. * It calls the Jacobian evaluation routine and, depending on ftype, * it performs various factorizations. */ static int cpLapackDenseProjSetup(CPodeMem cp_mem, N_Vector y, N_Vector cy, N_Vector c_tmp1, N_Vector c_tmp2, N_Vector s_tmp1) { int ier; CPDlsProjMem cpdlsP_mem; realtype *col_i, rim1, ri; int i, j, nd, one = 1; int retval; realtype coef_1 = ONE, coef_0 = ZERO; cpdlsP_mem = (CPDlsProjMem) lmemP; nd = ny-nc; /* Call Jacobian function (G will contain the Jacobian transposed) */ retval = djacP(nc, ny, tn, y, cy, G, JP_data, c_tmp1, c_tmp2); njeP++; if (retval < 0) { cpProcessError(cp_mem, CPDIRECT_JACFUNC_UNRECVR, "CPLAPACK", "cpLapackDenseProjSetup", MSGD_JACFUNC_FAILED); return(-1); } else if (retval > 0) { return(1); } /* Save Jacobian before factorization for possible use by lmultP */ dcopy_f77(&(G->ldata), G->data, &one, savedG->data, &one); /* Factorize G, depending on ftype */ switch (ftype) { case CPDIRECT_LU: /* * LU factorization of G^T with partial pivoting * P*G^T = | U1^T | * L^T * | U2^T | * After factorization, P is encoded in pivotsP and * G^T is overwritten with U1 (nc by nc unit upper triangular), * U2 ( nc by ny-nc rectangular), and L (nc by nc lower triangular). * Return ier if factorization failed. */ dgetrf_f77(&ny, &nc, G->data, &ny, pivotsP, &ier); if (ier != 0) return(ier); /* * Build S = U1^{-1} * U2 (in place, S overwrites U2) * For each row j of G, j = nc,...,ny-1, perform * a backward substitution (row version). * After this step, G^T contains U1, S, and L. */ for (j=nc; j<ny; j++) dtrsv_f77("L", "T", "U", &nc, G->data, &ny, (G->data + j), &ny, 1, 1, 1); /* * Build K = D1 + S^T * D2 * S * S^T is stored in g_mat[nc...ny-1][0...nc] * Compute and store only the lower triangular part of K. */ if (pnorm == CP_PROJ_L2NORM) { dsyrk_f77("L", "N", &nd, &nc, &coef_1, (G->data + nc), &ny, &coef_0, K->data, &nd, 1, 1); LapackDenseAddI(K); } else { cplLUcomputeKD(cp_mem, s_tmp1); } /* * Perform Cholesky decomposition of K: K = C*C^T * After factorization, the lower triangular part of K contains C. * Return ier if factorization failed. */ dpotrf_f77("L", &nd, K->data, &nd, &ier, 1); if (ier != 0) return(ier); break; case CPDIRECT_QR: /* * QR factorization of G^T: G^T = Q*R * After factorization, the upper triangular part of G^T * contains the matrix R. The lower trapezoidal part of * G^T, together with the array beta, encodes the orthonormal * columns of Q as elementary reflectors. */ dgeqrf_f77(&ny, &nc, G->data, &ny, beta, wrk, &len_wrk, &ier); if (ier != 0) return(ier); /* If projecting in WRMS norm */ if (pnorm == CP_PROJ_ERRNORM) { /* Build K = Q^T * D^(-1) * Q */ cplQRcomputeKD(cp_mem, s_tmp1); /* Perform Cholesky decomposition of K */ dpotrf_f77("L", &nc, K->data, &nc, &ier, 1); if (ier != 0) return(ier); } break; case CPDIRECT_QRP: /* * QR with pivoting factorization of G^T: G^T * P = Q * R. * After factorization, the upper triangular part of G^T * contains the matrix R. The lower trapezoidal part of * G^T, together with the array beta, encodes the orthonormal * columns of Q as elementary reflectors. * The pivots are stored in 'pivotsP'. */ for (i=0; i<nc; i++) pivotsP[i] = 0; dgeqp3_f77(&ny, &nc, G->data, &ny, pivotsP, beta, wrk, &len_wrk, &ier); if (ier != 0) return(ier); /* * Determine the number of independent constraints. * After the QR factorization, the diagonal elements of R should * be in decreasing order of their absolute values. */ rim1 = ABS(G->data[0]); for (i=1, nr=1; i<nc; i++, nr++) { col_i = G->cols[i]; ri = ABS(col_i[i]); if (ri < 100*uround) break; if (ri/rim1 < RPowerR(uround, THREE/FOUR)) break; } /* If projecting in WRMS norm */ if (pnorm == CP_PROJ_ERRNORM) { /* Build K = Q^T * D^(-1) * Q */ cplQRcomputeKD(cp_mem, s_tmp1); /* Perform Cholesky decomposition of K */ dpotrf_f77("L", &nc, K->data, &nc, &ier, 1); if (ier != 0) return(ier); } break; case CPDIRECT_SC: /* * Build K = G*D^(-1)*G^T * G^T is stored in g_mat[0...ny-1][0...nc] * Compute and store only the lower triangular part of K. */ if (pnorm == CP_PROJ_L2NORM) { dsyrk_f77("L", "T", &nc, &ny, &coef_1, G->data, &ny, &coef_0, K->data, &nc, 1, 1); } else { cplSCcomputeKD(cp_mem, s_tmp1); } /* * Perform Cholesky decomposition of K: K = C*C^T * After factorization, the lower triangular part of K contains C. * Return 1 if factorization failed. */ dpotrf_f77("L", &nc, K->data, &nc, &ier, 1); if (ier != 0) return(ier); break; } return(0); }