static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X) { Mat_SeqDense *mat = (Mat_SeqDense*)A->data; PetscErrorCode ierr; const PetscScalar *b; PetscScalar *x; PetscInt n; PetscBLASInt nrhs,info,m; PetscBool flg; PetscFunctionBegin; ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr); ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr); ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); if (A->factortype == MAT_FACTOR_LU) { #if defined(PETSC_MISSING_LAPACK_GETRS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info)); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); #endif } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported"); ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr); PetscFunctionReturn(0); }
void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& b, DenseVector<T>& x) { // The calling sequence for getrs is: // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO) // trans (input) char* // 'n' for no tranpose, 't' for transpose char TRANS[] = "t"; // N (input) int* // The order of the matrix A. N >= 0. int N = this->m(); // NRHS (input) int* // The number of right hand sides, i.e., the number of columns // of the matrix B. NRHS >= 0. int NRHS = 1; // A (input) DOUBLE PRECISION array, dimension (LDA,N) // The factors L and U from the factorization A = P*L*U // as computed by dgetrf. // Here, we pass &(_val[0]) // LDA (input) int* // The leading dimension of the array A. LDA >= max(1,N). int LDA = N; // ipiv (input) int array, dimension (N) // The pivot indices from DGETRF; for 1<=i<=N, row i of the // matrix was interchanged with row IPIV(i). // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack // B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) // On entry, the right hand side matrix B. // On exit, the solution matrix X. // Here, we pass a copy of the rhs vector's data array in x, so that the // passed right-hand side b is unmodified. I don't see a way around this // copy if we want to maintain an unmodified rhs in LibMesh. x = b; std::vector<T>& x_vec = x.get_values(); // We can avoid the copy if we don't care about overwriting the RHS: just // pass b to the Lapack routine and then swap with x before exiting // std::vector<T>& x_vec = b.get_values(); // LDB (input) int* // The leading dimension of the array B. LDB >= max(1,N). int LDB = N; // INFO (output) int* // = 0: successful exit // < 0: if INFO = -i, the i-th argument had an illegal value int INFO = 0; // Finally, ready to call the Lapack getrs function LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO); // Check return value for errors if (INFO != 0) { libMesh::out << "INFO=" << INFO << ", Error during Lapack LU solve!" << std::endl; libmesh_error(); } // Don't do this if you already made a copy of b above // Swap b and x. The solution will then be in x, and whatever was originally // in x, maybe garbage, maybe nothing, will be in b. // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite // the input. This *should* make user code simpler, as they don't have to create // an extra vector just to pass it in to the solve function! // b.swap(x); }
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma) { #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscInt i,j; PetscBLASInt *ipiv,info,n,ld,one=1,ncol; PetscScalar *A,*B,*Q,*g=gin,*ghat; PetscScalar done=1.0,dmone=-1.0,dzero=0.0; PetscReal gnorm; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); A = ds->mat[DS_MAT_A]; if (!recover) { ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr); ipiv = ds->iwork; if (!g) { ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); g = ds->work; } /* use workspace matrix W to factor A-tau*eye(n) */ ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); B = ds->mat[DS_MAT_W]; ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr); /* Vector g initialy stores b = beta*e_n^T */ ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr); g[n-1] = beta; /* g = (A-tau*eye(n))'\b */ for (i=0;i<n;i++) B[i+i*ld] -= tau; PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info)); if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization"); if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization"); ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info)); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr); /* A = A + g*b' */ for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta; } else { /* recover */ PetscValidPointer(g,6); ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); ghat = ds->work; Q = ds->mat[DS_MAT_Q]; /* g^ = -Q(:,idx)'*g */ ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr); PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one)); /* A = A + g^*b' */ for (i=0;i<ds->l+ds->k;i++) for (j=ds->l;j<ds->l+ds->k;j++) A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta; /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */ PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one)); } /* Compute gamma factor */ if (gamma) { gnorm = 0.0; for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i])); *gamma = PetscSqrtReal(1.0+gnorm); } PetscFunctionReturn(0); #endif }