void DenseGenMatrix::matMult(double alpha, DenseGenMatrix& A, int transA, DenseGenMatrix& B, int transB, double beta) { assert(0); char fortranTransA = (transA==0?'N':'T'); char fortranTransB = (transB==0?'N':'T'); DenseGenMatrix& C = *this; int m,n,k,tmp; if(transA) { // A^T op(B) A.getSize(k,m); } else { // A op(B) A.getSize(m,k); } if(transB) { //op(A) B^T B.getSize(n,tmp); } else { // op(A) B B.getSize(tmp, n); } assert(m == C.mStorage->m); assert(n == C.mStorage->n); assert(k == tmp); double ** CC = C.mStorage->M; double ** AA = A.mStorage->M; double ** BB = B.mStorage->M; dgemm_(&fortranTransA, &fortranTransB, &m,&n,&k, &alpha, &AA[0][0], &m, &BB[0][0], &k, &beta, &CC[0][0], &m); // if( n != 0 && m != 0 ) { // dgemv_( &fortranTrans, &n, &m, &alpha, &C[0][0], &n, // &x[0], &incx, &beta, &y[0], &incy ); }
void DenseSymMatrix::atRankkUpdate( double alpha, double beta, DenseGenMatrix& U, int trans) { int n, k; int ldu, lda; //----------------------------------------------- // setup if the U is stored in column-major form // (FORTRAN Style) // char UPLO = 'U'; char TRANS = trans==0?'N':'T'; // U.getSize(n,k); ldu=n; // if(trans) k=n; // n = mStorage->n; // lda=n; //---------------------------------------------- // U and 'this' are stored in row-major form -> a little change in passing params to FORTRAN is needed char UPLO = 'U'; //update LOWER triagular part for symmetric matrix 'this' //trans=1 -> this += U'*U -> tell BLAS to do U*U' //trans=0 -> this += U*U' -> tell BLAS to do U'*U char TRANS = trans==0?'T':'N'; int m; U.getSize(m,k); ldu=k; // change leading dim so that U in row-major in col-major if(trans) k=m; n = mStorage->n; lda=n; #ifdef DEBUG //TRANS = 'N', k specifies the number of columns of the matrix U //we pass U' instead of U, so k should be the number of rows of U int r,c; U.getSize(rll,cll); if(TRANS=='N') assert(k==r); else if(TRANS=='T') assert(k==c); else assert(false); #endif dsyrk_(&UPLO, &TRANS, &n, &k, &beta, &U.getStorageRef().M[0][0], &ldu, &alpha, &mStorage->M[0][0], &lda); }
void sLinsys::addColsToDenseSchurCompl(sData *prob, DenseGenMatrix& out, int startcol, int endcol) { assert(gOuterSolve<3 ); SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int ncols = endcol-startcol; int N, nxP, ncols_t, N_out; A.getSize(N, nxP); assert(N==locmy); out.getSize(ncols_t, N_out); assert(N_out == nxP); assert(endcol <= nxP && ncols_t >= ncols); if(nxP==-1) C.getSize(N,nxP); //if(nxP==-1) nxP = NP; N = locnx+locmy+locmz; DenseGenMatrix cols(ncols,N); bool allzero = true; memset(cols[0],0,N*ncols*sizeof(double)); R.getStorageRef().fromGetColBlock(startcol, &cols[0][0], N, endcol-startcol, allzero); A.getStorageRef().fromGetColBlock(startcol, &cols[0][locnx], N, endcol-startcol, allzero); C.getStorageRef().fromGetColBlock(startcol, &cols[0][locnx+locmy], N, endcol-startcol, allzero); //int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); //printf("solving with multiple RHS %d \n", mype); solver->solve(cols); //printf("done solving %d \n", mype); const int blocksize = 20; for (int it=0; it < ncols; it += blocksize) { int end = MIN(it+blocksize,ncols); int numcols = end-it; assert(false); //add Rt*x -- and test the code // SC-=At*y A.getStorageRef().transMultMat( 1.0, out[it], numcols, N_out, -1.0, &cols[it][locnx], N); // SC-=Ct*z C.getStorageRef().transMultMat( 1.0, out[it], numcols, N_out, -1.0, &cols[it][locnx+locmy], N); } }