void GradDpElement :: computeStiffnessMatrix_ku(FloatMatrix &answer, MatResponseMode rMode, TimeStep *tStep) { double dV; NLStructuralElement *elem = this->giveNLStructuralElement(); FloatArray Nk; FloatMatrix B, DkuB, Dku; StructuralCrossSection *cs = elem->giveStructuralCrossSection(); answer.clear(); int nlGeo = elem->giveGeometryMode(); for ( auto &gp: *elem->giveIntegrationRule(0) ) { GradDpMaterialExtensionInterface *dpmat = dynamic_cast< GradDpMaterialExtensionInterface * >( cs->giveMaterialInterface(GradDpMaterialExtensionInterfaceType, gp) ); if ( !dpmat ) { OOFEM_ERROR("Material doesn't implement the required DpGrad interface!"); } elem->computeBmatrixAt(gp, B); if ( nlGeo == 1 ) { if ( elem->domain->giveEngngModel()->giveFormulation() == AL ) { elem->computeBmatrixAt(gp, B); } else { elem->computeBHmatrixAt(gp, B); } } dpmat->givePDGradMatrix_ku(Dku, rMode, gp, tStep); this->computeNkappaMatrixAt(gp, Nk); dV = elem->computeVolumeAround(gp); DkuB.beProductOf(Dku, B); answer.plusProductUnsym(Nk, DkuB, -dV); if ( dpmat->giveAveragingType() == 2 ) { double dl1, dl2, dl3; FloatArray Gk; FloatMatrix D, DB, LDB; FloatMatrix Bk, BktM22, BktM22Gk, BktM12, BktM12Gk, M22(2, 2), M12(2, 2); FloatMatrix dL1(1, 3), dL2(1, 3), result1, result2, dLdS, n(2, 2); this->computeBkappaMatrixAt(gp, Bk); dpmat->givePDGradMatrix_uu(D, rMode, gp, tStep); dpmat->givePDGradMatrix_LD(dLdS, rMode, gp, tStep); this->computeNonlocalGradient(Gk, gp, tStep); dl1 = dLdS.at(3, 3); dl2 = dLdS.at(4, 4); dl3 = dLdS.at(5, 5); n.at(1, 1) = dLdS.at(1, 1); n.at(1, 2) = dLdS.at(1, 2); n.at(2, 1) = dLdS.at(2, 1); n.at(2, 2) = dLdS.at(2, 2); // first term Bk^T M22 G L1 D B // M22 = n2 \otimes n2 M22.at(1, 1) = n.at(1, 2) * n.at(1, 2); M22.at(1, 2) = n.at(1, 2) * n.at(2, 2); M22.at(2, 1) = n.at(2, 2) * n.at(1, 2); M22.at(2, 2) = n.at(2, 2) * n.at(2, 2); // dL1 dL1.at(1, 1) = dl1 * n.at(1, 1) * n.at(1, 1) + dl2 *n.at(1, 2) * n.at(1, 2); dL1.at(1, 2) = dl1 * n.at(2, 1) * n.at(2, 1) + dl2 *n.at(2, 2) * n.at(2, 2); dL1.at(1, 3) = dl1 * n.at(1, 1) * n.at(2, 1) + dl2 *n.at(1, 2) * n.at(2, 2); DB.beProductOf(D, B); LDB.beProductOf(dL1, DB); BktM22.beTProductOf(Bk, M22); ///@todo This can't possibly work if this is uncommented (!) / Mikael //BktM22Gk.beProductOf(BktM22,Gk); result1.beProductOf(BktM22Gk, LDB); answer.add(dV, result1); // This would be slightly shorter and faster; //GkLDB.beProductOf(Gk, LDB); //MGkLDB.beProductOf(M22, GkLDB); //answer.plusProductUnsym(Bk, MGkLDB, dV); // M12 + M21 = n1 \otimes n2 + n2 \otimes n1 M12.at(1, 1) = n.at(1, 1) * n.at(1, 2) + n.at(1, 2) * n.at(1, 1); M12.at(1, 2) = n.at(1, 1) * n.at(2, 2) + n.at(1, 2) * n.at(2, 1); M12.at(2, 1) = n.at(2, 1) * n.at(1, 2) + n.at(2, 2) * n.at(1, 1); M12.at(2, 2) = n.at(2, 1) * n.at(2, 2) + n.at(2, 2) * n.at(2, 1); //dL2 dL2.at(1, 1) = dl3 * ( n.at(1, 1) * n.at(1, 2) + n.at(1, 1) * n.at(1, 2) ); dL2.at(1, 2) = dl3 * ( n.at(2, 1) * n.at(2, 2) + n.at(2, 1) * n.at(2, 2) ); dL2.at(1, 3) = dl3 * ( n.at(1, 2) * n.at(2, 1) + n.at(1, 1) * n.at(2, 2) ); LDB.beProductOf(dL2, DB); BktM12.beTProductOf(Bk, M12); ///@todo This can't possibly work if this is uncommented (!) / Mikael //BktM12Gk.beProductOf(BktM12,Gk); result2.beProductOf(BktM12Gk, LDB); answer.add(dV, result2); // This would be slightly shorter and faster; //GkLDB.beProductOf(Gk, LDB); //MGkLDB.beProductOf(M12, GkLDB); //answer.plusProductUnsym(Bk, MGkLDB, dV); } } }
extern "C" magma_int_t magma_zgessm_gpu( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL, magma_int_t lddl, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= ZGESSM applies the factors L computed by ZGETRF_INCPIV to a complex M-by-N tile A. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. K (input) INTEGER The number of columns of the matrix L. K >= 0. IB (input) INTEGER The inner-blocking size. IB >= 0. IPIV (input) INTEGER array on the cpu. The pivot indices array of size K as returned by ZGETRF_INCPIV. dL1 (input) DOUBLE COMPLEX array, dimension(LDDL1, N) The IB-by-K matrix in which is stored L^(-1) as returned by GETRF_INCPIV LDDL1 (input) INTEGER The leading dimension of the array L1. LDDL1 >= max(1,2*IB). dL (input) DOUBLE COMPLEX array, dimension(LDDL, N) The M-by-K lower triangular tile on the gpu. LDDL (input) INTEGER The leading dimension of the array L. LDDL >= max(1,M). dA (input/output) DOUBLE COMPLEX array, dimension (LDDA, N) On entry, the M-by-N tile A on the gpu. On exit, updated by the application of L on the gpu. ===================================================================== */ #define AT(i,j) (dAT + (i)*ldda + (j) ) #define L(i,j) (dL + (i) + (j)*lddl ) #define dL1(j) (dL1 + (j)*lddl1) magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; int i, s, sb; magmaDoubleComplex *dAT; /* Check arguments */ *info = 0; if (m < 0) *info = -1; else if (n < 0) *info = -2; else if (ldda < max(1,m)) *info = -4; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) return *info; if ( (storev == 'C') || (storev == 'c') ) { magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } else { dAT = dA; } s = k / ib; for(i = 0; i < k; i += ib) { sb = min(ib, k-i); magmablas_zlaswp( n, dAT, ldda, i+1, i+sb, ipiv, 1 ); #ifndef WITHOUTTRTRI magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, dL1(i), lddl1, AT(i, 0), ldda); #else magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, L( i, i), lddl, AT(i, 0), ldda); #endif if ( (i+sb) < m) { magma_zgemm( MagmaNoTrans, MagmaTrans, n, m-(i+sb), sb, c_neg_one, AT(i, 0), ldda, L( i+sb, i), lddl, c_one, AT(i+sb, 0), ldda ); } } if ( (storev == 'C') || (storev == 'c') ) { magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } return *info; /* End of MAGMA_ZGETRF_GPU */ }
void GradDpElement :: computeStiffnessMatrix(FloatMatrix &answer, MatResponseMode rMode, TimeStep *tStep) { //set displacement and nonlocal location array this->setDisplacementLocationArray(); this->setNonlocalLocationArray(); NLStructuralElement *elem = this->giveNLStructuralElement(); StructuralCrossSection *cs = elem->giveStructuralCrossSection(); FloatMatrix B, D, DB; FloatMatrix DkuB, Dku; FloatArray Nk; FloatMatrix SNk, gPSigma; FloatMatrix lStiff; FloatMatrix Bk, LBk; FloatMatrix answer_uu, answer_ku, answer_uk, answer_kk; int nlGeo = elem->giveGeometryMode(); bool matStiffSymmFlag = elem->giveCrossSection()->isCharacteristicMtrxSymmetric(rMode); for ( auto &gp : *elem->giveIntegrationRule(0) ) { GradDpMaterialExtensionInterface *dpmat = dynamic_cast< GradDpMaterialExtensionInterface * >( cs->giveMaterialInterface(GradDpMaterialExtensionInterfaceType, gp) ); if ( !dpmat ) { OOFEM_ERROR("Material doesn't implement the required DpGrad interface!"); } double dV = elem->computeVolumeAround(gp); if ( nlGeo == 0 ) { elem->computeBmatrixAt(gp, B); } else if ( nlGeo == 1 ) { if ( elem->domain->giveEngngModel()->giveFormulation() == AL ) { elem->computeBmatrixAt(gp, B); } else { elem->computeBHmatrixAt(gp, B); } } this->computeNkappaMatrixAt(gp, Nk); this->computeBkappaMatrixAt(gp, Bk); dpmat->givePDGradMatrix_uu(D, rMode, gp, tStep); dpmat->givePDGradMatrix_ku(Dku, rMode, gp, tStep); dpmat->givePDGradMatrix_uk(gPSigma, rMode, gp, tStep); dpmat->givePDGradMatrix_kk(lStiff, rMode, gp, tStep); /////////////////////////////////////////////////////////////////// uu: DB.beProductOf(D, B); if ( matStiffSymmFlag ) { answer_uu.plusProductSymmUpper(B, DB, dV); } else { answer_uu.plusProductUnsym(B, DB, dV); } //////////////////////////////////////////////////////////////////////// ku: DkuB.beProductOf(Dku, B); answer_ku.plusProductUnsym(Nk, DkuB, -dV); if ( dpmat->giveAveragingType() == 2 ) { double dl1, dl2, dl3; FloatMatrix LDB; FloatMatrix GkLDB, MGkLDB; FloatMatrix M22, M12; FloatMatrix dL1(1, 3), dL2(1, 3), dLdS; FloatArray Gk, n1, n2; dpmat->givePDGradMatrix_LD(dLdS, rMode, gp, tStep); this->computeNonlocalGradient(Gk, gp, tStep); dl1 = dLdS.at(3, 3); dl2 = dLdS.at(4, 4); dl3 = dLdS.at(5, 5); n1 = {dLdS.at(1, 1), dLdS.at(2, 1)}; n2 = {dLdS.at(1, 2), dLdS.at(2, 2)}; // first term Bk^T M22 G L1 D B // M22 = n2 \otimes n2 M22.plusDyadUnsym(n2, n2, 1.); // dL1 dL1.at(1, 1) = dl1 * n1.at(1) * n1.at(1) + dl2 * n2.at(1) * n2.at(1); dL1.at(1, 2) = dl1 * n1.at(2) * n1.at(2) + dl2 * n2.at(2) * n2.at(2); dL1.at(1, 3) = dl1 * n1.at(1) * n1.at(2) + dl2 * n2.at(1) * n2.at(2); LDB.beProductOf(dL1, DB); GkLDB.beProductOf(Gk, LDB); MGkLDB.beProductOf(M22, GkLDB); answer.plusProductUnsym(Bk, MGkLDB, dV); // M12 + M21 = n1 \otimes n2 + n2 \otimes n1 M12.plusDyadUnsym(n1, n2, 1.); M12.plusDyadUnsym(n2, n1, 1.); //dL2 dL2.at(1, 1) = dl3 * ( n1.at(1) * n2.at(1) + n1.at(1) * n2.at(1) ); dL2.at(1, 2) = dl3 * ( n1.at(2) * n2.at(2) + n1.at(2) * n2.at(2) ); dL2.at(1, 3) = dl3 * ( n1.at(2) * n2.at(1) + n1.at(1) * n2.at(2) ); // Bk * ((M12 * L2 + M22 * L1) * DB) LDB.beProductOf(dL2, DB); GkLDB.beProductOf(Gk, LDB); MGkLDB.beProductOf(M12, GkLDB); answer.plusProductUnsym(Bk, MGkLDB, dV); } //////////////////////////////////////////////////////////////////////// uk: SNk.beProductOf(gPSigma, Nk); answer_uk.plusProductUnsym(B, SNk, -dV); // uk /////////////////////////////////////////////////////////////////////// kk: answer_kk.plusProductUnsym(Nk, Nk, dV); if ( dpmat->giveAveragingType() == 0 || dpmat->giveAveragingType() == 1 ) { double l = lStiff.at(1, 1); answer_kk.plusProductUnsym(Bk, Bk, l * l * dV); } else if ( dpmat->giveAveragingType() == 2 ) { LBk.beProductOf(lStiff, Bk); answer_kk.plusProductUnsym(Bk, LBk, dV); } } if ( elem->domain->giveEngngModel()->giveFormulation() == AL ) { FloatMatrix initialStressMatrix; elem->computeInitialStressMatrix(initialStressMatrix, tStep); answer_uu.add(initialStressMatrix); } if ( matStiffSymmFlag ) { answer_uu.symmetrized(); } answer.resize(totalSize, totalSize); answer.zero(); answer.assemble(answer_uu, locU); answer.assemble(answer_uk, locU, locK); answer.assemble(answer_ku, locK, locU); answer.assemble(answer_kk,locK); }
/** Purpose ------- ZGESSM applies the factors L computed by ZGETRF_INCPIV to a complex M-by-N tile A. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in] k INTEGER The number of columns of the matrix L. K >= 0. @param[in] ib INTEGER The inner-blocking size. IB >= 0. @param[in] ipiv INTEGER array on the cpu. The pivot indices array of size K as returned by ZGETRF_INCPIV. @param[in] dL1 DOUBLE COMPLEX array, dimension(LDDL1, N) The IB-by-K matrix in which is stored L^(-1) as returned by GETRF_INCPIV @param[in] lddl1 INTEGER The leading dimension of the array L1. LDDL1 >= max(1,2*IB). @param[in] dL DOUBLE COMPLEX array, dimension(LDDL, N) The M-by-K lower triangular tile on the gpu. @param[in] lddl INTEGER The leading dimension of the array L. LDDL >= max(1,M). @param[in,out] dA DOUBLE COMPLEX array, dimension (LDDA, N) On entry, the M-by-N tile A on the gpu. On exit, updated by the application of L on the gpu. @param[in] ldda INTEGER The leading dimension of the array A. LDDA >= max(1,M). @ingroup magma_zgesv_tile ********************************************************************/ extern "C" magma_int_t magma_zgessm_gpu( magma_order_t order, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL, magma_int_t lddl, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info) { #define AT(i,j) (dAT + (i)*ldda + (j) ) #define L(i,j) (dL + (i) + (j)*lddl ) #define dL1(j) (dL1 + (j)*lddl1) magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; int i, s, sb; magmaDoubleComplex *dAT; /* Check arguments */ *info = 0; if (m < 0) *info = -1; else if (n < 0) *info = -2; else if (ldda < max(1,m)) *info = -4; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) return *info; if ( order == MagmaColMajor ) { magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } else { dAT = dA; } s = k / ib; for (i = 0; i < k; i += ib) { sb = min(ib, k-i); magmablas_zlaswp( n, dAT, ldda, i+1, i+sb, ipiv, 1 ); #ifndef WITHOUTTRTRI magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, dL1(i), lddl1, AT(i, 0), ldda); #else magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, L( i, i), lddl, AT(i, 0), ldda); #endif if ( (i+sb) < m) { magma_zgemm( MagmaNoTrans, MagmaTrans, n, m-(i+sb), sb, c_neg_one, AT(i, 0), ldda, L( i+sb, i), lddl, c_one, AT(i+sb, 0), ldda ); } } if ( order == MagmaColMajor ) { magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } return *info; } /* magma_zgessm_gpu */