void CIntel::onEnemyCreated(int enemy) { const UnitDef* ud = ai->cbc->GetUnitDef(enemy); if (ud) { LOG_II("CIntel::onEnemyCreated Unit(" << enemy << ")") //assert(ai->cbc->GetUnitTeam(enemy) != ai->team); enemies.addUnit(UT(ud->id), enemy); } }
static int GetTextSize (lua_State * L) { int width, height; GetTextSize(UT(L, 1), S(L, 2), width, height); lua_pushnumber(L, width); lua_pushnumber(L, height); return 2; }
static int GetPictureTexels (lua_State * L) { float fS0, fT0, fS1, fT1; GetPictureTexels(UT(L, 1), fS0, fT0, fS1, fT1); lua_pushnumber(L, fS0); lua_pushnumber(L, fT0); lua_pushnumber(L, fS1); lua_pushnumber(L, fT1); return 4; }
//extern "C" SEXP log_marg_A0k(SEXP WpostR, SEXP A0R, SEXP N2R, SEXP consttermR, SEXP bfR, SEXP UTR, SEXP TinvR, SEXP dfR, SEXP n0R) { int i, j, *dTi, db, m, N2, df, len; N2=INTEGER(N2R)[0]; df=INTEGER(dfR)[0]; m=INTEGER(coerceVector(listElt(WpostR,"m"),INTSXP))[0]; double *pbfi, *lpa0, *cterm, lN2, tol, maxvlog, lqlog; lN2=log((double)N2); tol=1E-12; cterm=REAL(consttermR); //Rprintf("m: %d\nN2: %d\ndf: %d\n",m,N2,df); // Initialize Tinv/b.free/vlog variables SEXP Ti, bfi; Matrix Tinv; ColumnVector bfree, vlog(N2), qlog; // Initialize Wlist/W/Wmat objects and populate Wlist from WpostR Wlist Wall(WpostR,N2); Wobj W; Matrix Wmat; // Initialize SEXP/ptr to store/access log marginal A0k values SEXP lpa0yao; PROTECT(lpa0yao=allocVector(REALSXP,m)); lpa0=REAL(lpa0yao); for(i=0;i<m-1;i++){ PROTECT(Ti=VECTOR_ELT(TinvR,i)); dTi=getdims(Ti); Tinv=R2Cmat(Ti,dTi[0],dTi[1]); UNPROTECT(1); //Rprintf("Tinv[[%d]](%dx%d) initialized\n",i,dTi[0],dTi[1]); PROTECT(bfi=VECTOR_ELT(bfR,i)); db=length(bfi); bfree.ReSize(db); pbfi=REAL(bfi); bfree<<pbfi; UNPROTECT(1); //Rprintf("bfree[[%d]](%d) initialized\n",i,db); for(j=1;j<=N2;j++){ Wall.getWobj(W,j); Wmat=W.getWelt(i+1); W.clear(); vlog(j)=getvlog(Wmat,Tinv,bfree,cterm[i],df,tol); //Rprintf("vlog(%d): %f\n",j,vlog(j)); } // Modified harmonic mean of the max maxvlog=vlog.Maximum(); qlog=vlog-maxvlog; len=qlog.Storage(); lqlog=0; for(j=1;j<=len;j++) lqlog+=exp(qlog(j)); lqlog=log(lqlog); // log(sum(exp(qlog))) lpa0[i]=maxvlog-lN2+lqlog; //Rprintf("lpa0[%d] = %f\n", i, lpa0[i]); } // Computations for last column PROTECT(Ti=VECTOR_ELT(TinvR,m-1)); dTi=getdims(Ti); Tinv=R2Cmat(Ti,dTi[0],dTi[1]); UNPROTECT(1); //Rprintf("Tinv[[%d]](%dx%d) initialized\n",i,dTi[0],dTi[1]); PROTECT(bfi=VECTOR_ELT(bfR,m-1)); pbfi=REAL(bfi); bfree.ReSize(length(bfi)); bfree<<pbfi; UNPROTECT(1); //Rprintf("bfree[[%d]](%d) initialized\n",i,db); UTobj UT(UTR); Matrix A0=R2Cmat(A0R,m,m); A0=drawA0cpp(A0,UT,df,INTEGER(n0R),W); Wmat=W.getWelt(m); lpa0[m-1]=getvlog(Wmat,Tinv,bfree,cterm[m-1],df,tol); //Rprintf("lpa0[%d] = %f\n",m-1,lpa0[m-1]); // Return R object lpa0yao UNPROTECT(1); return lpa0yao; }
int main(int argc, char *argv[]) { t_tout* tout; tout = (t_tout*)malloc(sizeof(t_tout)); gtk_init(&argc,&argv); interface_make(tout); // background : lie les boutons à 'tout' UT(SET_PLAYERS); gtk_main(); return EXIT_SUCCESS; }
void CEconomy::init(CUnit &unit) { if (initialized) return; // NOTE: expecting "unit" is a commander unit const UnitDef *ud = ai->cb->GetUnitDef(unit.key); utCommander = UT(ud->id); windmap = ((ai->cb->GetMaxWind() + ai->cb->GetMinWind()) / 2.0f) >= 10.0f; //float avgWind = (ai->cb->GetMinWind() + ai->cb->GetMaxWind()) / 2.0f; //float windProf = avgWind / utWind->cost; //float solarProf = utSolar->energyMake / utSolar->cost; worthBuildingTidal = ai->cb->GetTidalStrength() > 5.0f; initialized = true; }
static int LoadTextImage (lua_State * L) { SDL_Color color; color.r = U8(L, 3); color.g = U8(L, 4); color.b = U8(L, 5); TextImage_h textImage; if (LoadTextImage(UT(L, 1), S(L, 2), color, textImage) != 0) { PushUserType(L, textImage, "TextImage"); return 1; } return 0; }
int main(void) { float (*ut)(float); float utf[UT_NBUTEST]; int i; utf[0] = 0.0; utf[1] = 1.0; utf[2] = 2.0; utf[3] = 3.0; utf[4] = 4.0; utf[5] = 5.0; utf[6] = 6.0; utf[7] = 7.0; ut = ft_math_tan; i = -1; while (++i < UT_NBUTEST) printf("tan(%f) = [%f]\n", UT(i)); return (0); }
int main(void) { float (*ut)(float); float utf[UT_NBUTEST]; int i; utf[0] = 0.0; utf[1] = 25.0; utf[2] = 2.0; utf[3] = MATH_PI; utf[4] = 1.0; utf[5] = -25.0; utf[6] = -1.0; utf[7] = 1000000.5; ut = ft_math_rsqrt_nosmart2; i = -1; while (++i < UT_NBUTEST) printf("rsqrt(%f) = [%f]\n", UT(i)); return (0); }
void CIntel::update(int frame) { resetCounters(); if (enemyvector == ZeroVector) updateEnemyVector(); int numUnits = ai->cbc->GetEnemyUnits(&ai->unitIDs[0], MAX_UNITS); for (int i = 0; i < numUnits; i++) { const int uid = ai->unitIDs[i]; const UnitDef* ud = ai->cbc->GetUnitDef(uid); if (ud == NULL) continue; unitCategory c = UT(ud->id)->cats; if ((c&ATTACKER).any() && (c&MOBILE).any()) updateCounters(c); } updateRoulette(); }
/* Wait for I/O on a single descriptor. Return the number of I/O events found. Mask is the events of interest. Timeout is in milliseconds. */ PUBLIC int mprWaitForSingleIO(int fd, int desiredMask, MprTicks timeout) { HANDLE h; int winMask; if (timeout < 0 || timeout > MAXINT) { timeout = MAXINT; } winMask = 0; if (desiredMask & MPR_READABLE) { winMask |= FD_CLOSE | FD_READ; } if (desiredMask & MPR_WRITABLE) { winMask |= FD_WRITE; } h = CreateEvent(NULL, FALSE, FALSE, UT("mprWaitForSingleIO")); WSAEventSelect(fd, h, winMask); if (WaitForSingleObject(h, (DWORD) timeout) == WAIT_OBJECT_0) { CloseHandle(h); return desiredMask; } CloseHandle(h); return 0; }
static int DrawTextImage (lua_State * L) { DrawTextImage(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5)); return 0; }
void CThreatMap::update(int frame) { static const unitCategory catsCanShootGround = ASSAULT|SNIPER|ARTILLERY|SCOUTER/*|PARALYZER*/; if ((frame - lastUpdateFrame) < MULTIPLEXER) return; const bool isWaterMap = !ai->gamemap->IsWaterFreeMap(); std::list<ThreatMapType> activeTypes; std::list<ThreatMapType>::const_iterator itMapType; reset(); int numUnits = ai->cbc->GetEnemyUnits(&ai->unitIDs[0], MAX_UNITS_AI); /* Add enemy threats */ for (int i = 0; i < numUnits; i++) { const int uid = ai->unitIDs[i]; const UnitDef* ud = ai->cbc->GetUnitDef(uid); if (ud == NULL) continue; const UnitType* ut = UT(ud->id); const unitCategory ecats = ut->cats; if ((ecats&ATTACKER).none() || ai->cbc->IsUnitParalyzed(uid) || ai->cbc->UnitBeingBuilt(uid)) continue; // ignore unamred, paralyzed & being built units if ((ecats&AIR).any() && (ecats&ASSAULT).none()) continue; // ignore air fighters & bombers // FIXME: using maxWeaponRange below (twice) is WRONG; we need // to calculate different max. ranges per each threatmap layer // FIXME: think smth cleverer if (ud->maxWeaponRange > MAX_WEAPON_RANGE_FOR_TM) continue; // ignore units with extra large range const float3 upos = ai->cbc->GetUnitPos(uid); activeTypes.clear(); if ((ecats&ANTIAIR).any() && upos.y >= 0.0f) { activeTypes.push_back(TMT_AIR); } if (((ecats&SEA).any() || upos.y >= 0.0f) && ((ecats&ANTIAIR).none() || (catsCanShootGround&ecats).any())) { activeTypes.push_back(TMT_SURFACE); } if (isWaterMap && (ecats&TORPEDO).any()) { activeTypes.push_back(TMT_UNDERWATER); } if (activeTypes.empty()) continue; const float uRealX = upos.x / PATH2REAL; const float uRealZ = upos.z / PATH2REAL; const float range = (ud->maxWeaponRange + 100.0f) / PATH2REAL; float powerT = ai->cbc->GetUnitPower(uid); const float power = (ecats&COMMANDER).any() ? powerT/20.0f : powerT; float3 pos(0.0f, 0.0f, 0.0f); const int R = (int) ceil(range); for (int z = -R; z <= R; z++) { for (int x = -R; x <= R; x++) { pos.x = x; pos.z = z; if (pos.Length2D() <= range) { pos.x += uRealX; pos.z += uRealZ; const int mx = int(round(pos.x)); const int mz = int(round(pos.z)); if (isInBounds(mx, mz)) { for (itMapType = activeTypes.begin(); itMapType != activeTypes.end(); ++itMapType) { int id = ID(mx, mz); maps[*itMapType][id] += power; maxPower[*itMapType] = std::max(maps[*itMapType][id], maxPower[*itMapType]); } } } } } /* for (itMapType = activeTypes.begin(); itMapType != activeTypes.end(); ++itMapType) { maxPower[*itMapType] = std::max<float>(power, maxPower[*itMapType]); } */ } #if !defined(BUILDING_AI_FOR_SPRING_0_81_2) if (ai->cb->IsDebugDrawerEnabled()) { std::map<ThreatMapType, int>::iterator i; for (i = handles.begin(); i != handles.end(); ++i) { float power = maxPower[i->first]; // normalize the data... for (int j = 0, N = X*Z; j < N; j++) maps[i->first][j] /= power; // update texturemap ai->cb->DebugDrawerUpdateOverlayTexture(i->second, maps[i->first], 0, 0, X, Z); // restore the original data... for (int j = 0, N = X*Z; j < N; j++) maps[i->first][j] *= power; } } #endif if (drawMap != TMT_NONE) visualizeMap(drawMap); lastUpdateFrame = frame; }
static struct dirent * E (lua_State * L) { return static_cast<struct dirent*>(UT(L, 1)); }
static DIR * D (lua_State * L) { return static_cast<DIR*>(UT(L, 1)); }
/** Purpose ------- ZSSSSM applies the LU factorization update from a complex matrix formed by a lower triangular IB-by-K tile L1 on top of a M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1 tile A1 on top of a M2-by-N2 tile A2 (N1 == N2). This is the right-looking Level 2.5 BLAS version of the algorithm. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in] ib INTEGER The inner-blocking size. IB >= 0. @param[in] NB INTEGER The blocking size. NB >= 0. @param[in,out] hU COMPLEX_16 array, dimension(LDHU, N), on cpu. On entry, the NB-by-N upper triangular tile hU. On exit, the content is incomplete. Shouldn't be used. @param[in] ldhu INTEGER The leading dimension of the array hU. LDHU >= max(1,NB). @param[in,out] dU COMPLEX_16 array, dimension(LDDU, N), on gpu. On entry, the NB-by-N upper triangular tile dU identical to hU. On exit, the new factor U from the factorization. @param[in] lddu INTEGER The leading dimension of the array dU. LDDU >= max(1,NB). @param[in,out] hA COMPLEX_16 array, dimension(LDHA, N), on cpu. On entry, only the M-by-IB first panel needs to be identical to dA(1..M, 1..IB). On exit, the content is incomplete. Shouldn't be used. @param[in] ldha INTEGER The leading dimension of the array hA. LDHA >= max(1,M). @param[in,out] dA COMPLEX_16 array, dimension(LDDA, N), on gpu. On entry, the M-by-N tile to be factored. On exit, the factor L from the factorization @param[in] ldda INTEGER The leading dimension of the array dA. LDDA >= max(1,M). @param[out] hL COMPLEX_16 array, dimension(LDHL, K), on vpu. On exit, contains in the upper part the IB-by-K lower triangular tile, and in the lower part IB-by-K the inverse of the top part. @param[in] ldhl INTEGER The leading dimension of the array hL. LDHL >= max(1,2*IB). @param[out] dL COMPLEX_16 array, dimension(LDDL, K), on gpu. On exit, contains in the upper part the IB-by-K lower triangular tile, and in the lower part IB-by-K the inverse of the top part. @param[in] lddl INTEGER The leading dimension of the array dL. LDDL >= max(1,2*IB). @param[out] hWORK COMPLEX_16 array, dimension(LDHWORK, 2*IB), on cpu. Workspace. @param[in] ldhwork INTEGER The leading dimension of the array hWORK. LDHWORK >= max(NB, 1). @param[out] dWORK COMPLEX_16 array, dimension(LDDWORK, 2*IB), on gpu. Workspace. @param[in] lddwork INTEGER The leading dimension of the array dWORK. LDDWORK >= max(NB, 1). @param[out] ipiv INTEGER array on the cpu. The pivot indices array of size K as returned by ZTSTRF @param[out] info INTEGER - PLASMA_SUCCESS successful exit - < 0 if INFO = -k, the k-th argument had an illegal value - > 0 if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. @ingroup magma_zgesv_tile ********************************************************************/ extern "C" magma_int_t magma_ztstrf_gpu( magma_order_t order, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, magmaDoubleComplex *hU, magma_int_t ldhu, magmaDoubleComplex_ptr dU, magma_int_t lddu, magmaDoubleComplex *hA, magma_int_t ldha, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex *hL, magma_int_t ldhl, magmaDoubleComplex_ptr dL, magma_int_t lddl, magma_int_t *ipiv, magmaDoubleComplex *hwork, magma_int_t ldhwork, magmaDoubleComplex_ptr dwork, magma_int_t lddwork, magma_int_t *info) { #define UT(i,j) (dUT + (i)*ib*lddu + (j)*ib ) #define AT(i,j) (dAT + (i)*ib*ldda + (j)*ib ) #define L(i) (dL + (i)*ib*lddl ) #define L2(i) (dL2 + (i)*ib*lddl ) #define hU(i,j) (hU + (j)*ib*ldhu + (i)*ib ) #define hA(i,j) (hA + (j)*ib*ldha + (i)*ib ) #define hL(i) (hL + (i)*ib*ldhl ) #define hL2(i) (hL2 + (i)*ib*ldhl ) magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; int iinfo = 0; int maxm, mindim; int i, j, im, s, ip, ii, sb, p = 1; magmaDoubleComplex_ptr dAT, dUT; magmaDoubleComplex_ptr dAp, dUp; #ifndef WITHOUTTRTRI magmaDoubleComplex_ptr dL2 = dL + ib; magmaDoubleComplex *hL2 = hL + ib; p = 2; #endif /* Check input arguments */ *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ib < 0) { *info = -3; } else if ((lddu < max(1,m)) && (m > 0)) { *info = -6; } else if ((ldda < max(1,m)) && (m > 0)) { *info = -8; } else if ((lddl < max(1,ib)) && (ib > 0)) { *info = -10; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* quick return */ if ((m == 0) || (n == 0) || (ib == 0)) return *info; ip = 0; /* Function Body */ mindim = min(m, n); s = mindim / ib; if ( ib >= mindim ) { /* Use CPU code. */ CORE_ztstrf(m, n, ib, nb, (PLASMA_Complex64_t*)hU, ldhu, (PLASMA_Complex64_t*)hA, ldha, (PLASMA_Complex64_t*)hL, ldhl, ipiv, (PLASMA_Complex64_t*)hwork, ldhwork, info); #ifndef WITHOUTTRTRI CORE_zlacpy( PlasmaUpperLower, mindim, mindim, (PLASMA_Complex64_t*)hL, ldhl, (PLASMA_Complex64_t*)hL2, ldhl ); CORE_ztrtri( PlasmaLower, PlasmaUnit, mindim, (PLASMA_Complex64_t*)hL2, ldhl, info ); if (*info != 0 ) { fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info); } #endif if ( order == MagmaRowMajor ) { magma_zsetmatrix( m, n, hU, ldhu, dwork, lddwork ); magmablas_ztranspose( m, n, dwork, lddwork, dU, lddu ); magma_zsetmatrix( m, n, hA, ldha, dwork, lddwork ); magmablas_ztranspose( m, n, dwork, lddwork, dA, ldda ); } else { magma_zsetmatrix( m, n, hU, ldhu, dU, lddu ); magma_zsetmatrix( m, n, hA, ldha, dA, ldda ); } magma_zsetmatrix( p*ib, n, hL, ldhl, dL, lddl ); } else { /* Use hybrid blocked code. */ maxm = magma_roundup( m, 32 ); if ( order == MagmaColMajor ) { magmablas_zgetmo_in( dU, dUT, lddu, m, n ); magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } else { dUT = dU; dAT = dA; } dAp = dwork; dUp = dAp + ib*lddwork; ip = 0; for( i=0; i < s; i++ ) { ii = i * ib; sb = min(mindim-ii, ib); if ( i > 0 ) { // download i-th panel magmablas_ztranspose( sb, ii, UT(0,i), lddu, dUp, lddu ); magmablas_ztranspose( sb, m, AT(0,i), ldda, dAp, ldda ); magma_zgetmatrix( ii, sb, dUp, lddu, hU(0, i), ldhu ); magma_zgetmatrix( m, sb, dAp, ldda, hA(0, i), ldha ); // make sure that gpu queue is empty //magma_device_sync(); #ifndef WITHOUTTRTRI magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-(ii+sb), ib, c_one, L2(i-1), lddl, UT(i-1, i+1), lddu); #else magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-(ii+sb), ib, c_one, L(i-1), lddl, UT(i-1, i+1), lddu); #endif magma_zgemm( MagmaNoTrans, MagmaNoTrans, n-(ii+sb), m, ib, c_neg_one, UT(i-1, i+1), lddu, AT(0, i-1), ldda, c_one, AT(0, i+1), ldda ); } // do the cpu part CORE_ztstrf(m, sb, ib, nb, (PLASMA_Complex64_t*)hU(i, i), ldhu, (PLASMA_Complex64_t*)hA(0, i), ldha, (PLASMA_Complex64_t*)hL(i), ldhl, ipiv+ii, (PLASMA_Complex64_t*)hwork, ldhwork, info); if ( (*info == 0) && (iinfo > 0) ) *info = iinfo + ii; // Need to swap betw U and A #ifndef NOSWAPBLK magmablas_zswapblk( MagmaRowMajor, n-(ii+sb), UT(i, i+1), lddu, AT(0, i+1), ldda, 1, sb, ipiv+ii, 1, nb ); for (j=0; j < ib; j++) { im = ipiv[ip]-1; if ( im == j ) { ipiv[ip] += ii; } ip++; } #else for (j=0; j < ib; j++) { im = ipiv[ip]-1; if ( im != (j) ) { im = im - nb; assert( (im >= 0) && (im < m) ); magmablas_zswap( n-(ii+sb), UT(i, i+1)+j*lddu, 1, AT(0, i+1)+im*ldda, 1 ); } else { ipiv[ip] += ii; } ip++; } #endif #ifndef WITHOUTTRTRI CORE_zlacpy( PlasmaUpperLower, sb, sb, (PLASMA_Complex64_t*)hL(i), ldhl, (PLASMA_Complex64_t*)hL2(i), ldhl ); CORE_ztrtri( PlasmaLower, PlasmaUnit, sb, (PLASMA_Complex64_t*)hL2(i), ldhl, info ); if (*info != 0 ) { fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info); } #endif // upload i-th panel magma_zsetmatrix( sb, sb, hU(i, i), ldhu, dUp, lddu ); magma_zsetmatrix( m, sb, hA(0, i), ldha, dAp, ldda ); magma_zsetmatrix( p*ib, sb, hL(i), ldhl, L(i), lddl ); magmablas_ztranspose( sb, sb, dUp, lddu, UT(i,i), lddu ); magmablas_ztranspose( m, sb, dAp, ldda, AT(0,i), ldda ); // make sure that gpu queue is empty //magma_device_sync(); // do the small non-parallel computations if ( s > (i+1) ) { #ifndef WITHOUTTRTRI magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, sb, sb, c_one, L2(i), lddl, UT(i, i+1), lddu); #else magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, sb, sb, c_one, L(i), lddl, UT(i, i+1), lddu); #endif magma_zgemm( MagmaNoTrans, MagmaNoTrans, sb, m, sb, c_neg_one, UT(i, i+1), lddu, AT(0, i ), ldda, c_one, AT(0, i+1), ldda ); } else { #ifndef WITHOUTTRTRI magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-mindim, sb, c_one, L2(i), lddl, UT(i, i+1), lddu); #else magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-mindim, sb, c_one, L(i), lddl, UT(i, i+1), lddu); #endif magma_zgemm( MagmaNoTrans, MagmaNoTrans, n-mindim, m, sb, c_neg_one, UT(i, i+1), lddu, AT(0, i ), ldda, c_one, AT(0, i+1), ldda ); } } if ( order == MagmaColMajor ) { magmablas_zgetmo_out( dU, dUT, lddu, m, n ); magmablas_zgetmo_out( dA, dAT, ldda, m, n ); } } return *info; }
/// @brief Int return; usertype argument int I_Ut (lua_State * L, int (*func)(void *)) { func(UT(L, 1)); return 0; }
Matrix operator * (const Matrix& A, const Matrix& B) { if (A.Clo() != B.Rlo() || A.Chi() != B.Rhi()) Matpack.Error("Matrix operator * (const Matrix&, const Matrix&): " "non conformant arguments\n"); // allocate return matrix Matrix C(A.Rlo(),A.Rhi(),B.Clo(),B.Chi()); //------------------------------------------------------------------------// // the BLAS version //------------------------------------------------------------------------// #if defined ( _MATPACK_USE_BLAS_ ) if ( LT(B) ) { // full matrix * lower triangle #ifdef DEBUG cout << "GM*LT\n"; #endif checksquare(B); // copy A to C to protect from overwriting copyvec(C.Store(),A.Store(),A.Elements()); charT side('L'), uplo('U'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(B.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,B.Store(),&ldb, C.Store(),&ldc); } else if ( UT(B) ) { // full matrix * upper triangle #ifdef DEBUG cout << "GM*UT\n"; #endif checksquare(B); // copy A to C to protect from overwriting copyvec(C.Store(),A.Store(),A.Elements()); charT side('L'), uplo('L'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(B.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,B.Store(),&ldb, C.Store(),&ldc); } else if ( LT(A) ) { // lower triangle * full matrix #ifdef DEBUG cout << "LT*GM\n"; #endif checksquare(A); // copy B to C to protect from overwriting copyvec(C.Store(),B.Store(),B.Elements()); charT side('R'), uplo('U'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(A.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,A.Store(),&ldb, C.Store(),&ldc); } else if ( UT(A) ) { // upper triangle * full matrix #ifdef DEBUG cout << "UT*GM\n"; #endif checksquare(A); // copy A to C to protect from overwriting copyvec(C.Store(),B.Store(),B.Elements()); charT side('R'), uplo('L'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(A.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,A.Store(),&ldb, C.Store(),&ldc); } else /* GM(A) and GM(B) */ { // GM*GM: full matrix * full matrix #ifdef DEBUG cout << "GM*GM\n"; #endif charT t('N'); intT m(B.Cols()), n(A.Rows()), k(B.Rows()), lda(A.Cols()), ldb(B.Cols()), ldc(C.Cols()); doubleT alpha(1.0), beta(0.0); F77NAME(dgemm)(&t,&t, &m,&n,&k, &alpha,B.Store(),&ldb, A.Store(),&lda, &beta,C.Store(),&ldc); } //------------------------------------------------------------------------// // the non-BLAS version //------------------------------------------------------------------------// #else int cl = A.cl, ch = A.ch, arl = A.rl, arh = A.rh, bcl = B.cl, bch = B.ch; // avoid call to index operator that optimizes very badely double **a = A.M, **b = B.M, **c = C.M; for (int i = arl; i <= arh; i++) { for (int j = bcl; j <= bch; j++) c[i][j] = 0.0; for (int l = cl; l <= ch; l++) { if ( a[i][l] != 0.0 ) { double temp = a[i][l]; for (int j = bcl; j <= bch; j++) c[i][j] += temp * b[l][j]; } } } #endif return C.Value(); }
static int DrawClippedTextImage (lua_State * L) { DrawClippedTextImage(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5), F(L, 6), F(L, 7)); return 0; }
void InvertibleHyperelasticMaterial::modifiedDeformGradient(const Scalar *gradient, Scalar *diags, Scalar *leftOrthoMat, Scalar *rightOrthoMat) const { Scalar triMat[6], eigenVals[3]; Tensor2<Scalar> UT, VT; constexpr Scalar epsilon = Scalar(1e-8); constexpr int diagIndices[3] = { 0, 3, 5 }; for (int i = 0; i < 3; i++) for (int j = i; j < 3; j++) triMat[diagIndices[i] + j - i] = gradient[j * 3 + 0] * gradient[i * 3 + 0] + gradient[j * 3 + 1] * gradient[i * 3 + 1] + gradient[j * 3 + 2] * gradient[i * 3 + 2]; eigenSym3x3(triMat, eigenVals, &UT(0, 0)); if (UT.Determinant() < 0.0) { for (int i = 0; i < 3; i++) UT(0, i) = -UT(0, i); } for (int i = 0; i < 3; i++) eigenVals[i] = eigenVals[i] > Scalar(0) ? sqrt(eigenVals[i]) : Scalar(0); Tensor2<Scalar> F(gradient); VT = UT * F; int condition = (eigenVals[0] < epsilon) + ((eigenVals[1] < epsilon) << 1) + ((eigenVals[2] < epsilon) << 2); switch (condition){ case 0: { for (int i = 0; i < 3; i++) { Scalar inverse = Scalar(1.0) / eigenVals[i]; for (int j = 0; j < 3; j++) VT(i, j) *= inverse; } if (VT.Determinant() < 0) { int smallestIndex = eigenVals[0] < eigenVals[1] ? (eigenVals[0] < eigenVals[2] ? 0 : 2) : (eigenVals[1] < eigenVals[2] ? 1 : 2); for (int i = 0; i < 3; i++) VT(smallestIndex, i) = -VT(smallestIndex, i); eigenVals[smallestIndex] = -eigenVals[smallestIndex]; } break; } case 1: { Scalar inverse1 = Scalar(1.0) / eigenVals[1]; Scalar inverse2 = Scalar(1.0) / eigenVals[2]; for (int j = 0; j < 3; j++) { VT(1, j) *= inverse1; VT(2, j) *= inverse2; } Vector3 another = Vector3(VT(1, 0), VT(1, 1), VT(1, 2)) % Vector3(VT(2, 0), VT(2, 1), VT(2, 2)); memcpy(&VT(0, 0), &another[0], sizeof(Scalar) * 3); break; } case 2: { Scalar inverse0 = Scalar(1.0) / eigenVals[0]; Scalar inverse2 = Scalar(1.0) / eigenVals[2]; for (int j = 0; j < 3; j++) { VT(0, j) *= inverse0; VT(2, j) *= inverse2; } Vector3 another = Vector3(VT(2, 0), VT(2, 1), VT(2, 2)) % Vector3(VT(0, 0), VT(0, 1), VT(0, 2)); memcpy(&VT(1, 0), &another[0], sizeof(Scalar) * 3); break; } case 3: { Scalar inverse = Scalar(1.0) / eigenVals[2]; for (int i = 0; i < 3; i++) VT(2, i) *= inverse; Vector3 v1, v2; coordinateSystem(Vector3(VT(2, 0), VT(2, 1), VT(2, 2)), v1, v2); memcpy(&VT(0, 0), &v1[0], sizeof(Scalar) * 3); memcpy(&VT(1, 0), &v2[0], sizeof(Scalar) * 3); break; } case 4: { Scalar inverse0 = Scalar(1.0) / eigenVals[0]; Scalar inverse1 = Scalar(1.0) / eigenVals[1]; for (int j = 0; j < 3; j++) { VT(0, j) *= inverse0; VT(1, j) *= inverse1; } Vector3 another = Vector3(VT(0, 0), VT(0, 1), VT(0, 2)) % Vector3(VT(1, 0), VT(1, 1), VT(1, 2)); memcpy(&VT(2, 0), &another[0], sizeof(Scalar) * 3); break; } case 5: { Scalar inverse = Scalar(1.0) / eigenVals[1]; for (int i = 0; i < 3; i++) VT(1, i) *= inverse; Vector3 v1, v2; coordinateSystem(Vector3(VT(1, 0), VT(1, 1), VT(1, 2)), v1, v2); memcpy(&VT(2, 0), &v1[0], sizeof(Scalar) * 3); memcpy(&VT(0, 0), &v2[0], sizeof(Scalar) * 3); break; } case 6: { Scalar inverse = Scalar(1.0) / eigenVals[0]; for (int i = 0; i < 3; i++) VT(0, i) *= inverse; Vector3 v1, v2; coordinateSystem(Vector3(VT(0, 0), VT(0, 1), VT(0, 2)), v1, v2); memcpy(&VT(1, 0), &v1[0], sizeof(Scalar) * 3); memcpy(&VT(2, 0), &v2[0], sizeof(Scalar) * 3); break; } case 7: { memset(&VT(0, 0), 0, sizeof(Scalar) * 9); VT(0, 0) = VT(1, 1) = VT(2, 2) = 1.0; break; } default: Severe("Unexpected condition in InvertibleHyperelasticMaterial::modifiedDeformGradient"); break; } for (int i = 0; i < 3; i++) diags[i] = std::max(eigenVals[i], trashold); for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) leftOrthoMat[i * 3 + j] = UT(j, i); memcpy(rightOrthoMat, &VT(0, 0), sizeof(Scalar) * 9); }
extern "C" magma_int_t magma_ctstrf_gpu( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, magmaFloatComplex *hU, magma_int_t ldhu, magmaFloatComplex *dU, magma_int_t lddu, magmaFloatComplex *hA, magma_int_t ldha, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *hL, magma_int_t ldhl, magmaFloatComplex *dL, magma_int_t lddl, magma_int_t *ipiv, magmaFloatComplex *hwork, magma_int_t ldhwork, magmaFloatComplex *dwork, magma_int_t lddwork, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= CSSSSM applies the LU factorization update from a complex matrix formed by a lower triangular IB-by-K tile L1 on top of a M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1 tile A1 on top of a M2-by-N2 tile A2 (N1 == N2). This is the right-looking Level 2.5 BLAS version of the algorithm. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. IB (input) INTEGER The inner-blocking size. IB >= 0. NB (input) INTEGER The blocking size. NB >= 0. hU (input,output) COMPLEX array, dimension(LDHU, N), on cpu. On entry, the NB-by-N upper triangular tile hU. On exit, the content is incomplete. Shouldn't be used. LDHU (input) INTEGER The leading dimension of the array hU. LDHU >= max(1,NB). dU (input,output) COMPLEX array, dimension(LDDU, N), on gpu. On entry, the NB-by-N upper triangular tile dU identical to hU. On exit, the new factor U from the factorization. LDDU (input) INTEGER The leading dimension of the array dU. LDDU >= max(1,NB). hA (input,output) COMPLEX array, dimension(LDHA, N), on cpu. On entry, only the M-by-IB first panel needs to be identical to dA(1..M, 1..IB). On exit, the content is incomplete. Shouldn't be used. LDHA (input) INTEGER The leading dimension of the array hA. LDHA >= max(1,M). dA (input,output) COMPLEX array, dimension(LDDA, N) , on gpu. On entry, the M-by-N tile to be factored. On exit, the factor L from the factorization LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,M). hL (output) COMPLEX array, dimension(LDHL, K), on vpu. On exit, contains in the upper part the IB-by-K lower triangular tile, and in the lower part IB-by-K the inverse of the top part. LDHL (input) INTEGER The leading dimension of the array hL. LDHL >= max(1,2*IB). dL (output) COMPLEX array, dimension(LDDL, K), on gpu. On exit, contains in the upper part the IB-by-K lower triangular tile, and in the lower part IB-by-K the inverse of the top part. LDDL (input) INTEGER The leading dimension of the array dL. LDDL >= max(1,2*IB). hWORK (output) COMPLEX array, dimension(LDHWORK, 2*IB), on cpu. Workspace. LDHWORK (input) INTEGER The leading dimension of the array hWORK. LDHWORK >= max(NB, 1). dWORK (output) COMPLEX array, dimension(LDDWORK, 2*IB), on gpu. Workspace. LDDWORK (input) INTEGER The leading dimension of the array dWORK. LDDWORK >= max(NB, 1). IPIV (output) INTEGER array on the cpu. The pivot indices array of size K as returned by CTSTRF INFO (output) INTEGER - PLASMA_SUCCESS successful exit - < 0 if INFO = -k, the k-th argument had an illegal value - > 0 if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. ===================================================================== */ #define UT(i,j) (dUT + (i)*ib*lddu + (j)*ib ) #define AT(i,j) (dAT + (i)*ib*ldda + (j)*ib ) #define L(i) (dL + (i)*ib*lddl ) #define L2(i) (dL2 + (i)*ib*lddl ) #define hU(i,j) (hU + (j)*ib*ldhu + (i)*ib ) #define hA(i,j) (hA + (j)*ib*ldha + (i)*ib ) #define hL(i) (hL + (i)*ib*ldhl ) #define hL2(i) (hL2 + (i)*ib*ldhl ) magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; int iinfo = 0; int maxm, mindim; int i, j, im, s, ip, ii, sb, p = 1; magmaFloatComplex *dAT, *dUT; magmaFloatComplex *dAp, *dUp; #ifndef WITHOUTTRTRI magmaFloatComplex *dL2 = dL + ib; magmaFloatComplex *hL2 = hL + ib; p = 2; #endif /* Check input arguments */ *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ib < 0) { *info = -3; } else if ((lddu < max(1,m)) && (m > 0)) { *info = -6; } else if ((ldda < max(1,m)) && (m > 0)) { *info = -8; } else if ((lddl < max(1,ib)) && (ib > 0)) { *info = -10; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* quick return */ if ((m == 0) || (n == 0) || (ib == 0)) return *info; ip = 0; /* Function Body */ mindim = min(m, n); s = mindim / ib; if ( ib >= mindim ) { /* Use CPU code. */ CORE_ctstrf(m, n, ib, nb, (PLASMA_Complex32_t*)hU, ldhu, (PLASMA_Complex32_t*)hA, ldha, (PLASMA_Complex32_t*)hL, ldhl, ipiv, (PLASMA_Complex32_t*)hwork, ldhwork, info); #ifndef WITHOUTTRTRI CORE_clacpy( PlasmaUpperLower, mindim, mindim, (PLASMA_Complex32_t*)hL, ldhl, (PLASMA_Complex32_t*)hL2, ldhl ); CORE_ctrtri( PlasmaLower, PlasmaUnit, mindim, (PLASMA_Complex32_t*)hL2, ldhl, info ); if (*info != 0 ) { fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info); } #endif if ( (storev == 'R') || (storev == 'r') ) { magma_csetmatrix( m, n, hU, ldhu, dwork, lddwork ); magmablas_ctranspose( dU, lddu, dwork, lddwork, m, n ); magma_csetmatrix( m, n, hA, ldha, dwork, lddwork ); magmablas_ctranspose( dA, ldda, dwork, lddwork, m, n ); } else { magma_csetmatrix( m, n, hU, ldhu, dU, lddu ); magma_csetmatrix( m, n, hA, ldha, dA, ldda ); } magma_csetmatrix( p*ib, n, hL, ldhl, dL, lddl ); } else { /* Use hybrid blocked code. */ maxm = ((m + 31)/32)*32; if ( (storev == 'C') || (storev == 'c') ) { magmablas_cgetmo_in( dU, dUT, lddu, m, n ); magmablas_cgetmo_in( dA, dAT, ldda, m, n ); } else { dUT = dU; dAT = dA; } dAp = dwork; dUp = dAp + ib*lddwork; ip = 0; for( i=0; i<s; i++ ) { ii = i * ib; sb = min(mindim-ii, ib); if ( i>0 ){ // download i-th panel magmablas_ctranspose( dUp, lddu, UT(0, i), lddu, sb, ii ); magmablas_ctranspose( dAp, ldda, AT(0, i), ldda, sb, m ); magma_cgetmatrix( ii, sb, dUp, lddu, hU(0, i), ldhu ); magma_cgetmatrix( m, sb, dAp, ldda, hA(0, i), ldha ); // make sure that gpu queue is empty //magma_device_sync(); #ifndef WITHOUTTRTRI magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-(ii+sb), ib, c_one, L2(i-1), lddl, UT(i-1, i+1), lddu); #else magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-(ii+sb), ib, c_one, L(i-1), lddl, UT(i-1, i+1), lddu); #endif magma_cgemm( MagmaNoTrans, MagmaNoTrans, n-(ii+sb), m, ib, c_neg_one, UT(i-1, i+1), lddu, AT(0, i-1), ldda, c_one, AT(0, i+1), ldda ); } // do the cpu part CORE_ctstrf(m, sb, ib, nb, (PLASMA_Complex32_t*)hU(i, i), ldhu, (PLASMA_Complex32_t*)hA(0, i), ldha, (PLASMA_Complex32_t*)hL(i), ldhl, ipiv+ii, (PLASMA_Complex32_t*)hwork, ldhwork, info); if ( (*info == 0) && (iinfo > 0) ) *info = iinfo + ii; // Need to swap betw U and A #ifndef NOSWAPBLK magmablas_cswapblk( 'R', n-(ii+sb), UT(i, i+1), lddu, AT(0, i+1), ldda, 1, sb, ipiv+ii, 1, nb ); for(j=0; j<ib; j++) { im = ipiv[ip]-1; if ( im == j ) { ipiv[ip] += ii; } ip++; } #else for(j=0; j<ib; j++) { im = ipiv[ip]-1; if ( im != (j) ) { im = im - nb; assert( (im>=0) && (im<m) ); magmablas_cswap( n-(ii+sb), UT(i, i+1)+j*lddu, 1, AT(0, i+1)+im*ldda, 1 ); } else { ipiv[ip] += ii; } ip++; } #endif #ifndef WITHOUTTRTRI CORE_clacpy( PlasmaUpperLower, sb, sb, (PLASMA_Complex32_t*)hL(i), ldhl, (PLASMA_Complex32_t*)hL2(i), ldhl ); CORE_ctrtri( PlasmaLower, PlasmaUnit, sb, (PLASMA_Complex32_t*)hL2(i), ldhl, info ); if (*info != 0 ) { fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info); } #endif // upload i-th panel magma_csetmatrix( sb, sb, hU(i, i), ldhu, dUp, lddu ); magma_csetmatrix( m, sb, hA(0, i), ldha, dAp, ldda ); magma_csetmatrix( p*ib, sb, hL(i), ldhl, L(i), lddl ); magmablas_ctranspose( UT(i, i), lddu, dUp, lddu, sb, sb); magmablas_ctranspose( AT(0, i), ldda, dAp, ldda, m, sb); // make sure that gpu queue is empty //magma_device_sync(); // do the small non-parallel computations if ( s > (i+1) ) { #ifndef WITHOUTTRTRI magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, sb, sb, c_one, L2(i), lddl, UT(i, i+1), lddu); #else magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, sb, sb, c_one, L(i), lddl, UT(i, i+1), lddu); #endif magma_cgemm( MagmaNoTrans, MagmaNoTrans, sb, m, sb, c_neg_one, UT(i, i+1), lddu, AT(0, i ), ldda, c_one, AT(0, i+1), ldda ); } else { #ifndef WITHOUTTRTRI magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-mindim, sb, c_one, L2(i), lddl, UT(i, i+1), lddu); #else magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-mindim, sb, c_one, L(i), lddl, UT(i, i+1), lddu); #endif magma_cgemm( MagmaNoTrans, MagmaNoTrans, n-mindim, m, sb, c_neg_one, UT(i, i+1), lddu, AT(0, i ), ldda, c_one, AT(0, i+1), ldda ); } } if ( (storev == 'C') || (storev == 'c') ) { magmablas_cgetmo_out( dU, dUT, lddu, m, n ); magmablas_cgetmo_out( dA, dAT, ldda, m, n ); } } return *info; }
static int DrawPicture (lua_State * L) { DrawPicture(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5)); return 0; }
static int SetPictureTexels (lua_State * L) { SetPictureTexels(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5)); return 0; }