Exemplo n.º 1
0
void CIntel::onEnemyCreated(int enemy) {
	const UnitDef* ud = ai->cbc->GetUnitDef(enemy);
	if (ud) {
		LOG_II("CIntel::onEnemyCreated Unit(" << enemy << ")")
		//assert(ai->cbc->GetUnitTeam(enemy) != ai->team);
		enemies.addUnit(UT(ud->id), enemy);
	}
}
Exemplo n.º 2
0
static int GetTextSize (lua_State * L)
{
	int width, height;	GetTextSize(UT(L, 1), S(L, 2), width, height);

	lua_pushnumber(L, width);
	lua_pushnumber(L, height);

	return 2;
}
Exemplo n.º 3
0
static int GetPictureTexels (lua_State * L)
{
	float fS0, fT0, fS1, fT1;	GetPictureTexels(UT(L, 1), fS0, fT0, fS1, fT1);

	lua_pushnumber(L, fS0);
	lua_pushnumber(L, fT0);
	lua_pushnumber(L, fS1);
	lua_pushnumber(L, fT1);

	return 4;
}
Exemplo n.º 4
0
//extern "C"
SEXP log_marg_A0k(SEXP WpostR, SEXP A0R, SEXP N2R, SEXP consttermR,
			     SEXP bfR, SEXP UTR, SEXP TinvR, SEXP dfR, SEXP n0R)
{
  int i, j, *dTi, db, m, N2, df, len;
  N2=INTEGER(N2R)[0]; df=INTEGER(dfR)[0]; m=INTEGER(coerceVector(listElt(WpostR,"m"),INTSXP))[0];

  double *pbfi, *lpa0, *cterm, lN2, tol, maxvlog, lqlog;
  lN2=log((double)N2); tol=1E-12; cterm=REAL(consttermR); //Rprintf("m: %d\nN2: %d\ndf: %d\n",m,N2,df);

  // Initialize Tinv/b.free/vlog variables
  SEXP Ti, bfi; Matrix Tinv; ColumnVector bfree, vlog(N2), qlog;

  // Initialize Wlist/W/Wmat objects and populate Wlist from WpostR
  Wlist Wall(WpostR,N2); Wobj W; Matrix Wmat;

  // Initialize SEXP/ptr to store/access log marginal A0k values
  SEXP lpa0yao; PROTECT(lpa0yao=allocVector(REALSXP,m)); lpa0=REAL(lpa0yao);
  for(i=0;i<m-1;i++){
    PROTECT(Ti=VECTOR_ELT(TinvR,i));
    dTi=getdims(Ti); Tinv=R2Cmat(Ti,dTi[0],dTi[1]);
    UNPROTECT(1); //Rprintf("Tinv[[%d]](%dx%d) initialized\n",i,dTi[0],dTi[1]);

    PROTECT(bfi=VECTOR_ELT(bfR,i));
    db=length(bfi); bfree.ReSize(db); pbfi=REAL(bfi); bfree<<pbfi;
    UNPROTECT(1); //Rprintf("bfree[[%d]](%d) initialized\n",i,db);

    for(j=1;j<=N2;j++){
      Wall.getWobj(W,j); Wmat=W.getWelt(i+1); W.clear();
      vlog(j)=getvlog(Wmat,Tinv,bfree,cterm[i],df,tol); //Rprintf("vlog(%d): %f\n",j,vlog(j));
    }

    // Modified harmonic mean of the max
    maxvlog=vlog.Maximum(); qlog=vlog-maxvlog; len=qlog.Storage();
    lqlog=0; for(j=1;j<=len;j++) lqlog+=exp(qlog(j)); lqlog=log(lqlog); // log(sum(exp(qlog)))
    lpa0[i]=maxvlog-lN2+lqlog; //Rprintf("lpa0[%d] = %f\n", i, lpa0[i]);
  }

  // Computations for last column
  PROTECT(Ti=VECTOR_ELT(TinvR,m-1));
  dTi=getdims(Ti); Tinv=R2Cmat(Ti,dTi[0],dTi[1]);
  UNPROTECT(1); //Rprintf("Tinv[[%d]](%dx%d) initialized\n",i,dTi[0],dTi[1]);

  PROTECT(bfi=VECTOR_ELT(bfR,m-1));
  pbfi=REAL(bfi); bfree.ReSize(length(bfi)); bfree<<pbfi;
  UNPROTECT(1); //Rprintf("bfree[[%d]](%d) initialized\n",i,db);

  UTobj UT(UTR); Matrix A0=R2Cmat(A0R,m,m);
  A0=drawA0cpp(A0,UT,df,INTEGER(n0R),W); Wmat=W.getWelt(m);
  lpa0[m-1]=getvlog(Wmat,Tinv,bfree,cterm[m-1],df,tol); //Rprintf("lpa0[%d] = %f\n",m-1,lpa0[m-1]);

  // Return R object lpa0yao
  UNPROTECT(1); return lpa0yao;
}
int main(int argc, char *argv[])
{
	t_tout* tout;
	tout = (t_tout*)malloc(sizeof(t_tout));

	gtk_init(&argc,&argv);

	interface_make(tout); // background : lie les boutons à 'tout'

	UT(SET_PLAYERS);

	gtk_main();
	return EXIT_SUCCESS;
}
Exemplo n.º 6
0
void CEconomy::init(CUnit &unit) {
	if (initialized) return;
	// NOTE: expecting "unit" is a commander unit
	const UnitDef *ud = ai->cb->GetUnitDef(unit.key);
	
	utCommander = UT(ud->id);
	windmap = ((ai->cb->GetMaxWind() + ai->cb->GetMinWind()) / 2.0f) >= 10.0f;
	//float avgWind   = (ai->cb->GetMinWind() + ai->cb->GetMaxWind()) / 2.0f;
	//float windProf  = avgWind / utWind->cost;
	//float solarProf = utSolar->energyMake / utSolar->cost;
	worthBuildingTidal = ai->cb->GetTidalStrength() > 5.0f;
	
	initialized = true;
}
Exemplo n.º 7
0
static int LoadTextImage (lua_State * L)
{
	SDL_Color color;

	color.r = U8(L, 3);
	color.g = U8(L, 4);
	color.b = U8(L, 5);

	TextImage_h textImage;

	if (LoadTextImage(UT(L, 1), S(L, 2), color, textImage) != 0)
	{
		PushUserType(L, textImage, "TextImage");

		return 1;
	}

	return 0;
}
Exemplo n.º 8
0
int main(void)
{
	float	(*ut)(float);
	float	utf[UT_NBUTEST];
	int		i;

	utf[0] = 0.0;
	utf[1] = 1.0;
	utf[2] = 2.0;
	utf[3] = 3.0;
	utf[4] = 4.0;
	utf[5] = 5.0;
	utf[6] = 6.0;
	utf[7] = 7.0;
	ut = ft_math_tan;
	i = -1;
	while (++i < UT_NBUTEST)
		printf("tan(%f) = [%f]\n", UT(i));
	return (0);
}
Exemplo n.º 9
0
int main(void)
{
	float	(*ut)(float);
	float	utf[UT_NBUTEST];
	int		i;

	utf[0] = 0.0;
	utf[1] = 25.0;
	utf[2] = 2.0;
	utf[3] = MATH_PI;
	utf[4] = 1.0;
	utf[5] = -25.0;
	utf[6] = -1.0;
	utf[7] = 1000000.5;
	ut = ft_math_rsqrt_nosmart2;
	i = -1;
	while (++i < UT_NBUTEST)
		printf("rsqrt(%f) = [%f]\n", UT(i));
	return (0);
}
Exemplo n.º 10
0
void CIntel::update(int frame) {
	resetCounters();

	if (enemyvector == ZeroVector)
		updateEnemyVector();

	int numUnits = ai->cbc->GetEnemyUnits(&ai->unitIDs[0], MAX_UNITS);
	
	for (int i = 0; i < numUnits; i++) {
		const int uid = ai->unitIDs[i];
		const UnitDef* ud = ai->cbc->GetUnitDef(uid);
		
		if (ud == NULL)
			continue;

		unitCategory c = UT(ud->id)->cats;

		if ((c&ATTACKER).any() && (c&MOBILE).any())
			updateCounters(c);
	}

	updateRoulette();
}
Exemplo n.º 11
0
/*
    Wait for I/O on a single descriptor. Return the number of I/O events found. Mask is the events of interest.
    Timeout is in milliseconds.
 */
PUBLIC int mprWaitForSingleIO(int fd, int desiredMask, MprTicks timeout)
{
    HANDLE      h;
    int         winMask;

    if (timeout < 0 || timeout > MAXINT) {
        timeout = MAXINT;
    }
    winMask = 0;
    if (desiredMask & MPR_READABLE) {
        winMask |= FD_CLOSE | FD_READ;
    }
    if (desiredMask & MPR_WRITABLE) {
        winMask |= FD_WRITE;
    }
    h = CreateEvent(NULL, FALSE, FALSE, UT("mprWaitForSingleIO"));
    WSAEventSelect(fd, h, winMask);
    if (WaitForSingleObject(h, (DWORD) timeout) == WAIT_OBJECT_0) {
        CloseHandle(h);
        return desiredMask;
    }
    CloseHandle(h);
    return 0;
}
Exemplo n.º 12
0
static int DrawTextImage (lua_State * L)
{
	DrawTextImage(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5));

	return 0;
}
Exemplo n.º 13
0
void CThreatMap::update(int frame) {
	static const unitCategory catsCanShootGround = ASSAULT|SNIPER|ARTILLERY|SCOUTER/*|PARALYZER*/;

	if ((frame - lastUpdateFrame) < MULTIPLEXER)
		return;

	const bool isWaterMap = !ai->gamemap->IsWaterFreeMap();
	std::list<ThreatMapType> activeTypes;
	std::list<ThreatMapType>::const_iterator itMapType;

	reset();

	int numUnits = ai->cbc->GetEnemyUnits(&ai->unitIDs[0], MAX_UNITS_AI);

	/* Add enemy threats */
	for (int i = 0; i < numUnits; i++) {
		const int uid = ai->unitIDs[i];
		const UnitDef* ud = ai->cbc->GetUnitDef(uid);

		if (ud == NULL)
			continue;

		const UnitType* ut = UT(ud->id);
		const unitCategory ecats = ut->cats;

		if ((ecats&ATTACKER).none() || ai->cbc->IsUnitParalyzed(uid)
		|| ai->cbc->UnitBeingBuilt(uid))
			continue; // ignore unamred, paralyzed & being built units

		if ((ecats&AIR).any() && (ecats&ASSAULT).none())
			continue; // ignore air fighters & bombers

		// FIXME: using maxWeaponRange below (twice) is WRONG; we need
		// to calculate different max. ranges per each threatmap layer

		// FIXME: think smth cleverer
		if (ud->maxWeaponRange > MAX_WEAPON_RANGE_FOR_TM)
			continue; // ignore units with extra large range

		const float3 upos = ai->cbc->GetUnitPos(uid);

		activeTypes.clear();

		if ((ecats&ANTIAIR).any() && upos.y >= 0.0f) {
			activeTypes.push_back(TMT_AIR);
		}

		if (((ecats&SEA).any() || upos.y >= 0.0f)
		&&  ((ecats&ANTIAIR).none() || (catsCanShootGround&ecats).any())) {
			activeTypes.push_back(TMT_SURFACE);
		}

		if (isWaterMap && (ecats&TORPEDO).any()) {
			activeTypes.push_back(TMT_UNDERWATER);
		}

		if (activeTypes.empty())
			continue;

		const float uRealX = upos.x / PATH2REAL;
		const float uRealZ = upos.z / PATH2REAL;
		const float  range = (ud->maxWeaponRange + 100.0f) / PATH2REAL;
		float       powerT = ai->cbc->GetUnitPower(uid);
		const float  power = (ecats&COMMANDER).any() ? powerT/20.0f : powerT;
		float3 pos(0.0f, 0.0f, 0.0f);

		const int R = (int) ceil(range);
		for (int z = -R; z <= R; z++) {
			for (int x = -R; x <= R; x++) {
				pos.x = x;
				pos.z = z;
				if (pos.Length2D() <= range) {
					pos.x += uRealX;
					pos.z += uRealZ;
					const int mx = int(round(pos.x));
					const int mz = int(round(pos.z));
					if (isInBounds(mx, mz)) {
						for (itMapType = activeTypes.begin(); itMapType != activeTypes.end(); ++itMapType) {
							int id = ID(mx, mz);
							maps[*itMapType][id] += power;
							maxPower[*itMapType] = std::max(maps[*itMapType][id], maxPower[*itMapType]);
						}
					}
				}
			}
		}

		/*
		for (itMapType = activeTypes.begin(); itMapType != activeTypes.end(); ++itMapType) {
			maxPower[*itMapType] = std::max<float>(power, maxPower[*itMapType]);
		}
		*/
	}

#if !defined(BUILDING_AI_FOR_SPRING_0_81_2)
	if (ai->cb->IsDebugDrawerEnabled()) {
		std::map<ThreatMapType, int>::iterator i;
		for (i = handles.begin(); i != handles.end(); ++i) {
			float power = maxPower[i->first];
			// normalize the data...
			for (int j = 0, N = X*Z; j < N; j++)
				maps[i->first][j] /= power;
			// update texturemap
			ai->cb->DebugDrawerUpdateOverlayTexture(i->second, maps[i->first], 0, 0, X, Z);
			// restore the original data...
			for (int j = 0, N = X*Z; j < N; j++)
				maps[i->first][j] *= power;
		}
	}
#endif

	if (drawMap != TMT_NONE)
		visualizeMap(drawMap);

	lastUpdateFrame = frame;
}
Exemplo n.º 14
0
static struct dirent * E (lua_State * L)
{
	return static_cast<struct dirent*>(UT(L, 1));
}
Exemplo n.º 15
0
static DIR * D (lua_State * L)
{
	return static_cast<DIR*>(UT(L, 1));
}
Exemplo n.º 16
0
/**
    Purpose
    -------
    ZSSSSM applies the LU factorization update from a complex
    matrix formed by a lower triangular IB-by-K tile L1 on top of a
    M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1
    tile A1 on top of a M2-by-N2 tile A2 (N1 == N2).

    This is the right-looking Level 2.5 BLAS version of the algorithm.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows of the matrix A.  M >= 0.

    @param[in]
    n       INTEGER
            The number of columns of the matrix A.  N >= 0.

    @param[in]
    ib      INTEGER
            The inner-blocking size.  IB >= 0.

    @param[in]
    NB      INTEGER
            The blocking size.  NB >= 0.

    @param[in,out]
    hU      COMPLEX_16 array, dimension(LDHU, N), on cpu.
            On entry, the NB-by-N upper triangular tile hU.
            On exit, the content is incomplete. Shouldn't be used.

    @param[in]
    ldhu    INTEGER
            The leading dimension of the array hU.  LDHU >= max(1,NB).

    @param[in,out]
    dU      COMPLEX_16 array, dimension(LDDU, N), on gpu.
            On entry, the NB-by-N upper triangular tile dU identical to hU.
            On exit, the new factor U from the factorization.

    @param[in]
    lddu    INTEGER
            The leading dimension of the array dU.  LDDU >= max(1,NB).

    @param[in,out]
    hA      COMPLEX_16 array, dimension(LDHA, N), on cpu.
            On entry, only the M-by-IB first panel needs to be identical to dA(1..M, 1..IB).
            On exit, the content is incomplete. Shouldn't be used.

    @param[in]
    ldha    INTEGER
            The leading dimension of the array hA.  LDHA >= max(1,M).

    @param[in,out]
    dA      COMPLEX_16 array, dimension(LDDA, N), on gpu.
            On entry, the M-by-N tile to be factored.
            On exit, the factor L from the factorization

    @param[in]
    ldda    INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,M).

    @param[out]
    hL      COMPLEX_16 array, dimension(LDHL, K), on vpu.
            On exit, contains in the upper part the IB-by-K lower triangular tile,
            and in the lower part IB-by-K the inverse of the top part.

    @param[in]
    ldhl    INTEGER
            The leading dimension of the array hL.  LDHL >= max(1,2*IB).

    @param[out]
    dL      COMPLEX_16 array, dimension(LDDL, K), on gpu.
            On exit, contains in the upper part the IB-by-K lower triangular tile,
            and in the lower part IB-by-K the inverse of the top part.

    @param[in]
    lddl    INTEGER
            The leading dimension of the array dL.  LDDL >= max(1,2*IB).

    @param[out]
    hWORK   COMPLEX_16 array, dimension(LDHWORK, 2*IB), on cpu.
            Workspace.

    @param[in]
    ldhwork INTEGER
            The leading dimension of the array hWORK.  LDHWORK >= max(NB, 1).

    @param[out]
    dWORK   COMPLEX_16 array, dimension(LDDWORK, 2*IB), on gpu.
            Workspace.

    @param[in]
    lddwork INTEGER
            The leading dimension of the array dWORK.  LDDWORK >= max(NB, 1).

    @param[out]
    ipiv    INTEGER array on the cpu.
            The pivot indices array of size K as returned by ZTSTRF

    @param[out]
    info    INTEGER
            - PLASMA_SUCCESS successful exit
            - < 0 if INFO = -k, the k-th argument had an illegal value
            - > 0 if INFO = k, U(k,k) is exactly zero. The factorization
                has been completed, but the factor U is exactly
                singular, and division by zero will occur if it is used
                to solve a system of equations.

    @ingroup magma_zgesv_tile
    ********************************************************************/
extern "C" magma_int_t
magma_ztstrf_gpu(
    magma_order_t order, magma_int_t m, magma_int_t n,
    magma_int_t ib, magma_int_t nb,
    magmaDoubleComplex    *hU, magma_int_t ldhu,
    magmaDoubleComplex_ptr dU, magma_int_t lddu,
    magmaDoubleComplex    *hA, magma_int_t ldha,
    magmaDoubleComplex_ptr dA, magma_int_t ldda,
    magmaDoubleComplex    *hL, magma_int_t ldhl,
    magmaDoubleComplex_ptr dL, magma_int_t lddl,
    magma_int_t *ipiv,
    magmaDoubleComplex    *hwork, magma_int_t ldhwork,
    magmaDoubleComplex_ptr dwork, magma_int_t lddwork,
    magma_int_t *info)
{
#define UT(i,j) (dUT + (i)*ib*lddu + (j)*ib )
#define AT(i,j) (dAT + (i)*ib*ldda + (j)*ib )
#define L(i)    (dL  + (i)*ib*lddl          )
#define L2(i)   (dL2 + (i)*ib*lddl          )
#define hU(i,j) (hU  + (j)*ib*ldhu + (i)*ib )
#define hA(i,j) (hA  + (j)*ib*ldha + (i)*ib )
#define hL(i)   (hL  + (i)*ib*ldhl          )
#define hL2(i)  (hL2 + (i)*ib*ldhl          )

    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;

    int iinfo = 0;
    int maxm, mindim;
    int i, j, im, s, ip, ii, sb, p = 1;
    magmaDoubleComplex_ptr dAT, dUT;
    magmaDoubleComplex_ptr dAp, dUp;
#ifndef WITHOUTTRTRI
    magmaDoubleComplex_ptr dL2 = dL + ib;
    magmaDoubleComplex *hL2 = hL + ib;
    p = 2;
#endif

    /* Check input arguments */
    *info = 0;
    if (m < 0) {
        *info = -1;
    }
    else if (n < 0) {
        *info = -2;
    }
    else if (ib < 0) {
        *info = -3;
    }
    else if ((lddu < max(1,m)) && (m > 0)) {
        *info = -6;
    }
    else if ((ldda < max(1,m)) && (m > 0)) {
        *info = -8;
    }
    else if ((lddl < max(1,ib)) && (ib > 0)) {
        *info = -10;
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* quick return */
    if ((m == 0) || (n == 0) || (ib == 0))
        return *info;

    ip = 0;

    /* Function Body */
    mindim = min(m, n);
    s      = mindim / ib;

    if ( ib >= mindim ) {
        /* Use CPU code. */
        CORE_ztstrf(m, n, ib, nb,
                    (PLASMA_Complex64_t*)hU, ldhu,
                    (PLASMA_Complex64_t*)hA, ldha,
                    (PLASMA_Complex64_t*)hL, ldhl,
                    ipiv,
                    (PLASMA_Complex64_t*)hwork, ldhwork,
                    info);

#ifndef WITHOUTTRTRI
        CORE_zlacpy( PlasmaUpperLower, mindim, mindim,
                     (PLASMA_Complex64_t*)hL, ldhl,
                     (PLASMA_Complex64_t*)hL2, ldhl );
        CORE_ztrtri( PlasmaLower, PlasmaUnit, mindim,
                     (PLASMA_Complex64_t*)hL2, ldhl, info );
        if (*info != 0 ) {
            fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info);
        }
#endif

        if ( order == MagmaRowMajor ) {
            magma_zsetmatrix( m, n, hU, ldhu, dwork, lddwork );
            magmablas_ztranspose( m, n, dwork, lddwork, dU, lddu );

            magma_zsetmatrix( m, n, hA, ldha, dwork, lddwork );
            magmablas_ztranspose( m, n, dwork, lddwork, dA, ldda );
        } else {
            magma_zsetmatrix( m, n, hU, ldhu, dU, lddu );
            magma_zsetmatrix( m, n, hA, ldha, dA, ldda );
        }
        magma_zsetmatrix( p*ib, n, hL, ldhl, dL, lddl );
    }
    else {
        /* Use hybrid blocked code. */
        maxm = magma_roundup( m, 32 );

        if ( order == MagmaColMajor ) {
            magmablas_zgetmo_in( dU, dUT, lddu, m,  n );
            magmablas_zgetmo_in( dA, dAT, ldda, m,  n );
        } else {
            dUT = dU; dAT = dA;
        }
        dAp = dwork;
        dUp = dAp + ib*lddwork;

        ip = 0;
        for( i=0; i < s; i++ ) {
            ii = i * ib;
            sb = min(mindim-ii, ib);
            
            if ( i > 0 ) {
                // download i-th panel
                magmablas_ztranspose( sb, ii, UT(0,i), lddu, dUp, lddu );
                magmablas_ztranspose( sb, m,  AT(0,i), ldda, dAp, ldda );
                
                magma_zgetmatrix( ii, sb, dUp, lddu, hU(0, i), ldhu );
                magma_zgetmatrix( m,  sb, dAp, ldda, hA(0, i), ldha );
                
                // make sure that gpu queue is empty
                //magma_device_sync();
                
#ifndef WITHOUTTRTRI
                magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-(ii+sb), ib,
                             c_one, L2(i-1),      lddl,
                                    UT(i-1, i+1), lddu);
#else
                magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-(ii+sb), ib,
                             c_one, L(i-1),       lddl,
                                    UT(i-1, i+1), lddu);
#endif
                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                             n-(ii+sb), m, ib,
                             c_neg_one, UT(i-1, i+1), lddu,
                                        AT(0,   i-1), ldda,
                             c_one,     AT(0,   i+1), ldda );
            }

            // do the cpu part
            CORE_ztstrf(m, sb, ib, nb,
                        (PLASMA_Complex64_t*)hU(i, i), ldhu,
                        (PLASMA_Complex64_t*)hA(0, i), ldha,
                        (PLASMA_Complex64_t*)hL(i),    ldhl,
                        ipiv+ii,
                        (PLASMA_Complex64_t*)hwork, ldhwork,
                        info);

            if ( (*info == 0) && (iinfo > 0) )
                *info = iinfo + ii;
            
            // Need to swap betw U and A
#ifndef NOSWAPBLK
            magmablas_zswapblk( MagmaRowMajor, n-(ii+sb),
                                UT(i, i+1), lddu,
                                AT(0, i+1), ldda,
                                1, sb, ipiv+ii, 1, nb );

            for (j=0; j < ib; j++) {
                im = ipiv[ip]-1;
                if ( im == j ) {
                    ipiv[ip] += ii;
                }
                ip++;
            }
#else
            for (j=0; j < ib; j++) {
                im = ipiv[ip]-1;
                if ( im != (j) ) {
                    im = im - nb;
                    assert( (im >= 0) && (im < m) );
                    magmablas_zswap( n-(ii+sb), UT(i, i+1)+j*lddu, 1, AT(0, i+1)+im*ldda, 1 );
                } else {
                    ipiv[ip] += ii;
                }
                ip++;
            }
#endif

#ifndef WITHOUTTRTRI
            CORE_zlacpy( PlasmaUpperLower, sb, sb,
                         (PLASMA_Complex64_t*)hL(i), ldhl,
                         (PLASMA_Complex64_t*)hL2(i), ldhl );
            CORE_ztrtri( PlasmaLower, PlasmaUnit, sb,
                         (PLASMA_Complex64_t*)hL2(i), ldhl, info );
            if (*info != 0 ) {
                fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info);
            }
#endif
            // upload i-th panel
            magma_zsetmatrix( sb, sb, hU(i, i), ldhu, dUp, lddu );
            magma_zsetmatrix( m, sb, hA(0, i), ldha, dAp, ldda );
            magma_zsetmatrix( p*ib, sb, hL(i), ldhl, L(i), lddl );
            magmablas_ztranspose( sb, sb, dUp, lddu, UT(i,i), lddu );
            magmablas_ztranspose( m,  sb, dAp, ldda, AT(0,i), ldda );
            
            // make sure that gpu queue is empty
            //magma_device_sync();
            
            // do the small non-parallel computations
            if ( s > (i+1) ) {
#ifndef WITHOUTTRTRI
                magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             sb, sb,
                             c_one, L2(i),      lddl,
                                    UT(i, i+1), lddu);
#else
                magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             sb, sb,
                             c_one, L(i),      lddl,
                                    UT(i, i+1), lddu);
#endif
                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                             sb, m, sb,
                             c_neg_one, UT(i, i+1), lddu,
                                        AT(0, i  ), ldda,
                             c_one,     AT(0, i+1), ldda );
            }
            else {
#ifndef WITHOUTTRTRI
                magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-mindim, sb,
                             c_one, L2(i),      lddl,
                                    UT(i, i+1), lddu);
#else
                magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-mindim, sb,
                             c_one, L(i),      lddl,
                                    UT(i, i+1), lddu);
#endif
                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                             n-mindim, m, sb,
                             c_neg_one, UT(i, i+1), lddu,
                                        AT(0, i  ), ldda,
                             c_one,     AT(0, i+1), ldda );
            }
        }

        if ( order == MagmaColMajor ) {
            magmablas_zgetmo_out( dU, dUT, lddu, m,  n );
            magmablas_zgetmo_out( dA, dAT, ldda, m,  n );
        }
    }
    return *info;
}
Exemplo n.º 17
0
/// @brief Int return; usertype argument
int I_Ut (lua_State * L, int (*func)(void *))
{
	func(UT(L, 1));

	return 0;
}
Exemplo n.º 18
0
Matrix operator * (const Matrix& A, const Matrix& B)
{
    if (A.Clo() != B.Rlo() || A.Chi() != B.Rhi()) 
      Matpack.Error("Matrix operator * (const Matrix&, const Matrix&): "
                    "non conformant arguments\n");

    // allocate return matrix
    Matrix C(A.Rlo(),A.Rhi(),B.Clo(),B.Chi());
    
    //------------------------------------------------------------------------//
    // the BLAS version
    //------------------------------------------------------------------------//

#if defined ( _MATPACK_USE_BLAS_ )

    if ( LT(B) ) {                   // full matrix * lower triangle
#ifdef DEBUG
        cout << "GM*LT\n";
#endif
        checksquare(B);

        // copy A to C to protect from overwriting
        copyvec(C.Store(),A.Store(),A.Elements());

        charT   side('L'), uplo('U'), transc('N'), diag('N');
        intT    m(C.Cols()), n(C.Rows()),
                ldb(B.Cols()), ldc(C.Cols());
        doubleT alpha(1.0);
        
        F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n,
                       &alpha,B.Store(),&ldb, C.Store(),&ldc);


    } else if ( UT(B) ) {             // full matrix * upper triangle
#ifdef DEBUG
        cout << "GM*UT\n";
#endif
        checksquare(B);

        // copy A to C to protect from overwriting
        copyvec(C.Store(),A.Store(),A.Elements());

        charT   side('L'), uplo('L'), transc('N'), diag('N');
        intT    m(C.Cols()), n(C.Rows()),
                ldb(B.Cols()), ldc(C.Cols());
        doubleT alpha(1.0);
        
        F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n,
                       &alpha,B.Store(),&ldb, C.Store(),&ldc);


    } else if ( LT(A) ) {            // lower triangle * full matrix
#ifdef DEBUG
        cout << "LT*GM\n";
#endif

        checksquare(A);

        // copy B to C to protect from overwriting
        copyvec(C.Store(),B.Store(),B.Elements());

        charT   side('R'), uplo('U'), transc('N'), diag('N');
        intT    m(C.Cols()), n(C.Rows()),
                ldb(A.Cols()), ldc(C.Cols());
        doubleT alpha(1.0);
        
        F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n,
                       &alpha,A.Store(),&ldb, C.Store(),&ldc);



    } else if ( UT(A) ) {            // upper triangle * full matrix
#ifdef DEBUG
        cout << "UT*GM\n";
#endif
        checksquare(A);

        // copy A to C to protect from overwriting
        copyvec(C.Store(),B.Store(),B.Elements());

        charT   side('R'), uplo('L'), transc('N'), diag('N');
        intT    m(C.Cols()), n(C.Rows()),
                ldb(A.Cols()), ldc(C.Cols());
        doubleT alpha(1.0);
        
        F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n,
                       &alpha,A.Store(),&ldb, C.Store(),&ldc);

    } else /* GM(A) and GM(B) */ {   // GM*GM: full matrix * full matrix
#ifdef DEBUG
        cout << "GM*GM\n";
#endif

        charT   t('N');
        intT    m(B.Cols()), n(A.Rows()), k(B.Rows()),
                lda(A.Cols()), ldb(B.Cols()), ldc(C.Cols());
        doubleT alpha(1.0), beta(0.0);
        
        F77NAME(dgemm)(&t,&t, &m,&n,&k,
                       &alpha,B.Store(),&ldb, A.Store(),&lda, 
                       &beta,C.Store(),&ldc);
    }

    //------------------------------------------------------------------------//
    // the non-BLAS version
    //------------------------------------------------------------------------//

#else
    int  cl = A.cl,   ch = A.ch,
        arl = A.rl,  arh = A.rh,
        bcl = B.cl,  bch = B.ch;

    // avoid call to index operator that optimizes very badely
    double **a = A.M, **b = B.M, **c = C.M;
    for (int i = arl; i <= arh; i++)  {
        for (int j = bcl; j <= bch; j++) c[i][j] = 0.0;
        for (int l = cl; l <= ch; l++) {
            if ( a[i][l] != 0.0 ) {
                double temp = a[i][l];
                for (int j = bcl; j <= bch; j++)
                    c[i][j] += temp * b[l][j];
            }
        }
    }

#endif

    return C.Value();
}
Exemplo n.º 19
0
static int DrawClippedTextImage (lua_State * L)
{
	DrawClippedTextImage(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5), F(L, 6), F(L, 7));

	return 0;
}
	void InvertibleHyperelasticMaterial::modifiedDeformGradient(const Scalar *gradient, Scalar *diags, Scalar *leftOrthoMat, Scalar *rightOrthoMat) const {
		Scalar triMat[6], eigenVals[3];
		Tensor2<Scalar> UT, VT;
		constexpr Scalar epsilon = Scalar(1e-8);
		constexpr int diagIndices[3] = { 0, 3, 5 };

		for (int i = 0; i < 3; i++)
			for (int j = i; j < 3; j++)
				triMat[diagIndices[i] + j - i] = gradient[j * 3 + 0] * gradient[i * 3 + 0] +
				gradient[j * 3 + 1] * gradient[i * 3 + 1] + gradient[j * 3 + 2] * gradient[i * 3 + 2];

		eigenSym3x3(triMat, eigenVals, &UT(0, 0));
		
		if (UT.Determinant() < 0.0) {
			for (int i = 0; i < 3; i++)
				UT(0, i) = -UT(0, i);
		}

		for (int i = 0; i < 3; i++)
			eigenVals[i] = eigenVals[i] > Scalar(0) ? sqrt(eigenVals[i]) : Scalar(0);

		Tensor2<Scalar> F(gradient);
		VT = UT * F;
		int condition = (eigenVals[0] < epsilon) + ((eigenVals[1] < epsilon) << 1) + ((eigenVals[2] < epsilon) << 2);
		switch (condition){
		case 0:
		{
			for (int i = 0; i < 3; i++) {
				Scalar inverse = Scalar(1.0) / eigenVals[i];
				for (int j = 0; j < 3; j++)
					VT(i, j) *= inverse;
			}
			if (VT.Determinant() < 0) {
				int smallestIndex = eigenVals[0] < eigenVals[1] ? (eigenVals[0] < eigenVals[2] ? 0 : 2) : (eigenVals[1] < eigenVals[2] ? 1 : 2);
				for (int i = 0; i < 3; i++)
					VT(smallestIndex, i) = -VT(smallestIndex, i);
				eigenVals[smallestIndex] = -eigenVals[smallestIndex];
			}
			break;
		}
		case 1:
		{
			Scalar inverse1 = Scalar(1.0) / eigenVals[1];
			Scalar inverse2 = Scalar(1.0) / eigenVals[2];
			for (int j = 0; j < 3; j++) {
				VT(1, j) *= inverse1;
				VT(2, j) *= inverse2;
			}
			Vector3 another = Vector3(VT(1, 0), VT(1, 1), VT(1, 2)) %
				Vector3(VT(2, 0), VT(2, 1), VT(2, 2));
			memcpy(&VT(0, 0), &another[0], sizeof(Scalar) * 3);
			break;
		}
		case 2:
		{
			Scalar inverse0 = Scalar(1.0) / eigenVals[0];
			Scalar inverse2 = Scalar(1.0) / eigenVals[2];
			for (int j = 0; j < 3; j++) {
				VT(0, j) *= inverse0;
				VT(2, j) *= inverse2;
			}
			Vector3 another = Vector3(VT(2, 0), VT(2, 1), VT(2, 2)) %
				Vector3(VT(0, 0), VT(0, 1), VT(0, 2));
			memcpy(&VT(1, 0), &another[0], sizeof(Scalar) * 3);
			break;
		}
		case 3:
		{
			Scalar inverse = Scalar(1.0) / eigenVals[2];
			for (int i = 0; i < 3; i++)
				VT(2, i) *= inverse;
			Vector3 v1, v2;
			coordinateSystem(Vector3(VT(2, 0), VT(2, 1), VT(2, 2)), v1, v2);
			memcpy(&VT(0, 0), &v1[0], sizeof(Scalar) * 3);
			memcpy(&VT(1, 0), &v2[0], sizeof(Scalar) * 3);
			break;
		}
		case 4:
		{
			Scalar inverse0 = Scalar(1.0) / eigenVals[0];
			Scalar inverse1 = Scalar(1.0) / eigenVals[1];
			for (int j = 0; j < 3; j++) {
				VT(0, j) *= inverse0;
				VT(1, j) *= inverse1;
			}
			Vector3 another = Vector3(VT(0, 0), VT(0, 1), VT(0, 2)) %
				Vector3(VT(1, 0), VT(1, 1), VT(1, 2));
			memcpy(&VT(2, 0), &another[0], sizeof(Scalar) * 3);
			break;
		}
		case 5:
		{
			Scalar inverse = Scalar(1.0) / eigenVals[1];
			for (int i = 0; i < 3; i++)
				VT(1, i) *= inverse;
			Vector3 v1, v2;
			coordinateSystem(Vector3(VT(1, 0), VT(1, 1), VT(1, 2)), v1, v2);
			memcpy(&VT(2, 0), &v1[0], sizeof(Scalar) * 3);
			memcpy(&VT(0, 0), &v2[0], sizeof(Scalar) * 3);
			break;
		}
		case 6:
		{
			Scalar inverse = Scalar(1.0) / eigenVals[0];
			for (int i = 0; i < 3; i++)
				VT(0, i) *= inverse;
			Vector3 v1, v2;
			coordinateSystem(Vector3(VT(0, 0), VT(0, 1), VT(0, 2)), v1, v2);
			memcpy(&VT(1, 0), &v1[0], sizeof(Scalar) * 3);
			memcpy(&VT(2, 0), &v2[0], sizeof(Scalar) * 3);
			break;
		}
		case 7:
		{
			memset(&VT(0, 0), 0, sizeof(Scalar) * 9);
			VT(0, 0) = VT(1, 1) = VT(2, 2) = 1.0;
			break;
		}
		default:
			Severe("Unexpected condition in InvertibleHyperelasticMaterial::modifiedDeformGradient");
			break;
		}

		for (int i = 0; i < 3; i++)
			diags[i] = std::max(eigenVals[i], trashold);
		for (int i = 0; i < 3; i++)
			for (int j = 0; j < 3; j++)
				leftOrthoMat[i * 3 + j] = UT(j, i);
		memcpy(rightOrthoMat, &VT(0, 0), sizeof(Scalar) * 9);
	}
Exemplo n.º 21
0
extern "C" magma_int_t
magma_ctstrf_gpu( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb,
                  magmaFloatComplex *hU, magma_int_t ldhu, magmaFloatComplex *dU, magma_int_t lddu,
                  magmaFloatComplex *hA, magma_int_t ldha, magmaFloatComplex *dA, magma_int_t ldda,
                  magmaFloatComplex *hL, magma_int_t ldhl, magmaFloatComplex *dL, magma_int_t lddl,
                  magma_int_t *ipiv,
                  magmaFloatComplex *hwork, magma_int_t ldhwork, magmaFloatComplex *dwork, magma_int_t lddwork,
                  magma_int_t *info)
{
/*  -- MAGMA (version 1.4.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       August 2013

    Purpose
    =======
    CSSSSM applies the LU factorization update from a complex
    matrix formed by a lower triangular IB-by-K tile L1 on top of a
    M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1
    tile A1 on top of a M2-by-N2 tile A2 (N1 == N2).

    This is the right-looking Level 2.5 BLAS version of the algorithm.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    IB      (input) INTEGER
            The inner-blocking size.  IB >= 0.

    NB      (input) INTEGER
            The blocking size.  NB >= 0.

    hU      (input,output) COMPLEX array, dimension(LDHU, N), on cpu.
            On entry, the NB-by-N upper triangular tile hU.
            On exit, the content is incomplete. Shouldn't be used.

    LDHU    (input) INTEGER
            The leading dimension of the array hU.  LDHU >= max(1,NB).

    dU      (input,output) COMPLEX array, dimension(LDDU, N), on gpu.
            On entry, the NB-by-N upper triangular tile dU identical to hU.
            On exit, the new factor U from the factorization.

    LDDU    (input) INTEGER
            The leading dimension of the array dU.  LDDU >= max(1,NB).

    hA      (input,output) COMPLEX array, dimension(LDHA, N), on cpu.
            On entry, only the M-by-IB first panel needs to be identical to dA(1..M, 1..IB).
            On exit, the content is incomplete. Shouldn't be used.

    LDHA    (input) INTEGER
            The leading dimension of the array hA.  LDHA >= max(1,M).

    dA      (input,output) COMPLEX array, dimension(LDDA, N) , on gpu.
            On entry, the M-by-N tile to be factored.
            On exit, the factor L from the factorization

    LDDA    (input) INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,M).

    hL      (output) COMPLEX array, dimension(LDHL, K), on vpu.
            On exit, contains in the upper part the IB-by-K lower triangular tile,
            and in the lower part IB-by-K the inverse of the top part.

    LDHL    (input) INTEGER
            The leading dimension of the array hL.  LDHL >= max(1,2*IB).

    dL      (output) COMPLEX array, dimension(LDDL, K), on gpu.
            On exit, contains in the upper part the IB-by-K lower triangular tile,
            and in the lower part IB-by-K the inverse of the top part.

    LDDL    (input) INTEGER
            The leading dimension of the array dL.  LDDL >= max(1,2*IB).

    hWORK   (output) COMPLEX array, dimension(LDHWORK, 2*IB), on cpu.
            Workspace.

    LDHWORK (input) INTEGER
            The leading dimension of the array hWORK.  LDHWORK >= max(NB, 1).

    dWORK   (output) COMPLEX array, dimension(LDDWORK, 2*IB), on gpu.
            Workspace.

    LDDWORK (input) INTEGER
            The leading dimension of the array dWORK.  LDDWORK >= max(NB, 1).

    IPIV    (output) INTEGER array on the cpu.
            The pivot indices array of size K as returned by CTSTRF

    INFO    (output) INTEGER
            - PLASMA_SUCCESS successful exit
            - < 0 if INFO = -k, the k-th argument had an illegal value
            - > 0 if INFO = k, U(k,k) is exactly zero. The factorization
                has been completed, but the factor U is exactly
                singular, and division by zero will occur if it is used
                to solve a system of equations.

    =====================================================================    */

#define UT(i,j) (dUT + (i)*ib*lddu + (j)*ib )
#define AT(i,j) (dAT + (i)*ib*ldda + (j)*ib )
#define L(i)    (dL  + (i)*ib*lddl          )
#define L2(i)   (dL2 + (i)*ib*lddl          )
#define hU(i,j) (hU  + (j)*ib*ldhu + (i)*ib )
#define hA(i,j) (hA  + (j)*ib*ldha + (i)*ib )
#define hL(i)   (hL  + (i)*ib*ldhl          )
#define hL2(i)  (hL2 + (i)*ib*ldhl          )

    magmaFloatComplex c_one     = MAGMA_C_ONE;
    magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;

    int iinfo = 0;
    int maxm, mindim;
    int i, j, im, s, ip, ii, sb, p = 1;
    magmaFloatComplex *dAT, *dUT;
    magmaFloatComplex *dAp, *dUp;
#ifndef WITHOUTTRTRI
    magmaFloatComplex *dL2 = dL + ib;
    magmaFloatComplex *hL2 = hL + ib;
    p = 2;
#endif

    /* Check input arguments */
    *info = 0;
    if (m < 0) {
        *info = -1;
    }
    else if (n < 0) {
        *info = -2;
    }
    else if (ib < 0) {
        *info = -3;
    }
    else if ((lddu < max(1,m)) && (m > 0)) {
        *info = -6;
    }
    else if ((ldda < max(1,m)) && (m > 0)) {
        *info = -8;
    }
    else if ((lddl < max(1,ib)) && (ib > 0)) {
        *info = -10;
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* quick return */
    if ((m == 0) || (n == 0) || (ib == 0))
        return *info;

    ip = 0;

    /* Function Body */
    mindim = min(m, n);
    s      = mindim / ib;

    if ( ib >= mindim ) {
        /* Use CPU code. */
        CORE_ctstrf(m, n, ib, nb,
                    (PLASMA_Complex32_t*)hU, ldhu,
                    (PLASMA_Complex32_t*)hA, ldha,
                    (PLASMA_Complex32_t*)hL, ldhl,
                    ipiv,
                    (PLASMA_Complex32_t*)hwork, ldhwork,
                    info);

#ifndef WITHOUTTRTRI
        CORE_clacpy( PlasmaUpperLower, mindim, mindim,
                     (PLASMA_Complex32_t*)hL, ldhl,
                     (PLASMA_Complex32_t*)hL2, ldhl );
        CORE_ctrtri( PlasmaLower, PlasmaUnit, mindim,
                     (PLASMA_Complex32_t*)hL2, ldhl, info );
        if (*info != 0 ) {
            fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info);
        }
#endif

        if ( (storev == 'R') || (storev == 'r') ) {
            magma_csetmatrix( m, n, hU, ldhu, dwork, lddwork );
            magmablas_ctranspose( dU, lddu, dwork, lddwork, m, n );

            magma_csetmatrix( m, n, hA, ldha, dwork, lddwork );
            magmablas_ctranspose( dA, ldda, dwork, lddwork, m, n );
        } else {
            magma_csetmatrix( m, n, hU, ldhu, dU, lddu );
            magma_csetmatrix( m, n, hA, ldha, dA, ldda );
        }
        magma_csetmatrix( p*ib, n, hL, ldhl, dL, lddl );
            
    }
    else {
        /* Use hybrid blocked code. */
        maxm = ((m + 31)/32)*32;

        if ( (storev == 'C') || (storev == 'c') ) {
            magmablas_cgetmo_in( dU, dUT, lddu, m,  n );
            magmablas_cgetmo_in( dA, dAT, ldda, m,  n );
        } else {
            dUT = dU; dAT = dA;
        }
        dAp = dwork;
        dUp = dAp + ib*lddwork;

        ip = 0;
        for( i=0; i<s; i++ )
        {
            ii = i * ib;
            sb = min(mindim-ii, ib);
            
            if ( i>0 ){
                // download i-th panel
                magmablas_ctranspose( dUp, lddu, UT(0, i), lddu, sb, ii );
                magmablas_ctranspose( dAp, ldda, AT(0, i), ldda, sb, m  );
                
                magma_cgetmatrix( ii, sb, dUp, lddu, hU(0, i), ldhu );
                magma_cgetmatrix( m, sb, dAp, ldda, hA(0, i), ldha );
                
                // make sure that gpu queue is empty
                //magma_device_sync();
                
#ifndef WITHOUTTRTRI
                magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-(ii+sb), ib,
                             c_one, L2(i-1),      lddl,
                                    UT(i-1, i+1), lddu);
#else
                magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-(ii+sb), ib,
                             c_one, L(i-1),       lddl,
                                    UT(i-1, i+1), lddu);
#endif
                magma_cgemm( MagmaNoTrans, MagmaNoTrans,
                             n-(ii+sb), m, ib,
                             c_neg_one, UT(i-1, i+1), lddu,
                                        AT(0,   i-1), ldda,
                             c_one,     AT(0,   i+1), ldda );
            }

            // do the cpu part
            CORE_ctstrf(m, sb, ib, nb,
                        (PLASMA_Complex32_t*)hU(i, i), ldhu,
                        (PLASMA_Complex32_t*)hA(0, i), ldha,
                        (PLASMA_Complex32_t*)hL(i),    ldhl,
                        ipiv+ii,
                        (PLASMA_Complex32_t*)hwork, ldhwork,
                        info);

            if ( (*info == 0) && (iinfo > 0) )
                *info = iinfo + ii;
            
            // Need to swap betw U and A
#ifndef NOSWAPBLK
            magmablas_cswapblk( 'R', n-(ii+sb),
                                UT(i, i+1), lddu,
                                AT(0, i+1), ldda,
                                1, sb, ipiv+ii, 1, nb );

            for(j=0; j<ib; j++) {
                im = ipiv[ip]-1;
                if ( im == j ) {
                    ipiv[ip] += ii;
                }
                ip++;
            }
#else
            for(j=0; j<ib; j++) {
                im = ipiv[ip]-1;
                if ( im != (j) ) {
                    im = im - nb;
                    assert( (im>=0) && (im<m) );
                    magmablas_cswap( n-(ii+sb), UT(i, i+1)+j*lddu, 1, AT(0, i+1)+im*ldda, 1 );
                } else {
                    ipiv[ip] += ii;
                }
                ip++;
            }
#endif

#ifndef WITHOUTTRTRI
            CORE_clacpy( PlasmaUpperLower, sb, sb,
                         (PLASMA_Complex32_t*)hL(i), ldhl,
                         (PLASMA_Complex32_t*)hL2(i), ldhl );
            CORE_ctrtri( PlasmaLower, PlasmaUnit, sb,
                         (PLASMA_Complex32_t*)hL2(i), ldhl, info );
            if (*info != 0 ) {
                fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info);
            }
#endif
            // upload i-th panel
            magma_csetmatrix( sb, sb, hU(i, i), ldhu, dUp, lddu );
            magma_csetmatrix( m, sb, hA(0, i), ldha, dAp, ldda );
            magma_csetmatrix( p*ib, sb, hL(i), ldhl, L(i), lddl );
            magmablas_ctranspose( UT(i, i), lddu, dUp, lddu, sb, sb);
            magmablas_ctranspose( AT(0, i), ldda, dAp, ldda, m,  sb);
            
            // make sure that gpu queue is empty
            //magma_device_sync();
            
            // do the small non-parallel computations
            if ( s > (i+1) ) {
#ifndef WITHOUTTRTRI
                magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             sb, sb,
                             c_one, L2(i),      lddl,
                                    UT(i, i+1), lddu);
#else
                magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             sb, sb,
                             c_one, L(i),      lddl,
                                    UT(i, i+1), lddu);
#endif
                magma_cgemm( MagmaNoTrans, MagmaNoTrans,
                             sb, m, sb,
                             c_neg_one, UT(i, i+1), lddu,
                                        AT(0, i  ), ldda,
                             c_one,     AT(0, i+1), ldda );
            }
            else {
#ifndef WITHOUTTRTRI
                magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-mindim, sb,
                             c_one, L2(i),      lddl,
                                    UT(i, i+1), lddu);
#else
                magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-mindim, sb,
                             c_one, L(i),      lddl,
                                    UT(i, i+1), lddu);
#endif
                magma_cgemm( MagmaNoTrans, MagmaNoTrans,
                             n-mindim, m, sb,
                             c_neg_one, UT(i, i+1), lddu,
                                        AT(0, i  ), ldda,
                             c_one,     AT(0, i+1), ldda );
            }
        }

        if ( (storev == 'C') || (storev == 'c') ) {
            magmablas_cgetmo_out( dU, dUT, lddu, m,  n );
            magmablas_cgetmo_out( dA, dAT, ldda, m,  n );
        }
    }
    return *info;
}
Exemplo n.º 22
0
static int DrawPicture (lua_State * L)
{
	DrawPicture(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5));

	return 0;
}
Exemplo n.º 23
0
static int SetPictureTexels (lua_State * L)
{
	SetPictureTexels(UT(L, 1), F(L, 2), F(L, 3), F(L, 4), F(L, 5));

	return 0;
}