Exemple #1
0
void FoxLi( Matrix<Complex<Real>>& A, Int n, Real omega )
{
    DEBUG_CSE
    typedef Complex<Real> C;
    const Real pi = 4*Atan( Real(1) );
    const C phi = Sqrt( C(0,omega/pi) ); 
    
    // Compute Gauss quadrature points and weights
    Matrix<Real> d, e; 
    Zeros( d, n, 1 );
    e.Resize( n-1, 1 );
    for( Int j=0; j<n-1; ++j )
    {
        const Real betaInv = 2*Sqrt(1-Pow(j+Real(1),-2)/4);
        e(j) = 1/betaInv;
    }
    Matrix<Real> x, Z;
    HermitianTridiagEig( d, e, x, Z, UNSORTED );
    auto z = Z( IR(0), ALL );
    Matrix<Real> sqrtWeights( z ), sqrtWeightsTrans;
    for( Int j=0; j<n; ++j )
        sqrtWeights(0,j) = Sqrt(Real(2))*Abs(sqrtWeights(0,j));
    herm_eig::Sort( x, sqrtWeights, ASCENDING );
    Transpose( sqrtWeights, sqrtWeightsTrans );

    // Form the integral operator
    A.Resize( n, n );
    for( Int j=0; j<n; ++j )
    {
        for( Int i=0; i<n; ++i )
        {
            const Real theta = -omega*Pow(x(i)-x(j),2);
            const Real realPart = Cos(theta);
            const Real imagPart = Sin(theta);
            A(i,j) = phi*C(realPart,imagPart);
        }
    }

    // Apply the weighting
    DiagonalScale( LEFT, NORMAL, sqrtWeightsTrans, A );
    DiagonalScale( RIGHT, NORMAL, sqrtWeightsTrans, A );
}
// Rotation kring godtycklig axel (enbart rotationen)
void ArbRotate(Point3D *axis, GLfloat fi, GLfloat *m)
{
	Point3D x, y, z, a;
	GLfloat R[16], Rt[16], Raxel[16], RtRx[16];
	
// Kolla ocksΠom parallell med Z-axel!
	if (axis->x < 0.0000001) // Under nŒgon tillrŠckligt liten grŠns
	if (axis->x > -0.0000001)
	if (axis->y < 0.0000001)
	if (axis->y > -0.0000001)
		if (axis->z > 0)
		{
			Rz(fi, m);
			return;
		}
		else
		{
			Rz(-fi, m);
			return;
		}

	x = *axis;
	Normalize(&x); // |x|
	SetVector(0,0,1, &z); // Temp z
	CrossProduct(&z, &x, &y);
	Normalize(&y); // y' = z^ x x'
	CrossProduct(&x, &y, &z); // z' = x x y

	R[0] = x.x; R[4] = x.y; R[8] = x.z;  R[12] = 0.0;
	R[1] = y.x; R[5] = y.y; R[9] = y.z;  R[13] = 0.0;
	R[2] = z.x; R[6] = z.y; R[10] = z.z;  R[14] = 0.0;

	R[3] = 0.0; R[7] = 0.0; R[11] = 0.0;  R[15] = 1.0;

	Transpose(&R, &Rt); // Transpose = Invert -> felet ej i Transpose, och det Šr en ortonormal matris
	
	Rx(fi, &Raxel); // Rotate around x axis
	
	// m := Rt * Rx * R
	Mult(&Rt, &Raxel, &RtRx);
	Mult(&RtRx, &R, m);
}
Exemple #3
0
void SUMMA_NTA
( Orientation orientB,
  T alpha,
  const AbstractDistMatrix<T>& APre,
  const AbstractDistMatrix<T>& BPre,
        AbstractDistMatrix<T>& CPre )
{
    EL_DEBUG_CSE
    const Int n = CPre.Width();
    const Int bsize = Blocksize();
    const Grid& g = APre.Grid();
    const bool conjugate = ( orientB == ADJOINT );

    DistMatrixReadProxy<T,T,MC,MR> AProx( APre );
    DistMatrixReadProxy<T,T,MC,MR> BProx( BPre );
    DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre );
    auto& A = AProx.GetLocked();
    auto& B = BProx.GetLocked();
    auto& C = CProx.Get();

    // Temporary distributions
    DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g);
    DistMatrix<T,MC,STAR> D1_MC_STAR(g);

    B1Trans_MR_STAR.AlignWith( A );
    D1_MC_STAR.AlignWith( A );

    for( Int k=0; k<n; k+=bsize )
    {
        const Int nb = Min(bsize,n-k);
        auto B1 = B( IR(k,k+nb), ALL        );
        auto C1 = C( ALL,        IR(k,k+nb) );

        // C1[MC,*] := alpha A[MC,MR] (B1^[T/H])[MR,*]
        Transpose( B1, B1Trans_MR_STAR, conjugate );
        LocalGemm( NORMAL, NORMAL, alpha, A, B1Trans_MR_STAR, D1_MC_STAR );

        // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows
        AxpyContract( T(1), D1_MC_STAR, C1 );
    }
}
Exemple #4
0
// AnimatedTransform Method Definitions
void AnimatedTransform::Decompose(const Matrix4x4 &m, Vector *T,
                                  Quaternion *Rquat, Matrix4x4 *S) {
    // Extract translation _T_ from transformation matrix
    T->x = m.m[0][3];
    T->y = m.m[1][3];
    T->z = m.m[2][3];

    // Compute new transformation matrix _M_ without translation
    Matrix4x4 M = m;
    for (int i = 0; i < 3; ++i)
        M.m[i][3] = M.m[3][i] = 0.f;
    M.m[3][3] = 1.f;

    // Extract rotation _R_ from transformation matrix
    float norm;
    int count = 0;
    Matrix4x4 R = M;
    do {
        // Compute next matrix _Rnext_ in series
        Matrix4x4 Rnext;
        Matrix4x4 Rit = Inverse(Transpose(R));
        for (int i = 0; i < 4; ++i)
            for (int j = 0; j < 4; ++j)
                Rnext.m[i][j] = 0.5f * (R.m[i][j] + Rit.m[i][j]);

        // Compute norm of difference between _R_ and _Rnext_
        norm = 0.f;
        for (int i = 0; i < 3; ++i) {
            float n = fabsf(R.m[i][0] - Rnext.m[i][0]) +
                      fabsf(R.m[i][1] - Rnext.m[i][1]) +
                      fabsf(R.m[i][2] - Rnext.m[i][2]);
            norm = max(norm, n);
        }
        R = Rnext;
    } while (++count < 100 && norm > .0001f);
    // XXX TODO FIXME deal with flip...
    *Rquat = Quaternion(R);

    // Compute scale _S_ using rotation and original matrix
    *S = Matrix4x4::Mul(Inverse(R), M);
}
Exemple #5
0
int CalcSphereCenter (const Point<3> ** pts, Point<3> & c)
{
  Vec3d row1 (*pts[0], *pts[1]);
  Vec3d row2 (*pts[0], *pts[2]);
  Vec3d row3 (*pts[0], *pts[3]);

  Vec3d rhs(0.5 * (row1*row1),
	    0.5 * (row2*row2),
	    0.5 * (row3*row3));
  Transpose (row1, row2, row3);
  
  Vec3d sol;
  if (SolveLinearSystem (row1, row2, row3, rhs, sol))
    {
      (*testout) << "CalcSphereCenter: degenerated" << endl;
      return 1;
    }

  c = *pts[0] + sol;
  return 0;
}
Exemple #6
0
Matrix Matrix::Inverse() const
{
	if (Determinant() == 0)
	{
		return Matrix(
				Vector3D(0, 0, 0),
				Vector3D(0, 0, 0),
				Vector3D(0, 0, 0));
	}
	else
	{
		Matrix matrix = Transpose();
		Float blah = Determinant();
		Float invDet = 1.0/Determinant();

		return Matrix(
				matrix.Rows[1].CrossProduct(matrix.Rows[2]) * invDet,
				matrix.Rows[2].CrossProduct(matrix.Rows[0]) * invDet,
				matrix.Rows[0].CrossProduct(matrix.Rows[1]) * invDet);
	}
}
Exemple #7
0
void LapackInvAndDet(cDMatrix& theMatrix, cDMatrix& theInvMatrix, double& theDet)
{
uint myNCol = theMatrix.GetNCols() ;

double  *myAP = new double[myNCol*(myNCol + 1)/2],
                *myW = new double[myNCol],
                *myZ = new double[myNCol*myNCol],
                *myWork = new double[myNCol * 3] ;
int myInfo,
        myN = (int)(myNCol),
        myldz = (int)(myNCol) ;

        for (register int i = 0 ; i < myN ; i++)
                for (register int j = i ; j < myldz ; j++)
                        myAP[i+(j+1)*j/2]  = theMatrix[i][j] ;

        F77_NAME(dspev)("V", "U", &myN, myAP, myW, myZ, &myldz, myWork, &myInfo) ;

        if (myInfo != 0)
                throw cOTError("Non inversible matrix") ;
        theDet = 1.0L ;
cDVector myInvEigenValue = cDVector(myNCol) ;

cDMatrix myEigenVector(myNCol, myNCol) ;
        for (register uint i = 0 ; i < myNCol ; i++)
        {       theDet *= myW[i] ;
                myInvEigenValue[i] = 1.0 /myW[i] ;
                for (register int j = 0 ; j < myN ; j++)
                        myEigenVector[i][j] = myZ[i + j*myN] ;
        }
        theInvMatrix =  myEigenVector ;
cDMatrix myAuxMat1 = Diag(myInvEigenValue), myAuxMat2 = Transpose(myEigenVector) ;
cDMatrix myAuxMat = myAuxMat1 * myAuxMat2 ;
        theInvMatrix = theInvMatrix * myAuxMat ;
        
        delete myAP ;
        delete myW ;
        delete myZ ;
        delete myWork ;
}
void Camera::Precompute()
{
    //*********************************************************
    //Compute m_mKRt
    //*********************************************************
    KRt_ = K_ * Transpose(R_);

    //*********************************************************
    //Compute KRtT
    //*********************************************************
    KRtT_ = KRt_ * t_;

    //*********************************************************
    //Compute VPN
    //*********************************************************

    VPN_ = R_.getColumn(2);

    //*********************************************************
    //Compute VPd
    //*********************************************************

    VPd_ = 0.0;

    for(int i = 0; i < t_.getSize(); i++)
    {
        VPd_ += t_(i)*(VPN_(i)*(-1.0));
    }

    //*********************************************************
    //Compute GPN and GPD
    //*********************************************************
    GPD_ = GP_(3);
    GPN_.setSize(3);

    for(int i = 0; i < GPN_.getSize(); i++)
    {
        GPN_(i) = GP_(i);
    }
}
Exemple #9
0
void SUMMA_TNC
( Orientation orientA,
  T alpha,
  const AbstractDistMatrix<T>& APre,
  const AbstractDistMatrix<T>& BPre,
        AbstractDistMatrix<T>& CPre )
{
    DEBUG_CSE
    const Int sumDim = BPre.Height();
    const Int bsize = Blocksize();
    const Grid& g = APre.Grid();

    DistMatrixReadProxy<T,T,MC,MR> AProx( APre );
    DistMatrixReadProxy<T,T,MC,MR> BProx( BPre );
    DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre );
    auto& A = AProx.GetLocked();
    auto& B = BProx.GetLocked();
    auto& C = CProx.Get();

    // Temporary distributions
    DistMatrix<T,STAR,MC> A1_STAR_MC(g);
    DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g);

    A1_STAR_MC.AlignWith( C );
    B1Trans_MR_STAR.AlignWith( C );

    for( Int k=0; k<sumDim; k+=bsize )
    {
        const Int nb = Min(bsize,sumDim-k);
        auto A1 = A( IR(k,k+nb), ALL );
        auto B1 = B( IR(k,k+nb), ALL );

        // C[MC,MR] += alpha (A1[*,MC])^T B1[*,MR]
        //           = alpha (A1^T)[MC,*] B1[*,MR]
        A1_STAR_MC = A1; 
        Transpose( B1, B1Trans_MR_STAR );
        LocalGemm
        ( orientA, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, T(1), C );
    }
}
Vector<double> AncillaryMethods::PlaneToCam(const Camera& camera)
{
    Vector<double> plane = camera.get_GP();

    Vector<double> pv(plane(0), plane(1), plane(2));

    Matrix<double> cam_rot_trans = Transpose(camera.get_R());

    pv = cam_rot_trans * pv;

    Vector<double> t = cam_rot_trans * camera.get_t();

    double d = plane(3) + pv(0)*t(0) + pv(1)*t(1) + pv(2)*t(2);

    Vector<double> gp_in_camera(4);
    gp_in_camera(0) = pv(0);
    gp_in_camera(1) = pv(1);
    gp_in_camera(2) = pv(2);
    gp_in_camera(3) = d;

    return gp_in_camera;
}
int main()
{
  double Av[9] = {2.0, 1.0, 1.0,
                  1.0, 2.0, 1.0,
                  1.0, 1.0, 2.0, };

  Matrix<double> A(3, 3, Av);
  Matrix<double> B = 0.5 * Transpose(A) * A;
  printf("Matrix:\n");
  B.Print();
  char outputFilename[96] = "BMatrix";
  B.Save(outputFilename);

  Matrix<double> Q(3,3); // will hold eigenvectors
  Matrix<double> Lambda(3,1); // will hold eigenvalues
  B.SymmetricEigenDecomposition(Q, Lambda);

  printf("Eigenvalues:\n");
  Lambda.Print();

  return 0;
}
Exemple #12
0
void AugmentedKKT
( const Matrix<Real>& A,
  const Matrix<Real>& x,
  const Matrix<Real>& z,
        Matrix<Real>& J,
  bool onlyLower )
{
    EL_DEBUG_CSE
    const Int m = A.Height();
    const Int n = A.Width();

    Zeros( J, m+n, m+n );
    const IR xInd(0,n), yInd(n,n+m);
    auto Jxx = J(xInd,xInd); auto Jxy = J(xInd,yInd);
    auto Jyx = J(yInd,xInd); auto Jyy = J(yInd,yInd);
    Matrix<Real> d( z );
    DiagonalSolve( LEFT, NORMAL, x, d );
    Diagonal( Jxx, d );
    Jyx = A;
    if( !onlyLower )
        Transpose( A, Jxy );
}
Exemple #13
0
 PhaseEnumerationCache
 ( const Matrix<Field>& B,
   const Matrix<Base<Field>>& d,
   const Matrix<Field>& N,
   const Matrix<Base<Field>>& normUpperBounds,
         Int batchSize=256,
         Int blocksize=32,
         bool useTranspose=true )
 : B_(B),
   d_(d),
   N_(N),
   normUpperBounds_(normUpperBounds),
   foundVector_(false),
   numQueued_(0),
   insertionBound_(normUpperBounds.Height()),
   blocksize_(blocksize),
   useTranspose_(useTranspose)
 { 
     Zeros( Y_, N.Height(), batchSize );   
     if( useTranspose )
         Transpose( N, NTrans_ );
 }
Exemple #14
0
void SBVAR_symmetric::SetupSBVAR_symmetric(void)
{
  prior_YY=TransposeMultiply(prior_Y,prior_Y);
  prior_XX=TransposeMultiply(prior_X,prior_X);
  prior_XY=TransposeMultiply(prior_X,prior_Y);

  if (flat_prior)
    log_prior_constant=0.0;
  else
    {
      TDenseMatrix S(n_vars+n_predetermined,n_vars+n_predetermined);
      S.Insert(0,0,prior_YY);
      S.Insert(n_vars,0,-prior_XY);
      S.Insert(0,n_vars,-Transpose(prior_XY));
      S.Insert(n_vars,n_vars,prior_XX);
      log_prior_constant=n_vars*(-0.918938533204673*(n_vars+n_predetermined) + 0.5*LogAbsDeterminant(S));  // 0.918938533204673 = 0.5*ln(2*pi)
    }

  prior_YY*=lambda_bar;
  prior_XX*=lambda_bar;
  prior_XY*=lambda_bar;
  log_prior_constant*=lambda_bar;
}
Exemple #15
0
void CallFunction(FunctionCall fc)
{
	int function = GetFunction(fc->function);
	
	switch (function)
	{
		case VAR : NewVariable(fc); break;
		case NMX : NewMatrix(fc); break;
		case ADD : Addition(fc); break;
		case SUB : Substraction(fc); break;
		case MUL : Multiplication(fc); break;
		case MSC : Scalar_Mult(fc); break;
		case EXP : Exponentiation(fc); break;
		case TRA : Transpose(fc); break;
		case DET : Determinant(fc); break;
		case DLU : Decomposition(fc); break;
		case SOL : Solve(fc); break;
		case INV : Inversion(fc); break;
		case RNK : Rank(fc); break;
		case DSP : Display(fc); break;
		case NOF : // default
		default :
		{
			if (GetFunction(fc->name)==SPT) SpeedTest(fc);
			else if (IndexVariable(fc->function)!=-1) NewVariable(fc);
			else if (IndexMatrix(fc->function)!=-1) NewMatrix(fc);
			else
			{
				printf("\t%s : Function Not Implemented\n", fc->function);
				fni++;
			}
			break;
		}
	}
	
	if (function!=NOF && function !=VAR) fni = 0;
}
Exemple #16
0
    //-----------------------------------------------------------------------------
    //  Update
    //  Updates the object
    //  TODO: Pre- and Post- updates?
    //-----------------------------------------------------------------------------
    void CView::Update( void )
    {
        RVector4 x, y, z;

        z = m_vLook = Normalize( m_vLook );
        x = m_vRight = Normalize( CrossProduct( m_vUp, z ) );
        y = CrossProduct( z, x );
        
        m_mView.r0 = x;
        m_mView.r1 = y;
        m_mView.r2 = z;
        m_mView.r3 = RVector4Zero();
        m_mView = Transpose( m_mView );
        
        m_mView.r3 = RVector4( -DotProduct( x, m_vPosition), -DotProduct( y, m_vPosition), -DotProduct( z, m_vPosition), 1.0f );

        
        //z = Normalize( RQuatGetZAxis(m_Transform.orientation) );
        //x = Normalize( CrossProduct( RVector3(0.0f,1.0f,0.0f), z ) );
        //y = CrossProduct( z, x );
        //
        //m_mView.r0 = Homogonize( x );
        //m_mView.r1 = Homogonize( y );
        //m_mView.r2 = Homogonize( z );
        //m_mView.r3 = RVector4Zero();
        //m_mView = Transpose( m_mView );
        //
        //m_mView.r3 = RVector4( -DotProduct( x, m_Transform.position), -DotProduct( y, m_Transform.position), -DotProduct( z, m_Transform.position), 1.0f );
    
        char szCameraData[256] = { 0 };

        sprintf( szCameraData, "Pos:  %f, %f, %f", m_vPosition.x, m_vPosition.y, m_vPosition.z );
        Engine::GetRenderer()->DrawString( 200, 16, szCameraData );
        sprintf( szCameraData, "Look: %f, %f, %f", m_vLook.x, m_vLook.y, m_vLook.z );
        Engine::GetRenderer()->DrawString( 200, 32, szCameraData );
    }
Exemple #17
0
dng_matrix Invert (const dng_matrix &A)
	{
	
	if (A.Rows () < 2 || A.Cols () < 2)
		{
		
		ThrowMatrixMath ();
						 
		}
	
	if (A.Rows () == A.Cols ())
		{
		
		if (A.Rows () == 3)
			{
			
			return Invert3by3 (A);
			
			}
			
		return InvertNbyN (A);
		
		}
		
	else
		{
		
		// Compute the pseudo inverse.
	
		dng_matrix B = Transpose (A);
	
		return Invert (B * A) * B;
		
		}
		
	}
Exemple #18
0
/*
   See Waggoner and Zha, "A Gibbs sampler for structural vector autoregressions", 
   JEDC 2003, for discription of notations.  We take the square root of a 
   symmetric and positive definite X to be any matrix Y such that Y*Y'=X.  Note 
   that this is not the usual definition because we do not require Y to be 
   symmetric and positive definite.
*/
void SBVAR_symmetric_linear::SetSimulationInfo(void)
{
  if (NumberObservations() == 0)
    throw dw_exception("SetSimulationInfo(): cannot simulate if no observations");

  TDenseMatrix all_YY, all_XY, all_XX;
  if (flat_prior)
    {
      all_YY=YY;
      all_XY=XY;
      all_XX=XX;
    }
  else
    {
      TDenseMatrix all_Y, all_X;
      all_Y=VCat(sqrt(lambda)*Data(),sqrt(lambda_bar)*prior_Y);
      all_X=VCat(sqrt(lambda)*PredeterminedData(),sqrt(lambda_bar)*prior_X);
      all_YY=Transpose(all_Y)*all_Y;
      all_XY=Transpose(all_X)*all_Y;
      all_XX=Transpose(all_X)*all_X;
    }

  Simulate_SqrtH.resize(n_vars);
  Simulate_P.resize(n_vars);
  Simulate_SqrtS.resize(n_vars);
  Simulate_USqrtS.resize(n_vars);

  for (int i=n_vars-1; i >= 0; i--)
    {
      TDenseMatrix invH=Transpose(V[i])*(all_XX*V[i]);
      Simulate_SqrtH[i]=Inverse(Cholesky(invH,CHOLESKY_UPPER_TRIANGULAR),SOLVE_UPPER_TRIANGULAR);
      Simulate_P[i]=Simulate_SqrtH[i]*(Transpose(Simulate_SqrtH[i])*(Transpose(V[i])*(all_XY*U[i])));
      Simulate_SqrtS[i]=sqrt(lambda_T)*Inverse(Cholesky(Transpose(U[i])*(all_YY*U[i]) - Transpose(Simulate_P[i])*(invH*Simulate_P[i]),CHOLESKY_UPPER_TRIANGULAR),SOLVE_UPPER_TRIANGULAR);
      Simulate_USqrtS[i]=U[i]*Simulate_SqrtS[i];
    }

  simulation_info_set=true;
}
Exemple #19
0
/*
=================
R_SubdividePatchToGrid
=================
*/
srfGridMesh_t *R_SubdividePatchToGrid(int width, int height,
                                      drawVert_t points[MAX_PATCH_SIZE * MAX_PATCH_SIZE])
{
	int                   i, j, k, l;
	drawVert_t            prev, next, mid;
	float                 len, maxLen;
	int                   dir;
	int                   t;
	MAC_STATIC drawVert_t ctrl[MAX_GRID_SIZE][MAX_GRID_SIZE];
	float                 errorTable[2][MAX_GRID_SIZE];

	for (i = 0 ; i < width ; i++)
	{
		for (j = 0 ; j < height ; j++)
		{
			ctrl[j][i] = points[j * width + i];
		}
	}

	for (dir = 0 ; dir < 2 ; dir++)
	{
		for (j = 0 ; j < MAX_GRID_SIZE ; j++)
		{
			errorTable[dir][j] = 0;
		}

		// horizontal subdivisions
		for (j = 0 ; j + 2 < width ; j += 2)
		{
			// check subdivided midpoints against control points

			// FIXME: also check midpoints of adjacent patches against the control points
			// this would basically stitch all patches in the same LOD group together.

			maxLen = 0;
			for (i = 0 ; i < height ; i++)
			{
				vec3_t midxyz;
				vec3_t dir;
				vec3_t projected;
				float  d;

				// calculate the point on the curve
				for (l = 0 ; l < 3 ; l++)
				{
					midxyz[l] = (ctrl[i][j].xyz[l] + ctrl[i][j + 1].xyz[l] * 2
					             + ctrl[i][j + 2].xyz[l]) * 0.25f;
				}

				// see how far off the line it is
				// using dist-from-line will not account for internal
				// texture warping, but it gives a lot less polygons than
				// dist-from-midpoint
				VectorSubtract(midxyz, ctrl[i][j].xyz, midxyz);
				VectorSubtract(ctrl[i][j + 2].xyz, ctrl[i][j].xyz, dir);
				VectorNormalize(dir);

				d = DotProduct(midxyz, dir);
				VectorScale(dir, d, projected);
				VectorSubtract(midxyz, projected, midxyz);
				len = VectorLengthSquared(midxyz);              // we will do the sqrt later

				if (len > maxLen)
				{
					maxLen = len;
				}
			}

			maxLen = sqrt(maxLen);
			// if all the points are on the lines, remove the entire columns
			if (maxLen < 0.1f)
			{
				errorTable[dir][j + 1] = 999;
				continue;
			}

			// see if we want to insert subdivided columns
			if (width + 2 > MAX_GRID_SIZE)
			{
				errorTable[dir][j + 1] = 1.0f / maxLen;
				continue;   // can't subdivide any more
			}

			if (maxLen <= r_subdivisions->value)
			{
				errorTable[dir][j + 1] = 1.0f / maxLen;
				continue;   // didn't need subdivision
			}

			errorTable[dir][j + 2] = 1.0f / maxLen;

			// insert two columns and replace the peak
			width += 2;
			for (i = 0 ; i < height ; i++)
			{
				LerpDrawVert(&ctrl[i][j], &ctrl[i][j + 1], &prev);
				LerpDrawVert(&ctrl[i][j + 1], &ctrl[i][j + 2], &next);
				LerpDrawVert(&prev, &next, &mid);

				for (k = width - 1 ; k > j + 3 ; k--)
				{
					ctrl[i][k] = ctrl[i][k - 2];
				}
				ctrl[i][j + 1] = prev;
				ctrl[i][j + 2] = mid;
				ctrl[i][j + 3] = next;
			}

			// back up and recheck this set again, it may need more subdivision
			j -= 2;

		}

		Transpose(width, height, ctrl);
		t      = width;
		width  = height;
		height = t;
	}

	// put all the aproximating points on the curve
	PutPointsOnCurve(ctrl, width, height);

	// cull out any rows or columns that are colinear
	for (i = 1 ; i < width - 1 ; i++)
	{
		if (errorTable[0][i] != 999)
		{
			continue;
		}
		for (j = i + 1 ; j < width ; j++)
		{
			for (k = 0 ; k < height ; k++)
			{
				ctrl[k][j - 1] = ctrl[k][j];
			}
			errorTable[0][j - 1] = errorTable[0][j];
		}
		width--;
	}

	for (i = 1 ; i < height - 1 ; i++)
	{
		if (errorTable[1][i] != 999)
		{
			continue;
		}
		for (j = i + 1 ; j < height ; j++)
		{
			for (k = 0 ; k < width ; k++)
			{
				ctrl[j - 1][k] = ctrl[j][k];
			}
			errorTable[1][j - 1] = errorTable[1][j];
		}
		height--;
	}

	// flip for longest tristrips as an optimization
	// the results should be visually identical with or
	// without this step
	if (height > width)
	{
		Transpose(width, height, ctrl);
		InvertErrorTable(errorTable, width, height);
		t      = width;
		width  = height;
		height = t;
		InvertCtrl(width, height, ctrl);
	}

	// calculate normals
	MakeMeshNormals(width, height, ctrl);

	return R_CreateSurfaceGridMesh(width, height, ctrl, errorTable);
}
Exemple #20
0
inline void
TrmmLLTCOld
( Orientation orientation, 
  UnitOrNonUnit diag,
  T alpha, 
  const DistMatrix<T>& L,
        DistMatrix<T>& X )
{
#ifndef RELEASE
    PushCallStack("internal::TrmmLLTCOld");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( orientation == NORMAL )
        throw std::logic_error("TrmmLLT expects a (Conjugate)Transpose option");
    if( L.Height() != L.Width() || L.Height() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLLTC: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<T> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);
    DistMatrix<T> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<T,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<T,MC,  STAR> L21_MC_STAR(g);
    DistMatrix<T,STAR,VR  > X1_STAR_VR(g);
    DistMatrix<T,MR,  STAR> D1AdjOrTrans_MR_STAR(g);
    DistMatrix<T,MR,  MC  > D1AdjOrTrans_MR_MC(g);
    DistMatrix<T,MC,  MR  > D1(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionDownDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionDown
    ( X, XT,
         XB, 0 );
    while( XB.Height() > 0 )
    {
        LockedRepartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12,
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        RepartitionDown
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 ); 

        L21_MC_STAR.AlignWith( X2 );
        D1AdjOrTrans_MR_STAR.AlignWith( X1 );
        D1AdjOrTrans_MR_MC.AlignWith( X1 );
        D1.AlignWith( X1 );
        Zeros( X1.Width(), X1.Height(), D1AdjOrTrans_MR_STAR );
        Zeros( X1.Height(), X1.Width(), D1 );
        //--------------------------------------------------------------------//
        X1_STAR_VR = X1;
        L11_STAR_STAR = L11;
        LocalTrmm
        ( LEFT, LOWER, orientation, diag, T(1), L11_STAR_STAR, X1_STAR_VR );
        X1 = X1_STAR_VR;
 
        L21_MC_STAR = L21;
        LocalGemm
        ( orientation, NORMAL, 
          T(1), X2, L21_MC_STAR, T(0), D1AdjOrTrans_MR_STAR );
        D1AdjOrTrans_MR_MC.SumScatterFrom( D1AdjOrTrans_MR_STAR );
        if( orientation == TRANSPOSE )
            Transpose( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() );
        else
            Adjoint( D1AdjOrTrans_MR_MC.LocalMatrix(), D1.LocalMatrix() );
        Axpy( T(1), D1, X1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();
        D1AdjOrTrans_MR_MC.FreeAlignments();
        D1AdjOrTrans_MR_STAR.FreeAlignments();
        L21_MC_STAR.FreeAlignments();

        SlideLockedPartitionDownDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        SlidePartitionDown
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}
Exemple #21
0
int main(int argc, char* argv[])
{
	// Various examples.
	int i;
	long tick, base = 0;
	long ticks[SAMPLES];
	char* testname;

	// We are looking for the best value among SAMPLES to
	// eliminate cache delays and effects of cpuid variable timing.

	testname = "base";
	START_MEASUREMENTS;
	END_MEASUREMENTS;
	base = Duration(ticks);     // time required to count processor clocks

	// ================
	// SMLXMatrix Tests
	// ================

	SMLXMatrix m1(3, 3);        // declare initial size
	SMLXMatrix m2(3, 3);
	SMLXMatrix m3;              // size will be set by '='
	SMLXSpatialVector v1(11, 22, 33);
	SMLXSpatialVector v2;

	testname = "3x3 * 3x1";
	m1.Set(2.0);
	m2.Set(3.0);
	m1[1][2] = m1[2][1] = 3;
	
	START_MEASUREMENTS;
	v2 = m1 * v1;
	END_MEASUREMENTS;
	// m1.Output("m1");
	// v1.Output("v1");
	// v2.Output("m1 * v1");

	testname = "3x3 * 3x3";
	START_MEASUREMENTS;
	m3 = m1 * m2;
	END_MEASUREMENTS;
	// m1.Output("m1");
	// m2.Output("m2");
	// m3.Output("m1 * m2");

	testname = "6x6 * 6x6";
	m1.Resize(6, 6);
	m2.Resize(6, 6);
	m1.Set(1);
	m2.Set(2);
	m1[0][5] = 10;

	START_MEASUREMENTS;
	m3 = m1 * m2;
	END_MEASUREMENTS;
	// m1.Output("m1");
	// m2.Output("m2");
	// m3.Output("m1 * m2");

	testname = "6x6 * Transpose(6x6)";
	START_MEASUREMENTS;
	m3 = m1 * Transpose(m2);
	END_MEASUREMENTS;

	testname = "6x6 + 6x6";
	START_MEASUREMENTS;
	m3 = m1 + m3;
	END_MEASUREMENTS;

	testname = "6x6 * 6x6 - Transpose(6x6) * 6x6 - 6x6";
	START_MEASUREMENTS;
	m3 = m1 * m2 - Transpose(m3) * m1 - m2;
	END_MEASUREMENTS;

	// Assuming non-zero diagonal...
	testname = "Invert Without Pivoting(6x6)";
	m1.Identity();
	m1[0][0] = 10;
	m1[3][4] = 2;
	m3 = m1;

	START_MEASUREMENTS;
	m1.Invert();
	END_MEASUREMENTS;
	// m3 = m3 - m1; // discard even number of inversions
	// m3.Output("m3");

	// General case.
	testname = "Invert With Pivoting(6x6)";
	START_MEASUREMENTS;
	m1.GenericInvert();
	END_MEASUREMENTS;

	// =================
	// SMLMatrix3f tests
	// =================

	// TransformPoint and Multiply are inlined for SMLMatrix3f,
	// so timing is not entirely correct 
	// (some subexpressions are optimized out of loop)...

	testname = "TransformPoint 3x3";
	m1.Resize(3, 3);
	SMLMatrix3f m33_1 = (const SMLMatrix3f&) m1;
	SMLMatrix3f m33_2(m33_1);
	SMLMatrix3f m33_3;
	SMLVec3f v3_1(11, 22, 33);
	SMLVec3f v3_2;
	m33_2.Set(1, 2, 3.0);

	START_MEASUREMENTS;
	m33_1.TransformPoint(v3_1, v3_2);
	END_MEASUREMENTS;
	// m33_1.Output("m1");
	// report("m1 * v1 = {%f, %f, %f}", v3_2.x, v3_2.y, v3_2.z);

	testname = "Multiply 3x3";
	START_MEASUREMENTS;
	m33_3.Multiply(m33_1, m33_2);
	END_MEASUREMENTS;

	// =================
	// SMLMatrix4f tests
	// =================

	testname = "Transform 4x4";
	m1.Resize(4, 4);
	SMLMatrix4f m44_1 = (const SMLMatrix4f&) m1;
	SMLMatrix4f m44_2(m44_1);
	SMLMatrix4f m44_3;
	SMLVec4f v4_1(11, 22, 33, 44);
	SMLVec4f v4_2;
	m44_2.Set(1, 2, 3.0);

	START_MEASUREMENTS;
	m44_1.Transform(v4_1, v4_2);
	END_MEASUREMENTS;
	// m44_1.Output("m1");
	// report("m1 * v1 = {%f, %f, %f, %f}", v4_2.x, v4_2.y, v4_2.z, v4_2.w);

	testname = "TransformPoint 4x4";
	START_MEASUREMENTS;
	m44_1.TransformPoint(v3_1, v3_2);
	END_MEASUREMENTS;

	testname = "TransformVector 4x4";
	START_MEASUREMENTS;
	m44_1.TransformPoint(v3_1, v3_2);
	END_MEASUREMENTS;

	testname = "Multiply 4x4";
	START_MEASUREMENTS;
	m44_3.Multiply(m44_1, m44_2);
	END_MEASUREMENTS;

	return 0;
}
Exemple #22
0
inline void
Symv
( UpperOrLower uplo,
  T alpha, const DistMatrix<T>& A,
           const DistMatrix<T>& x,
  T beta,        DistMatrix<T>& y,
  bool conjugate=false )
{
#ifndef RELEASE
    CallStackEntry entry("Symv");
    if( A.Grid() != x.Grid() || x.Grid() != y.Grid() )
        throw std::logic_error
        ("{A,x,y} must be distributed over the same grid");
    if( A.Height() != A.Width() )
        throw std::logic_error("A must be square");
    if( ( x.Width() != 1 && x.Height() != 1 ) ||
        ( y.Width() != 1 && y.Height() != 1 ) )
        throw std::logic_error("x and y are assumed to be vectors");
    const int xLength = ( x.Width()==1 ? x.Height() : x.Width() );
    const int yLength = ( y.Width()==1 ? y.Height() : y.Width() );
    if( A.Height() != xLength || A.Height() != yLength )
    {
        std::ostringstream msg;
        msg << "Nonconformal Symv: \n"
            << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
            << "  x ~ " << x.Height() << " x " << x.Width() << "\n"
            << "  y ~ " << y.Height() << " x " << y.Width() << "\n";
        throw std::logic_error( msg.str() );
    }
#endif
    const Grid& g = A.Grid();

    if( x.Width() == 1 && y.Width() == 1 )
    {
        // Temporary distributions
        DistMatrix<T,MC,STAR> x_MC_STAR(g), z_MC_STAR(g);
        DistMatrix<T,MR,STAR> x_MR_STAR(g), z_MR_STAR(g);
        DistMatrix<T,MR,MC  > z_MR_MC(g);
        DistMatrix<T> z(g);

        // Begin the algoritm
        Scale( beta, y );
        x_MC_STAR.AlignWith( A );
        x_MR_STAR.AlignWith( A );
        z_MC_STAR.AlignWith( A );
        z_MR_STAR.AlignWith( A );
        z.AlignWith( y );
        Zeros( z_MC_STAR, y.Height(), 1 );
        Zeros( z_MR_STAR, y.Height(), 1 );
        //--------------------------------------------------------------------//
        x_MC_STAR = x;
        x_MR_STAR = x_MC_STAR;
        if( uplo == LOWER )
        {
            internal::LocalSymvColAccumulateL
            ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate );
        }
        else
        {
            internal::LocalSymvColAccumulateU
            ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate );
        }

        z_MR_MC.SumScatterFrom( z_MR_STAR );
        z = z_MR_MC;
        z.SumScatterUpdate( T(1), z_MC_STAR );
        Axpy( T(1), z, y );
        //--------------------------------------------------------------------//
        x_MC_STAR.FreeAlignments();
        x_MR_STAR.FreeAlignments();
        z_MC_STAR.FreeAlignments();
        z_MR_STAR.FreeAlignments();
        z.FreeAlignments();
    }
    else if( x.Width() == 1 )
    {
        // Temporary distributions
        DistMatrix<T,MC,STAR> x_MC_STAR(g), z_MC_STAR(g);
        DistMatrix<T,MR,STAR> x_MR_STAR(g), z_MR_STAR(g);
        DistMatrix<T,MR,MC  > z_MR_MC(g);
        DistMatrix<T> z(g), zTrans(g);

        // Begin the algoritm
        Scale( beta, y );
        x_MC_STAR.AlignWith( A );
        x_MR_STAR.AlignWith( A );
        z_MC_STAR.AlignWith( A );
        z_MR_STAR.AlignWith( A );
        z.AlignWith( y );
        z_MR_MC.AlignWith( y );
        Zeros( z_MC_STAR, y.Width(), 1 );
        Zeros( z_MR_STAR, y.Width(), 1 );
        //--------------------------------------------------------------------//
        x_MC_STAR = x;
        x_MR_STAR = x_MC_STAR;
        if( uplo == LOWER )
        {
            internal::LocalSymvColAccumulateL
            ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate );
        }
        else
        {
            internal::LocalSymvColAccumulateU
            ( alpha, A, x_MC_STAR, x_MR_STAR, z_MC_STAR, z_MR_STAR, conjugate );
        }

        z.SumScatterFrom( z_MC_STAR );
        z_MR_MC = z;
        z_MR_MC.SumScatterUpdate( T(1), z_MR_STAR );
        Transpose( z_MR_MC, zTrans );
        Axpy( T(1), zTrans, y );
        //--------------------------------------------------------------------//
        x_MC_STAR.FreeAlignments();
        x_MR_STAR.FreeAlignments();
        z_MC_STAR.FreeAlignments();
        z_MR_STAR.FreeAlignments();
        z.FreeAlignments();
        z_MR_MC.FreeAlignments();
    }
    else if( y.Width() == 1 )
    {
        // Temporary distributions
        DistMatrix<T,STAR,MC> x_STAR_MC(g), z_STAR_MC(g);
        DistMatrix<T,STAR,MR> x_STAR_MR(g), z_STAR_MR(g);
        DistMatrix<T,MR,  MC> z_MR_MC(g);
        DistMatrix<T> z(g), zTrans(g);

        // Begin the algoritm
        Scale( beta, y );
        x_STAR_MC.AlignWith( A );
        x_STAR_MR.AlignWith( A );
        z_STAR_MC.AlignWith( A );
        z_STAR_MR.AlignWith( A );
        z.AlignWith( y );
        z_MR_MC.AlignWith( y );
        Zeros( z_STAR_MC, 1, y.Height() );
        Zeros( z_STAR_MR, 1, y.Height() );
        //--------------------------------------------------------------------//
        x_STAR_MR = x;
        x_STAR_MC = x_STAR_MR;
        if( uplo == LOWER )
        {
            internal::LocalSymvRowAccumulateL
            ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate );
        }
        else
        {
            internal::LocalSymvRowAccumulateU
            ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate );
        }

        z.SumScatterFrom( z_STAR_MR );
        z_MR_MC = z;
        z_MR_MC.SumScatterUpdate( T(1), z_STAR_MC );
        Transpose( z_MR_MC, zTrans );
        Axpy( T(1), zTrans, y );
        //--------------------------------------------------------------------//
        x_STAR_MC.FreeAlignments();
        x_STAR_MR.FreeAlignments();
        z_STAR_MC.FreeAlignments();
        z_STAR_MR.FreeAlignments();
        z.FreeAlignments();
        z_MR_MC.FreeAlignments();
    }
    else
    {
        // Temporary distributions
        DistMatrix<T,STAR,MC> x_STAR_MC(g), z_STAR_MC(g);
        DistMatrix<T,STAR,MR> x_STAR_MR(g), z_STAR_MR(g);
        DistMatrix<T,MR,  MC> z_MR_MC(g);
        DistMatrix<T> z(g);

        // Begin the algoritm
        Scale( beta, y );
        x_STAR_MC.AlignWith( A );
        x_STAR_MR.AlignWith( A );
        z_STAR_MC.AlignWith( A );
        z_STAR_MR.AlignWith( A );
        z.AlignWith( y );
        z_MR_MC.AlignWith( y );
        Zeros( z_STAR_MC, 1, y.Width() );
        Zeros( z_STAR_MR, 1, y.Width() );
        //--------------------------------------------------------------------//
        x_STAR_MR = x;
        x_STAR_MC = x_STAR_MR;
        if( uplo == LOWER )
        {
            internal::LocalSymvRowAccumulateL
            ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate );
        }
        else
        {
            internal::LocalSymvRowAccumulateU
            ( alpha, A, x_STAR_MC, x_STAR_MR, z_STAR_MC, z_STAR_MR, conjugate );
        }

        z_MR_MC.SumScatterFrom( z_STAR_MC );
        z = z_MR_MC;
        z.SumScatterUpdate( T(1), z_STAR_MR );
        Axpy( T(1), z, y );
        //--------------------------------------------------------------------//
        x_STAR_MC.FreeAlignments();
        x_STAR_MR.FreeAlignments();
        z_STAR_MC.FreeAlignments();
        z_STAR_MR.FreeAlignments();
        z.FreeAlignments();
        z_MR_MC.FreeAlignments();
    }
}
Exemple #23
0
inline void
TrmmLLNCOld
( UnitOrNonUnit diag,
  T alpha, const DistMatrix<T>& L,
                 DistMatrix<T>& X )
{
#ifndef RELEASE
    CallStackEntry entry("internal::TrmmLLNCOld");
    if( L.Grid() != X.Grid() )
        throw std::logic_error
        ("L and X must be distributed over the same grid");
    if( L.Height() != L.Width() || L.Width() != X.Height() )
    {
        std::ostringstream msg;
        msg << "Nonconformal TrmmLLNC: \n"
            << "  L ~ " << L.Height() << " x " << L.Width() << "\n"
            << "  X ~ " << X.Height() << " x " << X.Width() << "\n";
        throw std::logic_error( msg.str().c_str() );
    }
#endif
    const Grid& g = L.Grid();

    // Matrix views
    DistMatrix<T> 
        LTL(g), LTR(g),  L00(g), L01(g), L02(g),
        LBL(g), LBR(g),  L10(g), L11(g), L12(g),
                         L20(g), L21(g), L22(g);
    DistMatrix<T> XT(g),  X0(g),
                  XB(g),  X1(g),
                          X2(g);

    // Temporary distributions
    DistMatrix<T,STAR,MC  > L10_STAR_MC(g);
    DistMatrix<T,STAR,STAR> L11_STAR_STAR(g);
    DistMatrix<T,STAR,VR  > X1_STAR_VR(g);
    DistMatrix<T,MR,  STAR> D1Trans_MR_STAR(g);
    DistMatrix<T,MR,  MC  > D1Trans_MR_MC(g);
    DistMatrix<T,MC,  MR  > D1(g);

    // Start the algorithm
    Scale( alpha, X );
    LockedPartitionUpDiagonal
    ( L, LTL, LTR,
         LBL, LBR, 0 );
    PartitionUp
    ( X, XT,
         XB, 0 );
    while( XT.Height() > 0 )
    {
        LockedRepartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, L01, /**/ L02,
               /**/       L10, L11, /**/ L12,
         /*************/ /******************/
          LBL, /**/ LBR,  L20, L21, /**/ L22 );

        RepartitionUp
        ( XT,  X0,
               X1,
         /**/ /**/
          XB,  X2 );

        L10_STAR_MC.AlignWith( X0 );
        D1Trans_MR_STAR.AlignWith( X1 );
        D1Trans_MR_MC.AlignWith( X1 );
        D1.AlignWith( X1 );
        //--------------------------------------------------------------------//
        L11_STAR_STAR = L11;
        X1_STAR_VR = X1;
        LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR );
        X1 = X1_STAR_VR;

        L10_STAR_MC = L10;
        LocalGemm
        ( TRANSPOSE, TRANSPOSE, T(1), X0, L10_STAR_MC, D1Trans_MR_STAR );
        D1Trans_MR_MC.SumScatterFrom( D1Trans_MR_STAR );
        Zeros( D1, X1.Height(), X1.Width() );
        Transpose( D1Trans_MR_MC.Matrix(), D1.Matrix() );
        Axpy( T(1), D1, X1 );
        //--------------------------------------------------------------------//
        D1.FreeAlignments();
        D1Trans_MR_MC.FreeAlignments();
        D1Trans_MR_STAR.FreeAlignments();
        L10_STAR_MC.FreeAlignments();

        SlideLockedPartitionUpDiagonal
        ( LTL, /**/ LTR,  L00, /**/ L01, L02,
         /*************/ /******************/
               /**/       L10, /**/ L11, L12, 
          LBL, /**/ LBR,  L20, /**/ L21, L22 );

        SlidePartitionUp
        ( XT,  X0,
         /**/ /**/
               X1,
          XB,  X2 );
    }
}
Exemple #24
0
/*
==================
==================
*/
void Vertex_Lighting(

	const __int32 n_triangles,
	const vertex_light_manager_& vertex_light_manager,
	const float4_ positions[4][3],
	float4_ colour[4][3]

) {

	static const float r_screen_scale_x = 1.0f / screen_scale_x;
	static const float r_screen_scale_y = 1.0f / screen_scale_y;
	const __m128 attenuation_factor = set_all(800.0f);
	const __m128 specular_scale = set_all(100.0f);
	const __m128 diffuse_scale = set_all(20.0f);

	const __m128 zero = set_all(0.0f);
	const __m128 one = set_all(1.0f);

	__m128 r_screen_scale[2];
	r_screen_scale[X] = set_all(r_screen_scale_x);
	r_screen_scale[Y] = set_all(r_screen_scale_y);
	__m128 screen_shift[2];
	screen_shift[X] = set_all(screen_shift_x);
	screen_shift[Y] = set_all(screen_shift_y);

	__m128 clip_space_position[3][4];
	__m128 vertex_colour[3][4];

	for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {

		__m128 vertex_position[4];
		for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {
			vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f);
			vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f);
		}
		Transpose(vertex_position);
		Transpose(vertex_colour[i_vertex]);

		__m128 depth = reciprocal(vertex_position[Z]);
		clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth;
		clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth;
		clip_space_position[i_vertex][Z] = depth;
	}

	__m128 a[3];
	a[X] = clip_space_position[1][X] - clip_space_position[0][X];
	a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y];
	a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z];

	__m128 b[3];
	b[X] = clip_space_position[2][X] - clip_space_position[0][X];
	b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y];
	b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z];


	__m128 normal[4];
	normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]);
	normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]);
	normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]);

	__m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]);
	mag = _mm_rsqrt_ps(mag);
	normal[X] *= mag;
	normal[Y] *= mag;
	normal[Z] *= mag;

	for (__int32 i_light = 0; i_light < 1; i_light++) {


		for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {


			__m128 light_position[3];
			__m128 light_colour[3];
			const float intensity = vertex_light_manager.light_sources[i_light].intensity;
			for (__int32 i_axis = X; i_axis < W; i_axis++) {

				light_position[i_axis] = set_all(vertex_light_manager.light_sources[i_light].position.f[i_axis]);
				light_colour[i_axis] = set_all(vertex_light_manager.light_sources[i_light].colour.f[i_axis] * intensity);
			}

			const __m128 extent = set_all(40.0f);
			__m128i is_valid = set_all(-1);
			is_valid &= (clip_space_position[i_vertex][X] - light_position[X]) < extent;
			is_valid &= (clip_space_position[i_vertex][Y] - light_position[Y]) < extent;
			is_valid &= (clip_space_position[i_vertex][Z] - light_position[Z]) < extent;

			light_position[X] = set_all(0.0f);
			light_position[Y] = set_all(0.0f);
			light_position[Z] = set_all(0.0f);

			light_colour[X] = set_all(100.0f);
			light_colour[Y] = set_all(100.0f);
			light_colour[Z] = set_all(100.0f);

			__m128 light_ray[3];
			light_ray[X] = clip_space_position[i_vertex][X] - light_position[X];
			light_ray[Y] = clip_space_position[i_vertex][Y] - light_position[Y];
			light_ray[Z] = clip_space_position[i_vertex][Z] - light_position[Z];

			__m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]);
			mag = _mm_rsqrt_ps(mag);
			light_ray[X] *= mag;
			light_ray[Y] *= mag;
			light_ray[Z] *= mag;

			__m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]);
			dot &= dot > zero;
			dot = (dot * dot) * mag;

			__m128 distance = set_zero();
			for (__int32 i_axis = X; i_axis < W; i_axis++) {
				__m128 d = light_position[i_axis] - clip_space_position[i_vertex][i_axis];
				distance += (d * d);
			}
			__m128 scalar = reciprocal(distance) * attenuation_factor;
			scalar = max_vec(scalar, zero);
			scalar = min_vec(scalar, one);

			for (__int32 i_channel = R; i_channel < A; i_channel++) {
				vertex_colour[i_vertex][i_channel] += dot * specular_scale * light_colour[i_channel];
				vertex_colour[i_vertex][i_channel] += mag * diffuse_scale * light_colour[i_channel];
			}
		}
	}
	for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {
		Transpose(vertex_colour[i_vertex]);
		for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {
			store_u(vertex_colour[i_vertex][i_triangle], colour[i_triangle][i_vertex].f);
		}
	}


}
Exemple #25
0
/*
==================
==================
*/
void Vertex_Lighting_REM(

	const __int32 n_triangles,
	const vertex_light_manager_& vertex_light_manager,
	const float4_ positions[4][3],
	float4_ colour[4][3]

) {

	//const __int32 VERTEX_COLOUR = FIRST_ATTRIBUTE + 0;

	static const float r_screen_scale_x = 1.0f / screen_scale_x;
	static const float r_screen_scale_y = 1.0f / screen_scale_y;
	//const __m128 attenuation_factor = set_all(200.0f);
	//const __m128 attenuation_factor = set_all(800.0f);
	//const __m128 specular_scale = set_all(100.0f);
	//const __m128 diffuse_scale = set_all(20.0f);

	__m128 r_screen_scale[2];
	r_screen_scale[X] = set_all(r_screen_scale_x);
	r_screen_scale[Y] = set_all(r_screen_scale_y);
	__m128 screen_shift[2];
	screen_shift[X] = set_all(screen_shift_x);
	screen_shift[Y] = set_all(screen_shift_y);

	__m128 clip_space_position[3][4];
	//__m128 vertex_colour[3][4];

	float4_ new_position[4][3];
	for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {

		__m128 vertex_position[4];
		for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {
			vertex_position[i_triangle] = load_u(positions[i_triangle][i_vertex].f);
			//vertex_colour[i_vertex][i_triangle] = load_u(colour[i_triangle][i_vertex].f);
		}
		Transpose(vertex_position);
		//Transpose(vertex_colour[i_vertex]);

		__m128 depth = reciprocal(vertex_position[Z]);
		clip_space_position[i_vertex][X] = ((vertex_position[X] - screen_shift[X]) * r_screen_scale[X]) * depth;
		clip_space_position[i_vertex][Y] = ((vertex_position[Y] - screen_shift[Y]) * r_screen_scale[Y]) * depth;
		clip_space_position[i_vertex][Z] = depth;


	}

	__m128 a[3];
	a[X] = clip_space_position[1][X] - clip_space_position[0][X];
	a[Y] = clip_space_position[1][Y] - clip_space_position[0][Y];
	a[Z] = clip_space_position[1][Z] - clip_space_position[0][Z];

	__m128 b[3];
	b[X] = clip_space_position[2][X] - clip_space_position[0][X];
	b[Y] = clip_space_position[2][Y] - clip_space_position[0][Y];
	b[Z] = clip_space_position[2][Z] - clip_space_position[0][Z];


	__m128 normal[4];
	normal[X] = (a[Y] * b[Z]) - (a[Z] * b[Y]);
	normal[Y] = (a[Z] * b[X]) - (a[X] * b[Z]);
	normal[Z] = (a[X] * b[Y]) - (a[Y] * b[X]);

	__m128 mag = (normal[X] * normal[X]) + (normal[Y] * normal[Y]) + (normal[Z] * normal[Z]);
	mag = _mm_rsqrt_ps(mag);
	normal[X] *= mag;
	normal[Y] *= mag;
	normal[Z] *= mag;

	float normal_4[3][4];
	store_u(normal[X], normal_4[X]);
	store_u(normal[Y], normal_4[Y]);
	store_u(normal[Z], normal_4[Z]);

	float centre_4[3][4];
	float extent_4[3][4];
	const __m128 half = set_all(0.5f);
	for (__int32 i_axis = X; i_axis < W; i_axis++) {

		__m128 max;
		__m128 min;
		max = min = clip_space_position[0][i_axis];
		max = max_vec(max_vec(max, clip_space_position[1][i_axis]), clip_space_position[2][i_axis]);
		min = min_vec(min_vec(min, clip_space_position[1][i_axis]), clip_space_position[2][i_axis]);
		store_u((max + min) * half, centre_4[i_axis]);
		store_u((max - min) * half, extent_4[i_axis]);
	}

	for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {

		Transpose(clip_space_position[i_vertex]);
		for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {
			store_u(clip_space_position[i_vertex][i_triangle], new_position[i_triangle][i_vertex].f);
		}
	}

	const __m128 zero = set_all(0.0f);
	const __m128 one = set_all(1.0f);

	enum {
		MAX_LIGHTS_PER_VERTEX = 128,
	};

	for (__int32 i_triangle = 0; i_triangle < n_triangles; i_triangle++) {

		__m128 centre[3];
		__m128 extent[3];
		for (__int32 i_axis = X; i_axis < W; i_axis++) {
			centre[i_axis] = set_all(centre_4[i_axis][i_triangle]);
			extent[i_axis] = set_all(extent_4[i_axis][i_triangle]);
		}

		float z_min = centre_4[Z][i_triangle] - extent_4[Z][i_triangle];
		float z_max = centre_4[Z][i_triangle] + extent_4[Z][i_triangle];
		__int32 bin_min = __int32(z_min / vertex_light_manager.bin_interval);
		__int32 bin_max = __int32(z_max / vertex_light_manager.bin_interval);
		bin_min = min(bin_min, vertex_light_manager_::NUM_BINS - 1);
		bin_max = min(bin_max, vertex_light_manager_::NUM_BINS - 1);
		bin_min = max(bin_min, 0);
		bin_max = max(bin_max, 0);

		//bin_max = bin_max >= 10 ? 0 : bin_max;
		//printf_s(" %i , %i \n", bin_min, bin_max);

		__int32 i_lights[MAX_LIGHTS_PER_VERTEX];
		__int32 n_lights = 0;
		{
			for (__int32 i_bin = bin_min; i_bin <= bin_max; i_bin++) {

				const vertex_light_manager_::bin_& bin = vertex_light_manager.bin[i_bin];

				for (__int32 i_light_4 = 0; i_light_4 < bin.n_lights; i_light_4 += 4) {

					const __int32 n = min(bin.n_lights - i_light_4, 4);

					__m128 light_position[4];
					for (__int32 i_light = 0; i_light < n; i_light++) {
						__int32 index = vertex_light_manager.i_light[bin.i_start + i_light_4 + i_light];
						light_position[i_light] = load_u(vertex_light_manager.light_sources[index].position.f);
					}
					Transpose(light_position);

					const __m128 light_extent = set_all(100.0f);
					__m128i is_valid = set_all(-1);
					is_valid &= abs(centre[X] - light_position[X]) < (extent[X] + light_extent);
					is_valid &= abs(centre[Y] - light_position[Y]) < (extent[Y] + light_extent);
					is_valid &= abs(centre[Z] - light_position[Z]) < (extent[Z] + light_extent);

					unsigned __int32 result_mask = store_mask(is_valid);

					for (__int32 i_light = 0; i_light < n; i_light++) {

						__int32 index = vertex_light_manager.i_light[bin.i_start + i_light_4 + i_light];
						i_lights[n_lights] = index;
						n_lights += (result_mask >> i_light) & 0x1;
					}

					if (n_lights > MAX_LIGHTS_PER_VERTEX) {

						n_lights = MAX_LIGHTS_PER_VERTEX;
						break;
					}
				}
			}
		}

		for (__int32 i_vertex = 0; i_vertex < 3; i_vertex++) {

			__m128 vertex_position[3];
			vertex_position[X] = set_all(new_position[i_triangle][i_vertex].x);
			vertex_position[Y] = set_all(new_position[i_triangle][i_vertex].y);
			vertex_position[Z] = set_all(new_position[i_triangle][i_vertex].z);

			__m128 vertex_colour[4];
			vertex_colour[R] = set_all(0.0f);
			vertex_colour[G] = set_all(0.0f);
			vertex_colour[B] = set_all(0.0f);

			__m128 normal[3];
			normal[X] = set_all(normal_4[X][i_triangle]);
			normal[Y] = set_all(normal_4[Y][i_triangle]);
			normal[Z] = set_all(normal_4[Z][i_triangle]);

			for (__int32 i_light_4 = 0; i_light_4 < n_lights; i_light_4 += 4) {

				const __int32 n = min(n_lights - i_light_4, 4);

				__m128 light_position[4];
				__m128 light_colour[4];
				unsigned __int32 mask = 0x0;
				float intensity_4[4];
				for (__int32 i_light = 0; i_light < n; i_light++) {

					mask |= 0x1 << i_light;
					const __int32 index = i_lights[i_light_4 + i_light];
					intensity_4[i_light] = vertex_light_manager.light_sources[index].intensity;
					light_position[i_light] = load_u(vertex_light_manager.light_sources[index].position.f);
					light_colour[i_light] = load_u(vertex_light_manager.light_sources[index].colour.f);
				}
				Transpose(light_position);
				Transpose(light_colour);
				__m128 light_intensity = load_u(intensity_4);

				__m128 light_ray[3];
				light_ray[X] = vertex_position[X] - light_position[X];
				light_ray[Y] = vertex_position[Y] - light_position[Y];
				light_ray[Z] = vertex_position[Z] - light_position[Z];

				__m128 mag = (light_ray[X] * light_ray[X]) + (light_ray[Y] * light_ray[Y]) + (light_ray[Z] * light_ray[Z]);
				__m128 r_mag = _mm_rsqrt_ps(mag);
				light_ray[X] *= r_mag;
				light_ray[Y] *= r_mag;
				light_ray[Z] *= r_mag;

				__m128 dot = (normal[X] * light_ray[X]) + (normal[Y] * light_ray[Y]) + (normal[Z] * light_ray[Z]);
				dot &= dot > zero;

				__m128 r_distance = reciprocal(one + mag);
				__m128 spec = (dot * dot) * r_distance;

				static const __m128 specular_coefficient = set_all(2000.0f);
				static const __m128 diffuse_coefficient = set_all(200.0f);

				//printf_s(" %f ", dot);
				__m128i loop_mask = load_mask[mask];

				for (__int32 i_channel = R; i_channel < A; i_channel++) {
					__m128 final = spec * specular_coefficient * light_colour[i_channel] * light_intensity;
					final += r_distance * diffuse_coefficient * light_colour[i_channel] * light_intensity;
					vertex_colour[i_channel] += final & loop_mask;
				}
			}

			Transpose(vertex_colour);
			vertex_colour[0] += vertex_colour[1] + vertex_colour[2] + vertex_colour[3];
			float4_ temp;
			store_u(vertex_colour[0], temp.f);
			colour[i_triangle][i_vertex].x += temp.x;
			colour[i_triangle][i_vertex].y += temp.y;
			colour[i_triangle][i_vertex].z += temp.z;
		}
	}
Exemple #26
0
/*
 * R_SubdividePatchToGrid
 */
srfGridMesh_t *
R_SubdividePatchToGrid(int width, int height,
		       Drawvert points[MAX_PATCH_SIZE*MAX_PATCH_SIZE])
{
	int i, j, k, l;
	drawVert_t_cleared(prev);
	drawVert_t_cleared(next);
	drawVert_t_cleared(mid);
	float len, maxLen;
	int dir;
	int t;
	Drawvert	ctrl[MAX_GRID_SIZE][MAX_GRID_SIZE];
	float		errorTable[2][MAX_GRID_SIZE];

	for(i = 0; i < width; i++)
		for(j = 0; j < height; j++)
			ctrl[j][i] = points[j*width+i];

	for(dir = 0; dir < 2; dir++){

		for(j = 0; j < MAX_GRID_SIZE; j++)
			errorTable[dir][j] = 0;

		/* horizontal subdivisions */
		for(j = 0; j + 2 < width; j += 2){
			/* check subdivided midpoints against control points */

			/* FIXME: also check midpoints of adjacent patches against the control points
			 * this would basically stitch all patches in the same LOD group together. */

			maxLen = 0;
			for(i = 0; i < height; i++){
				Vec3	midxyz;
				Vec3	midxyz2;
				Vec3	dir;
				Vec3	projected;
				float	d;

				/* calculate the point on the curve */
				for(l = 0; l < 3; l++)
					midxyz[l] = (ctrl[i][j].xyz[l] + ctrl[i][j+1].xyz[l] * 2
						     + ctrl[i][j+2].xyz[l]) * 0.25f;

				/* see how far off the line it is
				 * using dist-from-line will not account for internal
				 * texture warping, but it gives a lot less polygons than
				 * dist-from-midpoint */
				subv3(midxyz, ctrl[i][j].xyz, midxyz);
				subv3(ctrl[i][j+2].xyz, ctrl[i][j].xyz, dir);
				normv3(dir);

				d = dotv3(midxyz, dir);
				scalev3(dir, d, projected);
				subv3(midxyz, projected, midxyz2);
				len = lensqrv3(midxyz2);	/* we will do the sqrt later */
				if(len > maxLen){
					maxLen = len;
				}
			}

			maxLen = sqrt(maxLen);

			/* if all the points are on the lines, remove the entire columns */
			if(maxLen < 0.1f){
				errorTable[dir][j+1] = 999;
				continue;
			}

			/* see if we want to insert subdivided columns */
			if(width + 2 > MAX_GRID_SIZE){
				errorTable[dir][j+1] = 1.0f/maxLen;
				continue;	/* can't subdivide any more */
			}

			if(maxLen <= r_subdivisions->value){
				errorTable[dir][j+1] = 1.0f/maxLen;
				continue;	/* didn't need subdivision */
			}

			errorTable[dir][j+2] = 1.0f/maxLen;

			/* insert two columns and replace the peak */
			width += 2;
			for(i = 0; i < height; i++){
				LerpDrawVert(&ctrl[i][j], &ctrl[i][j+1], &prev);
				LerpDrawVert(&ctrl[i][j+1], &ctrl[i][j+2], &next);
				LerpDrawVert(&prev, &next, &mid);

				for(k = width - 1; k > j + 3; k--)
					ctrl[i][k] = ctrl[i][k-2];
				ctrl[i][j + 1]	= prev;
				ctrl[i][j + 2]	= mid;
				ctrl[i][j + 3]	= next;
			}

			/* back up and recheck this set again, it may need more subdivision */
			j -= 2;

		}

		Transpose(width, height, ctrl);
		t = width;
		width	= height;
		height	= t;
	}


	/* put all the aproximating points on the curve */
	PutPointsOnCurve(ctrl, width, height);

	/* cull out any rows or columns that are colinear */
	for(i = 1; i < width-1; i++){
		if(errorTable[0][i] != 999){
			continue;
		}
		for(j = i+1; j < width; j++){
			for(k = 0; k < height; k++)
				ctrl[k][j-1] = ctrl[k][j];
			errorTable[0][j-1] = errorTable[0][j];
		}
		width--;
	}

	for(i = 1; i < height-1; i++){
		if(errorTable[1][i] != 999){
			continue;
		}
		for(j = i+1; j < height; j++){
			for(k = 0; k < width; k++)
				ctrl[j-1][k] = ctrl[j][k];
			errorTable[1][j-1] = errorTable[1][j];
		}
		height--;
	}

#if 1
	/* flip for longest tristrips as an optimization
	 * the results should be visually identical with or
	 * without this step */
	if(height > width){
		Transpose(width, height, ctrl);
		InvertErrorTable(errorTable, width, height);
		t = width;
		width	= height;
		height	= t;
		InvertCtrl(width, height, ctrl);
	}
#endif

	/* calculate normals */
	MakeMeshNormals(width, height, ctrl);

	return R_CreateSurfaceGridMesh(width, height, ctrl, errorTable);
}
Exemple #27
0
void FoxLi( ElementalMatrix<Complex<Real>>& APre, Int n, Real omega )
{
    DEBUG_CSE
    typedef Complex<Real> C;
    const Real pi = 4*Atan( Real(1) );
    const C phi = Sqrt( C(0,omega/pi) ); 

    DistMatrixWriteProxy<C,C,MC,MR> AProx( APre );
    auto& A = AProx.Get();
    
    // Compute Gauss quadrature points and weights
    const Grid& g = A.Grid();
    DistMatrix<Real,VR,STAR> d(g), e(g); 
    Zeros( d, n, 1 );
    e.Resize( n-1, 1 );
    auto& eLoc = e.Matrix();
    for( Int iLoc=0; iLoc<e.LocalHeight(); ++iLoc )
    {
        const Int i = e.GlobalRow(iLoc);
        const Real betaInv = 2*Sqrt(1-Pow(i+Real(1),-2)/4);
        eLoc(iLoc) = 1/betaInv;
    }
    DistMatrix<Real,VR,STAR> x(g);
    DistMatrix<Real,STAR,VR> Z(g);
    HermitianTridiagEig( d, e, x, Z, UNSORTED );
    auto z = Z( IR(0), ALL );
    DistMatrix<Real,STAR,VR> sqrtWeights( z );
    auto& sqrtWeightsLoc = sqrtWeights.Matrix();
    for( Int jLoc=0; jLoc<sqrtWeights.LocalWidth(); ++jLoc )
        sqrtWeightsLoc(0,jLoc) = Sqrt(Real(2))*Abs(sqrtWeightsLoc(0,jLoc));
    herm_eig::Sort( x, sqrtWeights, ASCENDING );

    // Form the integral operator
    A.Resize( n, n );
    DistMatrix<Real,MC,STAR> x_MC( A.Grid() );
    DistMatrix<Real,MR,STAR> x_MR( A.Grid() );
    x_MC.AlignWith( A ); 
    x_MR.AlignWith( A );
    x_MC = x;
    x_MR = x;
    auto& ALoc = A.Matrix();
    auto& x_MCLoc = x_MC.Matrix();
    auto& x_MRLoc = x_MR.Matrix();
    for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc )
    {
        for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc )
        {
            const Real diff = x_MCLoc(iLoc)-x_MRLoc(jLoc);
            const Real theta = -omega*Pow(diff,2);
            const Real realPart = Cos(theta);
            const Real imagPart = Sin(theta);
            ALoc(iLoc,jLoc) = phi*C(realPart,imagPart);
        }
    }

    // Apply the weighting
    DistMatrix<Real,VR,STAR> sqrtWeightsTrans(g);
    Transpose( sqrtWeights, sqrtWeightsTrans );
    DiagonalScale( LEFT, NORMAL, sqrtWeightsTrans, A );
    DiagonalScale( RIGHT, NORMAL, sqrtWeightsTrans, A );
}
Exemple #28
0
void DrawBl()
{
	Shader* s = &g_shader[g_curS];

	//return;
	for(int i=0; i<BUILDINGS; i++)
	{
		Building* b = &g_building[i];

		if(!b->on)
			continue;

		const BuildingT* t = &g_bltype[b->type];
		//const BuildingT* t = &g_bltype[BUILDING_APARTMENT];
		Model* m = &g_model[ t->model ];

		Vec3f vmin(b->drawpos.x - t->widthx*TILE_SIZE/2, b->drawpos.y, b->drawpos.z - t->widthz*TILE_SIZE/2);
		Vec3f vmax(b->drawpos.x + t->widthx*TILE_SIZE/2, b->drawpos.y + (t->widthx+t->widthz)*TILE_SIZE/2, b->drawpos.z + t->widthz*TILE_SIZE/2);

		if(!g_frustum.boxin2(vmin.x, vmin.y, vmin.z, vmax.x, vmax.y, vmax.z))
			continue;

		if(!b->finished)
			m = &g_model[ t->cmodel ];

		/*
		m->draw(0, b->drawpos, 0);
		*/

		float pitch = 0;
		float yaw = 0;
		Matrix modelmat;
		float radians[] = {(float)DEGTORAD(pitch), (float)DEGTORAD(yaw), 0};
		modelmat.translation((const float*)&b->drawpos);
		Matrix rotation;
		rotation.rotrad(radians);
		modelmat.postmult(rotation);
		glUniformMatrix4fv(s->m_slot[SSLOT_MODELMAT], 1, 0, modelmat.m_matrix);

		Matrix modelview;
#ifdef SPECBUMPSHADOW
   	 modelview.set(g_camview.m_matrix);
#endif
    	modelview.postmult(modelmat);
		glUniformMatrix4fv(s->m_slot[SSLOT_MODELVIEW], 1, 0, modelview.m_matrix);

		Matrix mvp;
#if 0
		mvp.set(modelview.m_matrix);
		mvp.postmult(g_camproj);
#elif 0
		mvp.set(g_camproj.m_matrix);
		mvp.postmult(modelview);
#else
		mvp.set(g_camproj.m_matrix);
		mvp.postmult(g_camview);
		mvp.postmult(modelmat);
#endif
		glUniformMatrix4fv(s->m_slot[SSLOT_MVP], 1, 0, mvp.m_matrix);

		Matrix modelviewinv;
		Transpose(modelview, modelview);
		Inverse2(modelview, modelviewinv);
		//Transpose(modelviewinv, modelviewinv);
		glUniformMatrix4fv(s->m_slot[SSLOT_NORMALMAT], 1, 0, modelviewinv.m_matrix);

		VertexArray* va = &b->drawva;

		m->usetex();

		glVertexAttribPointer(s->m_slot[SSLOT_POSITION], 3, GL_FLOAT, GL_FALSE, 0, va->vertices);
		glVertexAttribPointer(s->m_slot[SSLOT_TEXCOORD0], 2, GL_FLOAT, GL_FALSE, 0, va->texcoords);

		if(s->m_slot[SSLOT_NORMAL] != -1)
			glVertexAttribPointer(s->m_slot[SSLOT_NORMAL], 3, GL_FLOAT, GL_FALSE, 0, va->normals);

		glDrawArrays(GL_TRIANGLES, 0, va->numverts);
	}
}
Exemple #29
0
Fichier : SSE.hpp Projet : Eynx/R3D
    // Matrix inverse ---------------------------------------------------------------------
    static Float4x4 VFunction Inverse(const Float4x4& matrix)
    {
        Float4x4 mTransposed = Transpose(matrix);
        Vector v00 = Permute<0, 0, 1, 1>(mTransposed.z);
        Vector v10 = Permute<2, 3, 2, 3>(mTransposed.w);
        Vector v01 = Permute<0, 0, 1, 1>(mTransposed.x);
        Vector v11 = Permute<2, 3, 2, 3>(mTransposed.y);
        Vector v02 = Shuffle<0, 2, 0, 2>(mTransposed.z, mTransposed.x);
        Vector v12 = Shuffle<1, 3, 1, 3>(mTransposed.w, mTransposed.y);

        Vector d0 = _mm_mul_ps(v00, v10);
        Vector d1 = _mm_mul_ps(v01, v11);
        Vector d2 = _mm_mul_ps(v02, v12);

        v00 = Permute<2, 3, 2, 3>(mTransposed.z);
        v10 = Permute<0, 0, 1, 1>(mTransposed.w);
        v01 = Permute<2, 3, 2, 3>(mTransposed.x);
        v11 = Permute<0, 0, 1, 1>(mTransposed.y);
        v02 = Shuffle<1, 3, 1, 3>(mTransposed.z, mTransposed.x);
        v12 = Shuffle<0, 2, 0, 2>(mTransposed.w, mTransposed.y);

        v00 = _mm_mul_ps(v00, v10);
        v01 = _mm_mul_ps(v01, v11);
        v02 = _mm_mul_ps(v02, v12);
        d0 = _mm_sub_ps(d0, v00);
        d1 = _mm_sub_ps(d1, v01);
        d2 = _mm_sub_ps(d2, v02);
        // v11 = d0.y, d0.w, d2.y, d2.y
        v11 = Shuffle<1, 3, 1, 1>(d0, d2);
        v00 = Permute<1, 2, 0, 1>(mTransposed.y);
        v10 = Shuffle<2, 0, 3, 0>(v11, d0);
        v01 = Permute<2, 0, 1, 0>(mTransposed.x);
        v11 = Shuffle<1, 2, 1, 2>(v11, d0);
        // v13 = D1Y,D1W,D2W,D2W
        Vector v13 = Shuffle<1, 3, 3, 3>(d1, d2);
        v02 = Permute<1, 2, 0, 1>(mTransposed.w);
        v12 = Shuffle<2, 0, 3, 0>(v13, d1);
        Vector v03 = Permute<2, 0, 1, 0>(mTransposed.z);
        v13 = Shuffle<1, 2, 1, 2>(v13, d1);

        Vector c0 = _mm_mul_ps(v00, v10);
        Vector c2 = _mm_mul_ps(v01, v11);
        Vector c4 = _mm_mul_ps(v02, v12);
        Vector c6 = _mm_mul_ps(v03, v13);

        // v11 = d0X,d0Y,d2X,d2X
        v11 = Shuffle<0, 1, 0, 0>(d0, d2);
        v00 = Permute<2, 3, 1, 2>(mTransposed.y);
        v10 = Shuffle<3, 0, 1, 2>(d0, v11);
        v01 = Permute<3, 2, 3, 1>(mTransposed.x);
        v11 = Shuffle<2, 1, 2, 0>(d0, v11);
        // v13 = d1X,d1Y,d2Z,d2Z
        v13 = Shuffle<0, 1, 2, 2>(d1, d2);
        v02 = Permute<2, 3, 1, 2>(mTransposed.w);
        v12 = Shuffle<3, 0, 1, 2>(d1, v13);
        v03 = Permute<3, 2, 3, 1>(mTransposed.z);
        v13 = Shuffle<2, 1, 2, 0>(d1, v13);

        v00 = _mm_mul_ps(v00, v10);
        v01 = _mm_mul_ps(v01, v11);
        v02 = _mm_mul_ps(v02, v12);
        v03 = _mm_mul_ps(v03, v13);
        c0 = _mm_sub_ps(c0, v00);
        c2 = _mm_sub_ps(c2, v01);
        c4 = _mm_sub_ps(c4, v02);
        c6 = _mm_sub_ps(c6, v03);

        v00 = Permute<3, 0, 3, 0>(mTransposed.y);
        // v10 = d0Z,d0Z,d2X,d2Y
        v10 = Shuffle<2, 2, 0, 1>(d0, d2);
        v10 = Permute<0, 3, 2, 0>(v10);
        v01 = Permute<1, 3, 0, 2>(mTransposed.x);
        // v11 = d0X,d0W,d2X,d2Y
        v11 = Shuffle<0, 3, 0, 1>(d0, d2);
        v11 = Permute<3, 0, 1, 2>(v11);
        v02 = Permute<3, 0, 3, 0>(mTransposed.w);
        // v12 = d1Z,d1Z,d2Z,d2W
        v12 = Shuffle<2, 2, 2, 3>(d1, d2);
        v12 = Permute<0, 3, 2, 0>(v12);
        v03 = Permute<1, 3, 0, 2>(mTransposed.z);
        // v13 = d1X,d1W,d2Z,d2W
        v13 = Shuffle<0, 3, 2, 3>(d1, d2);
        v13 = Permute<3, 0, 1, 2>(v13);

        v00 = _mm_mul_ps(v00, v10);
        v01 = _mm_mul_ps(v01, v11);
        v02 = _mm_mul_ps(v02, v12);
        v03 = _mm_mul_ps(v03, v13);
        Vector c1 = _mm_sub_ps(c0, v00);
        c0 = _mm_add_ps(c0, v00);
        Vector c3 = _mm_add_ps(c2, v01);
        c2 = _mm_sub_ps(c2, v01);
        Vector c5 = _mm_sub_ps(c4, v02);
        c4 = _mm_add_ps(c4, v02);
        Vector c7 = _mm_add_ps(c6, v03);
        c6 = _mm_sub_ps(c6, v03);

        c0 = Shuffle<0, 2, 1, 3>(c0, c1);
        c2 = Shuffle<0, 2, 1, 3>(c2, c3);
        c4 = Shuffle<0, 2, 1, 3>(c4, c5);
        c6 = Shuffle<0, 2, 1, 3>(c6, c7);
        c0 = Permute<0, 2, 1, 3>(c0);
        c2 = Permute<0, 2, 1, 3>(c2);
        c4 = Permute<0, 2, 1, 3>(c4);
        c6 = Permute<0, 2, 1, 3>(c6);
        // Get the determinant
        Vector vTemp = Dot((Float4)c0, mTransposed.x);
        //if(pDeterminant != nullptr)
        //    *pDeterminant = vTemp;
        vTemp = _mm_div_ps(Constant::One, vTemp);
        Float4x4 mResult;
        mResult.x = _mm_mul_ps(c0, vTemp);
        mResult.y = _mm_mul_ps(c2, vTemp);
        mResult.z = _mm_mul_ps(c4, vTemp);
        mResult.w = _mm_mul_ps(c6, vTemp);
        return mResult;
    }
Exemple #30
0
Fichier : SSE.hpp Projet : Eynx/R3D
 // -- //
 static Float4x4 VFunction MultiplyTranspose(const Float4x4& matrixA, const Float4x4& matrixB)
 {
     return Transpose(Multiply(matrixA, matrixB));
 }