Esempio n. 1
0
static int drawScene() {
	static int rotation = 0;
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glBindTexture(GL_TEXTURE_2D, g_CubeTexture);

	switch(g_PixelFormat) {
	case YV16: {
		glActiveTexture(GL_TEXTURE0);
		glBindTexture(GL_TEXTURE_2D, g_CubeTexture);
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer);

		glActiveTexture(GL_TEXTURE1);
		glBindTexture(GL_TEXTURE_2D, g_UTexture);
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth/2, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer+(g_VideoWidth*g_VideoHeight));

		glActiveTexture(GL_TEXTURE2);
		glBindTexture(GL_TEXTURE_2D, g_VTexture);
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth/2, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, (mipi->lastVideoBuffer+(g_VideoWidth*g_VideoHeight) + ((g_VideoWidth/2)*g_VideoHeight)));

		glActiveTexture(GL_TEXTURE0);
		break;
	}
	case NV12:{
		glActiveTexture(GL_TEXTURE0);
		glBindTexture(GL_TEXTURE_2D, g_CubeTexture);
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer);

		glActiveTexture(GL_TEXTURE1);
		glBindTexture(GL_TEXTURE_2D, g_UVTexture);
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth/2, g_VideoHeight/2,GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, (mipi->lastVideoBuffer+(g_VideoWidth*g_VideoHeight)));

		glActiveTexture(GL_TEXTURE0);
		break;
	}
	case RGBP:
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, mipi->lastVideoBuffer);
		break;
	default:
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_RGBA, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer);
		break;
	}

	GLfloat mat[16], rot[16], scale[16], final[16];
	makeIdentity(rot);
	makeIdentity(mat);
	makeIdentity(scale);
	glBindTexture(GL_TEXTURE_2D, g_CubeTexture);
	glUseProgram(shaderProgram);
	if (g_Rotation) {
		rotation += 2;
		makeYRotMatrix(rotation, mat);
		matrixMult(final, g_Draw1ViewPort, mat);
		glUniformMatrix4fv(g_u_matrix_p3, 1, GL_FALSE, final);
	} else {
Esempio n. 2
0
Matrix4& Matrix4::makeScale(GLfloat x, GLfloat y, GLfloat z) {
    makeIdentity();
    m[0][0] = x;
    m[1][1] = y;
    m[2][2] = z;
    return *this;
}
Esempio n. 3
0
 //! Set matrix to be a pure scaling matrix. (no translation or rotation components)
 void setScale(const vec3<Type>& a_scale)
 {
     makeIdentity();
     m_data[0][0] = a_scale[0];
     m_data[1][1] = a_scale[1];
     m_data[2][2] = a_scale[2];
 }
Esempio n. 4
0
 //! Make this matrix into a pure translation matrix. (no scale or rotation components)
 void setTranslate(const vec3<Type>& t)
 {
     makeIdentity();
     m_data[3][0] = t[0];
     m_data[3][1] = t[1];
     m_data[3][2] = t[2];
 }
Esempio n. 5
0
    float4x4 &float4x4::makePerspectiveLH(float fFov, float fAspect, 
        float fNear, float fFar)
    {
		float yScale = 1.0f / tan(fFov/2);
		float xScale = yScale / fAspect;

        makeIdentity();
		m[0][0] = xScale;
		m[1][1] = yScale;
		m[2][2] = fFar/(fFar-fNear);
		m[3][2] = -fNear * fFar/(fFar-fNear);
		m[2][3] = 1;
		m[3][3] = 0;
        return *this;

        //float xmin, xmax, ymin, ymax;       // Dimensions of near clipping plane

        //// Do the Math for the near clipping plane
        //ymax = fNear * float(tan( fFov * 3.14159265358979323846 / 360.0 ));
        //ymin = -ymax;
        //xmin = ymin * fAspect;
        //xmax = -xmin;

        //makeIdentity();
        //// Construct the projection matrix
        //ma[0] = (2.0f * fNear)/(xmax - xmin);
        //ma[5] = (2.0f * fNear)/(ymax - ymin);
        //ma[8] = (xmax + xmin) / (xmax - xmin);
        //ma[9] = (ymax + ymin) / (ymax - ymin);
        //ma[10] = -((fFar + fNear)/(fFar - fNear));
        //ma[11] = -1.0f;
        //ma[14] = -((2.0f * fFar * fNear)/(fFar - fNear));
        //ma[15] = 0.0f;
        //return *this;
    }
Esempio n. 6
0
Matrix4& Matrix4::makeScale(const Vector3& v) {
    makeIdentity();
    m[0][0] = v.x;
    m[1][1] = v.y;
    m[2][2] = v.z;
    return *this;
}
Esempio n. 7
0
Matrix4& Matrix4::makeTranslate(const Vector3& v) {
    makeIdentity();
    m[0][3] = v.x;
    m[1][3] = v.y;
    m[2][3] = v.z;
    return *this;
}
Esempio n. 8
0
Matrix4& Matrix4::makeTranslate(GLfloat x, GLfloat y, GLfloat z) {
    makeIdentity();
    m[0][3] = x;
    m[1][3] = y;
    m[2][3] = z;
    return *this;
}
Esempio n. 9
0
void Transform::makeRotz(real angle)
{
    real cosa = cos(angle);
    real sina = sin(angle);
    makeIdentity();
    m[0][0] = cosa; m[0][1] = -sina;
    m[1][0] = sina; m[1][1] =  cosa;
}
Esempio n. 10
0
Matrix4& Matrix4::makeRotateX(GLfloat deg) {
    deg = deg / 180.0 * M_PI;
    makeIdentity();
    m[1][1] = cos(deg);
    m[1][2] = -sin(deg);
    m[2][1] = sin(deg);
    m[2][2] = cos(deg);
    return *this;
}
Esempio n. 11
0
Matrix4& Matrix4::makeRotateY(GLfloat deg) {
    deg = deg / 180.0 * M_PI;  // convert from degrees to radians
    makeIdentity();
    m[0][0] = cos(deg);
    m[0][2] = sin(deg);
    m[2][0] = -sin(deg);
    m[2][2] = cos(deg);
    return *this;
}
Matrix3x3 Matrix3x3::makeRotation(f32 radian) {
  Matrix3x3 matrix=makeIdentity();
  f32 c=std::cosf(radian);
  f32 s=std::sinf(radian);
  matrix.m[0][0]=c;
  matrix.m[1][0]=-s;
  matrix.m[0][1]=s;
  matrix.m[1][1]=c;
  return matrix;   
}
void TransformationMatrix::recompose(const DecomposedType& decomp)
{
    makeIdentity();
    
    // first apply perspective
    m_matrix[0][3] = (float) decomp.perspectiveX;
    m_matrix[1][3] = (float) decomp.perspectiveY;
    m_matrix[2][3] = (float) decomp.perspectiveZ;
    m_matrix[3][3] = (float) decomp.perspectiveW;
    
    // now translate
    translate3d((float) decomp.translateX, (float) decomp.translateY, (float) decomp.translateZ);
    
    // apply rotation
    double xx = decomp.quaternionX * decomp.quaternionX;
    double xy = decomp.quaternionX * decomp.quaternionY;
    double xz = decomp.quaternionX * decomp.quaternionZ;
    double xw = decomp.quaternionX * decomp.quaternionW;
    double yy = decomp.quaternionY * decomp.quaternionY;
    double yz = decomp.quaternionY * decomp.quaternionZ;
    double yw = decomp.quaternionY * decomp.quaternionW;
    double zz = decomp.quaternionZ * decomp.quaternionZ;
    double zw = decomp.quaternionZ * decomp.quaternionW;
    
    // Construct a composite rotation matrix from the quaternion values
    TransformationMatrix rotationMatrix(1 - 2 * (yy + zz), 2 * (xy - zw), 2 * (xz + yw), 0, 
                           2 * (xy + zw), 1 - 2 * (xx + zz), 2 * (yz - xw), 0,
                           2 * (xz - yw), 2 * (yz + xw), 1 - 2 * (xx + yy), 0,
                           0, 0, 0, 1);
    
    multLeft(rotationMatrix);
    
    // now apply skew
    if (decomp.skewYZ) {
        TransformationMatrix tmp;
        tmp.setM32((float) decomp.skewYZ);
        multLeft(tmp);
    }
    
    if (decomp.skewXZ) {
        TransformationMatrix tmp;
        tmp.setM31((float) decomp.skewXZ);
        multLeft(tmp);
    }
    
    if (decomp.skewXY) {
        TransformationMatrix tmp;
        tmp.setM21((float) decomp.skewXY);
        multLeft(tmp);
    }
    
    // finally, apply scale
    scale3d((float) decomp.scaleX, (float) decomp.scaleY, (float) decomp.scaleZ);
}
Esempio n. 14
0
void initMatrices() {

	theta = 0.0f;
	scaleAmount = 1.0f;

	// Allocate memory for the matrices and initialize them to the Identity matrix
	rotXMatrix = new GLfloat[16];	makeIdentity(rotXMatrix);
	rotYMatrix = new GLfloat[16];	makeIdentity(rotYMatrix);
	rotZMatrix = new GLfloat[16];	makeIdentity(rotZMatrix);
	transMatrix = new GLfloat[16];	makeIdentity(transMatrix);
	scaleMatrix = new GLfloat[16];	makeIdentity(scaleMatrix);
	tempMatrix1 = new GLfloat[16];	makeIdentity(tempMatrix1);
	M = new GLfloat[16];			makeIdentity(M);
	V = new GLfloat[16];			makeIdentity(V);
	P = new GLfloat[16];			makeIdentity(P);

	// Set up the (P)erspective matrix only once! Arguments are 1) the resulting matrix, 2) FoV, 3) aspect ratio, 4) near plane 5) far plane
	makePerspectiveMatrix(P, 60.0f, 1.0f, 1.0f, 1000.0f);
}
Esempio n. 15
0
Matrix4& Matrix4::makeRotate(GLfloat deg, Vector3 a) {
    makeIdentity();
    a.normalize();
    deg = deg / 180.0 * M_PI;
    
    m[0][0] = 1 + (a.x * a.x - 1) * (1 - cos(deg));
    m[0][1] = -a.z * sin(deg) + a.x * a.y * (1 - cos(deg));
    m[0][2] = a.y * sin(deg) + a.x * a.z * (1 - cos(deg));
    
    m[1][0] = a.z * sin(deg) +  a.x * a.y * (1 - cos(deg));
    m[1][1] = 1 + (a.y * a.y - 1) * (1 - cos(deg));
    m[1][2] = -a.x * sin(deg) + a.y * a.z * (1 - cos(deg));
    
    m[2][0] = -a.y * sin(deg) + a.z * a.x * (1 - cos(deg));
    m[2][1] = a.x * sin(deg) + a.z * a.y * (1 - cos(deg));
    m[2][2] = 1 + (a.z * a.z - 1) * (1 - cos(deg));
    
    return *this;
}
Esempio n. 16
0
void
SbMatrix::setTransform(const SbVec3f &translation,
		 const SbRotation &rotation,
		 const SbVec3f &scaleFactor,
		 const SbRotation &scaleOrientation,
		 const SbVec3f &center)
{
#define TRANSLATE(vec)		m.setTranslate(vec), multLeft(m)
#define ROTATE(rot)		rot.getValue(m), multLeft(m)
    SbMatrix m;

    makeIdentity();
    
    if (translation != SbVec3f(0,0,0))
	TRANSLATE(translation);

    if (center != SbVec3f(0,0,0))
	TRANSLATE(center);

    if (rotation != SbRotation(0,0,0,1))
	ROTATE(rotation);

    if (scaleFactor != SbVec3f(1,1,1)) {
	SbRotation so = scaleOrientation;
	if (so != SbRotation(0,0,0,1))
	    ROTATE(so);
	
	m.setScale(scaleFactor);
	multLeft(m);

	if (so != SbRotation(0,0,0,1)) {
	    so.invert();
	    ROTATE(so);
	}
    }

    if (center != SbVec3f(0,0,0))
	TRANSLATE(-center);

#undef TRANSLATE
#undef ROTATE
}
Esempio n. 17
0
ToyRotationMatrix::ToyRotationMatrix(float degreeX /*=0*/, float degreeY /*=0*/, float degreeZ /*=0*/):ToyMatrix<float>(),
                                                                                                       DegreeX(degreeX),
                                                                                                       DegreeY(degreeY),
                                                                                                       DegreeZ(degreeZ) {
  makeIdentity();
  // Efficiency: Try to initialize in one go if possible
  // Out of convenience the default C'tor will init with
  // zero, then the identity matrix is written (only
  // five entries) and then rotation is constructed and
  // multiplied.
  if (DegreeX) {
    rotateX(DegreeX);
  }
  if (DegreeY) {
    rotateY(DegreeY);
  }
  if (DegreeZ) {
    rotateZ(DegreeZ);
  }
}
Esempio n. 18
0
	Quaternion& Quaternion::rotationFromTo(const Vector3& from, const Vector3& to)
	{
		// Based on Stan Melax's article in Game Programming Gems
		// Copy, since cannot modify local
		Vector3 v0 = from;
		Vector3 v1 = to;
		v0.normalize();
		v1.normalize();

		const FLOAT32 d = v0.dotProduct(v1);
		if (d >= 1.0f) // If dot == 1, vectors are the same
		{
			return makeIdentity();
		}
		else if (d <= -1.0f) // exactly opposite
		{
			Vector3 axis(1.0f, 0.f, 0.f);
			axis = axis.crossProduct(v0);
			if (axis.length() == 0)
			{
				axis.set(0.f, 1.f, 0.f);
				axis = axis.crossProduct(v0);
			}
			// same as fromAngleAxis(PI, axis).normalize();
			set(axis.x, axis.y, axis.z, 0);
			normalise();
			return *this;
		}

		const FLOAT32 s = sqrtf((1 + d) * 2); // optimize inv_sqrt
		const FLOAT32 invs = 1.f / s;
		const Vector3 c = v0.crossProduct(v1)*invs;
		set(c.x, c.y, c.z, s * 0.5f);
		normalise();
		return *this;
	}
Matrix3x3 Matrix3x3::makeTranslation(Vec2 const& t) {
  Matrix3x3 matrix=makeIdentity();
  matrix.m[2][0]=t.x;
  matrix.m[2][1]=t.y;
  return matrix;
}
Esempio n. 20
0
void Transform::makeTranslation(real tx, real ty, real tz) {
    makeIdentity();
    setRightSide(tx,ty,tz);
}
Esempio n. 21
0
 //! The default constructor. The matrix will be identity.
 matrix4()
 {
     makeIdentity();
 }
Esempio n. 22
0
int main(int argc, char **argv) {
    int info, i, j, pcol, Adim;
    double *D;
    int *DESCD;
    CSRdouble BT_i, B_j, Xsparse, Zsparse, Btsparse;

    /*BT_i.allocate(0,0,0);
    B_j.allocate(0,0,0);
    Xsparse.allocate(0,0,0);
    Zsparse.allocate(0,0,0);
    Btsparse.allocate(0,0,0);*/

    //Initialise MPI and some MPI-variables
    info = MPI_Init ( &argc, &argv );
    if ( info != 0 ) {
        printf ( "Error in MPI initialisation: %d\n",info );
        return info;
    }

    position= ( int* ) calloc ( 2,sizeof ( int ) );
    if ( position==NULL ) {
        printf ( "unable to allocate memory for processor position coordinate\n" );
        return EXIT_FAILURE;
    }

    dims= ( int* ) calloc ( 2,sizeof ( int ) );
    if ( dims==NULL ) {
        printf ( "unable to allocate memory for grid dimensions coordinate\n" );
        return EXIT_FAILURE;
    }

    //BLACS is the interface used by PBLAS and ScaLAPACK on top of MPI

    blacs_pinfo_ ( &iam,&size ); 				//determine the number of processes involved
    info=MPI_Dims_create ( size, 2, dims );			//determine the best 2D cartesian grid with the number of processes
    if ( info != 0 ) {
        printf ( "Error in MPI creation of dimensions: %d\n",info );
        return info;
    }

    //Until now the code can only work with square process grids
    //So we try to get the biggest square grid possible with the number of processes involved
    if (*dims != *(dims+1)) {
        while (*dims * *dims > size)
            *dims -=1;
        *(dims+1)= *dims;
        if (iam==0)
            printf("WARNING: %d processor(s) unused due to reformatting to a square process grid\n", size - (*dims * *dims));
        size = *dims * *dims;
        //cout << "New size of process grid: " << size << endl;
    }

    blacs_get_ ( &i_negone,&i_zero,&ICTXT2D );

    //Initialisation of the BLACS process grid, which is referenced as ICTXT2D
    blacs_gridinit_ ( &ICTXT2D,"R",dims, dims+1 );

    if (iam < size) {

        //The rank (iam) of the process is mapped to a 2D grid: position= (process row, process column)
        blacs_pcoord_ ( &ICTXT2D,&iam,position, position+1 );
        if ( *position ==-1 ) {
            printf ( "Error in proces grid\n" );
            return -1;
        }

        //Filenames, dimensions of all matrices and other important variables are read in as global variables (see src/readinput.cpp)
        info=read_input ( *++argv );
        if ( info!=0 ) {
            printf ( "Something went wrong when reading input file for processor %d\n",iam );
            return -1;
        }

        //blacs_barrier is used to stop any process of going beyond this point before all processes have made it up to this point.
        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if ( * ( position+1 ) ==0 && *position==0 )
            printf ( "Reading of input-file succesful\n" );

        if ( * ( position+1 ) ==0 && *position==0 ) {
            printf("\nA linear mixed model with %d observations, %d genotypes, %d random effects and %d fixed effects\n", n,k,m,l);
            printf("was analyzed using %d (%d x %d) processors\n",size,*dims,*(dims+1));
        }

        //Dimension of A (sparse matrix) is the number of fixed effects(m) + the sparse random effects (l)
        Adim=m+l;

        //Dimension of D (dense matrix) is the number of dense effects (k)
        Ddim=k;

        pcol= * ( position+1 );

        //Define number of blocks needed to store a complete column/row of D
        Dblocks= Ddim%blocksize==0 ? Ddim/blocksize : Ddim/blocksize +1;

        //Define the number of rowblocks needed by the current process to store its part of the dense matrix D
        Drows= ( Dblocks - *position ) % *dims == 0 ? ( Dblocks- *position ) / *dims : ( Dblocks- *position ) / *dims +1;
        Drows= Drows<1? 1 : Drows;

        //Define the number of columnblocks needed by the current process to store its part of the dense matrix D
        Dcols= ( Dblocks - pcol ) % * ( dims+1 ) == 0 ? ( Dblocks- pcol ) / * ( dims+1 ) : ( Dblocks- pcol ) / * ( dims+1 ) +1;
        Dcols=Dcols<1? 1 : Dcols;

        //Define the local leading dimension of D (keeping in mind that matrices are always stored column-wise)
        lld_D=Drows*blocksize;

        //Initialise the descriptor of the dense distributed matrix
        DESCD= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCD==NULL ) {
            printf ( "unable to allocate memory for descriptor for C\n" );
            return -1;
        }

        //D with dimensions (Ddim,Ddim) is distributed over all processes in ICTXT2D, with the first element in process (0,0)
        //D is distributed into blocks of size (blocksize,blocksize), having a local leading dimension lld_D in this specific process
        descinit_ ( DESCD, &Ddim, &Ddim, &blocksize, &blocksize, &i_zero, &i_zero, &ICTXT2D, &lld_D, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        //Allocate the space necessary to store the part of D that is held into memory of this process.
        D = ( double* ) calloc ( Drows * blocksize * Dcols * blocksize,sizeof ( double ) );
        if ( D==NULL ) {
            printf ( "unable to allocate memory for Matrix D  (required: %ld bytes)\n", Drows * blocksize * Dcols * blocksize * sizeof ( double ) );
            return EXIT_FAILURE;
        }

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if (iam==0)
            printf ( "Start set up of B & D\n" );

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        //set_up_BD is declared in readdist.cpp and constructs the parts of matrices B & D in each processor
        //which are necessary to create the distributed Schur complement of D
        info = set_up_BD ( DESCD, D, BT_i, B_j, Btsparse );

        //printdense(Drows*blocksize, Dcols * blocksize,D,"matrix_D.txt");

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if (iam==0)
            printf ( "Matrices B & D set up\n" );

        if(printD_bool) {

            int array_of_gsizes[2], array_of_distribs[2], array_of_dargs[2], array_of_psize[2] ;
            int buffersize;
            MPI_Datatype file_type;
            MPI_File fh;
            MPI_Status status;
            array_of_gsizes[0]=Dblocks * blocksize;
            array_of_gsizes[1]=Dblocks * blocksize;
            array_of_distribs[0]=MPI_DISTRIBUTE_CYCLIC;
            array_of_distribs[1]=MPI_DISTRIBUTE_CYCLIC;
            array_of_dargs[0]=blocksize;
            array_of_dargs[1]=blocksize;
            array_of_psize[0]=*dims;
            array_of_psize[1]=*(dims + 1);

            MPI_Type_create_darray(size,iam,2,array_of_gsizes, array_of_distribs,
                                   array_of_dargs, array_of_psize, MPI_ORDER_FORTRAN,
                                   MPI_DOUBLE, &file_type);
            MPI_Type_commit(&file_type);
            info = MPI_File_open(MPI_COMM_WORLD, filenameD,
                                 MPI_MODE_CREATE | MPI_MODE_WRONLY,
                                 MPI_INFO_NULL, &fh);
            /*if ( ( Drows-1 ) % *(dims+1) == *position && ( Dcols-1 ) % *(dims) == pcol && Ddim%blocksize !=0 )
                buffersize=((Drows-1) * blocksize + Ddim % blocksize) * ((Dcols-1) * blocksize + Ddim % blocksize);
            else if ( ( Drows-1 ) % *(dims+1) == *position && Ddim%blocksize !=0 )
                buffersize=((Drows-1) * blocksize + Ddim % blocksize) * Dcols * blocksize;
            else if ( ( Dcols-1 ) % *(dims) == *position && Ddim%blocksize !=0 )
                buffersize=((Dcols-1) * blocksize + Ddim % blocksize) * Drows * blocksize;
            else*/
            buffersize= Dcols * Drows * blocksize * blocksize;

            MPI_File_set_view(fh, 0, MPI_DOUBLE, file_type, "native", MPI_INFO_NULL);
            info =MPI_File_write_all(fh, D,buffersize, MPI_DOUBLE,
                                     &status);
	    MPI_File_close(&fh);
            if(iam==0) {
                printf("Matrix D (dimension %d) is printed in file %s\n", Dblocks*blocksize,filenameD);
            }
            if(filenameD != NULL)
                free(filenameD);
            filenameD=NULL;
            //delete[] array_of_gsizes, delete[] array_of_distribs, delete[] array_of_dargs, delete[] array_of_psize;
        }



        //Now every matrix has to set up the sparse matrix A, consisting of X'X, X'Z, Z'X and Z'Z + lambda*I
        Xsparse.loadFromFile ( filenameX );
        Zsparse.loadFromFile ( filenameZ );

        if(filenameX != NULL)
            free(filenameX);
        filenameX=NULL;
        if(filenameZ != NULL)
            free(filenameZ);
        filenameZ=NULL;

        smat_t *X_smat, *Z_smat;

        X_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Z_smat= (smat_t *) calloc(1,sizeof(smat_t));

        X_smat = smat_new_from ( Xsparse.nrows,Xsparse.ncols,Xsparse.pRows,Xsparse.pCols,Xsparse.pData,0,0 );
        Z_smat = smat_new_from ( Zsparse.nrows,Zsparse.ncols,Zsparse.pRows,Zsparse.pCols,Zsparse.pData,0,0 );

        smat_t *Xt_smat, *Zt_smat;
        Xt_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Zt_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Xt_smat = smat_copy_trans ( X_smat );
        Zt_smat = smat_copy_trans ( Z_smat );

        CSRdouble Asparse;
        smat_t *XtX_smat, *XtZ_smat, *ZtZ_smat, *lambda_smat, *ZtZlambda_smat;

        XtX_smat= (smat_t *) calloc(1,sizeof(smat_t));
        XtZ_smat= (smat_t *) calloc(1,sizeof(smat_t));
        ZtZ_smat= (smat_t *) calloc(1,sizeof(smat_t));


        XtX_smat = smat_matmul ( Xt_smat, X_smat );
        XtZ_smat = smat_matmul ( Xt_smat, Z_smat );
        ZtZ_smat = smat_matmul ( Zt_smat,Z_smat );

        Xsparse.clear();
        Zsparse.clear();
        smat_free(Xt_smat);
        smat_free(Zt_smat);
        /*smat_free(X_smat);
        smat_free(Z_smat);*/

        CSRdouble Imat;

        makeIdentity ( l, Imat );

        lambda_smat= (smat_t *) calloc(1,sizeof(smat_t));

        lambda_smat = smat_new_from ( Imat.nrows,Imat.ncols,Imat.pRows,Imat.pCols,Imat.pData,0,0 );

        smat_scale_diag ( lambda_smat, -lambda );

        ZtZlambda_smat= (smat_t *) calloc(1,sizeof(smat_t));

        ZtZlambda_smat = smat_add ( lambda_smat, ZtZ_smat );

        smat_free(ZtZ_smat);
        //smat_free(lambda_smat);


        smat_to_symmetric_structure ( XtX_smat );
        smat_to_symmetric_structure ( ZtZlambda_smat );

        CSRdouble XtX_sparse, XtZ_sparse, ZtZ_sparse;

        XtX_sparse.make2 ( XtX_smat->m,XtX_smat->n,XtX_smat->nnz,XtX_smat->ia,XtX_smat->ja,XtX_smat->a );
        XtZ_sparse.make2 ( XtZ_smat->m,XtZ_smat->n,XtZ_smat->nnz,XtZ_smat->ia,XtZ_smat->ja,XtZ_smat->a );
        ZtZ_sparse.make2 ( ZtZlambda_smat->m,ZtZlambda_smat->n,ZtZlambda_smat->nnz,ZtZlambda_smat->ia,ZtZlambda_smat->ja,ZtZlambda_smat->a );

        /*smat_free(XtX_smat);
        smat_free(XtZ_smat);
        smat_free(ZtZlambda_smat);*/
        Imat.clear();

        if (iam==0) {
            cout << "***                                           [  t     t  ] *** " << endl;
            cout << "***                                           [ X X   X Z ] *** " << endl;
            cout << "***                                           [           ] *** " << endl;
            cout << "*** G e n e r a t i n g    m a t r i x    A = [           ] *** " << endl;
            cout << "***                                           [  t     t  ] *** " << endl;
            cout << "***                                           [ Z X   Z Z ] *** " << endl;
        }

        //Sparse matrix A only contains the upper triangular part of A
        create2x2SymBlockMatrix ( XtX_sparse, XtZ_sparse, ZtZ_sparse, Asparse );
        //Asparse.writeToFile("A_sparse.csr");

        smat_free(XtX_smat);
        smat_free(XtZ_smat);
        smat_free(ZtZlambda_smat);
        XtX_sparse.clear();
        XtZ_sparse.clear();
        ZtZ_sparse.clear();

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        if(printsparseC_bool) {
            CSRdouble Dmat, Dblock, Csparse;
            Dblock.nrows=Dblocks * blocksize;
            Dblock.ncols=Dblocks * blocksize;
            Dblock.allocate(Dblocks * blocksize, Dblocks * blocksize, 0);
            Dmat.allocate(0,0,0);
            for (i=0; i<Drows; ++i) {
                for(j=0; j<Dcols; ++j) {
                    dense2CSR_sub(D + i * blocksize + j * lld_D * blocksize,blocksize,blocksize,lld_D,Dblock,( * ( dims) * i + *position ) *blocksize,
                                  ( * ( dims+1 ) * j + pcol ) *blocksize);
                    if ( Dblock.nonzeros>0 ) {
                        if ( Dmat.nonzeros==0 ) {
                            Dmat.make2 ( Dblock.nrows,Dblock.ncols,Dblock.nonzeros,Dblock.pRows,Dblock.pCols,Dblock.pData );
                        }
                        else {
                            Dmat.addBCSR ( Dblock );
                        }
                    }

                    Dblock.clear();
                }
            }
            blacs_barrier_(&ICTXT2D,"A");
            if ( iam!=0 ) {
                //Each process other than root sends its Dmat to the root process.
                MPI_Send ( & ( Dmat.nonzeros ),1, MPI_INT,0,iam,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pRows[0] ),Dmat.nrows + 1, MPI_INT,0,iam+size,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pCols[0] ),Dmat.nonzeros, MPI_INT,0,iam+2*size,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pData[0] ),Dmat.nonzeros, MPI_DOUBLE,0,iam+3*size,MPI_COMM_WORLD );
                Dmat.clear();
            }
            else {
                for ( i=1; i<size; ++i ) {
                    // The root process receives parts of Dmat sequentially from all processes and directly adds them together.
                    int nonzeroes, count;
                    MPI_Recv ( &nonzeroes,1,MPI_INT,i,i,MPI_COMM_WORLD,&status );
                    /*MPI_Get_count(&status, MPI_INT, &count);
                    printf("Process 0 received %d elements of process %d\n",count,i);*/
                    if(nonzeroes>0) {
                        printf("Nonzeroes : %d\n ",nonzeroes);
                        Dblock.allocate ( Dblocks * blocksize,Dblocks * blocksize,nonzeroes );
                        MPI_Recv ( & ( Dblock.pRows[0] ), Dblocks * blocksize + 1, MPI_INT,i,i+size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_INT, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        MPI_Recv ( & ( Dblock.pCols[0] ),nonzeroes, MPI_INT,i,i+2*size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_INT, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        MPI_Recv ( & ( Dblock.pData[0] ),nonzeroes, MPI_DOUBLE,i,i+3*size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_DOUBLE, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        Dmat.addBCSR ( Dblock );
                    }
                }
                //Dmat.writeToFile("D_sparse.csr");
                Dmat.reduceSymmetric();
                Btsparse.transposeIt(1);
                create2x2SymBlockMatrix(Asparse,Btsparse, Dmat, Csparse);
                Btsparse.clear();
                Dmat.clear();
                Csparse.writeToFile(filenameC);
                Csparse.clear();
                if(filenameC != NULL)
                    free(filenameC);
                filenameC=NULL;
            }
        }
        Btsparse.clear();
        blacs_barrier_(&ICTXT2D,"A");

        //AB_sol will contain the solution of A*X=B, distributed across the process rows. Processes in the same process row possess the same part of AB_sol
        double * AB_sol;
        int * DESCAB_sol;
        DESCAB_sol= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCAB_sol==NULL ) {
            printf ( "unable to allocate memory for descriptor for AB_sol\n" );
            return -1;
        }
        //AB_sol (Adim, Ddim) is distributed across all processes in ICTXT2D starting from process (0,0) into blocks of size (Adim, blocksize)
        descinit_ ( DESCAB_sol, &Adim, &Ddim, &Adim, &blocksize, &i_zero, &i_zero, &ICTXT2D, &Adim, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        AB_sol=(double *) calloc(Adim * Dcols*blocksize,sizeof(double));

        // Each process calculates the Schur complement of the part of D at its disposal. (see src/schur.cpp)
        // The solution of A * Y = B_j is stored in AB_sol (= A^-1 * B_j)
        blacs_barrier_(&ICTXT2D,"A");
        make_Sij_parallel_denseB ( Asparse, BT_i, B_j, D, lld_D, AB_sol );
        BT_i.clear();
        B_j.clear();

        //From here on the Schur complement S of D is stored in D

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        //The Schur complement is factorised (by ScaLAPACK)
        pdpotrf_ ( "U",&k,D,&i_one,&i_one,DESCD,&info );
        if ( info != 0 ) {
            printf ( "Cholesky decomposition of D was unsuccessful, error returned: %d\n",info );
            return -1;
        }

        //From here on the factorization of the Schur complement S is stored in D

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        //The Schur complement is inverted (by ScaLAPACK)
        pdpotri_ ( "U",&k,D,&i_one,&i_one,DESCD,&info );
        if ( info != 0 ) {
            printf ( "Inverse of D was unsuccessful, error returned: %d\n",info );
            return -1;
        }

        //From here on the inverse of the Schur complement S is stored in D

        blacs_barrier_(&ICTXT2D,"A");

        double* InvD_T_Block = ( double* ) calloc ( Dblocks * blocksize + Adim ,sizeof ( double ) );

        //Diagonal elements of the (1,1) block of C^-1 are still distributed and here they are gathered in InvD_T_Block in the root process.
        if(*position == pcol) {
            for (i=0; i<Ddim; ++i) {
                if (pcol == (i/blocksize) % *dims) {
                    int Dpos = i%blocksize + ((i/blocksize) / *dims) * blocksize ;
                    *(InvD_T_Block + Adim +i) = *( D + Dpos + lld_D * Dpos);
                }
            }
            for ( i=0,j=0; i<Dblocks; ++i,++j ) {
                if ( j==*dims )
                    j=0;
                if ( *position==j ) {
                    dgesd2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + i * blocksize,&blocksize,&i_zero,&i_zero );
                }
                if ( *position==0 ) {
                    dgerv2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + blocksize*i,&blocksize,&j,&j );
                }
            }
        }

        blacs_barrier_(&ICTXT2D,"A");

        //Only the root process performs a selected inversion of A.
        if (iam==0) {

            int pardiso_message_level = 1;

            int pardiso_mtype=-2;

            ParDiSO pardiso ( pardiso_mtype, pardiso_message_level );
            int number_of_processors = 1;
            char* var = getenv("OMP_NUM_THREADS");
            if(var != NULL) {
                sscanf( var, "%d", &number_of_processors );
            }
            else {
                printf("Set environment OMP_NUM_THREADS to 1");
                exit(1);
            }

            pardiso.iparm[2]  = 2;
            pardiso.iparm[3]  = number_of_processors;
            pardiso.iparm[8]  = 0;
            pardiso.iparm[11] = 1;
            pardiso.iparm[13]  = 0;
            pardiso.iparm[28]  = 0;

            //This function calculates the factorisation of A once again so this might be optimized.
            pardiso.findInverseOfA ( Asparse );

            printf("Processor %d inverted matrix A\n",iam);
        }
        blacs_barrier_(&ICTXT2D,"A");

        // To minimize memory usage, and because only the diagonal elements of the inverse are needed, Y' * S is calculated row by rowblocks
        // the diagonal element is calculates as the dot product of this row and the corresponding column of Y. (Y is solution of AY=B)
        double* YSrow= ( double* ) calloc ( Dcols * blocksize,sizeof ( double ) );
        int * DESCYSROW;
        DESCYSROW= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCYSROW==NULL ) {
            printf ( "unable to allocate memory for descriptor for AB_sol\n" );
            return -1;
        }
        //YSrow (1,Ddim) is distributed across processes of ICTXT2D starting from process (0,0) into blocks of size (1,blocksize)
        descinit_ ( DESCYSROW, &i_one, &Ddim, &i_one,&blocksize, &i_zero, &i_zero, &ICTXT2D, &i_one, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        blacs_barrier_(&ICTXT2D,"A");

        //Calculating diagonal elements 1 by 1 of the (0,0)-block of C^-1.
        for (i=1; i<=Adim; ++i) {
            pdsymm_ ("R","U",&i_one,&Ddim,&d_one,D,&i_one,&i_one,DESCD,AB_sol,&i,&i_one,DESCAB_sol,&d_zero,YSrow,&i_one,&i_one,DESCYSROW);
            pddot_(&Ddim,InvD_T_Block+i-1,AB_sol,&i,&i_one,DESCAB_sol,&Adim,YSrow,&i_one,&i_one,DESCYSROW,&i_one);
            /*if(*position==1 && pcol==1)
            printf("Dot product in process (1,1) is: %g\n", *(InvD_T_Block+i-1));
            if(*position==0 && pcol==1)
            printf("Dot product in process (0,1) is: %g\n",*(InvD_T_Block+i-1));*/
        }
        blacs_barrier_(&ICTXT2D,"A");
        if(YSrow != NULL)
            free(YSrow);
        YSrow = NULL;
        if(DESCYSROW != NULL)
            free(DESCYSROW);
        DESCYSROW = NULL;
        if(AB_sol != NULL)
            free(AB_sol);
        AB_sol = NULL;
        if(DESCAB_sol != NULL)
            free(DESCAB_sol);
        DESCAB_sol = NULL;
        if(D != NULL)
            free(D);
        D = NULL;
        if(DESCD != NULL)
            free(DESCD);
        DESCD = NULL;

        //Only in the root process we add the diagonal elements of A^-1
        if (iam ==0) {
            for(i=0; i<Adim; ++i) {
                j=Asparse.pRows[i];
                *(InvD_T_Block+i) += Asparse.pData[j];
            }
            Asparse.clear();
            printdense ( Adim+k,1,InvD_T_Block,"diag_inverse_C_parallel.txt" );
        }
        if(InvD_T_Block != NULL)
            free(InvD_T_Block);
        InvD_T_Block = NULL;
	blacs_gridexit_(&ICTXT2D);
    }
    //cout << iam << " reached end before MPI_Barrier" << endl;
    MPI_Barrier(MPI_COMM_WORLD);
    //MPI_Finalize();

    return 0;
}
Matrix3x3 Matrix3x3::makeYShear(f32 factor) {
  Matrix3x3 matrix=makeIdentity();
  matrix.m[0][1]=factor;
  return matrix;    
}
Esempio n. 24
0
Mat4::Mat4(float value) : values{} 
{
	makeIdentity(value);
}
Esempio n. 25
0
/*
   ** inverse = invert(src)
 */
int
invertMatrix(const GLfloat src[16], GLfloat inverse[16])
{
  int i, j, k, swap;
  double t;
  GLfloat temp[4][4];

  for (i = 0; i < 4; i++) {
    for (j = 0; j < 4; j++) {
      temp[i][j] = src[i * 4 + j];
    }
  }
  makeIdentity(inverse);

  for (i = 0; i < 4; i++) {
    /* 
       ** Look for largest element in column */
    swap = i;
    for (j = i + 1; j < 4; j++) {
      if (fabs(temp[j][i]) > fabs(temp[i][i])) {
        swap = j;
      }
    }

    if (swap != i) {
      /* 
         ** Swap rows. */
      for (k = 0; k < 4; k++) {
        t = temp[i][k];
        temp[i][k] = temp[swap][k];
        temp[swap][k] = t;

        t = inverse[i * 4 + k];
        inverse[i * 4 + k] = inverse[swap * 4 + k];
        inverse[swap * 4 + k] = t;
      }
    }
    if (temp[i][i] == 0) {
      /* 
         ** No non-zero pivot.  The matrix is singular, which
         shouldn't ** happen.  This means the user gave us a
         bad matrix. */
      return 0;
    }
    t = temp[i][i];
    for (k = 0; k < 4; k++) {
      temp[i][k] /= t;
      inverse[i * 4 + k] /= t;
    }
    for (j = 0; j < 4; j++) {
      if (j != i) {
        t = temp[j][i];
        for (k = 0; k < 4; k++) {
          temp[j][k] -= temp[i][k] * t;
          inverse[j * 4 + k] -= inverse[i * 4 + k] * t;
        }
      }
    }
  }
  return 1;
}
Matrix3x3 Matrix3x3::makeScale(Vec2 const& s) {
  Matrix3x3 matrix=makeIdentity();
  matrix.m[0][0]=s.x;
  matrix.m[1][1]=s.y;
  return matrix;  
}