static int drawScene() { static int rotation = 0; glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, g_CubeTexture); switch(g_PixelFormat) { case YV16: { glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, g_CubeTexture); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, g_UTexture); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth/2, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer+(g_VideoWidth*g_VideoHeight)); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, g_VTexture); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth/2, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, (mipi->lastVideoBuffer+(g_VideoWidth*g_VideoHeight) + ((g_VideoWidth/2)*g_VideoHeight))); glActiveTexture(GL_TEXTURE0); break; } case NV12:{ glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, g_CubeTexture); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, g_UVTexture); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth/2, g_VideoHeight/2,GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, (mipi->lastVideoBuffer+(g_VideoWidth*g_VideoHeight))); glActiveTexture(GL_TEXTURE0); break; } case RGBP: glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, mipi->lastVideoBuffer); break; default: glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, g_VideoWidth, g_VideoHeight, GL_RGBA, GL_UNSIGNED_BYTE, mipi->lastVideoBuffer); break; } GLfloat mat[16], rot[16], scale[16], final[16]; makeIdentity(rot); makeIdentity(mat); makeIdentity(scale); glBindTexture(GL_TEXTURE_2D, g_CubeTexture); glUseProgram(shaderProgram); if (g_Rotation) { rotation += 2; makeYRotMatrix(rotation, mat); matrixMult(final, g_Draw1ViewPort, mat); glUniformMatrix4fv(g_u_matrix_p3, 1, GL_FALSE, final); } else {
Matrix4& Matrix4::makeScale(GLfloat x, GLfloat y, GLfloat z) { makeIdentity(); m[0][0] = x; m[1][1] = y; m[2][2] = z; return *this; }
//! Set matrix to be a pure scaling matrix. (no translation or rotation components) void setScale(const vec3<Type>& a_scale) { makeIdentity(); m_data[0][0] = a_scale[0]; m_data[1][1] = a_scale[1]; m_data[2][2] = a_scale[2]; }
//! Make this matrix into a pure translation matrix. (no scale or rotation components) void setTranslate(const vec3<Type>& t) { makeIdentity(); m_data[3][0] = t[0]; m_data[3][1] = t[1]; m_data[3][2] = t[2]; }
float4x4 &float4x4::makePerspectiveLH(float fFov, float fAspect, float fNear, float fFar) { float yScale = 1.0f / tan(fFov/2); float xScale = yScale / fAspect; makeIdentity(); m[0][0] = xScale; m[1][1] = yScale; m[2][2] = fFar/(fFar-fNear); m[3][2] = -fNear * fFar/(fFar-fNear); m[2][3] = 1; m[3][3] = 0; return *this; //float xmin, xmax, ymin, ymax; // Dimensions of near clipping plane //// Do the Math for the near clipping plane //ymax = fNear * float(tan( fFov * 3.14159265358979323846 / 360.0 )); //ymin = -ymax; //xmin = ymin * fAspect; //xmax = -xmin; //makeIdentity(); //// Construct the projection matrix //ma[0] = (2.0f * fNear)/(xmax - xmin); //ma[5] = (2.0f * fNear)/(ymax - ymin); //ma[8] = (xmax + xmin) / (xmax - xmin); //ma[9] = (ymax + ymin) / (ymax - ymin); //ma[10] = -((fFar + fNear)/(fFar - fNear)); //ma[11] = -1.0f; //ma[14] = -((2.0f * fFar * fNear)/(fFar - fNear)); //ma[15] = 0.0f; //return *this; }
Matrix4& Matrix4::makeScale(const Vector3& v) { makeIdentity(); m[0][0] = v.x; m[1][1] = v.y; m[2][2] = v.z; return *this; }
Matrix4& Matrix4::makeTranslate(const Vector3& v) { makeIdentity(); m[0][3] = v.x; m[1][3] = v.y; m[2][3] = v.z; return *this; }
Matrix4& Matrix4::makeTranslate(GLfloat x, GLfloat y, GLfloat z) { makeIdentity(); m[0][3] = x; m[1][3] = y; m[2][3] = z; return *this; }
void Transform::makeRotz(real angle) { real cosa = cos(angle); real sina = sin(angle); makeIdentity(); m[0][0] = cosa; m[0][1] = -sina; m[1][0] = sina; m[1][1] = cosa; }
Matrix4& Matrix4::makeRotateX(GLfloat deg) { deg = deg / 180.0 * M_PI; makeIdentity(); m[1][1] = cos(deg); m[1][2] = -sin(deg); m[2][1] = sin(deg); m[2][2] = cos(deg); return *this; }
Matrix4& Matrix4::makeRotateY(GLfloat deg) { deg = deg / 180.0 * M_PI; // convert from degrees to radians makeIdentity(); m[0][0] = cos(deg); m[0][2] = sin(deg); m[2][0] = -sin(deg); m[2][2] = cos(deg); return *this; }
Matrix3x3 Matrix3x3::makeRotation(f32 radian) { Matrix3x3 matrix=makeIdentity(); f32 c=std::cosf(radian); f32 s=std::sinf(radian); matrix.m[0][0]=c; matrix.m[1][0]=-s; matrix.m[0][1]=s; matrix.m[1][1]=c; return matrix; }
void TransformationMatrix::recompose(const DecomposedType& decomp) { makeIdentity(); // first apply perspective m_matrix[0][3] = (float) decomp.perspectiveX; m_matrix[1][3] = (float) decomp.perspectiveY; m_matrix[2][3] = (float) decomp.perspectiveZ; m_matrix[3][3] = (float) decomp.perspectiveW; // now translate translate3d((float) decomp.translateX, (float) decomp.translateY, (float) decomp.translateZ); // apply rotation double xx = decomp.quaternionX * decomp.quaternionX; double xy = decomp.quaternionX * decomp.quaternionY; double xz = decomp.quaternionX * decomp.quaternionZ; double xw = decomp.quaternionX * decomp.quaternionW; double yy = decomp.quaternionY * decomp.quaternionY; double yz = decomp.quaternionY * decomp.quaternionZ; double yw = decomp.quaternionY * decomp.quaternionW; double zz = decomp.quaternionZ * decomp.quaternionZ; double zw = decomp.quaternionZ * decomp.quaternionW; // Construct a composite rotation matrix from the quaternion values TransformationMatrix rotationMatrix(1 - 2 * (yy + zz), 2 * (xy - zw), 2 * (xz + yw), 0, 2 * (xy + zw), 1 - 2 * (xx + zz), 2 * (yz - xw), 0, 2 * (xz - yw), 2 * (yz + xw), 1 - 2 * (xx + yy), 0, 0, 0, 0, 1); multLeft(rotationMatrix); // now apply skew if (decomp.skewYZ) { TransformationMatrix tmp; tmp.setM32((float) decomp.skewYZ); multLeft(tmp); } if (decomp.skewXZ) { TransformationMatrix tmp; tmp.setM31((float) decomp.skewXZ); multLeft(tmp); } if (decomp.skewXY) { TransformationMatrix tmp; tmp.setM21((float) decomp.skewXY); multLeft(tmp); } // finally, apply scale scale3d((float) decomp.scaleX, (float) decomp.scaleY, (float) decomp.scaleZ); }
void initMatrices() { theta = 0.0f; scaleAmount = 1.0f; // Allocate memory for the matrices and initialize them to the Identity matrix rotXMatrix = new GLfloat[16]; makeIdentity(rotXMatrix); rotYMatrix = new GLfloat[16]; makeIdentity(rotYMatrix); rotZMatrix = new GLfloat[16]; makeIdentity(rotZMatrix); transMatrix = new GLfloat[16]; makeIdentity(transMatrix); scaleMatrix = new GLfloat[16]; makeIdentity(scaleMatrix); tempMatrix1 = new GLfloat[16]; makeIdentity(tempMatrix1); M = new GLfloat[16]; makeIdentity(M); V = new GLfloat[16]; makeIdentity(V); P = new GLfloat[16]; makeIdentity(P); // Set up the (P)erspective matrix only once! Arguments are 1) the resulting matrix, 2) FoV, 3) aspect ratio, 4) near plane 5) far plane makePerspectiveMatrix(P, 60.0f, 1.0f, 1.0f, 1000.0f); }
Matrix4& Matrix4::makeRotate(GLfloat deg, Vector3 a) { makeIdentity(); a.normalize(); deg = deg / 180.0 * M_PI; m[0][0] = 1 + (a.x * a.x - 1) * (1 - cos(deg)); m[0][1] = -a.z * sin(deg) + a.x * a.y * (1 - cos(deg)); m[0][2] = a.y * sin(deg) + a.x * a.z * (1 - cos(deg)); m[1][0] = a.z * sin(deg) + a.x * a.y * (1 - cos(deg)); m[1][1] = 1 + (a.y * a.y - 1) * (1 - cos(deg)); m[1][2] = -a.x * sin(deg) + a.y * a.z * (1 - cos(deg)); m[2][0] = -a.y * sin(deg) + a.z * a.x * (1 - cos(deg)); m[2][1] = a.x * sin(deg) + a.z * a.y * (1 - cos(deg)); m[2][2] = 1 + (a.z * a.z - 1) * (1 - cos(deg)); return *this; }
void SbMatrix::setTransform(const SbVec3f &translation, const SbRotation &rotation, const SbVec3f &scaleFactor, const SbRotation &scaleOrientation, const SbVec3f ¢er) { #define TRANSLATE(vec) m.setTranslate(vec), multLeft(m) #define ROTATE(rot) rot.getValue(m), multLeft(m) SbMatrix m; makeIdentity(); if (translation != SbVec3f(0,0,0)) TRANSLATE(translation); if (center != SbVec3f(0,0,0)) TRANSLATE(center); if (rotation != SbRotation(0,0,0,1)) ROTATE(rotation); if (scaleFactor != SbVec3f(1,1,1)) { SbRotation so = scaleOrientation; if (so != SbRotation(0,0,0,1)) ROTATE(so); m.setScale(scaleFactor); multLeft(m); if (so != SbRotation(0,0,0,1)) { so.invert(); ROTATE(so); } } if (center != SbVec3f(0,0,0)) TRANSLATE(-center); #undef TRANSLATE #undef ROTATE }
ToyRotationMatrix::ToyRotationMatrix(float degreeX /*=0*/, float degreeY /*=0*/, float degreeZ /*=0*/):ToyMatrix<float>(), DegreeX(degreeX), DegreeY(degreeY), DegreeZ(degreeZ) { makeIdentity(); // Efficiency: Try to initialize in one go if possible // Out of convenience the default C'tor will init with // zero, then the identity matrix is written (only // five entries) and then rotation is constructed and // multiplied. if (DegreeX) { rotateX(DegreeX); } if (DegreeY) { rotateY(DegreeY); } if (DegreeZ) { rotateZ(DegreeZ); } }
Quaternion& Quaternion::rotationFromTo(const Vector3& from, const Vector3& to) { // Based on Stan Melax's article in Game Programming Gems // Copy, since cannot modify local Vector3 v0 = from; Vector3 v1 = to; v0.normalize(); v1.normalize(); const FLOAT32 d = v0.dotProduct(v1); if (d >= 1.0f) // If dot == 1, vectors are the same { return makeIdentity(); } else if (d <= -1.0f) // exactly opposite { Vector3 axis(1.0f, 0.f, 0.f); axis = axis.crossProduct(v0); if (axis.length() == 0) { axis.set(0.f, 1.f, 0.f); axis = axis.crossProduct(v0); } // same as fromAngleAxis(PI, axis).normalize(); set(axis.x, axis.y, axis.z, 0); normalise(); return *this; } const FLOAT32 s = sqrtf((1 + d) * 2); // optimize inv_sqrt const FLOAT32 invs = 1.f / s; const Vector3 c = v0.crossProduct(v1)*invs; set(c.x, c.y, c.z, s * 0.5f); normalise(); return *this; }
Matrix3x3 Matrix3x3::makeTranslation(Vec2 const& t) { Matrix3x3 matrix=makeIdentity(); matrix.m[2][0]=t.x; matrix.m[2][1]=t.y; return matrix; }
void Transform::makeTranslation(real tx, real ty, real tz) { makeIdentity(); setRightSide(tx,ty,tz); }
//! The default constructor. The matrix will be identity. matrix4() { makeIdentity(); }
int main(int argc, char **argv) { int info, i, j, pcol, Adim; double *D; int *DESCD; CSRdouble BT_i, B_j, Xsparse, Zsparse, Btsparse; /*BT_i.allocate(0,0,0); B_j.allocate(0,0,0); Xsparse.allocate(0,0,0); Zsparse.allocate(0,0,0); Btsparse.allocate(0,0,0);*/ //Initialise MPI and some MPI-variables info = MPI_Init ( &argc, &argv ); if ( info != 0 ) { printf ( "Error in MPI initialisation: %d\n",info ); return info; } position= ( int* ) calloc ( 2,sizeof ( int ) ); if ( position==NULL ) { printf ( "unable to allocate memory for processor position coordinate\n" ); return EXIT_FAILURE; } dims= ( int* ) calloc ( 2,sizeof ( int ) ); if ( dims==NULL ) { printf ( "unable to allocate memory for grid dimensions coordinate\n" ); return EXIT_FAILURE; } //BLACS is the interface used by PBLAS and ScaLAPACK on top of MPI blacs_pinfo_ ( &iam,&size ); //determine the number of processes involved info=MPI_Dims_create ( size, 2, dims ); //determine the best 2D cartesian grid with the number of processes if ( info != 0 ) { printf ( "Error in MPI creation of dimensions: %d\n",info ); return info; } //Until now the code can only work with square process grids //So we try to get the biggest square grid possible with the number of processes involved if (*dims != *(dims+1)) { while (*dims * *dims > size) *dims -=1; *(dims+1)= *dims; if (iam==0) printf("WARNING: %d processor(s) unused due to reformatting to a square process grid\n", size - (*dims * *dims)); size = *dims * *dims; //cout << "New size of process grid: " << size << endl; } blacs_get_ ( &i_negone,&i_zero,&ICTXT2D ); //Initialisation of the BLACS process grid, which is referenced as ICTXT2D blacs_gridinit_ ( &ICTXT2D,"R",dims, dims+1 ); if (iam < size) { //The rank (iam) of the process is mapped to a 2D grid: position= (process row, process column) blacs_pcoord_ ( &ICTXT2D,&iam,position, position+1 ); if ( *position ==-1 ) { printf ( "Error in proces grid\n" ); return -1; } //Filenames, dimensions of all matrices and other important variables are read in as global variables (see src/readinput.cpp) info=read_input ( *++argv ); if ( info!=0 ) { printf ( "Something went wrong when reading input file for processor %d\n",iam ); return -1; } //blacs_barrier is used to stop any process of going beyond this point before all processes have made it up to this point. blacs_barrier_ ( &ICTXT2D,"ALL" ); if ( * ( position+1 ) ==0 && *position==0 ) printf ( "Reading of input-file succesful\n" ); if ( * ( position+1 ) ==0 && *position==0 ) { printf("\nA linear mixed model with %d observations, %d genotypes, %d random effects and %d fixed effects\n", n,k,m,l); printf("was analyzed using %d (%d x %d) processors\n",size,*dims,*(dims+1)); } //Dimension of A (sparse matrix) is the number of fixed effects(m) + the sparse random effects (l) Adim=m+l; //Dimension of D (dense matrix) is the number of dense effects (k) Ddim=k; pcol= * ( position+1 ); //Define number of blocks needed to store a complete column/row of D Dblocks= Ddim%blocksize==0 ? Ddim/blocksize : Ddim/blocksize +1; //Define the number of rowblocks needed by the current process to store its part of the dense matrix D Drows= ( Dblocks - *position ) % *dims == 0 ? ( Dblocks- *position ) / *dims : ( Dblocks- *position ) / *dims +1; Drows= Drows<1? 1 : Drows; //Define the number of columnblocks needed by the current process to store its part of the dense matrix D Dcols= ( Dblocks - pcol ) % * ( dims+1 ) == 0 ? ( Dblocks- pcol ) / * ( dims+1 ) : ( Dblocks- pcol ) / * ( dims+1 ) +1; Dcols=Dcols<1? 1 : Dcols; //Define the local leading dimension of D (keeping in mind that matrices are always stored column-wise) lld_D=Drows*blocksize; //Initialise the descriptor of the dense distributed matrix DESCD= ( int* ) malloc ( DLEN_ * sizeof ( int ) ); if ( DESCD==NULL ) { printf ( "unable to allocate memory for descriptor for C\n" ); return -1; } //D with dimensions (Ddim,Ddim) is distributed over all processes in ICTXT2D, with the first element in process (0,0) //D is distributed into blocks of size (blocksize,blocksize), having a local leading dimension lld_D in this specific process descinit_ ( DESCD, &Ddim, &Ddim, &blocksize, &blocksize, &i_zero, &i_zero, &ICTXT2D, &lld_D, &info ); if ( info!=0 ) { printf ( "Descriptor of matrix C returns info: %d\n",info ); return info; } //Allocate the space necessary to store the part of D that is held into memory of this process. D = ( double* ) calloc ( Drows * blocksize * Dcols * blocksize,sizeof ( double ) ); if ( D==NULL ) { printf ( "unable to allocate memory for Matrix D (required: %ld bytes)\n", Drows * blocksize * Dcols * blocksize * sizeof ( double ) ); return EXIT_FAILURE; } blacs_barrier_ ( &ICTXT2D,"ALL" ); if (iam==0) printf ( "Start set up of B & D\n" ); blacs_barrier_ ( &ICTXT2D,"ALL" ); //set_up_BD is declared in readdist.cpp and constructs the parts of matrices B & D in each processor //which are necessary to create the distributed Schur complement of D info = set_up_BD ( DESCD, D, BT_i, B_j, Btsparse ); //printdense(Drows*blocksize, Dcols * blocksize,D,"matrix_D.txt"); blacs_barrier_ ( &ICTXT2D,"ALL" ); if (iam==0) printf ( "Matrices B & D set up\n" ); if(printD_bool) { int array_of_gsizes[2], array_of_distribs[2], array_of_dargs[2], array_of_psize[2] ; int buffersize; MPI_Datatype file_type; MPI_File fh; MPI_Status status; array_of_gsizes[0]=Dblocks * blocksize; array_of_gsizes[1]=Dblocks * blocksize; array_of_distribs[0]=MPI_DISTRIBUTE_CYCLIC; array_of_distribs[1]=MPI_DISTRIBUTE_CYCLIC; array_of_dargs[0]=blocksize; array_of_dargs[1]=blocksize; array_of_psize[0]=*dims; array_of_psize[1]=*(dims + 1); MPI_Type_create_darray(size,iam,2,array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_FORTRAN, MPI_DOUBLE, &file_type); MPI_Type_commit(&file_type); info = MPI_File_open(MPI_COMM_WORLD, filenameD, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); /*if ( ( Drows-1 ) % *(dims+1) == *position && ( Dcols-1 ) % *(dims) == pcol && Ddim%blocksize !=0 ) buffersize=((Drows-1) * blocksize + Ddim % blocksize) * ((Dcols-1) * blocksize + Ddim % blocksize); else if ( ( Drows-1 ) % *(dims+1) == *position && Ddim%blocksize !=0 ) buffersize=((Drows-1) * blocksize + Ddim % blocksize) * Dcols * blocksize; else if ( ( Dcols-1 ) % *(dims) == *position && Ddim%blocksize !=0 ) buffersize=((Dcols-1) * blocksize + Ddim % blocksize) * Drows * blocksize; else*/ buffersize= Dcols * Drows * blocksize * blocksize; MPI_File_set_view(fh, 0, MPI_DOUBLE, file_type, "native", MPI_INFO_NULL); info =MPI_File_write_all(fh, D,buffersize, MPI_DOUBLE, &status); MPI_File_close(&fh); if(iam==0) { printf("Matrix D (dimension %d) is printed in file %s\n", Dblocks*blocksize,filenameD); } if(filenameD != NULL) free(filenameD); filenameD=NULL; //delete[] array_of_gsizes, delete[] array_of_distribs, delete[] array_of_dargs, delete[] array_of_psize; } //Now every matrix has to set up the sparse matrix A, consisting of X'X, X'Z, Z'X and Z'Z + lambda*I Xsparse.loadFromFile ( filenameX ); Zsparse.loadFromFile ( filenameZ ); if(filenameX != NULL) free(filenameX); filenameX=NULL; if(filenameZ != NULL) free(filenameZ); filenameZ=NULL; smat_t *X_smat, *Z_smat; X_smat= (smat_t *) calloc(1,sizeof(smat_t)); Z_smat= (smat_t *) calloc(1,sizeof(smat_t)); X_smat = smat_new_from ( Xsparse.nrows,Xsparse.ncols,Xsparse.pRows,Xsparse.pCols,Xsparse.pData,0,0 ); Z_smat = smat_new_from ( Zsparse.nrows,Zsparse.ncols,Zsparse.pRows,Zsparse.pCols,Zsparse.pData,0,0 ); smat_t *Xt_smat, *Zt_smat; Xt_smat= (smat_t *) calloc(1,sizeof(smat_t)); Zt_smat= (smat_t *) calloc(1,sizeof(smat_t)); Xt_smat = smat_copy_trans ( X_smat ); Zt_smat = smat_copy_trans ( Z_smat ); CSRdouble Asparse; smat_t *XtX_smat, *XtZ_smat, *ZtZ_smat, *lambda_smat, *ZtZlambda_smat; XtX_smat= (smat_t *) calloc(1,sizeof(smat_t)); XtZ_smat= (smat_t *) calloc(1,sizeof(smat_t)); ZtZ_smat= (smat_t *) calloc(1,sizeof(smat_t)); XtX_smat = smat_matmul ( Xt_smat, X_smat ); XtZ_smat = smat_matmul ( Xt_smat, Z_smat ); ZtZ_smat = smat_matmul ( Zt_smat,Z_smat ); Xsparse.clear(); Zsparse.clear(); smat_free(Xt_smat); smat_free(Zt_smat); /*smat_free(X_smat); smat_free(Z_smat);*/ CSRdouble Imat; makeIdentity ( l, Imat ); lambda_smat= (smat_t *) calloc(1,sizeof(smat_t)); lambda_smat = smat_new_from ( Imat.nrows,Imat.ncols,Imat.pRows,Imat.pCols,Imat.pData,0,0 ); smat_scale_diag ( lambda_smat, -lambda ); ZtZlambda_smat= (smat_t *) calloc(1,sizeof(smat_t)); ZtZlambda_smat = smat_add ( lambda_smat, ZtZ_smat ); smat_free(ZtZ_smat); //smat_free(lambda_smat); smat_to_symmetric_structure ( XtX_smat ); smat_to_symmetric_structure ( ZtZlambda_smat ); CSRdouble XtX_sparse, XtZ_sparse, ZtZ_sparse; XtX_sparse.make2 ( XtX_smat->m,XtX_smat->n,XtX_smat->nnz,XtX_smat->ia,XtX_smat->ja,XtX_smat->a ); XtZ_sparse.make2 ( XtZ_smat->m,XtZ_smat->n,XtZ_smat->nnz,XtZ_smat->ia,XtZ_smat->ja,XtZ_smat->a ); ZtZ_sparse.make2 ( ZtZlambda_smat->m,ZtZlambda_smat->n,ZtZlambda_smat->nnz,ZtZlambda_smat->ia,ZtZlambda_smat->ja,ZtZlambda_smat->a ); /*smat_free(XtX_smat); smat_free(XtZ_smat); smat_free(ZtZlambda_smat);*/ Imat.clear(); if (iam==0) { cout << "*** [ t t ] *** " << endl; cout << "*** [ X X X Z ] *** " << endl; cout << "*** [ ] *** " << endl; cout << "*** G e n e r a t i n g m a t r i x A = [ ] *** " << endl; cout << "*** [ t t ] *** " << endl; cout << "*** [ Z X Z Z ] *** " << endl; } //Sparse matrix A only contains the upper triangular part of A create2x2SymBlockMatrix ( XtX_sparse, XtZ_sparse, ZtZ_sparse, Asparse ); //Asparse.writeToFile("A_sparse.csr"); smat_free(XtX_smat); smat_free(XtZ_smat); smat_free(ZtZlambda_smat); XtX_sparse.clear(); XtZ_sparse.clear(); ZtZ_sparse.clear(); blacs_barrier_ ( &ICTXT2D,"ALL" ); if(printsparseC_bool) { CSRdouble Dmat, Dblock, Csparse; Dblock.nrows=Dblocks * blocksize; Dblock.ncols=Dblocks * blocksize; Dblock.allocate(Dblocks * blocksize, Dblocks * blocksize, 0); Dmat.allocate(0,0,0); for (i=0; i<Drows; ++i) { for(j=0; j<Dcols; ++j) { dense2CSR_sub(D + i * blocksize + j * lld_D * blocksize,blocksize,blocksize,lld_D,Dblock,( * ( dims) * i + *position ) *blocksize, ( * ( dims+1 ) * j + pcol ) *blocksize); if ( Dblock.nonzeros>0 ) { if ( Dmat.nonzeros==0 ) { Dmat.make2 ( Dblock.nrows,Dblock.ncols,Dblock.nonzeros,Dblock.pRows,Dblock.pCols,Dblock.pData ); } else { Dmat.addBCSR ( Dblock ); } } Dblock.clear(); } } blacs_barrier_(&ICTXT2D,"A"); if ( iam!=0 ) { //Each process other than root sends its Dmat to the root process. MPI_Send ( & ( Dmat.nonzeros ),1, MPI_INT,0,iam,MPI_COMM_WORLD ); MPI_Send ( & ( Dmat.pRows[0] ),Dmat.nrows + 1, MPI_INT,0,iam+size,MPI_COMM_WORLD ); MPI_Send ( & ( Dmat.pCols[0] ),Dmat.nonzeros, MPI_INT,0,iam+2*size,MPI_COMM_WORLD ); MPI_Send ( & ( Dmat.pData[0] ),Dmat.nonzeros, MPI_DOUBLE,0,iam+3*size,MPI_COMM_WORLD ); Dmat.clear(); } else { for ( i=1; i<size; ++i ) { // The root process receives parts of Dmat sequentially from all processes and directly adds them together. int nonzeroes, count; MPI_Recv ( &nonzeroes,1,MPI_INT,i,i,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_INT, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ if(nonzeroes>0) { printf("Nonzeroes : %d\n ",nonzeroes); Dblock.allocate ( Dblocks * blocksize,Dblocks * blocksize,nonzeroes ); MPI_Recv ( & ( Dblock.pRows[0] ), Dblocks * blocksize + 1, MPI_INT,i,i+size,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_INT, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ MPI_Recv ( & ( Dblock.pCols[0] ),nonzeroes, MPI_INT,i,i+2*size,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_INT, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ MPI_Recv ( & ( Dblock.pData[0] ),nonzeroes, MPI_DOUBLE,i,i+3*size,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_DOUBLE, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ Dmat.addBCSR ( Dblock ); } } //Dmat.writeToFile("D_sparse.csr"); Dmat.reduceSymmetric(); Btsparse.transposeIt(1); create2x2SymBlockMatrix(Asparse,Btsparse, Dmat, Csparse); Btsparse.clear(); Dmat.clear(); Csparse.writeToFile(filenameC); Csparse.clear(); if(filenameC != NULL) free(filenameC); filenameC=NULL; } } Btsparse.clear(); blacs_barrier_(&ICTXT2D,"A"); //AB_sol will contain the solution of A*X=B, distributed across the process rows. Processes in the same process row possess the same part of AB_sol double * AB_sol; int * DESCAB_sol; DESCAB_sol= ( int* ) malloc ( DLEN_ * sizeof ( int ) ); if ( DESCAB_sol==NULL ) { printf ( "unable to allocate memory for descriptor for AB_sol\n" ); return -1; } //AB_sol (Adim, Ddim) is distributed across all processes in ICTXT2D starting from process (0,0) into blocks of size (Adim, blocksize) descinit_ ( DESCAB_sol, &Adim, &Ddim, &Adim, &blocksize, &i_zero, &i_zero, &ICTXT2D, &Adim, &info ); if ( info!=0 ) { printf ( "Descriptor of matrix C returns info: %d\n",info ); return info; } AB_sol=(double *) calloc(Adim * Dcols*blocksize,sizeof(double)); // Each process calculates the Schur complement of the part of D at its disposal. (see src/schur.cpp) // The solution of A * Y = B_j is stored in AB_sol (= A^-1 * B_j) blacs_barrier_(&ICTXT2D,"A"); make_Sij_parallel_denseB ( Asparse, BT_i, B_j, D, lld_D, AB_sol ); BT_i.clear(); B_j.clear(); //From here on the Schur complement S of D is stored in D blacs_barrier_ ( &ICTXT2D,"ALL" ); //The Schur complement is factorised (by ScaLAPACK) pdpotrf_ ( "U",&k,D,&i_one,&i_one,DESCD,&info ); if ( info != 0 ) { printf ( "Cholesky decomposition of D was unsuccessful, error returned: %d\n",info ); return -1; } //From here on the factorization of the Schur complement S is stored in D blacs_barrier_ ( &ICTXT2D,"ALL" ); //The Schur complement is inverted (by ScaLAPACK) pdpotri_ ( "U",&k,D,&i_one,&i_one,DESCD,&info ); if ( info != 0 ) { printf ( "Inverse of D was unsuccessful, error returned: %d\n",info ); return -1; } //From here on the inverse of the Schur complement S is stored in D blacs_barrier_(&ICTXT2D,"A"); double* InvD_T_Block = ( double* ) calloc ( Dblocks * blocksize + Adim ,sizeof ( double ) ); //Diagonal elements of the (1,1) block of C^-1 are still distributed and here they are gathered in InvD_T_Block in the root process. if(*position == pcol) { for (i=0; i<Ddim; ++i) { if (pcol == (i/blocksize) % *dims) { int Dpos = i%blocksize + ((i/blocksize) / *dims) * blocksize ; *(InvD_T_Block + Adim +i) = *( D + Dpos + lld_D * Dpos); } } for ( i=0,j=0; i<Dblocks; ++i,++j ) { if ( j==*dims ) j=0; if ( *position==j ) { dgesd2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + i * blocksize,&blocksize,&i_zero,&i_zero ); } if ( *position==0 ) { dgerv2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + blocksize*i,&blocksize,&j,&j ); } } } blacs_barrier_(&ICTXT2D,"A"); //Only the root process performs a selected inversion of A. if (iam==0) { int pardiso_message_level = 1; int pardiso_mtype=-2; ParDiSO pardiso ( pardiso_mtype, pardiso_message_level ); int number_of_processors = 1; char* var = getenv("OMP_NUM_THREADS"); if(var != NULL) { sscanf( var, "%d", &number_of_processors ); } else { printf("Set environment OMP_NUM_THREADS to 1"); exit(1); } pardiso.iparm[2] = 2; pardiso.iparm[3] = number_of_processors; pardiso.iparm[8] = 0; pardiso.iparm[11] = 1; pardiso.iparm[13] = 0; pardiso.iparm[28] = 0; //This function calculates the factorisation of A once again so this might be optimized. pardiso.findInverseOfA ( Asparse ); printf("Processor %d inverted matrix A\n",iam); } blacs_barrier_(&ICTXT2D,"A"); // To minimize memory usage, and because only the diagonal elements of the inverse are needed, Y' * S is calculated row by rowblocks // the diagonal element is calculates as the dot product of this row and the corresponding column of Y. (Y is solution of AY=B) double* YSrow= ( double* ) calloc ( Dcols * blocksize,sizeof ( double ) ); int * DESCYSROW; DESCYSROW= ( int* ) malloc ( DLEN_ * sizeof ( int ) ); if ( DESCYSROW==NULL ) { printf ( "unable to allocate memory for descriptor for AB_sol\n" ); return -1; } //YSrow (1,Ddim) is distributed across processes of ICTXT2D starting from process (0,0) into blocks of size (1,blocksize) descinit_ ( DESCYSROW, &i_one, &Ddim, &i_one,&blocksize, &i_zero, &i_zero, &ICTXT2D, &i_one, &info ); if ( info!=0 ) { printf ( "Descriptor of matrix C returns info: %d\n",info ); return info; } blacs_barrier_(&ICTXT2D,"A"); //Calculating diagonal elements 1 by 1 of the (0,0)-block of C^-1. for (i=1; i<=Adim; ++i) { pdsymm_ ("R","U",&i_one,&Ddim,&d_one,D,&i_one,&i_one,DESCD,AB_sol,&i,&i_one,DESCAB_sol,&d_zero,YSrow,&i_one,&i_one,DESCYSROW); pddot_(&Ddim,InvD_T_Block+i-1,AB_sol,&i,&i_one,DESCAB_sol,&Adim,YSrow,&i_one,&i_one,DESCYSROW,&i_one); /*if(*position==1 && pcol==1) printf("Dot product in process (1,1) is: %g\n", *(InvD_T_Block+i-1)); if(*position==0 && pcol==1) printf("Dot product in process (0,1) is: %g\n",*(InvD_T_Block+i-1));*/ } blacs_barrier_(&ICTXT2D,"A"); if(YSrow != NULL) free(YSrow); YSrow = NULL; if(DESCYSROW != NULL) free(DESCYSROW); DESCYSROW = NULL; if(AB_sol != NULL) free(AB_sol); AB_sol = NULL; if(DESCAB_sol != NULL) free(DESCAB_sol); DESCAB_sol = NULL; if(D != NULL) free(D); D = NULL; if(DESCD != NULL) free(DESCD); DESCD = NULL; //Only in the root process we add the diagonal elements of A^-1 if (iam ==0) { for(i=0; i<Adim; ++i) { j=Asparse.pRows[i]; *(InvD_T_Block+i) += Asparse.pData[j]; } Asparse.clear(); printdense ( Adim+k,1,InvD_T_Block,"diag_inverse_C_parallel.txt" ); } if(InvD_T_Block != NULL) free(InvD_T_Block); InvD_T_Block = NULL; blacs_gridexit_(&ICTXT2D); } //cout << iam << " reached end before MPI_Barrier" << endl; MPI_Barrier(MPI_COMM_WORLD); //MPI_Finalize(); return 0; }
Matrix3x3 Matrix3x3::makeYShear(f32 factor) { Matrix3x3 matrix=makeIdentity(); matrix.m[0][1]=factor; return matrix; }
Mat4::Mat4(float value) : values{} { makeIdentity(value); }
/* ** inverse = invert(src) */ int invertMatrix(const GLfloat src[16], GLfloat inverse[16]) { int i, j, k, swap; double t; GLfloat temp[4][4]; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { temp[i][j] = src[i * 4 + j]; } } makeIdentity(inverse); for (i = 0; i < 4; i++) { /* ** Look for largest element in column */ swap = i; for (j = i + 1; j < 4; j++) { if (fabs(temp[j][i]) > fabs(temp[i][i])) { swap = j; } } if (swap != i) { /* ** Swap rows. */ for (k = 0; k < 4; k++) { t = temp[i][k]; temp[i][k] = temp[swap][k]; temp[swap][k] = t; t = inverse[i * 4 + k]; inverse[i * 4 + k] = inverse[swap * 4 + k]; inverse[swap * 4 + k] = t; } } if (temp[i][i] == 0) { /* ** No non-zero pivot. The matrix is singular, which shouldn't ** happen. This means the user gave us a bad matrix. */ return 0; } t = temp[i][i]; for (k = 0; k < 4; k++) { temp[i][k] /= t; inverse[i * 4 + k] /= t; } for (j = 0; j < 4; j++) { if (j != i) { t = temp[j][i]; for (k = 0; k < 4; k++) { temp[j][k] -= temp[i][k] * t; inverse[j * 4 + k] -= inverse[i * 4 + k] * t; } } } } return 1; }
Matrix3x3 Matrix3x3::makeScale(Vec2 const& s) { Matrix3x3 matrix=makeIdentity(); matrix.m[0][0]=s.x; matrix.m[1][1]=s.y; return matrix; }