void sLinsys::addTermToDenseSchurCompl(sData *prob, DenseSymMatrix& SC) { SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int N, nxP, NP; A.getSize(N, nxP); assert(N==locmy); NP = SC.size(); assert(NP>=nxP); if(nxP==-1) C.getSize(N,nxP); if(nxP==-1) nxP = NP; N = locnx+locmy+locmz; SimpleVector col(N); for(int it=0; it<nxP; it++) { double* pcol = &col[0]; for(int it1=0; it1<locnx; it1++) pcol[it1]=0.0; R.fromGetDense(0, it, &col[0], 1, locnx, 1); A.fromGetDense(0, it, &col[locnx], 1, locmy, 1); C.fromGetDense(0, it, &col[locnx+locmy], 1, locmz, 1); solver->solve(col); //here we have colGi = inv(H_i)* it-th col of Gi^t //now do colSC = Gi * inv(H_i)* it-th col of Gi^t // SC+=R*x R.transMult( 1.0, &SC[it][0], 1, -1.0, &col[0], 1); // SC+=At*y A.transMult( 1.0, &SC[it][0], 1, -1.0, &col[locnx], 1); // SC+=Ct*z C.transMult( 1.0, &SC[it][0], 1, -1.0, &col[locnx+locmy], 1); } }
void sLinsysRoot::dumpMatrix(int scen, int proc, const char* nameToken, DenseSymMatrix& M) { int n = M.size(); char szNumber[30]; string strBuffer=""; //assert(false); int iter = g_iterNumber; if(iter!=1 && iter!=5 && iter!=15 && iter!=25 && iter!=35 && iter!=45) return; char szFilename[256]; if(scen==-1) sprintf(szFilename, "%s_%d__%d.mat", nameToken, n, iter); else sprintf(szFilename, "%s_%03d_%d__%d.mat", nameToken, scen+1, n, iter); FILE* file = fopen(szFilename, "w"); assert(file); for(int j=0; j<n; j++) { for(int i=0; i<n; i++) { sprintf(szNumber, "%22.16f ", M[i][j]); strBuffer += szNumber; } strBuffer += "\n"; if(strBuffer.length()>1250000) { fwrite(strBuffer.c_str(), 1, strBuffer.length(), file); strBuffer = ""; } } if(strBuffer.length()>0) { fwrite(strBuffer.c_str(), 1, strBuffer.length(), file); } fclose(file); }
void sLinsys::addTermToDenseSchurCompl(sData *prob, DenseSymMatrix& SC) { SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int N, nxP, NP,mR,nR; int locns = locmz; int mle = prob->getmle(); int mli = prob->getmli(); SparseGenMatrix& E = prob->getLocalE(); SparseGenMatrix& F = prob->getLocalF(); SparseGenMatrix ET; SparseGenMatrix FT; ET.transCopyof(E); FT.transCopyof(F); int nx0, my0, mz0; stochNode->get_FistStageSize(nx0, my0,mz0); A.getSize(N, nxP); assert(N==locmy); NP = SC.size(); assert(NP>=nxP); if(nxP==-1) C.getSize(N,nxP); if(nxP==-1) nxP = NP; if(gOuterSolve>=3 ) N = locnx+locns+locmy+locmz; else N = locnx+locmy+locmz; int blocksize = 64; DenseGenMatrix cols(blocksize,N); bool ispardiso=false; PardisoSolver* pardisoSlv=NULL; // pardisoSlv = dynamic_cast<PardisoSolver*>(solver); int* colSparsity=NULL; if(pardisoSlv) { ispardiso=true; colSparsity=new int[N]; //blocksize=32; } for (int it=0; it < nxP; it += blocksize) { int start=it; int end = MIN(it+blocksize,nxP); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; if(gOuterSolve>=3 ) { R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, colSparsity, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, colSparsity, allzero); } else{ R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, colSparsity, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, colSparsity, allzero); } } else { if(gOuterSolve>=3 ) { R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, allzero); } else{ R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, allzero); } } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0-mle][start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0+mz0-mli][start]), numcols, NP, -1.0, &cols[0][0], N); } else{ R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locmy], N); } } //end !allzero } for (int it=0; it < mle; it += blocksize) { int start=it; int end = MIN(it+blocksize,mle); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); } else { ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0-mle][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); /* std::cout<<"cols: "<<std::endl; for(int i=0; i<numcols;i++) for(int j=0; j<N;j++) std::cout<<"col "<<i<<"row "<<j<<"elt "<<cols[i][j]<<std::endl; std::cout<<"SC: "<<std::endl; for(int i=0; i<NP;i++) for(int j=0; j<NP;j++) std::cout<<"row "<<i<<"col "<<j<<"elt "<<SC.getStorageRef().M[i][j]<<std::endl; */ } else{ assert(false && "not implemented"); } } //end !allzero } for (int it=0; it < mli; it += blocksize) { int start=it; int end = MIN(it+blocksize,mle); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); } else { FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0-mle][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); } else{ assert(false && "not implemented"); } } //end !allzero } if(ispardiso) delete[] colSparsity; }