void sLinsys::addTermToDenseSchurCompl(sData *prob, DenseSymMatrix& SC) { SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int N, nxP, NP,mR,nR; int locns = locmz; int mle = prob->getmle(); int mli = prob->getmli(); SparseGenMatrix& E = prob->getLocalE(); SparseGenMatrix& F = prob->getLocalF(); SparseGenMatrix ET; SparseGenMatrix FT; ET.transCopyof(E); FT.transCopyof(F); int nx0, my0, mz0; stochNode->get_FistStageSize(nx0, my0,mz0); A.getSize(N, nxP); assert(N==locmy); NP = SC.size(); assert(NP>=nxP); if(nxP==-1) C.getSize(N,nxP); if(nxP==-1) nxP = NP; if(gOuterSolve>=3 ) N = locnx+locns+locmy+locmz; else N = locnx+locmy+locmz; int blocksize = 64; DenseGenMatrix cols(blocksize,N); bool ispardiso=false; PardisoSolver* pardisoSlv=NULL; // pardisoSlv = dynamic_cast<PardisoSolver*>(solver); int* colSparsity=NULL; if(pardisoSlv) { ispardiso=true; colSparsity=new int[N]; //blocksize=32; } for (int it=0; it < nxP; it += blocksize) { int start=it; int end = MIN(it+blocksize,nxP); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; if(gOuterSolve>=3 ) { R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, colSparsity, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, colSparsity, allzero); } else{ R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, colSparsity, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, colSparsity, allzero); } } else { if(gOuterSolve>=3 ) { R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, allzero); } else{ R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, allzero); } } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0-mle][start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0+mz0-mli][start]), numcols, NP, -1.0, &cols[0][0], N); } else{ R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locmy], N); } } //end !allzero } for (int it=0; it < mle; it += blocksize) { int start=it; int end = MIN(it+blocksize,mle); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); } else { ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0-mle][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); /* std::cout<<"cols: "<<std::endl; for(int i=0; i<numcols;i++) for(int j=0; j<N;j++) std::cout<<"col "<<i<<"row "<<j<<"elt "<<cols[i][j]<<std::endl; std::cout<<"SC: "<<std::endl; for(int i=0; i<NP;i++) for(int j=0; j<NP;j++) std::cout<<"row "<<i<<"col "<<j<<"elt "<<SC.getStorageRef().M[i][j]<<std::endl; */ } else{ assert(false && "not implemented"); } } //end !allzero } for (int it=0; it < mli; it += blocksize) { int start=it; int end = MIN(it+blocksize,mle); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); } else { FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0-mle][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); } else{ assert(false && "not implemented"); } } //end !allzero } if(ispardiso) delete[] colSparsity; }
// this function is called only once and creates the augmented system void PardisoSchurSolver::firstSolveCall(SparseGenMatrix& R, SparseGenMatrix& A, SparseGenMatrix& C) { int nR,nA,nC; nnz=0; R.getSize(nR,nSC); nnz += R.numberOfNonZeros(); A.getSize(nA,nSC); nnz += A.numberOfNonZeros(); C.getSize(nC,nSC); nnz += C.numberOfNonZeros(); int Msize=Msys->size(); //cout << "nR=" << nR << " nA=" << nA << " nC=" << nC << " nSC=" << nSC << " sizeKi=" << (nR+nA+nC)<< endl // << "nnzR=" << R.numberOfNonZeros() // << " nnzA=" << A.numberOfNonZeros() // << " nnzC=" << C.numberOfNonZeros() << endl; n = nR+nA+nC+nSC; assert( Msize == nR+nA+nC ); nnz += Msys->numberOfNonZeros(); nnz += nSC; //space for the 0 diagonal of 2x2 block // the lower triangular part of the augmented system in row-major SparseSymMatrix augSys( n, nnz); // //put (1,1) block in the augmented system // memcpy(augSys.getStorageRef().krowM, Msys->getStorageRef().krowM, sizeof(int)*Msize); memcpy(augSys.getStorageRef().jcolM, Msys->getStorageRef().jcolM, sizeof(int)*Msys->numberOfNonZeros()); memcpy(augSys.getStorageRef().M, Msys->getStorageRef().M, sizeof(double)*Msys->numberOfNonZeros()); // //put C block in the augmented system as C^T in the lower triangular part // if(C.numberOfNonZeros()>0 && nC>0) { int nnzIt=Msys->numberOfNonZeros(); SparseGenMatrix Ct(nSC,nC,C.numberOfNonZeros()); int* krowCt=Ct.getStorageRef().krowM; int* jcolCt=Ct.getStorageRef().jcolM; double *MCt=Ct.getStorageRef().M; C.getStorageRef().transpose(krowCt, jcolCt, MCt); int colShift=nR+nA; int* krowAug= augSys.getStorageRef().krowM; int* jcolAug = augSys.getStorageRef().jcolM; double* MAug = augSys.getStorageRef().M; int row=Msize; for(; row<n; row++) { krowAug[row]=nnzIt; for(int c=krowCt[row-Msize]; c< krowCt[row-Msize+1]; c++) { int j=jcolCt[c]; jcolAug[nnzIt]=j+colShift; MAug[nnzIt] =MCt[c]; nnzIt++; } //add the zero from the diagonal jcolAug[nnzIt]=row; MAug[nnzIt]=0.0; nnzIt++; } krowAug[row]=nnzIt; } // -- to do //put R block in the augmented system as R^T in the lower triangular part assert(R.numberOfNonZeros()==0); // if(A.numberOfNonZeros()>0 && nA>0) { int nnzIt=Msys->numberOfNonZeros(); SparseGenMatrix At(nSC,nA,A.numberOfNonZeros()); int* krowAt=At.getStorageRef().krowM; int* jcolAt=At.getStorageRef().jcolM; double *MAt=At.getStorageRef().M; A.getStorageRef().transpose(krowAt, jcolAt, MAt); int colShift=nR; int* krowAug= augSys.getStorageRef().krowM; int* jcolAug = augSys.getStorageRef().jcolM; double* MAug = augSys.getStorageRef().M; int row=Msize; for(; row<n; row++) { krowAug[row]=nnzIt; for(int c=krowAt[row-Msize]; c< krowAt[row-Msize+1]; c++) { int j=jcolAt[c]; jcolAug[nnzIt]=j+colShift; MAug[nnzIt] =MAt[c]; nnzIt++; } //add the zero from the diagonal jcolAug[nnzIt]=row; MAug[nnzIt]=0.0; nnzIt++; } krowAug[row]=nnzIt; } nnz=augSys.numberOfNonZeros(); // we need to transpose to get the augmented system in the row-major upper triangular format of PARDISO rowptrAug = new int[n+1]; colidxAug = new int[nnz]; eltsAug = new double[nnz]; augSys.getStorageRef().transpose(rowptrAug,colidxAug,eltsAug); //save the indeces for diagonal entries for a streamlined later update int* krowMsys = Msys->getStorageRef().krowM; int* jcolMsys = Msys->getStorageRef().jcolM; for(int r=0; r<Msize; r++) { // Msys - find the index in jcol for the diagonal (r,r) int idxDiagMsys=-1; for(int idx=krowMsys[r]; idx<krowMsys[r+1]; idx++) if(jcolMsys[idx]==r) { idxDiagMsys=idx; break; } assert(idxDiagMsys>=0); // aug - find the index in jcol for the diagonal (r,r) int idxDiagAug=-1; for(int idx=rowptrAug[r]; idx<rowptrAug[r+1]; idx++) if(colidxAug[idx]==r) { idxDiagAug=idx; break; } assert(idxDiagAug>=0); diagMap.insert( pair<int,int>(idxDiagMsys,idxDiagAug) ); } //convert to Fortran indexing for(int it=0; it<n+1; it++) rowptrAug[it]++; for(int it=0; it<nnz; it++) colidxAug[it]++; //allocate temp vector(s) nvec=new double[n]; /* // // symbolic analysis // int mtype=-2, error; int phase=11; //analysis int maxfct=1, mnum=1, nrhs=1; iparm[2]=num_threads; iparm[7]=8; //# iterative refinements //iparm[1] = 2; // 2 is for metis, 0 for min degree //iparm[ 9] =10; // pivot perturbation 10^{-xxx} iparm[10] = 1; // scaling for IPM KKT; used with IPARM(13)=1 or 2 iparm[12] = 2; // improved accuracy for IPM KKT; used with IPARM(11)=1; // if needed, use 2 for advanced matchings and higer accuracy. iparm[23] = 1; //Parallel Numerical Factorization (0=used in the last years, 1=two-level scheduling) int msglvl=0; // with statistical information iparm[32] = 1; // compute determinant iparm[37] = Msys->size(); //compute Schur-complement pardiso (pt , &maxfct , &mnum, &mtype, &phase, &n, eltsAug, rowptrAug, colidxAug, NULL, &nrhs, iparm , &msglvl, NULL, NULL, &error, dparm ); if ( error != 0) { printf ("PardisoSolver - ERROR during symbolic factorization: %d\n", error ); assert(false); } */ }