void DenseSymMatrix::AddMatrix(Number alpha, const DenseSymMatrix& A, Number beta) { DBG_ASSERT(beta==0. || initialized_); DBG_ASSERT(Dim()==A.Dim()); if (alpha==0.) return; const Number* Avalues = A.Values(); const Index dim = Dim(); if (beta==0.) { for (Index j=0; j<dim; j++) { for (Index i=j; i<dim; i++) { values_[i+j*dim] = alpha*Avalues[i+j*dim]; } } } else if (beta==1.) { for (Index j=0; j<dim; j++) { for (Index i=j; i<dim; i++) { values_[i+j*dim] += alpha*Avalues[i+j*dim]; } } } else { for (Index j=0; j<dim; j++) { for (Index i=j; i<dim; i++) { values_[i+j*dim] = alpha*Avalues[i+j*dim] + beta*values_[i+j*dim]; } } } ObjectChanged(); initialized_ = true; }
bool DenseGenMatrix::ComputeEigenVectors(const DenseSymMatrix& M, DenseVector& Evalues) { Index dim = M.Dim(); DBG_ASSERT(Evalues.Dim()==dim); DBG_ASSERT(NRows()==dim); DBG_ASSERT(NCols()==dim); // First we copy the content of the matrix into Q const Number* Mvalues = M.Values(); for (Index j=0; j<dim; j++) { for (Index i=j; i<dim; i++) { values_[i+j*dim] = Mvalues[i+j*dim]; } } bool compute_eigenvectors = true; Number* Evals = Evalues.Values(); Index info; IpLapackDsyev(compute_eigenvectors, dim, values_, dim, Evals, info); initialized_ = (info==0); ObjectChanged(); return (info==0); }
bool DenseGenMatrix::ComputeCholeskyFactor(const DenseSymMatrix& M) { Index dim = M.Dim(); DBG_ASSERT(dim==NCols()); DBG_ASSERT(dim==NRows()); ObjectChanged(); // First we copy the content of the symmetric matrix into J const Number* Mvalues = M.Values(); for (Index j=0; j<dim; j++) { for (Index i=j; i<dim; i++) { values_[i+j*dim] = Mvalues[i+j*dim]; } } // Now call the lapack subroutine to perform the factorization Index info; IpLapackDpotrf(dim, values_, dim, info); DBG_ASSERT(info>=0); if (info!=0) { initialized_ = false; return false; } // We set all strictly upper values to zero // ToDo: This might not be necessary?!? for (Index j=1; j<dim; j++) { for (Index i=0; i<j; i++) { values_[i+j*dim] = 0.; } } factorization_ = CHOL; initialized_ = true; return true; }
void sLinsys::addTermToDenseSchurCompl(sData *prob, DenseSymMatrix& SC) { SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int N, nxP, NP; A.getSize(N, nxP); assert(N==locmy); NP = SC.size(); assert(NP>=nxP); if(nxP==-1) C.getSize(N,nxP); if(nxP==-1) nxP = NP; N = locnx+locmy+locmz; SimpleVector col(N); for(int it=0; it<nxP; it++) { double* pcol = &col[0]; for(int it1=0; it1<locnx; it1++) pcol[it1]=0.0; R.fromGetDense(0, it, &col[0], 1, locnx, 1); A.fromGetDense(0, it, &col[locnx], 1, locmy, 1); C.fromGetDense(0, it, &col[locnx+locmy], 1, locmz, 1); solver->solve(col); //here we have colGi = inv(H_i)* it-th col of Gi^t //now do colSC = Gi * inv(H_i)* it-th col of Gi^t // SC+=R*x R.transMult( 1.0, &SC[it][0], 1, -1.0, &col[0], 1); // SC+=At*y A.transMult( 1.0, &SC[it][0], 1, -1.0, &col[locnx], 1); // SC+=Ct*z C.transMult( 1.0, &SC[it][0], 1, -1.0, &col[locnx+locmy], 1); } }
void sLinsysRoot::dumpMatrix(int scen, int proc, const char* nameToken, DenseSymMatrix& M) { int n = M.size(); char szNumber[30]; string strBuffer=""; //assert(false); int iter = g_iterNumber; if(iter!=1 && iter!=5 && iter!=15 && iter!=25 && iter!=35 && iter!=45) return; char szFilename[256]; if(scen==-1) sprintf(szFilename, "%s_%d__%d.mat", nameToken, n, iter); else sprintf(szFilename, "%s_%03d_%d__%d.mat", nameToken, scen+1, n, iter); FILE* file = fopen(szFilename, "w"); assert(file); for(int j=0; j<n; j++) { for(int i=0; i<n; i++) { sprintf(szNumber, "%22.16f ", M[i][j]); strBuffer += szNumber; } strBuffer += "\n"; if(strBuffer.length()>1250000) { fwrite(strBuffer.c_str(), 1, strBuffer.length(), file); strBuffer = ""; } } if(strBuffer.length()>0) { fwrite(strBuffer.c_str(), 1, strBuffer.length(), file); } fclose(file); }
void sLinsys::addTermToDenseSchurCompl(sData *prob, DenseSymMatrix& SC) { SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int N, nxP, NP,mR,nR; int locns = locmz; int mle = prob->getmle(); int mli = prob->getmli(); SparseGenMatrix& E = prob->getLocalE(); SparseGenMatrix& F = prob->getLocalF(); SparseGenMatrix ET; SparseGenMatrix FT; ET.transCopyof(E); FT.transCopyof(F); int nx0, my0, mz0; stochNode->get_FistStageSize(nx0, my0,mz0); A.getSize(N, nxP); assert(N==locmy); NP = SC.size(); assert(NP>=nxP); if(nxP==-1) C.getSize(N,nxP); if(nxP==-1) nxP = NP; if(gOuterSolve>=3 ) N = locnx+locns+locmy+locmz; else N = locnx+locmy+locmz; int blocksize = 64; DenseGenMatrix cols(blocksize,N); bool ispardiso=false; PardisoSolver* pardisoSlv=NULL; // pardisoSlv = dynamic_cast<PardisoSolver*>(solver); int* colSparsity=NULL; if(pardisoSlv) { ispardiso=true; colSparsity=new int[N]; //blocksize=32; } for (int it=0; it < nxP; it += blocksize) { int start=it; int end = MIN(it+blocksize,nxP); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; if(gOuterSolve>=3 ) { R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, colSparsity, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, colSparsity, allzero); } else{ R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, colSparsity, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, colSparsity, allzero); } } else { if(gOuterSolve>=3 ) { R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, allzero); } else{ R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, allzero); C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, allzero); } } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0-mle][start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0+mz0-mli][start]), numcols, NP, -1.0, &cols[0][0], N); } else{ R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP, -1.0, &cols[0][locnx+locmy], N); } } //end !allzero } for (int it=0; it < mle; it += blocksize) { int start=it; int end = MIN(it+blocksize,mle); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); } else { ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0-mle][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N); /* std::cout<<"cols: "<<std::endl; for(int i=0; i<numcols;i++) for(int j=0; j<N;j++) std::cout<<"col "<<i<<"row "<<j<<"elt "<<cols[i][j]<<std::endl; std::cout<<"SC: "<<std::endl; for(int i=0; i<NP;i++) for(int j=0; j<NP;j++) std::cout<<"row "<<i<<"col "<<j<<"elt "<<SC.getStorageRef().M[i][j]<<std::endl; */ } else{ assert(false && "not implemented"); } } //end !allzero } for (int it=0; it < mli; it += blocksize) { int start=it; int end = MIN(it+blocksize,mle); int numcols = end-start; cols.getStorageRef().m = numcols; // avoid extra solves bool allzero = true; memset(&cols[0][0],0,N*blocksize*sizeof(double)); if(ispardiso) { for(int i=0; i<N; i++) colSparsity[i]=0; FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero); } else { FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero); } if(!allzero) { if(ispardiso) pardisoSlv->solve(cols,colSparsity); else solver->solve(cols); if(gOuterSolve>=3 ) { R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][locnx+locns], N); C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][locnx+locns+locmy], N); if(mle>0) ET.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0-mle][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); if(mli>0) FT.getStorageRef().transMultMat( 1.0, &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N); } else{ assert(false && "not implemented"); } } //end !allzero } if(ispardiso) delete[] colSparsity; }
void QpGenStochLinsysRootAugRedPrecond::factor2(QpGenStochData *prob, Variables *vars) { assert( children.size() == prob->children.size() ); double* buffer=NULL; StochTreePrecond* stochNodePrcnd = dynamic_cast<StochTreePrecond*>(stochNode); //!! DenseSymMatrix * kktd = (DenseSymMatrix*) kkt; myAtPutZeros(kktd, 0, 0, locnx+locmy, locnx+locmy); //~~ // First tell children to factorize. for(int it=0; it<children.size(); it++) { children[it]->factor2(prob->children[it], vars); } if(me==ePrecond || me==eSpecialWorker) buffer = new double[locnx*(locnx+locmy)]; DenseGenMatrix* U = NULL; DenseGenMatrix* V = NULL; //if(me==ePrecond) assert(children.size()==1); int commWrkrs = stochNode->commWrkrs; //////////////////////////////////////////////////////// // DIRECT workers -> all processes in fact //////////////////////////////////////////////////////// int childrenDone=0; for(int it=0; it<children.size(); it++) { if(children[it]->mpiComm == MPI_COMM_NULL) continue; children[it]->stochNode->resMon.recFactTmChildren_start(); //----------------------------------------------------------- children[it]->allocU(&U, locnx); children[it]->allocV(&V, locnx); children[it]->computeU_V(prob->children[it], U, V); //----------------------------------------------------------- children[it]->stochNode->resMon.recSchurMultChildren_start(); //----------------------------------------------------------- kktd->matMult(-1.0, *U, 1, *V, 0, 1.0); //----------------------------------------------------------- children[it]->stochNode->resMon.recSchurMultChildren_stop(); children[it]->stochNode->resMon.recFactTmChildren_stop(); childrenDone++; /////////////////////////////////////////////////////////////// // Stop and engage in communication with preconditioner if // enough scenarios were done /////////////////////////////////////////////////////////////// if(childrenDone==1) { int rankPrecond = stochNode->rankPrcnd; int rankZeroW = stochNodePrcnd ->rankZeroW; int commP2ZeroW = stochNodePrcnd ->commP2ZeroW; if(me!=ePrecond) { stochNode->resMon.recFactTmLocal_start(); /////////////////////////////////////// // WORKERS -> send to precond /////////////////////////////////////// MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm); //null out Schur complement so the existing info will no more be added myAtPutZeros(kktd, 0, 0, locnx, locnx); stochNode->resMon.recFactTmLocal_stop(); if(me==eSpecialWorker) { MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW); memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double)); } } else { //////////////////////////////////////////// //PRECONDITIONER -> receive from workers //////////////////////////////////////////// stochNode->resMon.recFactTmLocal_start(); stochNode->resMon.recSchurMultLocal_start(); if(U) delete U; if(V) delete V; //deleteUtV(); reuse this stochNode->resMon.recSchurMultLocal_stop(); MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm); memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double)); delete[] buffer; //send the information back to specialWorker MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW); stochNode->resMon.recSchurMultLocal_start(); ////////////////////////////////////////////// // factorize partial schur complement ////////////////////////////////////////////// // update the upper block of the kkt with the UtV block int noProcs; MPI_Comm_size(mpiComm, &noProcs); double alpha = 1.0*children.size()/(noProcs*childrenDone); kktd->scalarMult(alpha); updateKKT(prob,vars); //addUtVToKKT(alpha, *UtV, *kktd, locnx); //factorize double st=MPI_Wtime(); solver->matrixChanged(); printf("fact took %g\n", MPI_Wtime()-st); stochNode->resMon.recFactTmLocal_stop(); stochNode->resMon.recSchurMultLocal_stop(); } } } if(me!=ePrecond) { //printf("Worker finished updates rank=%d\n", stochNode->rankMe); stochNode->resMon.recSchurMultLocal_start(); if(U) delete U; if(V) delete V; //deleteUtV(); reuse this stochNode->resMon.recSchurMultLocal_stop(); } ///////////////////////////////////////////////////////// // Everybody sum the partial Schur complements to // special worker who will have the complete matrix ///////////////////////////////////////////////////////// if(iAmDistrib) { int rankZeroW = stochNode->rankZeroW; MPI_Comm commWorkers = stochNodePrcnd ->commWorkers; if(me==eSpecialWorker) { //buffer=new double[locnx*locnx]; //if(buffer==NULL) printf("PANIC !!!! not enough memory in doing the reduce !!!!\n"); MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commWorkers); memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double)); delete[] buffer; stochNode->resMon.recFactTmLocal_start(); updateKKT(prob,vars); //addUtVToKKT(1.0, *UtV, *kktd, locnx); stochNode->resMon.recFactTmLocal_stop(); } else { //printf("Nonzero worker %d -> reducing...\n", stochNode->rankMe); if(me!=ePrecond) MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commWorkers); //printf("Nonzero worker %d -> finished reducing\n", stochNode->rankMe); } } }
void sLinsysRootAug::finalizeKKT(sData* prob, Variables* vars) { assert(locmz==0||gOuterSolve<3); int j, p, pend; double val; stochNode->resMon.recFactTmLocal_start(); stochNode->resMon.recSchurMultLocal_start(); DenseSymMatrix * kktd = (DenseSymMatrix*) kkt; //alias for internal buffer of kkt double** dKkt = kktd->Mat(); ////////////////////////////////////////////////////// // compute Q+diag(xdiag) - C' * diag(zDiag) * C // and update the KKT ////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// // update the KKT with Q (DO NOT PUT DIAG) ///////////////////////////////////////////////////////////// SparseSymMatrix& Q = prob->getLocalQ(); int* krowQ=Q.krowM(); int* jcolQ=Q.jcolM(); double* dQ=Q.M(); for(int i=0; i<locnx; i++) { pend = krowQ[i+1]; for(p=krowQ[i]; p<pend; p++) { j = jcolQ[p]; if(i==j) continue; val = dQ[p]; dKkt[i][j] += val; dKkt[j][i] += val; } } ///////////////////////////////////////////////////////////// // update the KKT with the diagonals // xDiag is in fact diag(Q)+X^{-1}S ///////////////////////////////////////////////////////////// //kktd->atPutDiagonal( 0, *xDiag ); SimpleVector& sxDiag = dynamic_cast<SimpleVector&>(*xDiag); for(int i=0; i<locnx; i++) dKkt[i][i] += sxDiag[i]; SimpleVector& syDiag = dynamic_cast<SimpleVector&>(*yDiag); for(int i=locnx; i<locnx+locmy; i++) dKkt[i][i] += syDiag[i-locnx]; ///////////////////////////////////////////////////////////// // update the KKT with - C' * diag(zDiag) *C ///////////////////////////////////////////////////////////// if(locmz>0) { SparseGenMatrix& C = prob->getLocalD(); C.matTransDinvMultMat(*zDiag, &CtDC); assert(CtDC->size() == locnx); //aliases for internal buffers of CtDC SparseSymMatrix* CtDCsp = reinterpret_cast<SparseSymMatrix*>(CtDC); int* krowCtDC=CtDCsp->krowM(); int* jcolCtDC=CtDCsp->jcolM(); double* dCtDC=CtDCsp->M(); for(int i=0; i<locnx; i++) { pend = krowCtDC[i+1]; for(p=krowCtDC[i]; p<pend; p++) { j = jcolCtDC[p]; dKkt[i][j] -= dCtDC[p]; //printf("%d %d %f\n", i,j,dCtDC[p]); } } } //~end if locmz>0 ///////////////////////////////////////////////////////////// // update the KKT with A (symmetric update forced) ///////////////////////////////////////////////////////////// if(locmy>0){ kktd->symAtPutSubmatrix( locnx, 0, prob->getLocalB(), 0, 0, locmy, locnx, 1 ); } //prob->getLocalB().getStorageRef().dump("stage1eqmat2.dump"); ///////////////////////////////////////////////////////////// // update the KKT zeros for the lower right block ///////////////////////////////////////////////////////////// //kktd->storage().atPutZeros(locnx, locnx, locmy+locmz, locmy+locmz); //myAtPutZeros(kktd, locnx, locnx, locmy, locmy); stochNode->resMon.recSchurMultLocal_stop(); stochNode->resMon.recFactTmLocal_stop(); }