void QpGenStochLinsysRootAugRedPrecond::factor2(QpGenStochData *prob, Variables *vars) { assert( children.size() == prob->children.size() ); double* buffer=NULL; StochTreePrecond* stochNodePrcnd = dynamic_cast<StochTreePrecond*>(stochNode); //!! DenseSymMatrix * kktd = (DenseSymMatrix*) kkt; myAtPutZeros(kktd, 0, 0, locnx+locmy, locnx+locmy); //~~ // First tell children to factorize. for(int it=0; it<children.size(); it++) { children[it]->factor2(prob->children[it], vars); } if(me==ePrecond || me==eSpecialWorker) buffer = new double[locnx*(locnx+locmy)]; DenseGenMatrix* U = NULL; DenseGenMatrix* V = NULL; //if(me==ePrecond) assert(children.size()==1); int commWrkrs = stochNode->commWrkrs; //////////////////////////////////////////////////////// // DIRECT workers -> all processes in fact //////////////////////////////////////////////////////// int childrenDone=0; for(int it=0; it<children.size(); it++) { if(children[it]->mpiComm == MPI_COMM_NULL) continue; children[it]->stochNode->resMon.recFactTmChildren_start(); //----------------------------------------------------------- children[it]->allocU(&U, locnx); children[it]->allocV(&V, locnx); children[it]->computeU_V(prob->children[it], U, V); //----------------------------------------------------------- children[it]->stochNode->resMon.recSchurMultChildren_start(); //----------------------------------------------------------- kktd->matMult(-1.0, *U, 1, *V, 0, 1.0); //----------------------------------------------------------- children[it]->stochNode->resMon.recSchurMultChildren_stop(); children[it]->stochNode->resMon.recFactTmChildren_stop(); childrenDone++; /////////////////////////////////////////////////////////////// // Stop and engage in communication with preconditioner if // enough scenarios were done /////////////////////////////////////////////////////////////// if(childrenDone==1) { int rankPrecond = stochNode->rankPrcnd; int rankZeroW = stochNodePrcnd ->rankZeroW; int commP2ZeroW = stochNodePrcnd ->commP2ZeroW; if(me!=ePrecond) { stochNode->resMon.recFactTmLocal_start(); /////////////////////////////////////// // WORKERS -> send to precond /////////////////////////////////////// MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm); //null out Schur complement so the existing info will no more be added myAtPutZeros(kktd, 0, 0, locnx, locnx); stochNode->resMon.recFactTmLocal_stop(); if(me==eSpecialWorker) { MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW); memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double)); } } else { //////////////////////////////////////////// //PRECONDITIONER -> receive from workers //////////////////////////////////////////// stochNode->resMon.recFactTmLocal_start(); stochNode->resMon.recSchurMultLocal_start(); if(U) delete U; if(V) delete V; //deleteUtV(); reuse this stochNode->resMon.recSchurMultLocal_stop(); MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm); memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double)); delete[] buffer; //send the information back to specialWorker MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW); stochNode->resMon.recSchurMultLocal_start(); ////////////////////////////////////////////// // factorize partial schur complement ////////////////////////////////////////////// // update the upper block of the kkt with the UtV block int noProcs; MPI_Comm_size(mpiComm, &noProcs); double alpha = 1.0*children.size()/(noProcs*childrenDone); kktd->scalarMult(alpha); updateKKT(prob,vars); //addUtVToKKT(alpha, *UtV, *kktd, locnx); //factorize double st=MPI_Wtime(); solver->matrixChanged(); printf("fact took %g\n", MPI_Wtime()-st); stochNode->resMon.recFactTmLocal_stop(); stochNode->resMon.recSchurMultLocal_stop(); } } } if(me!=ePrecond) { //printf("Worker finished updates rank=%d\n", stochNode->rankMe); stochNode->resMon.recSchurMultLocal_start(); if(U) delete U; if(V) delete V; //deleteUtV(); reuse this stochNode->resMon.recSchurMultLocal_stop(); } ///////////////////////////////////////////////////////// // Everybody sum the partial Schur complements to // special worker who will have the complete matrix ///////////////////////////////////////////////////////// if(iAmDistrib) { int rankZeroW = stochNode->rankZeroW; MPI_Comm commWorkers = stochNodePrcnd ->commWorkers; if(me==eSpecialWorker) { //buffer=new double[locnx*locnx]; //if(buffer==NULL) printf("PANIC !!!! not enough memory in doing the reduce !!!!\n"); MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commWorkers); memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double)); delete[] buffer; stochNode->resMon.recFactTmLocal_start(); updateKKT(prob,vars); //addUtVToKKT(1.0, *UtV, *kktd, locnx); stochNode->resMon.recFactTmLocal_stop(); } else { //printf("Nonzero worker %d -> reducing...\n", stochNode->rankMe); if(me!=ePrecond) MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy), MPI_DOUBLE, MPI_SUM, rankZeroW, commWorkers); //printf("Nonzero worker %d -> finished reducing\n", stochNode->rankMe); } } }