void QpGenStochLinsysRootAugRedPrecond::factor2(QpGenStochData *prob,
        Variables *vars)
{
    assert( children.size() == prob->children.size() );
    double* buffer=NULL;
    StochTreePrecond* stochNodePrcnd = dynamic_cast<StochTreePrecond*>(stochNode);
    //!!
    DenseSymMatrix * kktd = (DenseSymMatrix*) kkt;
    myAtPutZeros(kktd, 0, 0, locnx+locmy, locnx+locmy);
    //~~

    // First tell children to factorize.
    for(int it=0; it<children.size(); it++) {
        children[it]->factor2(prob->children[it], vars);
    }

    if(me==ePrecond || me==eSpecialWorker)
        buffer = new double[locnx*(locnx+locmy)];

    DenseGenMatrix* U = NULL;
    DenseGenMatrix* V = NULL;


    //if(me==ePrecond) assert(children.size()==1);
    int commWrkrs = stochNode->commWrkrs;
    ////////////////////////////////////////////////////////
    // DIRECT workers -> all processes in fact
    ////////////////////////////////////////////////////////
    int childrenDone=0;
    for(int it=0; it<children.size(); it++) {

        if(children[it]->mpiComm == MPI_COMM_NULL)
            continue;


        children[it]->stochNode->resMon.recFactTmChildren_start();
        //-----------------------------------------------------------
        children[it]->allocU(&U, locnx);
        children[it]->allocV(&V, locnx);
        children[it]->computeU_V(prob->children[it], U, V);

        //-----------------------------------------------------------
        children[it]->stochNode->resMon.recSchurMultChildren_start();
        //-----------------------------------------------------------
        kktd->matMult(-1.0, *U, 1, *V, 0, 1.0);
        //-----------------------------------------------------------
        children[it]->stochNode->resMon.recSchurMultChildren_stop();
        children[it]->stochNode->resMon.recFactTmChildren_stop();
        childrenDone++;
        ///////////////////////////////////////////////////////////////
        // Stop and engage in communication with preconditioner if
        // enough scenarios were done
        ///////////////////////////////////////////////////////////////
        if(childrenDone==1) {
            int rankPrecond = stochNode->rankPrcnd;
            int rankZeroW   = stochNodePrcnd ->rankZeroW;
            int commP2ZeroW = stochNodePrcnd ->commP2ZeroW;

            if(me!=ePrecond) {
                stochNode->resMon.recFactTmLocal_start();
                ///////////////////////////////////////
                // WORKERS  ->   send to precond
                ///////////////////////////////////////
                MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm);

                //null out Schur complement so the existing info will no more be added
                myAtPutZeros(kktd, 0, 0, locnx, locnx);
                stochNode->resMon.recFactTmLocal_stop();

                if(me==eSpecialWorker) {
                    MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy),
                               MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW);

                    memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double));
                }

            } else {
                ////////////////////////////////////////////
                //PRECONDITIONER   ->  receive from workers
                ////////////////////////////////////////////
                stochNode->resMon.recFactTmLocal_start();
                stochNode->resMon.recSchurMultLocal_start();
                if(U) delete U;
                if(V) delete V;
                //deleteUtV(); reuse this
                stochNode->resMon.recSchurMultLocal_stop();

                MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm);

                memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double));
                delete[] buffer;


                //send the information back to specialWorker
                MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW);


                stochNode->resMon.recSchurMultLocal_start();
                //////////////////////////////////////////////
                // factorize partial schur complement
                //////////////////////////////////////////////

                // update the upper block of the kkt with the UtV block
                int noProcs;
                MPI_Comm_size(mpiComm, &noProcs);
                double alpha = 1.0*children.size()/(noProcs*childrenDone);

                kktd->scalarMult(alpha);
                updateKKT(prob,vars);
                //addUtVToKKT(alpha, *UtV, *kktd, locnx);

                //factorize
                double st=MPI_Wtime();
                solver->matrixChanged();
                printf("fact took %g\n", MPI_Wtime()-st);
                stochNode->resMon.recFactTmLocal_stop();
                stochNode->resMon.recSchurMultLocal_stop();
            }
        }
    }


    if(me!=ePrecond) {
        //printf("Worker finished updates rank=%d\n", stochNode->rankMe);
        stochNode->resMon.recSchurMultLocal_start();
        if(U) delete U;
        if(V) delete V;
        //deleteUtV(); reuse this
        stochNode->resMon.recSchurMultLocal_stop();
    }

    /////////////////////////////////////////////////////////
    // Everybody sum the partial Schur complements to
    // special worker who will have the complete matrix
    /////////////////////////////////////////////////////////
    if(iAmDistrib) {
        int rankZeroW = stochNode->rankZeroW;
        MPI_Comm commWorkers = stochNodePrcnd ->commWorkers;
        if(me==eSpecialWorker) {

            //buffer=new double[locnx*locnx];
            //if(buffer==NULL) printf("PANIC !!!! not enough memory in doing the reduce !!!!\n");

            MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy),
                       MPI_DOUBLE, MPI_SUM,
                       rankZeroW, commWorkers);

            memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double));
            delete[] buffer;

            stochNode->resMon.recFactTmLocal_start();

            updateKKT(prob,vars);
            //addUtVToKKT(1.0, *UtV, *kktd, locnx);
            stochNode->resMon.recFactTmLocal_stop();

        } else {
            //printf("Nonzero worker %d -> reducing...\n", stochNode->rankMe);
            if(me!=ePrecond)
                MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankZeroW, commWorkers);

            //printf("Nonzero worker %d -> finished reducing\n", stochNode->rankMe);
        }
    }
}