void DenseSymMatrix::AddMatrix(Number alpha, const DenseSymMatrix& A,
                                 Number beta)
  {
    DBG_ASSERT(beta==0. || initialized_);
    DBG_ASSERT(Dim()==A.Dim());

    if (alpha==0.)
      return;

    const Number* Avalues = A.Values();
    const Index dim = Dim();
    if (beta==0.) {
      for (Index j=0; j<dim; j++) {
        for (Index i=j; i<dim; i++) {
          values_[i+j*dim] = alpha*Avalues[i+j*dim];
        }
      }
    }
    else if (beta==1.) {
      for (Index j=0; j<dim; j++) {
        for (Index i=j; i<dim; i++) {
          values_[i+j*dim] += alpha*Avalues[i+j*dim];
        }
      }
    }
    else {
      for (Index j=0; j<dim; j++) {
        for (Index i=j; i<dim; i++) {
          values_[i+j*dim] = alpha*Avalues[i+j*dim] + beta*values_[i+j*dim];
        }
      }
    }
    ObjectChanged();
    initialized_ = true;
  }
Пример #2
0
  bool DenseGenMatrix::ComputeEigenVectors(const DenseSymMatrix& M,
      DenseVector& Evalues)
  {
    Index dim = M.Dim();
    DBG_ASSERT(Evalues.Dim()==dim);
    DBG_ASSERT(NRows()==dim);
    DBG_ASSERT(NCols()==dim);

    // First we copy the content of the matrix into Q
    const Number* Mvalues = M.Values();
    for (Index j=0; j<dim; j++) {
      for (Index i=j; i<dim; i++) {
        values_[i+j*dim] = Mvalues[i+j*dim];
      }
    }

    bool compute_eigenvectors = true;
    Number* Evals = Evalues.Values();
    Index info;
    IpLapackDsyev(compute_eigenvectors, dim, values_,
                  dim, Evals, info);

    initialized_ = (info==0);
    ObjectChanged();
    return (info==0);
  }
Пример #3
0
  bool DenseGenMatrix::ComputeCholeskyFactor(const DenseSymMatrix& M)
  {
    Index dim = M.Dim();
    DBG_ASSERT(dim==NCols());
    DBG_ASSERT(dim==NRows());

    ObjectChanged();

    // First we copy the content of the symmetric matrix into J
    const Number* Mvalues = M.Values();
    for (Index j=0; j<dim; j++) {
      for (Index i=j; i<dim; i++) {
        values_[i+j*dim] = Mvalues[i+j*dim];
      }
    }

    // Now call the lapack subroutine to perform the factorization
    Index info;
    IpLapackDpotrf(dim, values_, dim, info);

    DBG_ASSERT(info>=0);
    if (info!=0) {
      initialized_ = false;
      return false;
    }

    // We set all strictly upper values to zero
    // ToDo: This might not be necessary?!?
    for (Index j=1; j<dim; j++) {
      for (Index i=0; i<j; i++) {
        values_[i+j*dim] = 0.;
      }
    }

    factorization_ = CHOL;
    initialized_ = true;
    return true;
  }
Пример #4
0
void sLinsys::addTermToDenseSchurCompl(sData *prob, 
				       DenseSymMatrix& SC) 
{
  SparseGenMatrix& A = prob->getLocalA();
  SparseGenMatrix& C = prob->getLocalC();
  SparseGenMatrix& R = prob->getLocalCrossHessian();

  int N, nxP, NP;
  A.getSize(N, nxP); assert(N==locmy);
  NP = SC.size(); assert(NP>=nxP);

  if(nxP==-1) C.getSize(N,nxP);
  if(nxP==-1) nxP = NP;
  N = locnx+locmy+locmz;

  SimpleVector col(N);

  for(int it=0; it<nxP; it++) {
    
    double* pcol = &col[0];
    for(int it1=0; it1<locnx; it1++) pcol[it1]=0.0;

    R.fromGetDense(0, it, &col[0],           1, locnx, 1);
    A.fromGetDense(0, it, &col[locnx],       1, locmy, 1);    
    C.fromGetDense(0, it, &col[locnx+locmy], 1, locmz, 1);

    solver->solve(col);

    //here we have colGi = inv(H_i)* it-th col of Gi^t
     //now do colSC = Gi * inv(H_i)* it-th col of Gi^t


 
    // SC+=R*x
    R.transMult( 1.0, &SC[it][0],     1,
     		 -1.0, &col[0],      1);

    // SC+=At*y
     A.transMult( 1.0, &SC[it][0],   1,  
		  -1.0, &col[locnx],  1);

    // SC+=Ct*z
    C.transMult( 1.0, &SC[it][0],   1,
		 -1.0, &col[locnx+locmy], 1);

  }
}
Пример #5
0
void sLinsysRoot::dumpMatrix(int scen, int proc, const char* nameToken, DenseSymMatrix& M) 
{
  int n = M.size();
  char szNumber[30];
  string strBuffer="";

  //assert(false);

  int iter = g_iterNumber;

  if(iter!=1 && iter!=5 && iter!=15 && iter!=25 && iter!=35 && iter!=45) return;


  char szFilename[256];
  if(scen==-1)
    sprintf(szFilename, "%s_%d__%d.mat", nameToken, n, iter);
  else 
    sprintf(szFilename, "%s_%03d_%d__%d.mat", nameToken, scen+1, n, iter);
  FILE* file = fopen(szFilename, "w");
  assert(file);
  

  for(int j=0; j<n; j++) {
    for(int i=0; i<n; i++) {
      sprintf(szNumber, "%22.16f ", M[i][j]);
      strBuffer += szNumber;
    }
    strBuffer += "\n";
    
    if(strBuffer.length()>1250000) {
      fwrite(strBuffer.c_str(), 1, strBuffer.length(), file);
      strBuffer = "";
    }
  }

  if(strBuffer.length()>0) {
    fwrite(strBuffer.c_str(), 1, strBuffer.length(), file);
  }
  
  fclose(file);
}
Пример #6
0
void sLinsys::addTermToDenseSchurCompl(sData *prob, 
				       DenseSymMatrix& SC) 
{
  SparseGenMatrix& A = prob->getLocalA();
  SparseGenMatrix& C = prob->getLocalC();
  SparseGenMatrix& R = prob->getLocalCrossHessian();
  

  int N, nxP, NP,mR,nR;
  int locns = locmz;
  
  int mle = prob->getmle();
  int mli = prob->getmli();
  SparseGenMatrix& E = prob->getLocalE();
  SparseGenMatrix& F = prob->getLocalF();

  SparseGenMatrix ET;
  SparseGenMatrix FT;
  ET.transCopyof(E);
  FT.transCopyof(F);

  int nx0, my0, mz0;
  stochNode->get_FistStageSize(nx0, my0,mz0);
   
  A.getSize(N, nxP); assert(N==locmy);
  NP = SC.size(); assert(NP>=nxP);

  if(nxP==-1) C.getSize(N,nxP);
  if(nxP==-1) nxP = NP;
  if(gOuterSolve>=3 ) 
	N = locnx+locns+locmy+locmz;
  else
    N = locnx+locmy+locmz;


  int blocksize = 64;
  DenseGenMatrix cols(blocksize,N);

  bool ispardiso=false;
  PardisoSolver* pardisoSlv=NULL;
//  pardisoSlv = dynamic_cast<PardisoSolver*>(solver);
  int* colSparsity=NULL;
  if(pardisoSlv) {
    ispardiso=true;
    colSparsity=new int[N];
    //blocksize=32;
  }
  
  for (int it=0; it < nxP; it += blocksize) {
    int start=it;
    int end = MIN(it+blocksize,nxP);
    int numcols = end-start;
    cols.getStorageRef().m = numcols; // avoid extra solves

    bool allzero = true;
    memset(&cols[0][0],0,N*blocksize*sizeof(double));

    if(ispardiso) {
      for(int i=0; i<N; i++) colSparsity[i]=0;
	  if(gOuterSolve>=3 ) {
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
		A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, colSparsity, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, colSparsity, allzero);
	  }
	  else{
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
	    A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, colSparsity, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, colSparsity, allzero);
	  }
    } else {
	  if(gOuterSolve>=3 ) {
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
		A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, allzero);
	  }
	  else{
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
		A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, allzero);
	  }    
    }

    if(!allzero) {
      
      if(ispardiso)
		pardisoSlv->solve(cols,colSparsity);
      else 
		solver->solve(cols);

      if(gOuterSolve>=3 ) {
        R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][0], N);
        A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][locnx+locns], N);	
        C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,
       				      -1.0, &cols[0][locnx+locns+locmy], N);
	if(mle>0)
          ET.getStorageRef().transMultMat( 1.0,  &(SC.getStorageRef().M[nx0+mz0+my0-mle][start]), numcols, NP, -1.0, &cols[0][0], N);
	if(mli>0)
	    FT.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0+mz0-mli][start]), numcols, NP,
                                   -1.0, &cols[0][0], N);
	  }
	  else{
	    R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][0], N);
	    A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][locnx], N);
	    C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,
       				      -1.0, &cols[0][locnx+locmy], N);
	  }
    } //end !allzero
  }

  for (int it=0; it < mle; it += blocksize)
  {
    int start=it;
    int end = MIN(it+blocksize,mle);
    int numcols = end-start;
    cols.getStorageRef().m = numcols; // avoid extra solves                                                                                                                          
    bool allzero = true;
    memset(&cols[0][0],0,N*blocksize*sizeof(double));

    if(ispardiso) 
    {
        for(int i=0; i<N; i++) colSparsity[i]=0;
	ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
    }
    else 
    {
	ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
    }
    if(!allzero) {
      if(ispardiso)
	pardisoSlv->solve(cols,colSparsity);
      else
	solver->solve(cols);
      if(gOuterSolve>=3 ) {
	R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP,
					-1.0, &cols[0][0], N);
	A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP,
					-1.0, &cols[0][locnx+locns], N);
	C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP,
					-1.0, &cols[0][locnx+locns+locmy], N);
	if(mle>0)
	  ET.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0-mle][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N);
	if(mli>0)
	  FT.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N);

	/*
	std::cout<<"cols:  "<<std::endl;
        for(int i=0; i<numcols;i++)
          for(int j=0; j<N;j++)
	    std::cout<<"col "<<i<<"row "<<j<<"elt "<<cols[i][j]<<std::endl;

	std::cout<<"SC:  "<<std::endl;
        for(int i=0; i<NP;i++)
          for(int j=0; j<NP;j++)
	    std::cout<<"row "<<i<<"col "<<j<<"elt "<<SC.getStorageRef().M[i][j]<<std::endl;
	*/
      }
      else{
	assert(false && "not implemented");
      }
    } //end !allzero 
  }

  for (int it=0; it < mli; it += blocksize)
  {
      int start=it;
      int end = MIN(it+blocksize,mle);
      int numcols = end-start;
      cols.getStorageRef().m = numcols; // avoid extra solves
      bool allzero = true;
      memset(&cols[0][0],0,N*blocksize*sizeof(double));

      if(ispardiso)
      {
	  for(int i=0; i<N; i++) colSparsity[i]=0;
	  FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
      }
      else
      {
	  FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
      }
      if(!allzero) {
	if(ispardiso)
	  pardisoSlv->solve(cols,colSparsity);
	else
	  solver->solve(cols);
	if(gOuterSolve>=3 ) {
	  R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP,
					  -1.0, &cols[0][0], N);
	  A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP,
					  -1.0, &cols[0][locnx+locns], N);
	  C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP,
					  -1.0, &cols[0][locnx+locns+locmy], N);
	  if(mle>0)
	    ET.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0-mle][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N);
	  if(mli>0)
	    FT.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N);
	}
	else{
	  assert(false && "not implemented");
	}
      } //end !allzero
    }

  if(ispardiso) delete[] colSparsity;
}
void QpGenStochLinsysRootAugRedPrecond::factor2(QpGenStochData *prob,
        Variables *vars)
{
    assert( children.size() == prob->children.size() );
    double* buffer=NULL;
    StochTreePrecond* stochNodePrcnd = dynamic_cast<StochTreePrecond*>(stochNode);
    //!!
    DenseSymMatrix * kktd = (DenseSymMatrix*) kkt;
    myAtPutZeros(kktd, 0, 0, locnx+locmy, locnx+locmy);
    //~~

    // First tell children to factorize.
    for(int it=0; it<children.size(); it++) {
        children[it]->factor2(prob->children[it], vars);
    }

    if(me==ePrecond || me==eSpecialWorker)
        buffer = new double[locnx*(locnx+locmy)];

    DenseGenMatrix* U = NULL;
    DenseGenMatrix* V = NULL;


    //if(me==ePrecond) assert(children.size()==1);
    int commWrkrs = stochNode->commWrkrs;
    ////////////////////////////////////////////////////////
    // DIRECT workers -> all processes in fact
    ////////////////////////////////////////////////////////
    int childrenDone=0;
    for(int it=0; it<children.size(); it++) {

        if(children[it]->mpiComm == MPI_COMM_NULL)
            continue;


        children[it]->stochNode->resMon.recFactTmChildren_start();
        //-----------------------------------------------------------
        children[it]->allocU(&U, locnx);
        children[it]->allocV(&V, locnx);
        children[it]->computeU_V(prob->children[it], U, V);

        //-----------------------------------------------------------
        children[it]->stochNode->resMon.recSchurMultChildren_start();
        //-----------------------------------------------------------
        kktd->matMult(-1.0, *U, 1, *V, 0, 1.0);
        //-----------------------------------------------------------
        children[it]->stochNode->resMon.recSchurMultChildren_stop();
        children[it]->stochNode->resMon.recFactTmChildren_stop();
        childrenDone++;
        ///////////////////////////////////////////////////////////////
        // Stop and engage in communication with preconditioner if
        // enough scenarios were done
        ///////////////////////////////////////////////////////////////
        if(childrenDone==1) {
            int rankPrecond = stochNode->rankPrcnd;
            int rankZeroW   = stochNodePrcnd ->rankZeroW;
            int commP2ZeroW = stochNodePrcnd ->commP2ZeroW;

            if(me!=ePrecond) {
                stochNode->resMon.recFactTmLocal_start();
                ///////////////////////////////////////
                // WORKERS  ->   send to precond
                ///////////////////////////////////////
                MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm);

                //null out Schur complement so the existing info will no more be added
                myAtPutZeros(kktd, 0, 0, locnx, locnx);
                stochNode->resMon.recFactTmLocal_stop();

                if(me==eSpecialWorker) {
                    MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy),
                               MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW);

                    memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double));
                }

            } else {
                ////////////////////////////////////////////
                //PRECONDITIONER   ->  receive from workers
                ////////////////////////////////////////////
                stochNode->resMon.recFactTmLocal_start();
                stochNode->resMon.recSchurMultLocal_start();
                if(U) delete U;
                if(V) delete V;
                //deleteUtV(); reuse this
                stochNode->resMon.recSchurMultLocal_stop();

                MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankPrecond, mpiComm);

                memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double));
                delete[] buffer;


                //send the information back to specialWorker
                MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankZeroW, commP2ZeroW);


                stochNode->resMon.recSchurMultLocal_start();
                //////////////////////////////////////////////
                // factorize partial schur complement
                //////////////////////////////////////////////

                // update the upper block of the kkt with the UtV block
                int noProcs;
                MPI_Comm_size(mpiComm, &noProcs);
                double alpha = 1.0*children.size()/(noProcs*childrenDone);

                kktd->scalarMult(alpha);
                updateKKT(prob,vars);
                //addUtVToKKT(alpha, *UtV, *kktd, locnx);

                //factorize
                double st=MPI_Wtime();
                solver->matrixChanged();
                printf("fact took %g\n", MPI_Wtime()-st);
                stochNode->resMon.recFactTmLocal_stop();
                stochNode->resMon.recSchurMultLocal_stop();
            }
        }
    }


    if(me!=ePrecond) {
        //printf("Worker finished updates rank=%d\n", stochNode->rankMe);
        stochNode->resMon.recSchurMultLocal_start();
        if(U) delete U;
        if(V) delete V;
        //deleteUtV(); reuse this
        stochNode->resMon.recSchurMultLocal_stop();
    }

    /////////////////////////////////////////////////////////
    // Everybody sum the partial Schur complements to
    // special worker who will have the complete matrix
    /////////////////////////////////////////////////////////
    if(iAmDistrib) {
        int rankZeroW = stochNode->rankZeroW;
        MPI_Comm commWorkers = stochNodePrcnd ->commWorkers;
        if(me==eSpecialWorker) {

            //buffer=new double[locnx*locnx];
            //if(buffer==NULL) printf("PANIC !!!! not enough memory in doing the reduce !!!!\n");

            MPI_Reduce(&(kktd->mStorage->M[0][0]), buffer, locnx*(locnx+locmy),
                       MPI_DOUBLE, MPI_SUM,
                       rankZeroW, commWorkers);

            memcpy(&kktd->mStorage->M[0][0], buffer, locnx*(locnx+locmy)*sizeof(double));
            delete[] buffer;

            stochNode->resMon.recFactTmLocal_start();

            updateKKT(prob,vars);
            //addUtVToKKT(1.0, *UtV, *kktd, locnx);
            stochNode->resMon.recFactTmLocal_stop();

        } else {
            //printf("Nonzero worker %d -> reducing...\n", stochNode->rankMe);
            if(me!=ePrecond)
                MPI_Reduce(&(kktd->mStorage->M[0][0]), NULL, locnx*(locnx+locmy),
                           MPI_DOUBLE, MPI_SUM, rankZeroW, commWorkers);

            //printf("Nonzero worker %d -> finished reducing\n", stochNode->rankMe);
        }
    }
}
Пример #8
0
void sLinsysRootAug::finalizeKKT(sData* prob, Variables* vars)
{
  assert(locmz==0||gOuterSolve<3);

  int j, p, pend; double val;

  stochNode->resMon.recFactTmLocal_start();
  stochNode->resMon.recSchurMultLocal_start();

  DenseSymMatrix * kktd = (DenseSymMatrix*) kkt;
  //alias for internal buffer of kkt
  double** dKkt = kktd->Mat();
 

  //////////////////////////////////////////////////////
  // compute Q+diag(xdiag) - C' * diag(zDiag) * C 
  // and update the KKT
  //////////////////////////////////////////////////////

  /////////////////////////////////////////////////////////////
  // update the KKT with Q (DO NOT PUT DIAG)
  /////////////////////////////////////////////////////////////
  SparseSymMatrix& Q = prob->getLocalQ();
  int* krowQ=Q.krowM(); int* jcolQ=Q.jcolM(); double* dQ=Q.M();
  for(int i=0; i<locnx; i++) {
    pend = krowQ[i+1];
    for(p=krowQ[i]; p<pend; p++) {
      j = jcolQ[p]; 
      if(i==j) continue;
      val = dQ[p];
      dKkt[i][j] += val;
      dKkt[j][i] += val;
    }
  }
  
  /////////////////////////////////////////////////////////////
  // update the KKT with the diagonals
  // xDiag is in fact diag(Q)+X^{-1}S
  /////////////////////////////////////////////////////////////
  //kktd->atPutDiagonal( 0, *xDiag );
  SimpleVector& sxDiag = dynamic_cast<SimpleVector&>(*xDiag);
  for(int i=0; i<locnx; i++) dKkt[i][i] += sxDiag[i];
  SimpleVector& syDiag = dynamic_cast<SimpleVector&>(*yDiag);
  for(int i=locnx; i<locnx+locmy; i++) dKkt[i][i] += syDiag[i-locnx];


  /////////////////////////////////////////////////////////////
  // update the KKT with   - C' * diag(zDiag) *C
  /////////////////////////////////////////////////////////////
  if(locmz>0) {
    SparseGenMatrix& C = prob->getLocalD();
    C.matTransDinvMultMat(*zDiag, &CtDC);
    assert(CtDC->size() == locnx);

    //aliases for internal buffers of CtDC
    SparseSymMatrix* CtDCsp = reinterpret_cast<SparseSymMatrix*>(CtDC);
    int* krowCtDC=CtDCsp->krowM(); int* jcolCtDC=CtDCsp->jcolM(); double* dCtDC=CtDCsp->M();
    
    for(int i=0; i<locnx; i++) {
      pend = krowCtDC[i+1];
      for(p=krowCtDC[i]; p<pend; p++) {
        j = jcolCtDC[p];
        dKkt[i][j] -= dCtDC[p];
	      //printf("%d %d %f\n", i,j,dCtDC[p]);
      }
    }
  } //~end if locmz>0
  /////////////////////////////////////////////////////////////
  // update the KKT with A (symmetric update forced)
  /////////////////////////////////////////////////////////////
  if(locmy>0){
    kktd->symAtPutSubmatrix( locnx, 0, prob->getLocalB(), 0, 0, locmy, locnx, 1 );
  }
  //prob->getLocalB().getStorageRef().dump("stage1eqmat2.dump");

  /////////////////////////////////////////////////////////////
  // update the KKT zeros for the lower right block 
  /////////////////////////////////////////////////////////////
  //kktd->storage().atPutZeros(locnx, locnx, locmy+locmz, locmy+locmz);
  //myAtPutZeros(kktd, locnx, locnx, locmy, locmy);

  stochNode->resMon.recSchurMultLocal_stop();
  stochNode->resMon.recFactTmLocal_stop();
}