void sLinsys::addTermToDenseSchurCompl(sData *prob, 
				       DenseSymMatrix& SC) 
{
  SparseGenMatrix& A = prob->getLocalA();
  SparseGenMatrix& C = prob->getLocalC();
  SparseGenMatrix& R = prob->getLocalCrossHessian();
  

  int N, nxP, NP,mR,nR;
  int locns = locmz;
  
  int mle = prob->getmle();
  int mli = prob->getmli();
  SparseGenMatrix& E = prob->getLocalE();
  SparseGenMatrix& F = prob->getLocalF();

  SparseGenMatrix ET;
  SparseGenMatrix FT;
  ET.transCopyof(E);
  FT.transCopyof(F);

  int nx0, my0, mz0;
  stochNode->get_FistStageSize(nx0, my0,mz0);
   
  A.getSize(N, nxP); assert(N==locmy);
  NP = SC.size(); assert(NP>=nxP);

  if(nxP==-1) C.getSize(N,nxP);
  if(nxP==-1) nxP = NP;
  if(gOuterSolve>=3 ) 
	N = locnx+locns+locmy+locmz;
  else
    N = locnx+locmy+locmz;


  int blocksize = 64;
  DenseGenMatrix cols(blocksize,N);

  bool ispardiso=false;
  PardisoSolver* pardisoSlv=NULL;
//  pardisoSlv = dynamic_cast<PardisoSolver*>(solver);
  int* colSparsity=NULL;
  if(pardisoSlv) {
    ispardiso=true;
    colSparsity=new int[N];
    //blocksize=32;
  }
  
  for (int it=0; it < nxP; it += blocksize) {
    int start=it;
    int end = MIN(it+blocksize,nxP);
    int numcols = end-start;
    cols.getStorageRef().m = numcols; // avoid extra solves

    bool allzero = true;
    memset(&cols[0][0],0,N*blocksize*sizeof(double));

    if(ispardiso) {
      for(int i=0; i<N; i++) colSparsity[i]=0;
	  if(gOuterSolve>=3 ) {
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
		A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, colSparsity, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, colSparsity, allzero);
	  }
	  else{
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
	    A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, colSparsity, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, colSparsity, allzero);
	  }
    } else {
	  if(gOuterSolve>=3 ) {
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
		A.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns], N, numcols, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locns+locmy], N, numcols, allzero);
	  }
	  else{
		R.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
		A.getStorageRef().fromGetColBlock(start, &cols[0][locnx], N, numcols, allzero);
		C.getStorageRef().fromGetColBlock(start, &cols[0][locnx+locmy], N, numcols, allzero);
	  }    
    }

    if(!allzero) {
      
      if(ispardiso)
		pardisoSlv->solve(cols,colSparsity);
      else 
		solver->solve(cols);

      if(gOuterSolve>=3 ) {
        R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][0], N);
        A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][locnx+locns], N);	
        C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,
       				      -1.0, &cols[0][locnx+locns+locmy], N);
	if(mle>0)
          ET.getStorageRef().transMultMat( 1.0,  &(SC.getStorageRef().M[nx0+mz0+my0-mle][start]), numcols, NP, -1.0, &cols[0][0], N);
	if(mli>0)
	    FT.getStorageRef().transMultMat( 1.0, &(SC.getStorageRef().M[nx0+mz0+my0+mz0-mli][start]), numcols, NP,
                                   -1.0, &cols[0][0], N);
	  }
	  else{
	    R.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][0], N);
	    A.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,  
       				      -1.0, &cols[0][locnx], N);
	    C.getStorageRef().transMultMat( 1.0, &(SC[0][start]), numcols, NP,
       				      -1.0, &cols[0][locnx+locmy], N);
	  }
    } //end !allzero
  }

  for (int it=0; it < mle; it += blocksize)
  {
    int start=it;
    int end = MIN(it+blocksize,mle);
    int numcols = end-start;
    cols.getStorageRef().m = numcols; // avoid extra solves                                                                                                                          
    bool allzero = true;
    memset(&cols[0][0],0,N*blocksize*sizeof(double));

    if(ispardiso) 
    {
        for(int i=0; i<N; i++) colSparsity[i]=0;
	ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
    }
    else 
    {
	ET.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
    }
    if(!allzero) {
      if(ispardiso)
	pardisoSlv->solve(cols,colSparsity);
      else
	solver->solve(cols);
      if(gOuterSolve>=3 ) {
	R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP,
					-1.0, &cols[0][0], N);
	A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP,
					-1.0, &cols[0][locnx+locns], N);
	C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0-mle+start]), numcols, NP,
					-1.0, &cols[0][locnx+locns+locmy], N);
	if(mle>0)
	  ET.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0-mle][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N);
	if(mli>0)
	  FT.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0-mle+start]), numcols, NP, -1.0, &cols[0][0], N);

	/*
	std::cout<<"cols:  "<<std::endl;
        for(int i=0; i<numcols;i++)
          for(int j=0; j<N;j++)
	    std::cout<<"col "<<i<<"row "<<j<<"elt "<<cols[i][j]<<std::endl;

	std::cout<<"SC:  "<<std::endl;
        for(int i=0; i<NP;i++)
          for(int j=0; j<NP;j++)
	    std::cout<<"row "<<i<<"col "<<j<<"elt "<<SC.getStorageRef().M[i][j]<<std::endl;
	*/
      }
      else{
	assert(false && "not implemented");
      }
    } //end !allzero 
  }

  for (int it=0; it < mli; it += blocksize)
  {
      int start=it;
      int end = MIN(it+blocksize,mle);
      int numcols = end-start;
      cols.getStorageRef().m = numcols; // avoid extra solves
      bool allzero = true;
      memset(&cols[0][0],0,N*blocksize*sizeof(double));

      if(ispardiso)
      {
	  for(int i=0; i<N; i++) colSparsity[i]=0;
	  FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, colSparsity, allzero);
      }
      else
      {
	  FT.getStorageRef().fromGetColBlock(start, &cols[0][0], N, numcols, allzero);
      }
      if(!allzero) {
	if(ispardiso)
	  pardisoSlv->solve(cols,colSparsity);
	else
	  solver->solve(cols);
	if(gOuterSolve>=3 ) {
	  R.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP,
					  -1.0, &cols[0][0], N);
	  A.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP,
					  -1.0, &cols[0][locnx+locns], N);
	  C.getStorageRef().transMultMat( 1.0, &(SC[0][nx0+mz0+my0+mz0-mli+start]), numcols, NP,
					  -1.0, &cols[0][locnx+locns+locmy], N);
	  if(mle>0)
	    ET.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0-mle][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N);
	  if(mli>0)
	    FT.getStorageRef().transMultMat( 1.0,  &(SC[nx0+mz0+my0+mz0-mli][nx0+mz0+my0+mz0-mli+start]), numcols, NP, -1.0, &cols[0][0], N);
	}
	else{
	  assert(false && "not implemented");
	}
      } //end !allzero
    }

  if(ispardiso) delete[] colSparsity;
}
示例#2
0
// this function is called only once and creates the augmented system
void PardisoSchurSolver::firstSolveCall(SparseGenMatrix& R,
                                        SparseGenMatrix& A,
                                        SparseGenMatrix& C)
{
    int nR,nA,nC;
    nnz=0;
    R.getSize(nR,nSC);
    nnz += R.numberOfNonZeros();
    A.getSize(nA,nSC);
    nnz += A.numberOfNonZeros();
    C.getSize(nC,nSC);
    nnz += C.numberOfNonZeros();
    int Msize=Msys->size();

    //cout << "nR=" << nR << " nA=" << nA << " nC=" << nC << " nSC=" << nSC << " sizeKi=" << (nR+nA+nC)<< endl
    //     << "nnzR=" << R.numberOfNonZeros()
    //     << " nnzA=" << A.numberOfNonZeros()
    //     << " nnzC=" << C.numberOfNonZeros() << endl;

    n = nR+nA+nC+nSC;
    assert( Msize == nR+nA+nC );
    nnz += Msys->numberOfNonZeros();
    nnz += nSC; //space for the 0 diagonal of 2x2 block

    // the lower triangular part of the augmented system in row-major
    SparseSymMatrix augSys( n, nnz);

    //
    //put (1,1) block in the augmented system
    //
    memcpy(augSys.getStorageRef().krowM, Msys->getStorageRef().krowM, sizeof(int)*Msize);
    memcpy(augSys.getStorageRef().jcolM, Msys->getStorageRef().jcolM, sizeof(int)*Msys->numberOfNonZeros());
    memcpy(augSys.getStorageRef().M,     Msys->getStorageRef().M,     sizeof(double)*Msys->numberOfNonZeros());

    //
    //put C block in the augmented system as C^T in the lower triangular part
    //
    if(C.numberOfNonZeros()>0 && nC>0) {
        int nnzIt=Msys->numberOfNonZeros();

        SparseGenMatrix Ct(nSC,nC,C.numberOfNonZeros());
        int* krowCt=Ct.getStorageRef().krowM;
        int* jcolCt=Ct.getStorageRef().jcolM;
        double *MCt=Ct.getStorageRef().M;

        C.getStorageRef().transpose(krowCt, jcolCt, MCt);

        int colShift=nR+nA;

        int* krowAug= augSys.getStorageRef().krowM;
        int* jcolAug = augSys.getStorageRef().jcolM;
        double* MAug = augSys.getStorageRef().M;

        int row=Msize;
        for(; row<n; row++) {
            krowAug[row]=nnzIt;

            for(int c=krowCt[row-Msize]; c< krowCt[row-Msize+1]; c++) {

                int j=jcolCt[c];

                jcolAug[nnzIt]=j+colShift;
                MAug[nnzIt]   =MCt[c];
                nnzIt++;
            }
            //add the zero from the diagonal
            jcolAug[nnzIt]=row;
            MAug[nnzIt]=0.0;
            nnzIt++;

        }
        krowAug[row]=nnzIt;
    }

    // -- to do
    //put R block in the augmented system as R^T in the lower triangular part
    assert(R.numberOfNonZeros()==0);
    //
    if(A.numberOfNonZeros()>0 && nA>0) {
        int nnzIt=Msys->numberOfNonZeros();

        SparseGenMatrix At(nSC,nA,A.numberOfNonZeros());
        int* krowAt=At.getStorageRef().krowM;
        int* jcolAt=At.getStorageRef().jcolM;
        double *MAt=At.getStorageRef().M;

        A.getStorageRef().transpose(krowAt, jcolAt, MAt);

        int colShift=nR;

        int* krowAug= augSys.getStorageRef().krowM;
        int* jcolAug = augSys.getStorageRef().jcolM;
        double* MAug = augSys.getStorageRef().M;

        int row=Msize;
        for(; row<n; row++) {
            krowAug[row]=nnzIt;

            for(int c=krowAt[row-Msize]; c< krowAt[row-Msize+1]; c++) {

                int j=jcolAt[c];

                jcolAug[nnzIt]=j+colShift;
                MAug[nnzIt]   =MAt[c];
                nnzIt++;
            }
            //add the zero from the diagonal
            jcolAug[nnzIt]=row;
            MAug[nnzIt]=0.0;
            nnzIt++;
        }
        krowAug[row]=nnzIt;
    }

    nnz=augSys.numberOfNonZeros();
    // we need to transpose to get the augmented system in the row-major upper triangular format of  PARDISO
    rowptrAug = new int[n+1];
    colidxAug = new int[nnz];
    eltsAug   = new double[nnz];

    augSys.getStorageRef().transpose(rowptrAug,colidxAug,eltsAug);


    //save the indeces for diagonal entries for a streamlined later update
    int* krowMsys = Msys->getStorageRef().krowM;
    int* jcolMsys = Msys->getStorageRef().jcolM;
    for(int r=0; r<Msize; r++) {
        // Msys - find the index in jcol for the diagonal (r,r)
        int idxDiagMsys=-1;
        for(int idx=krowMsys[r]; idx<krowMsys[r+1]; idx++)
            if(jcolMsys[idx]==r) {
                idxDiagMsys=idx;
                break;
            }
        assert(idxDiagMsys>=0);

        // aug  - find the index in jcol for the diagonal (r,r)
        int idxDiagAug=-1;
        for(int idx=rowptrAug[r]; idx<rowptrAug[r+1]; idx++)
            if(colidxAug[idx]==r) {
                idxDiagAug=idx;
                break;
            }
        assert(idxDiagAug>=0);

        diagMap.insert( pair<int,int>(idxDiagMsys,idxDiagAug) );
    }

    //convert to Fortran indexing
    for(int it=0; it<n+1; it++)   rowptrAug[it]++;
    for(int it=0; it<nnz; it++)   colidxAug[it]++;

    //allocate temp vector(s)
    nvec=new double[n];

    /*  //
    // symbolic analysis
    //
    int mtype=-2, error;
    int phase=11; //analysis
    int maxfct=1, mnum=1, nrhs=1;
    iparm[2]=num_threads;
    iparm[7]=8;     //# iterative refinements
    //iparm[1] = 2; // 2 is for metis, 0 for min degree
    //iparm[ 9] =10; // pivot perturbation 10^{-xxx}
    iparm[10] = 1; // scaling for IPM KKT; used with IPARM(13)=1 or 2
    iparm[12] = 2; // improved accuracy for IPM KKT; used with IPARM(11)=1;
                   // if needed, use 2 for advanced matchings and higer accuracy.
    iparm[23] = 1; //Parallel Numerical Factorization (0=used in the last years, 1=two-level scheduling)

    int msglvl=0;  // with statistical information
    iparm[32] = 1; // compute determinant
    iparm[37] = Msys->size(); //compute Schur-complement

    pardiso (pt , &maxfct , &mnum, &mtype, &phase,
       &n, eltsAug, rowptrAug, colidxAug,
       NULL, &nrhs,
       iparm , &msglvl, NULL, NULL, &error, dparm );

    if ( error != 0) {
      printf ("PardisoSolver - ERROR during symbolic factorization: %d\n", error );
      assert(false);
    }
    */
}