Exemplo n.º 1
0
PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C)
{
  PetscErrorCode      ierr;
  Mat                 P;
  PetscInt            *rti,*rtj;
  Mat_RARt            *rart;
  PetscContainer      container;
  MatTransposeColoring matcoloring;
  ISColoring           iscoloring;
  Mat                  Rt_dense,RARt_dense;
  PetscLogDouble       GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0;
  Mat_SeqAIJ           *c;

  PetscFunctionBegin;
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  /* create symbolic P=Rt */
  ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr);
  ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,PETSC_NULL,&P);CHKERRQ(ierr);

  /* get symbolic C=Pt*A*P */
  ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
  (*C)->rmap->bs = R->rmap->bs;
  (*C)->cmap->bs = R->rmap->bs;

  /* create a supporting struct */
  ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr);

  /* attach the supporting struct to C */
  ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
  ierr = PetscContainerSetPointer(container,rart);CHKERRQ(ierr);
  ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_RARt);CHKERRQ(ierr);
  ierr = PetscObjectCompose((PetscObject)(*C),"Mat_RARt",(PetscObject)container);CHKERRQ(ierr);
  ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  etime += tf - t0;

  /* Create MatTransposeColoring from symbolic C=R*A*R^T */
  c=(Mat_SeqAIJ*)(*C)->data;
  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  GColor += tf - t0;

  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr);
  rart->matcoloring = matcoloring;
  ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  MCCreate += tf - t0;

  ierr = PetscGetTime(&t0);CHKERRQ(ierr);
  /* Create Rt_dense */
  ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr);
  ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr);
  ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSeqDenseSetPreallocation(Rt_dense,PETSC_NULL);CHKERRQ(ierr);
  Rt_dense->assembled = PETSC_TRUE;
  rart->Rt            = Rt_dense;

  /* Create RARt_dense = R*A*Rt_dense */
  ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr);
  ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr);
  ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSeqDenseSetPreallocation(RARt_dense,PETSC_NULL);CHKERRQ(ierr);
  rart->RARt = RARt_dense;

  /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
  ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr);

  ierr = PetscGetTime(&tf);CHKERRQ(ierr);
  MDenCreate += tf - t0;

  rart->destroy = (*C)->ops->destroy;
  (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt;

  /* clean up */
  ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr);
  ierr = MatDestroy(&P);CHKERRQ(ierr);

#if defined(PETSC_USE_INFO)
  {
  PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n);
  ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr);
  ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr);
  }
#endif
  PetscFunctionReturn(0);
}
Exemplo n.º 2
0
PetscErrorCode  PetscViewerFileSetName_ASCII(PetscViewer viewer,const char name[])
{
  PetscErrorCode    ierr;
  size_t            len;
  char              fname[PETSC_MAX_PATH_LEN],*gz;
  PetscViewer_ASCII *vascii = (PetscViewer_ASCII*)viewer->data;
  PetscBool         isstderr,isstdout;
  PetscMPIInt       rank;

  PetscFunctionBegin;
  ierr = PetscViewerFileClose_ASCII(viewer);CHKERRQ(ierr);
  if (!name) PetscFunctionReturn(0);
  ierr = PetscStrallocpy(name,&vascii->filename);CHKERRQ(ierr);

  /* Is this file to be compressed */
  vascii->storecompressed = PETSC_FALSE;

  ierr = PetscStrstr(vascii->filename,".gz",&gz);CHKERRQ(ierr);
  if (gz) {
    ierr = PetscStrlen(gz,&len);CHKERRQ(ierr);
    if (len == 3) {
      *gz = 0;
      vascii->storecompressed = PETSC_TRUE;
    }
  }
  ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)viewer),&rank);CHKERRQ(ierr);
  if (!rank) {
    ierr = PetscStrcmp(name,"stderr",&isstderr);CHKERRQ(ierr);
    ierr = PetscStrcmp(name,"stdout",&isstdout);CHKERRQ(ierr);
    /* empty filename means stdout */
    if (name[0] == 0)  isstdout = PETSC_TRUE;
    if (isstderr)      vascii->fd = PETSC_STDERR;
    else if (isstdout) vascii->fd = PETSC_STDOUT;
    else {


      ierr = PetscFixFilename(name,fname);CHKERRQ(ierr);
      switch (vascii->mode) {
      case FILE_MODE_READ:
        vascii->fd = fopen(fname,"r");
        break;
      case FILE_MODE_WRITE:
        vascii->fd = fopen(fname,"w");
        break;
      case FILE_MODE_APPEND:
        vascii->fd = fopen(fname,"a");
        break;
      case FILE_MODE_UPDATE:
        vascii->fd = fopen(fname,"r+");
        if (!vascii->fd) vascii->fd = fopen(fname,"w+");
        break;
      case FILE_MODE_APPEND_UPDATE:
        /* I really want a file which is opened at the end for updating,
           not a+, which opens at the beginning, but makes writes at the end.
        */
        vascii->fd = fopen(fname,"r+");
        if (!vascii->fd) vascii->fd = fopen(fname,"w+");
        else {
          ierr     = fseek(vascii->fd, 0, SEEK_END);CHKERRQ(ierr);
        }
        break;
      default:
        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG, "Invalid file mode %d", vascii->mode);
      }
      if (!vascii->fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open PetscViewer file: %s",fname);
    }
  }
#if defined(PETSC_USE_LOG)
  PetscLogObjectState((PetscObject)viewer,"File: %s",name);
#endif
  PetscFunctionReturn(0);
}
Exemplo n.º 3
0
PetscErrorCode PetscFESetUp_Composite(PetscFE fem)
{
  PetscFE_Composite *cmp = (PetscFE_Composite *) fem->data;
  DM                 K;
  PetscReal         *subpoint;
  PetscBLASInt      *pivots;
  PetscBLASInt       n, info;
  PetscScalar       *work, *invVscalar;
  PetscInt           dim, pdim, spdim, j, s;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  /* Get affine mapping from reference cell to each subcell */
  ierr = PetscDualSpaceGetDM(fem->dualSpace, &K);CHKERRQ(ierr);
  ierr = DMGetDimension(K, &dim);CHKERRQ(ierr);
  ierr = DMPlexGetCellRefiner_Internal(K, &cmp->cellRefiner);CHKERRQ(ierr);
  ierr = CellRefinerGetAffineTransforms_Internal(cmp->cellRefiner, &cmp->numSubelements, &cmp->v0, &cmp->jac, &cmp->invjac);CHKERRQ(ierr);
  /* Determine dof embedding into subelements */
  ierr = PetscDualSpaceGetDimension(fem->dualSpace, &pdim);CHKERRQ(ierr);
  ierr = PetscSpaceGetDimension(fem->basisSpace, &spdim);CHKERRQ(ierr);
  ierr = PetscMalloc1(cmp->numSubelements*spdim,&cmp->embedding);CHKERRQ(ierr);
  ierr = DMGetWorkArray(K, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr);
  for (s = 0; s < cmp->numSubelements; ++s) {
    PetscInt sd = 0;

    for (j = 0; j < pdim; ++j) {
      PetscBool       inside;
      PetscQuadrature f;
      PetscInt        d, e;

      ierr = PetscDualSpaceGetFunctional(fem->dualSpace, j, &f);CHKERRQ(ierr);
      /* Apply transform to first point, and check that point is inside subcell */
      for (d = 0; d < dim; ++d) {
        subpoint[d] = -1.0;
        for (e = 0; e < dim; ++e) subpoint[d] += cmp->invjac[(s*dim + d)*dim+e]*(f->points[e] - cmp->v0[s*dim+e]);
      }
      ierr = CellRefinerInCellTest_Internal(cmp->cellRefiner, subpoint, &inside);CHKERRQ(ierr);
      if (inside) {cmp->embedding[s*spdim+sd++] = j;}
    }
    if (sd != spdim) SETERRQ3(PetscObjectComm((PetscObject) fem), PETSC_ERR_PLIB, "Subelement %d has %d dual basis vectors != %d", s, sd, spdim);
  }
  ierr = DMRestoreWorkArray(K, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr);
  /* Construct the change of basis from prime basis to nodal basis for each subelement */
  ierr = PetscMalloc1(cmp->numSubelements*spdim*spdim,&fem->invV);CHKERRQ(ierr);
  ierr = PetscMalloc2(spdim,&pivots,spdim,&work);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
  ierr = PetscMalloc1(cmp->numSubelements*spdim*spdim,&invVscalar);CHKERRQ(ierr);
#else
  invVscalar = fem->invV;
#endif
  for (s = 0; s < cmp->numSubelements; ++s) {
    for (j = 0; j < spdim; ++j) {
      PetscReal       *Bf;
      PetscQuadrature  f;
      const PetscReal *points, *weights;
      PetscInt         Nc, Nq, q, k;

      ierr = PetscDualSpaceGetFunctional(fem->dualSpace, cmp->embedding[s*spdim+j], &f);CHKERRQ(ierr);
      ierr = PetscQuadratureGetData(f, NULL, &Nc, &Nq, &points, &weights);CHKERRQ(ierr);
      ierr = PetscMalloc1(f->numPoints*spdim*Nc,&Bf);CHKERRQ(ierr);
      ierr = PetscSpaceEvaluate(fem->basisSpace, Nq, points, Bf, NULL, NULL);CHKERRQ(ierr);
      for (k = 0; k < spdim; ++k) {
        /* n_j \cdot \phi_k */
        invVscalar[(s*spdim + j)*spdim+k] = 0.0;
        for (q = 0; q < Nq; ++q) {
          invVscalar[(s*spdim + j)*spdim+k] += Bf[q*spdim+k]*weights[q];
        }
      }
      ierr = PetscFree(Bf);CHKERRQ(ierr);
    }
    n = spdim;
    PetscStackCallBLAS("LAPACKgetrf", LAPACKgetrf_(&n, &n, &invVscalar[s*spdim*spdim], &n, pivots, &info));
    PetscStackCallBLAS("LAPACKgetri", LAPACKgetri_(&n, &invVscalar[s*spdim*spdim], &n, pivots, work, &n, &info));
  }
#if defined(PETSC_USE_COMPLEX)
  for (s = 0; s <cmp->numSubelements*spdim*spdim; s++) fem->invV[s] = PetscRealPart(invVscalar[s]);
  ierr = PetscFree(invVscalar);CHKERRQ(ierr);
#endif
  ierr = PetscFree2(pivots,work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 4
0
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
{
  Mat_MPIDense   *c = (Mat_MPIDense*)C->data;
  Mat            A  = c->A;
  Mat_SeqDense   *a = (Mat_SeqDense*)A->data,*mat;
  PetscErrorCode ierr;
  PetscMPIInt    rank,size,tag0,tag1,idex,end,i;
  PetscInt       N = C->cmap->N,rstart = C->rmap->rstart,count;
  const PetscInt **irow,**icol,*irow_i;
  PetscInt       *nrow,*ncol,*w1,*w3,*w4,*rtable,start;
  PetscInt       **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc;
  PetscInt       nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr;
  PetscInt       is_no,jmax,**rmap,*rmap_i;
  PetscInt       ctr_j,*sbuf1_j,*rbuf1_i;
  MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
  MPI_Status     *r_status1,*r_status2,*s_status1,*s_status2;
  MPI_Comm       comm;
  PetscScalar    **rbuf2,**sbuf2;
  PetscBool      sorted;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
  tag0 = ((PetscObject)C)->tag;
  size = c->size;
  rank = c->rank;
  m    = C->rmap->N;

  /* Get some new tags to keep the communication clean */
  ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);

  /* Check if the col indices are sorted */
  for (i=0; i<ismax; i++) {
    ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr);
    if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
    ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr);
    if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
  }

  ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr);
  for (i=0; i<ismax; i++) {
    ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr);
    ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr);
    ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr);
    ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr);
  }

  /* Create hash table for the mapping :row -> proc*/
  for (i=0,j=0; i<size; i++) {
    jmax = C->rmap->range[i+1];
    for (; j<jmax; j++) rtable[j] = i;
  }

  /* evaluate communication - mesg to who,length of mesg, and buffer space
     required. Based on this, buffers are allocated, and data copied into them*/
  ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr);
  ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
  ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
  for (i=0; i<ismax; i++) {
    ierr   = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
    jmax   = nrow[i];
    irow_i = irow[i];
    for (j=0; j<jmax; j++) {
      row  = irow_i[j];
      proc = rtable[row];
      w4[proc]++;
    }
    for (j=0; j<size; j++) {
      if (w4[j]) { w1[2*j] += w4[j];  w3[j]++;}
    }
  }

  nrqs       = 0;              /* no of outgoing messages */
  msz        = 0;              /* total mesg length (for all procs) */
  w1[2*rank] = 0;              /* no mesg sent to self */
  w3[rank]   = 0;
  for (i=0; i<size; i++) {
    if (w1[2*i])  { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
  }
  ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/
  for (i=0,j=0; i<size; i++) {
    if (w1[2*i]) { pa[j] = i; j++; }
  }

  /* Each message would have a header = 1 + 2*(no of IS) + data */
  for (i=0; i<nrqs; i++) {
    j        = pa[i];
    w1[2*j] += w1[2*j+1] + 2* w3[j];
    msz     += w1[2*j];
  }
  /* Do a global reduction to determine how many messages to expect*/
  ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr);

  /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */
  ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr);
  ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr);
  for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

  /* Post the receives */
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr);
  for (i=0; i<nrqr; ++i) {
    ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr);
  }

  /* Allocate Memory for outgoing messages */
  ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr);
  ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr);
  ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr);
  {
    PetscInt *iptr = tmp,ict = 0;
    for (i=0; i<nrqs; i++) {
      j        = pa[i];
      iptr    += ict;
      sbuf1[j] = iptr;
      ict      = w1[2*j];
    }
  }

  /* Form the outgoing messages */
  /* Initialize the header space */
  for (i=0; i<nrqs; i++) {
    j           = pa[i];
    sbuf1[j][0] = 0;
    ierr        = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr);
    ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
  }

  /* Parse the isrow and copy data into outbuf */
  for (i=0; i<ismax; i++) {
    ierr   = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr);
    irow_i = irow[i];
    jmax   = nrow[i];
    for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
      row  = irow_i[j];
      proc = rtable[row];
      if (proc != rank) { /* copy to the outgoing buf*/
        ctr[proc]++;
        *ptr[proc] = row;
        ptr[proc]++;
      }
    }
    /* Update the headers for the current IS */
    for (j=0; j<size; j++) { /* Can Optimise this loop too */
      if ((ctr_j = ctr[j])) {
        sbuf1_j        = sbuf1[j];
        k              = ++sbuf1_j[0];
        sbuf1_j[2*k]   = ctr_j;
        sbuf1_j[2*k-1] = i;
      }
    }
  }

  /*  Now  post the sends */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
  for (i=0; i<nrqs; ++i) {
    j    = pa[i];
    ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr);
  }

  /* Post recieves to capture the row_data from other procs */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr);
  for (i=0; i<nrqs; i++) {
    j     = pa[i];
    count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N;
    ierr  = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr);
    ierr  = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr);
  }

  /* Receive messages(row_nos) and then, pack and send off the rowvalues
     to the correct processors */

  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr);

  {
    PetscScalar *sbuf2_i,*v_start;
    PetscInt    s_proc;
    for (i=0; i<nrqr; ++i) {
      ierr    = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr);
      s_proc  = r_status1[i].MPI_SOURCE;         /* send processor */
      rbuf1_i = rbuf1[idex];         /* Actual message from s_proc */
      /* no of rows = end - start; since start is array idex[], 0idex, whel end
         is length of the buffer - which is 1idex */
      start = 2*rbuf1_i[0] + 1;
      ierr  = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr);
      /* allocate memory sufficinet to hold all the row values */
      ierr    = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr);
      sbuf2_i = sbuf2[idex];
      /* Now pack the data */
      for (j=start; j<end; j++) {
        row     = rbuf1_i[j] - rstart;
        v_start = a->v + row;
        for (k=0; k<N; k++) {
          sbuf2_i[0] = v_start[0];
          sbuf2_i++;
          v_start += C->rmap->n;
        }
      }
      /* Now send off the data */
      ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr);
    }
  }
  /* End Send-Recv of IS + row_numbers */
  ierr = PetscFree(r_status1);CHKERRQ(ierr);
  ierr = PetscFree(r_waits1);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr);
  if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);}
  ierr = PetscFree(s_status1);CHKERRQ(ierr);
  ierr = PetscFree(s_waits1);CHKERRQ(ierr);

  /* Create the submatrices */
  if (scall == MAT_REUSE_MATRIX) {
    for (i=0; i<ismax; i++) {
      mat = (Mat_SeqDense*)(submats[i]->data);
      if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
      ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr);

      submats[i]->factortype = C->factortype;
    }
  } else {
    for (i=0; i<ismax; i++) {
      ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr);
      ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr);
      ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr);
      ierr = MatSeqDenseSetPreallocation(submats[i],NULL);CHKERRQ(ierr);
    }
  }

  /* Assemble the matrices */
  {
    PetscInt    col;
    PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi;

    for (i=0; i<ismax; i++) {
      mat    = (Mat_SeqDense*)submats[i]->data;
      mat_v  = a->v;
      imat_v = mat->v;
      irow_i = irow[i];
      m      = nrow[i];
      for (j=0; j<m; j++) {
        row  = irow_i[j];
        proc = rtable[row];
        if (proc == rank) {
          row     = row - rstart;
          mat_vi  = mat_v + row;
          imat_vi = imat_v + j;
          for (k=0; k<ncol[i]; k++) {
            col          = icol[i][k];
            imat_vi[k*m] = mat_vi[col*C->rmap->n];
          }
        }
      }
    }
  }

  /* Create row map-> This maps c->row to submat->row for each submat*/
  /* this is a very expensive operation wrt memory usage */
  ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr);
  ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr);
  ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N;
  for (i=0; i<ismax; i++) {
    rmap_i = rmap[i];
    irow_i = irow[i];
    jmax   = nrow[i];
    for (j=0; j<jmax; j++) {
      rmap_i[irow_i[j]] = j;
    }
  }

  /* Now Receive the row_values and assemble the rest of the matrix */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr);
  {
    PetscInt    is_max,tmp1,col,*sbuf1_i,is_sz;
    PetscScalar *rbuf2_i,*imat_v,*imat_vi;

    for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */
      ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr);
      /* Now dig out the corresponding sbuf1, which contains the IS data_structure */
      sbuf1_i = sbuf1[pa[i]];
      is_max  = sbuf1_i[0];
      ct1     = 2*is_max+1;
      rbuf2_i = rbuf2[i];
      for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */
        is_no  = sbuf1_i[2*j-1];
        is_sz  = sbuf1_i[2*j];
        mat    = (Mat_SeqDense*)submats[is_no]->data;
        imat_v = mat->v;
        rmap_i = rmap[is_no];
        m      = nrow[is_no];
        for (k=0; k<is_sz; k++,rbuf2_i+=N) {  /* For each row */
          row     = sbuf1_i[ct1]; ct1++;
          row     = rmap_i[row];
          imat_vi = imat_v + row;
          for (l=0; l<ncol[is_no]; l++) { /* For each col */
            col          = icol[is_no][l];
            imat_vi[l*m] = rbuf2_i[col];
          }
        }
      }
    }
  }
  /* End Send-Recv of row_values */
  ierr = PetscFree(r_status2);CHKERRQ(ierr);
  ierr = PetscFree(r_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr);
  if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);}
  ierr = PetscFree(s_status2);CHKERRQ(ierr);
  ierr = PetscFree(s_waits2);CHKERRQ(ierr);

  /* Restore the indices */
  for (i=0; i<ismax; i++) {
    ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr);
    ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr);
  }

  /* Destroy allocated memory */
  ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr);
  ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr);
  ierr = PetscFree(pa);CHKERRQ(ierr);

  for (i=0; i<nrqs; ++i) {
    ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr);
  }
  ierr = PetscFree(rbuf2);CHKERRQ(ierr);
  ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr);
  ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr);
  ierr = PetscFree(rbuf1);CHKERRQ(ierr);

  for (i=0; i<nrqr; ++i) {
    ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr);
  }

  ierr = PetscFree(sbuf2);CHKERRQ(ierr);
  ierr = PetscFree(rmap[0]);CHKERRQ(ierr);
  ierr = PetscFree(rmap);CHKERRQ(ierr);

  for (i=0; i<ismax; i++) {
    ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 5
0
static PetscErrorCode  KSPSolve_CR(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i = 0;
  MatStructure   pflag;
  PetscReal      dp;
  PetscScalar    ai, bi;
  PetscScalar    apq,btop, bbot;
  Vec            X,B,R,RT,P,AP,ART,Q;
  Mat            Amat, Pmat;

  PetscFunctionBegin;
  X       = ksp->vec_sol;
  B       = ksp->vec_rhs;
  R       = ksp->work[0];
  RT      = ksp->work[1];
  P       = ksp->work[2];
  AP      = ksp->work[3];
  ART     = ksp->work[4];
  Q       = ksp->work[5];

  /* R is the true residual norm, RT is the preconditioned residual norm */
  ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr);
  if (!ksp->guess_zero) {
    ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr);     /*   R <- A*X           */
    ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr);            /*   R <- B-R == B-A*X  */
  } else { 
    ierr = VecCopy(B,R);CHKERRQ(ierr);                  /*   R <- B (X is 0)    */
  }
  ierr = KSP_PCApply(ksp,R,P);CHKERRQ(ierr);     /*   P   <- B*R         */
  ierr = KSP_MatMult(ksp,Amat,P,AP);CHKERRQ(ierr);      /*   AP  <- A*P         */
  ierr = VecCopy(P,RT);CHKERRQ(ierr);                   /*   RT  <- P           */
  ierr = VecCopy(AP,ART);CHKERRQ(ierr);                 /*   ART <- AP          */
  ierr = VecDotBegin(RT,ART,&btop);CHKERRQ(ierr);          /*   (RT,ART)           */
    
  if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
    ierr = VecNormBegin(RT,NORM_2,&dp);CHKERRQ(ierr);        /*   dp <- RT'*RT       */
    ierr = VecDotEnd   (RT,ART,&btop) ;CHKERRQ(ierr);          /*   (RT,ART)           */
    ierr = VecNormEnd  (RT,NORM_2,&dp);CHKERRQ(ierr);        /*   dp <- RT'*RT       */
  } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
    ierr = VecNormBegin(R,NORM_2,&dp);CHKERRQ(ierr);         /*   dp <- R'*R         */
    ierr = VecDotEnd   (RT,ART,&btop);CHKERRQ(ierr);          /*   (RT,ART)           */
    ierr = VecNormEnd  (R,NORM_2,&dp);CHKERRQ(ierr);        /*   dp <- RT'*RT       */
  } else if (ksp->normtype == KSP_NORM_NATURAL) {
    ierr = VecDotEnd   (RT,ART,&btop) ;CHKERRQ(ierr);          /*   (RT,ART)           */
    dp = sqrt(PetscAbsScalar(btop));                    /* dp = sqrt(R,AR)      */
  }
  if (PetscAbsScalar(btop) < 0.0) {
    ksp->reason = KSP_DIVERGED_INDEFINITE_MAT;
    ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite matrix\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ksp->its = 0;
  KSPMonitor(ksp,0,dp);
  ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
  ksp->rnorm              = dp;
  ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);
  KSPLogResidualHistory(ksp,dp);
  ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) PetscFunctionReturn(0);

  i = 0;
  do {
    ierr   = KSP_PCApply(ksp,AP,Q);CHKERRQ(ierr);/*   Q <- B* AP          */

    ierr   = VecDot(AP,Q,&apq);CHKERRQ(ierr);  
    if (PetscAbsScalar(apq) <= 0.0) {
      ksp->reason = KSP_DIVERGED_INDEFINITE_PC;
      ierr = PetscInfo(ksp,"KSPSolve_CR:diverging due to indefinite or negative definite PC\n");CHKERRQ(ierr);
      break;
    }
    ai = btop/apq;                                      /* ai = (RT,ART)/(AP,Q)  */

    ierr   = VecAXPY(X,ai,P);CHKERRQ(ierr);            /*   X   <- X + ai*P     */
    ierr   = VecAXPY(RT,-ai,Q);CHKERRQ(ierr);           /*   RT  <- RT - ai*Q    */
    ierr   = KSP_MatMult(ksp,Amat,RT,ART);CHKERRQ(ierr);/*   ART <-   A*RT       */
    bbot = btop;
    ierr   = VecDotBegin(RT,ART,&btop);CHKERRQ(ierr);

    if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
      ierr = VecNormBegin(RT,NORM_2,&dp);CHKERRQ(ierr);      /*   dp <- || RT ||      */
      ierr = VecDotEnd   (RT,ART,&btop) ;CHKERRQ(ierr);
      ierr = VecNormEnd  (RT,NORM_2,&dp);CHKERRQ(ierr);      /*   dp <- || RT ||      */
    } else if (ksp->normtype == KSP_NORM_NATURAL) {
      ierr = VecDotEnd(RT,ART,&btop);CHKERRQ(ierr);
      dp = sqrt(PetscAbsScalar(btop));                  /* dp = sqrt(R,AR)       */
    } else if (ksp->normtype == KSP_NORM_NO) {
      ierr = VecDotEnd(RT,ART,&btop);CHKERRQ(ierr);
      dp = 0.0; 
    } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
      ierr = VecAXPY(R,ai,AP);CHKERRQ(ierr);           /*   R   <- R - ai*AP    */
      ierr = VecNormBegin(R,NORM_2,&dp);CHKERRQ(ierr);       /*   dp <- R'*R          */
      ierr = VecDotEnd   (RT,ART,&btop);CHKERRQ(ierr);
      ierr = VecNormEnd  (R,NORM_2,&dp);CHKERRQ(ierr);       /*   dp <- R'*R          */
    } else {
      SETERRQ1(PETSC_ERR_SUP,"KSPNormType of %d not supported",(int)ksp->normtype);
    }
    if (PetscAbsScalar(btop) < 0.0) {
      ksp->reason = KSP_DIVERGED_INDEFINITE_MAT;
      ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite PC\n");CHKERRQ(ierr);
      break;
    }

    ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr);
    ksp->its++;
    ksp->rnorm = dp;
    ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr);

    KSPLogResidualHistory(ksp,dp);
    KSPMonitor(ksp,i+1,dp);
    ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason) break;

    bi = btop/bbot;
    ierr = VecAYPX(P,bi,RT);CHKERRQ(ierr);              /*   P <- RT + Bi P     */
    ierr = VecAYPX(AP,bi,ART);CHKERRQ(ierr);            /*   AP <- ART + Bi AP  */
    i++;
  } while (i<ksp->max_it);
  if (i >= ksp->max_it) {
    ksp->reason =  KSP_DIVERGED_ITS;
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 6
0
Arquivo: stcg.c Projeto: Kun-Qu/petsc
PetscErrorCode KSPSolve_STCG(KSP ksp)
{
#ifdef PETSC_USE_COMPLEX
  SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "STCG is not available for complex systems");
#else
  KSP_STCG       *cg = (KSP_STCG *)ksp->data;

  PetscErrorCode ierr;
  MatStructure   pflag;
  Mat            Qmat, Mmat;
  Vec            r, z, p, d;
  PC             pc;

  PetscReal      norm_r, norm_d, norm_dp1, norm_p, dMp;
  PetscReal      alpha, beta, kappa, rz, rzm1;
  PetscReal      rr, r2, step;

  PetscInt       max_cg_its;

  PetscBool      diagonalscale;

  PetscFunctionBegin;
  /***************************************************************************/
  /* Check the arguments and parameters.                                     */
  /***************************************************************************/

  ierr = PCGetDiagonalScale(ksp->pc, &diagonalscale);CHKERRQ(ierr);
  if (diagonalscale) SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
  if (cg->radius < 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_OUTOFRANGE, "Input error: radius < 0");

  /***************************************************************************/
  /* Get the workspace vectors and initialize variables                      */
  /***************************************************************************/

  r2 = cg->radius * cg->radius;
  r  = ksp->work[0];
  z  = ksp->work[1];
  p  = ksp->work[2];
  d  = ksp->vec_sol;
  pc = ksp->pc;

  ierr = PCGetOperators(pc, &Qmat, &Mmat, &pflag);CHKERRQ(ierr);

  ierr = VecGetSize(d, &max_cg_its);CHKERRQ(ierr);
  max_cg_its = PetscMin(max_cg_its, ksp->max_it);
  ksp->its = 0;

  /***************************************************************************/
  /* Initialize objective function and direction.                            */
  /***************************************************************************/

  cg->o_fcn = 0.0;

  ierr = VecSet(d, 0.0);CHKERRQ(ierr);			/* d = 0             */
  cg->norm_d = 0.0;

  /***************************************************************************/
  /* Begin the conjugate gradient method.  Check the right-hand side for     */
  /* numerical problems.  The check for not-a-number and infinite values     */
  /* need be performed only once.                                            */
  /***************************************************************************/

  ierr = VecCopy(ksp->vec_rhs, r);CHKERRQ(ierr);	/* r = -grad         */
  ierr = VecDot(r, r, &rr);CHKERRQ(ierr);		/* rr = r^T r        */
  if (PetscIsInfOrNanScalar(rr)) {
    /*************************************************************************/
    /* The right-hand side contains not-a-number or an infinite value.       */
    /* The gradient step does not work; return a zero value for the step.    */
    /*************************************************************************/

    ksp->reason = KSP_DIVERGED_NAN;
    ierr = PetscInfo1(ksp, "KSPSolve_STCG: bad right-hand side: rr=%g\n", rr);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  /***************************************************************************/
  /* Check the preconditioner for numerical problems and for positive        */
  /* definiteness.  The check for not-a-number and infinite values need be   */
  /* performed only once.                                                    */
  /***************************************************************************/

  ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr);		/* z = inv(M) r      */
  ierr = VecDot(r, z, &rz);CHKERRQ(ierr);		/* rz = r^T inv(M) r */
  if (PetscIsInfOrNanScalar(rz)) {
    /*************************************************************************/
    /* The preconditioner contains not-a-number or an infinite value.        */
    /* Return the gradient direction intersected with the trust region.      */
    /*************************************************************************/

    ksp->reason = KSP_DIVERGED_NAN;
    ierr = PetscInfo1(ksp, "KSPSolve_STCG: bad preconditioner: rz=%g\n", rz);CHKERRQ(ierr);

    if (cg->radius != 0) {
      if (r2 >= rr) {
        alpha = 1.0;
        cg->norm_d = PetscSqrtReal(rr);
      }
      else {
        alpha = PetscSqrtReal(r2 / rr);
        cg->norm_d = cg->radius;
      }

      ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr);	/* d = d + alpha r   */

      /***********************************************************************/
      /* Compute objective function.                                         */
      /***********************************************************************/

      ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr);
      ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr);
      ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr);
      cg->o_fcn = -cg->o_fcn;
      ++ksp->its;
    }
    PetscFunctionReturn(0);
  }

  if (rz < 0.0) {
    /*************************************************************************/
    /* The preconditioner is indefinite.  Because this is the first          */
    /* and we do not have a direction yet, we use the gradient step.  Note   */
    /* that we cannot use the preconditioned norm when computing the step    */
    /* because the matrix is indefinite.                                     */
    /*************************************************************************/

    ksp->reason = KSP_DIVERGED_INDEFINITE_PC;
    ierr = PetscInfo1(ksp, "KSPSolve_STCG: indefinite preconditioner: rz=%g\n", rz);CHKERRQ(ierr);

    if (cg->radius != 0.0) {
      if (r2 >= rr) {
        alpha = 1.0;
        cg->norm_d = PetscSqrtReal(rr);
      }
      else {
        alpha = PetscSqrtReal(r2 / rr);
        cg->norm_d = cg->radius;
      }

      ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr);	/* d = d + alpha r   */

      /***********************************************************************/
      /* Compute objective function.                                         */
      /***********************************************************************/

      ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr);
      ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr);
      ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr);
      cg->o_fcn = -cg->o_fcn;
      ++ksp->its;
    }
    PetscFunctionReturn(0);
  }

  /***************************************************************************/
  /* As far as we know, the preconditioner is positive semidefinite.         */
  /* Compute and log the residual.  Check convergence because this           */
  /* initializes things, but do not terminate until at least one conjugate   */
  /* gradient iteration has been performed.                                  */
  /***************************************************************************/

  switch(ksp->normtype) {
  case KSP_NORM_PRECONDITIONED:
    ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr);	/* norm_r = |z|      */
    break;

  case KSP_NORM_UNPRECONDITIONED:
    norm_r = PetscSqrtReal(rr);					/* norm_r = |r|      */
    break;

  case KSP_NORM_NATURAL:
    norm_r = PetscSqrtReal(rz);					/* norm_r = |r|_M    */
    break;

  default:
    norm_r = 0.0;
    break;
  }

  KSPLogResidualHistory(ksp, norm_r);
  ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr);
  ksp->rnorm = norm_r;

  ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);

  /***************************************************************************/
  /* Compute the first direction and update the iteration.                   */
  /***************************************************************************/

  ierr = VecCopy(z, p);CHKERRQ(ierr);			/* p = z             */
  ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr);	/* z = Q * p         */
  ++ksp->its;

  /***************************************************************************/
  /* Check the matrix for numerical problems.                                */
  /***************************************************************************/

  ierr = VecDot(p, z, &kappa);CHKERRQ(ierr);		/* kappa = p^T Q p   */
  if (PetscIsInfOrNanScalar(kappa)) {
    /*************************************************************************/
    /* The matrix produced not-a-number or an infinite value.  In this case, */
    /* we must stop and use the gradient direction.  This condition need     */
    /* only be checked once.                                                 */
    /*************************************************************************/

    ksp->reason = KSP_DIVERGED_NAN;
    ierr = PetscInfo1(ksp, "KSPSolve_STCG: bad matrix: kappa=%g\n", kappa);CHKERRQ(ierr);

    if (cg->radius) {
      if (r2 >= rr) {
        alpha = 1.0;
        cg->norm_d = PetscSqrtReal(rr);
      }
      else {
        alpha = PetscSqrtReal(r2 / rr);
        cg->norm_d = cg->radius;
      }

      ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr);	/* d = d + alpha r   */

      /***********************************************************************/
      /* Compute objective function.                                         */
      /***********************************************************************/

      ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr);
      ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr);
      ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr);
      cg->o_fcn = -cg->o_fcn;
      ++ksp->its;
    }
    PetscFunctionReturn(0);
  }

  /***************************************************************************/
  /* Initialize variables for calculating the norm of the direction.         */
  /***************************************************************************/

  dMp = 0.0;
  norm_d = 0.0;
  switch(cg->dtype) {
  case STCG_PRECONDITIONED_DIRECTION:
    norm_p = rz;
    break;

  default:
    ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr);
    break;
  }

  /***************************************************************************/
  /* Check for negative curvature.                                           */
  /***************************************************************************/

  if (kappa <= 0.0) {
    /*************************************************************************/
    /* In this case, the matrix is indefinite and we have encountered a      */
    /* direction of negative curvature.  Because negative curvature occurs   */
    /* during the first step, we must follow a direction.                    */
    /*************************************************************************/

    ksp->reason = KSP_CONVERGED_CG_NEG_CURVE;
    ierr = PetscInfo1(ksp, "KSPSolve_STCG: negative curvature: kappa=%g\n", kappa);CHKERRQ(ierr);

    if (cg->radius != 0.0 && norm_p > 0.0) {
      /***********************************************************************/
      /* Follow direction of negative curvature to the boundary of the       */
      /* trust region.                                                       */
      /***********************************************************************/

      step = PetscSqrtReal(r2 / norm_p);
      cg->norm_d = cg->radius;

      ierr = VecAXPY(d, step, p);CHKERRQ(ierr);	/* d = d + step p    */

      /***********************************************************************/
      /* Update objective function.                                          */
      /***********************************************************************/

      cg->o_fcn += step * (0.5 * step * kappa - rz);
    }
    else if (cg->radius != 0.0) {
      /***********************************************************************/
      /* The norm of the preconditioned direction is zero; use the gradient  */
      /* step.                                                               */
      /***********************************************************************/

      if (r2 >= rr) {
        alpha = 1.0;
        cg->norm_d = PetscSqrtReal(rr);
      }
      else {
        alpha = PetscSqrtReal(r2 / rr);
        cg->norm_d = cg->radius;
      }

      ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr);	/* d = d + alpha r   */

      /***********************************************************************/
      /* Compute objective function.                                         */
      /***********************************************************************/

      ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr);
      ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr);
      ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr);
      cg->o_fcn = -cg->o_fcn;
      ++ksp->its;
    }
    PetscFunctionReturn(0);
  }

  /***************************************************************************/
  /* Run the conjugate gradient method until either the problem is solved,   */
  /* we encounter the boundary of the trust region, or the conjugate         */
  /* gradient method breaks down.                                            */
  /***************************************************************************/

  while(1) {
    /*************************************************************************/
    /* Know that kappa is nonzero, because we have not broken down, so we    */
    /* can compute the steplength.                                           */
    /*************************************************************************/

    alpha = rz / kappa;

    /*************************************************************************/
    /* Compute the steplength and check for intersection with the trust      */
    /* region.                                                               */
    /*************************************************************************/

    norm_dp1 = norm_d + alpha*(2.0*dMp + alpha*norm_p);
    if (cg->radius != 0.0 && norm_dp1 >= r2) {
      /***********************************************************************/
      /* In this case, the matrix is positive definite as far as we know.    */
      /* However, the full step goes beyond the trust region.                */
      /***********************************************************************/

      ksp->reason = KSP_CONVERGED_CG_CONSTRAINED;
      ierr = PetscInfo1(ksp, "KSPSolve_STCG: constrained step: radius=%g\n", cg->radius);CHKERRQ(ierr);

      if (norm_p > 0.0) {
	/*********************************************************************/
	/* Follow the direction to the boundary of the trust region.         */
	/*********************************************************************/

        step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p;
        cg->norm_d = cg->radius;

        ierr = VecAXPY(d, step, p);CHKERRQ(ierr);	/* d = d + step p    */

        /*********************************************************************/
        /* Update objective function.                                        */
        /*********************************************************************/

        cg->o_fcn += step * (0.5 * step * kappa - rz);
      }
      else {
        /*********************************************************************/
        /* The norm of the direction is zero; there is nothing to follow.    */
        /*********************************************************************/
      }
      break;
    }

    /*************************************************************************/
    /* Now we can update the direction and residual.                         */
    /*************************************************************************/

    ierr = VecAXPY(d, alpha, p);CHKERRQ(ierr);		/* d = d + alpha p   */
    ierr = VecAXPY(r, -alpha, z);			/* r = r - alpha Q p */
    ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr);	/* z = inv(M) r      */

    switch(cg->dtype) {
    case STCG_PRECONDITIONED_DIRECTION:
      norm_d = norm_dp1;
      break;

    default:
      ierr = VecDot(d, d, &norm_d);CHKERRQ(ierr);
      break;
    }
    cg->norm_d = PetscSqrtReal(norm_d);

    /*************************************************************************/
    /* Update objective function.                                            */
    /*************************************************************************/

    cg->o_fcn -= 0.5 * alpha * rz;

    /*************************************************************************/
    /* Check that the preconditioner appears positive semidefinite.          */
    /*************************************************************************/

    rzm1 = rz;
    ierr = VecDot(r, z, &rz);CHKERRQ(ierr);		/* rz = r^T z        */
    if (rz < 0.0) {
      /***********************************************************************/
      /* The preconditioner is indefinite.                                   */
      /***********************************************************************/

      ksp->reason = KSP_DIVERGED_INDEFINITE_PC;
      ierr = PetscInfo1(ksp, "KSPSolve_STCG: cg indefinite preconditioner: rz=%g\n", rz);CHKERRQ(ierr);
      break;
    }

    /*************************************************************************/
    /* As far as we know, the preconditioner is positive semidefinite.       */
    /* Compute the residual and check for convergence.                       */
    /*************************************************************************/

    switch(ksp->normtype) {
    case KSP_NORM_PRECONDITIONED:
      ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr);/* norm_r = |z|      */
      break;

    case KSP_NORM_UNPRECONDITIONED:
      ierr = VecNorm(r, NORM_2, &norm_r);CHKERRQ(ierr);/* norm_r = |r|      */
      break;

    case KSP_NORM_NATURAL:
      norm_r = PetscSqrtReal(rz);				/* norm_r = |r|_M    */
      break;

    default:
      norm_r = 0.0;
      break;
    }

    KSPLogResidualHistory(ksp, norm_r);
    ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr);
    ksp->rnorm = norm_r;
  
    ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason) {
      /***********************************************************************/
      /* The method has converged.                                           */
      /***********************************************************************/

      ierr = PetscInfo2(ksp, "KSPSolve_STCG: truncated step: rnorm=%g, radius=%g\n", norm_r, cg->radius);CHKERRQ(ierr);
      break;
    }

    /*************************************************************************/
    /* We have not converged yet.  Check for breakdown.                      */
    /*************************************************************************/

    beta = rz / rzm1;
    if (fabs(beta) <= 0.0) {
      /***********************************************************************/
      /* Conjugate gradients has broken down.                                */
      /***********************************************************************/

      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      ierr = PetscInfo1(ksp, "KSPSolve_STCG: breakdown: beta=%g\n", beta);CHKERRQ(ierr);
      break;
    }

    /*************************************************************************/
    /* Check iteration limit.                                                */
    /*************************************************************************/

    if (ksp->its >= max_cg_its) {
      ksp->reason = KSP_DIVERGED_ITS;
      ierr = PetscInfo1(ksp, "KSPSolve_STCG: iterlim: its=%d\n", ksp->its);CHKERRQ(ierr);
      break;
    }

    /*************************************************************************/
    /* Update p and the norms.                                               */
    /*************************************************************************/

    ierr = VecAYPX(p, beta, z);CHKERRQ(ierr);          /* p = z + beta p    */

    switch(cg->dtype) {
    case STCG_PRECONDITIONED_DIRECTION:
      dMp = beta*(dMp + alpha*norm_p);
      norm_p = beta*(rzm1 + beta*norm_p);
      break;

    default:
      ierr = VecDot(d, p, &dMp);CHKERRQ(ierr);
      ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr);
      break;
    }

    /*************************************************************************/
    /* Compute the new direction and update the iteration.                   */
    /*************************************************************************/

    ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr);	/* z = Q * p         */
    ierr = VecDot(p, z, &kappa);CHKERRQ(ierr);		/* kappa = p^T Q p   */
    ++ksp->its;

    /*************************************************************************/
    /* Check for negative curvature.                                         */
    /*************************************************************************/

    if (kappa <= 0.0) {
      /***********************************************************************/
      /* In this case, the matrix is indefinite and we have encountered      */
      /* a direction of negative curvature.  Follow the direction to the     */
      /* boundary of the trust region.                                       */
      /***********************************************************************/

      ksp->reason = KSP_CONVERGED_CG_NEG_CURVE;
      ierr = PetscInfo1(ksp, "KSPSolve_STCG: negative curvature: kappa=%g\n", kappa);CHKERRQ(ierr);

      if (cg->radius != 0.0 && norm_p > 0.0) {
	/*********************************************************************/
	/* Follow direction of negative curvature to boundary.               */
	/*********************************************************************/

        step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p;
        cg->norm_d = cg->radius;

        ierr = VecAXPY(d, step, p);CHKERRQ(ierr);	/* d = d + step p    */

	/*********************************************************************/
	/* Update objective function.                                        */
	/*********************************************************************/

        cg->o_fcn += step * (0.5 * step * kappa - rz);
      }
      else if (cg->radius != 0.0) {
	/*********************************************************************/
	/* The norm of the direction is zero; there is nothing to follow.    */
	/*********************************************************************/
      }
      break;
    }
  }

  PetscFunctionReturn(0);
#endif
}
Exemplo n.º 7
0
PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
{
  Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
  Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);  
  PetscErrorCode     ierr;
  PetscInt           i,j,*aj = B->j,ec = 0,*garray;
  PetscInt           bs = mat->rmap->bs,*stmp;
  IS                 from,to;
  Vec                gvec;
#if defined (PETSC_USE_CTABLE)
  PetscTable         gid1_lid1;
  PetscTablePosition tpos;
  PetscInt           gid,lid;
#else
  PetscInt           Nbs = baij->Nbs,*indices;
#endif  

  PetscFunctionBegin;
#if defined (PETSC_USE_CTABLE)
  /* use a table - Mark Adams */
  PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      PetscInt data,gid1 = aj[B->i[i]+j] + 1;
      ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
      if (!data) {
        /* one based table */ 
        ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 
      }
    }
  } 
  /* form array of columns we need */
  ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 
  while (tpos) {  
    ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 
    gid--; lid--;
    garray[lid] = gid; 
  }
  ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
  ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
  for (i=0; i<ec; i++) {
    ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 
  }
  /* compact out the extra columns in B */
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      PetscInt gid1 = aj[B->i[i] + j] + 1;
      ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
      lid --;
      aj[B->i[i]+j] = lid;
    }
  }
  B->nbs     = ec;
  baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
  ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
  ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
#else
  /* For the first stab we make an array as long as the number of columns */
  /* mark those columns that are in baij->B */
  ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      if (!indices[aj[B->i[i] + j]]) ec++; 
      indices[aj[B->i[i] + j] ] = 1;
    }
  }

  /* form array of columns we need */
  ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ec = 0;
  for (i=0; i<Nbs; i++) {
    if (indices[i]) {
      garray[ec++] = i;
    }
  }

  /* make indices now point into garray */
  for (i=0; i<ec; i++) {
    indices[garray[i]] = i;
  }

  /* compact out the extra columns in B */
  for (i=0; i<B->mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
    }
  }
  B->nbs       = ec;
  baij->B->cmap->n   = ec*mat->rmap->bs;
  ierr = PetscFree(indices);CHKERRQ(ierr);
#endif  
  
  /* create local vector that is used to scatter into */
  ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);

  /* create two temporary index sets for building scatter-gather */
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);   

  ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
  for (i=0; i<ec; i++) { stmp[i] = i; } 
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);

  /* create temporary global vector to generate scatter context */
  ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
  ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
  ierr = VecDestroy(&gvec);CHKERRQ(ierr);

  ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
  baij->garray = garray;
  ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 8
0
static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
{
  PetscErrorCode         ierr;
  GLLData                gll;
  Mat                    local_mat  =0,temp_A=0;
  ISLocalToGlobalMapping matis_map  =0;
  IS                     dirichletIS=0;

  PetscFunctionBeginUser;
  /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
  ierr = GLLStuffs(dd,&gll);CHKERRQ(ierr);
  /* Compute matrix of subdomain Neumann problem */
  ierr = ComputeSubdomainMatrix(dd,gll,&local_mat);CHKERRQ(ierr);
  /* Compute global mapping of local dofs */
  ierr = ComputeMapping(dd,&matis_map);CHKERRQ(ierr);
  /* Create MATIS object needed by BDDC */
  ierr = MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);CHKERRQ(ierr);
  /* Set local subdomain matrices into MATIS object */
  ierr = MatScale(local_mat,dd.scalingfactor);CHKERRQ(ierr);
  ierr = MatISSetLocalMat(temp_A,local_mat);CHKERRQ(ierr);
  /* Call assembly functions */
  ierr = MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  if (dd.DBC_zerorows) {
    PetscInt dirsize;

    ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);CHKERRQ(ierr);
    ierr = MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
    ierr = MatZeroRowsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);CHKERRQ(ierr);
    ierr = ISGetLocalSize(dirichletIS,&dirsize);CHKERRQ(ierr);
    /* giving hints to local and global matrices could be useful for the BDDC */
    if (!dirsize) {
      ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
      ierr = MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
    } else {
      ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
      ierr = MatSetOption(local_mat,MAT_SPD,PETSC_FALSE);CHKERRQ(ierr);
    }
    ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr);
  } else { /* safe to set the options for the global matrices (they will be communicated to the matis local matrices) */
    ierr = MatSetOption(temp_A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
    ierr = MatSetOption(temp_A,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
  }
#if DEBUG
  {
    Vec       lvec,rvec;
    PetscReal norm;
    ierr = MatCreateVecs(temp_A,&lvec,&rvec);CHKERRQ(ierr);
    ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
    ierr = MatMult(temp_A,lvec,rvec);CHKERRQ(ierr);
    ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
    printf("Test null space of global mat % 1.14e\n",norm);
    ierr = VecDestroy(&lvec);CHKERRQ(ierr);
    ierr = VecDestroy(&rvec);CHKERRQ(ierr);
  }
#endif
  /* free allocated workspace */
  ierr = PetscFree(gll.zGL);CHKERRQ(ierr);
  ierr = PetscFree(gll.rhoGL);CHKERRQ(ierr);
  ierr = PetscFree(gll.A[0]);CHKERRQ(ierr);
  ierr = PetscFree(gll.A);CHKERRQ(ierr);
  ierr = MatDestroy(&gll.elem_mat);CHKERRQ(ierr);
  ierr = MatDestroy(&local_mat);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingDestroy(&matis_map);CHKERRQ(ierr);
  /* give back the pointer to te MATIS object */
  *A = temp_A;
  PetscFunctionReturn(0);
}
Exemplo n.º 9
0
static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp)
{
  PetscErrorCode ierr;
  KSP            temp_ksp;
  PC             pc;
  IS             dirichletIS=0,neumannIS=0,*bddc_dofs_splitting;
  PetscInt       localsize,*xadj=NULL,*adjncy=NULL;
  MatNullSpace   near_null_space;

  PetscFunctionBeginUser;
  ierr = KSPCreate(dd.gcomm,&temp_ksp);CHKERRQ(ierr);
  ierr = KSPSetOperators(temp_ksp,A,A);CHKERRQ(ierr);
  ierr = KSPSetType(temp_ksp,KSPCG);CHKERRQ(ierr);
  ierr = KSPGetPC(temp_ksp,&pc);CHKERRQ(ierr);
  ierr = PCSetType(pc,PCBDDC);CHKERRQ(ierr);

  localsize = dd.xm_l*dd.ym_l*dd.zm_l;

  /* BDDC customization */

  /* jumping coefficients case */
  ierr = PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);CHKERRQ(ierr);

  /* Dofs splitting
     Simple stride-1 IS
     It is not needed since, by default, PCBDDC assumes a stride-1 split */
  ierr = PetscMalloc1(1,&bddc_dofs_splitting);CHKERRQ(ierr);
#if 1
  ierr = ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr);
  ierr = PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);CHKERRQ(ierr);
#else
  /* examples for global ordering */

  /* each process lists the nodes it owns */
  PetscInt sr,er;
  ierr = MatGetOwnershipRange(A,&sr,&er);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr);
  ierr = PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);CHKERRQ(ierr);
  /* Split can be passed in a more general way since any process can list any node */
#endif
  ierr = ISDestroy(&bddc_dofs_splitting[0]);CHKERRQ(ierr);
  ierr = PetscFree(bddc_dofs_splitting);CHKERRQ(ierr);

  /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
    (which in practice is not needed since, by default, PCBDDC build the primal space using constants for quadrature formulas */
#if 0
  Vec vecs[2];
  PetscRandom rctx;
  ierr = MatCreateVecs(A,&vecs[0],&vecs[1]);CHKERRQ(ierr);
  ierr = PetscRandomCreate(dd.gcomm,&rctx);CHKERRQ(ierr);
  ierr = VecSetRandom(vecs[0],rctx);CHKERRQ(ierr);
  ierr = VecSetRandom(vecs[1],rctx);CHKERRQ(ierr);
  ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);CHKERRQ(ierr);
  ierr = VecDestroy(&vecs[0]);CHKERRQ(ierr);
  ierr = VecDestroy(&vecs[1]);CHKERRQ(ierr);
  ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
#else
  ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);CHKERRQ(ierr);
#endif
  ierr = MatSetNearNullSpace(A,near_null_space);CHKERRQ(ierr);
  ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr);

  /* CSR graph of subdomain dofs */
  ierr = BuildCSRGraph(dd,&xadj,&adjncy);CHKERRQ(ierr);
  ierr = PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);

  /* Neumann/Dirichlet indices on the global boundary */
  if (dd.DBC_zerorows) {
    /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
    ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr);
    ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
    ierr = PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);CHKERRQ(ierr);
  } else {
    if (dd.pure_neumann) {
      /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
      ierr = ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);CHKERRQ(ierr);
      ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
    } else {
      /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
      /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
      ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr);
      ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
    }
  }

  /* Pass local null space information to local matrices (needed when using approximate local solvers) */
  if (dd.ipx || dd.pure_neumann) {
    MatNullSpace nsp;
    Mat          local_mat;

    ierr = MatISGetLocalMat(A,&local_mat);CHKERRQ(ierr);
    ierr = MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);CHKERRQ(ierr);
    ierr = MatSetNullSpace(local_mat,nsp);CHKERRQ(ierr);
    ierr = MatNullSpaceDestroy(&nsp);CHKERRQ(ierr);
  }
  ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
  ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
  ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
  *ksp = temp_ksp;
  ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr);
  ierr = ISDestroy(&neumannIS);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 10
0
static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
{
  PetscErrorCode ierr;
  PetscReal      *M,si;
  PetscScalar    x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk;
  PetscBLASInt   pm1,lierr;
  PetscInt       i,j,n,k,s,r,q,ii,jj,p=dd.p;
  PetscInt       xloc,yloc,zloc,xyloc,xyzloc;

  PetscFunctionBeginUser;
  /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
  ierr = PetscMalloc1(p+1,&glldata->zGL);CHKERRQ(ierr);
  ierr = PetscMemzero(glldata->zGL,(p+1)*sizeof(*glldata->zGL));CHKERRQ(ierr);

  glldata->zGL[0]=-1.0;
  glldata->zGL[p]= 1.0;
  if (p > 1) {
    if (p == 2) glldata->zGL[1]=0.0;
    else {
      ierr = PetscMalloc1(p-1,&M);CHKERRQ(ierr);
      for (i=0; i<p-1; i++) {
        si  = (PetscReal)(i+1.0);
        M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5)));
      }
      pm1  = p-1;
      ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
      PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr));
      if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr);
      ierr = PetscFPTrapPop();CHKERRQ(ierr);
      ierr = PetscFree(M);CHKERRQ(ierr);
    }
  }

  /* Weights for 1D quadrature */
  ierr = PetscMalloc1(p+1,&glldata->rhoGL);CHKERRQ(ierr);

  glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0));
  glldata->rhoGL[p]=glldata->rhoGL[0];
  z2 = -1;                      /* Dummy value to avoid -Wmaybe-initialized */
  for (i=1; i<p; i++) {
    x  = glldata->zGL[i];
    z0 = 1.0;
    z1 = x;
    for (n=1; n<p; n++) {
      z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
      z0 = z1;
      z1 = z2;
    }
    glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2);
  }

  /* Auxiliary mat for laplacian */
  ierr = PetscMalloc1(p+1,&glldata->A);CHKERRQ(ierr);
  ierr = PetscMalloc1((p+1)*(p+1),&glldata->A[0]);CHKERRQ(ierr);
  for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1;

  for (j=1; j<p; j++) {
    x =glldata->zGL[j];
    z0=1.0;
    z1=x;
    for (n=1; n<p; n++) {
      z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
      z0=z1;
      z1=z2;
    }
    Lpj=z2;
    for (r=1; r<p; r++) {
      if (r == j) {
        glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj);
      } else {
        x  = glldata->zGL[r];
        z0 = 1.0;
        z1 = x;
        for (n=1; n<p; n++) {
          z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
          z0=z1;
          z1=z2;
        }
        Lpr             = z2;
        glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r]));
      }
    }
  }
  for (j=1; j<p+1; j++) {
    x  = glldata->zGL[j];
    z0 = 1.0;
    z1 = x;
    for (n=1; n<p; n++) {
      z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
      z0=z1;
      z1=z2;
    }
    Lpj             = z2;
    glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j]));
    glldata->A[0][j]=glldata->A[j][0];
  }
  for (j=0; j<p; j++) {
    x  = glldata->zGL[j];
    z0 = 1.0;
    z1 = x;
    for (n=1; n<p; n++) {
      z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
      z0=z1;
      z1=z2;
    }
    Lpj=z2;

    glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j]));
    glldata->A[j][p]=glldata->A[p][j];
  }
  glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0;
  glldata->A[p][p]=glldata->A[0][0];

  /* compute element matrix */
  xloc = p+1;
  yloc = p+1;
  zloc = p+1;
  if (dd.dim<2) yloc=1;
  if (dd.dim<3) zloc=1;
  xyloc  = xloc*yloc;
  xyzloc = xloc*yloc*zloc;

  ierr = MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);CHKERRQ(ierr);
  ierr = MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);CHKERRQ(ierr);
  ierr = MatSetType(glldata->elem_mat,MATSEQAIJ);CHKERRQ(ierr);
  ierr = MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL);CHKERRQ(ierr); /* overestimated */
  ierr = MatZeroEntries(glldata->elem_mat);CHKERRQ(ierr);
  ierr = MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);

  for (k=0; k<zloc; k++) {
    if (dd.dim>2) rhoGLk=glldata->rhoGL[k];
    else rhoGLk=1.0;

    for (j=0; j<yloc; j++) {
      if (dd.dim>1) rhoGLj=glldata->rhoGL[j];
      else rhoGLj=1.0;

      for (i=0; i<xloc; i++) {
        ii = k*xyloc+j*xloc+i;
        s  = k;
        r  = j;
        for (q=0; q<xloc; q++) {
          jj   = s*xyloc+r*xloc+q;
          ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);CHKERRQ(ierr);
        }
        if (dd.dim>1) {
          s=k;
          q=i;
          for (r=0; r<yloc; r++) {
            jj   = s*xyloc+r*xloc+q;
            ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);CHKERRQ(ierr);
          }
        }
        if (dd.dim>2) {
          r=j;
          q=i;
          for (s=0; s<zloc; s++) {
            jj   = s*xyloc+r*xloc+q;
            ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);CHKERRQ(ierr);
          }
        }
      }
    }
  }
  ierr = MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd  (glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
#if DEBUG
  {
    Vec       lvec,rvec;
    PetscReal norm;
    ierr = MatCreateVecs(glldata->elem_mat,&lvec,&rvec);CHKERRQ(ierr);
    ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
    ierr = MatMult(glldata->elem_mat,lvec,rvec);CHKERRQ(ierr);
    ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
    printf("Test null space of elem mat % 1.14e\n",norm);
    ierr = VecDestroy(&lvec);CHKERRQ(ierr);
    ierr = VecDestroy(&rvec);CHKERRQ(ierr);
  }
#endif
  PetscFunctionReturn(0);
}
Exemplo n.º 11
0
static PetscErrorCode DomainDecomposition(DomainData *dd)
{
  PetscMPIInt rank;
  PetscInt    i,j,k;

  PetscFunctionBeginUser;
  /* Subdomain index in cartesian coordinates */
  MPI_Comm_rank(dd->gcomm,&rank);
  dd->ipx = rank%dd->npx;
  if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy);
  else dd->ipz = 0;

  dd->ipy = rank/dd->npx-dd->ipz*dd->npy;
  /* number of local elements */
  dd->nex_l = dd->nex/dd->npx;
  if (dd->ipx < dd->nex%dd->npx) dd->nex_l++;
  if (dd->dim>1) {
    dd->ney_l = dd->ney/dd->npy;
    if (dd->ipy < dd->ney%dd->npy) dd->ney_l++;
  } else {
    dd->ney_l = 0;
  }
  if (dd->dim>2) {
    dd->nez_l = dd->nez/dd->npz;
    if (dd->ipz < dd->nez%dd->npz) dd->nez_l++;
  } else {
    dd->nez_l = 0;
  }
  /* local and global number of dofs */
  dd->xm_l = dd->nex_l*dd->p+1;
  dd->xm   = dd->nex*dd->p+1;
  dd->ym_l = dd->ney_l*dd->p+1;
  dd->ym   = dd->ney*dd->p+1;
  dd->zm_l = dd->nez_l*dd->p+1;
  dd->zm   = dd->nez*dd->p+1;
  if (!dd->pure_neumann) {
    if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
    if (!dd->DBC_zerorows) dd->xm--;
  }

  /* starting global index for local dofs (simple lexicographic order) */
  dd->startx = 0;
  j          = dd->nex/dd->npx;
  for (i=0; i<dd->ipx; i++) {
    k = j;
    if (i<dd->nex%dd->npx) k++;
    dd->startx = dd->startx+k*dd->p;
  }
  if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;

  dd->starty = 0;
  if (dd->dim > 1) {
    j = dd->ney/dd->npy;
    for (i=0; i<dd->ipy; i++) {
      k = j;
      if (i<dd->ney%dd->npy) k++;
      dd->starty = dd->starty+k*dd->p;
    }
  }
  dd->startz = 0;
  if (dd->dim > 2) {
    j = dd->nez/dd->npz;
    for (i=0; i<dd->ipz; i++) {
      k = j;
      if (i<dd->nez%dd->npz) k++;
      dd->startz = dd->startz+k*dd->p;
    }
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 12
0
static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
{
  PetscErrorCode ierr;
  PetscInt       localsize,zloc,yloc,xloc,auxnex,auxney,auxnez;
  PetscInt       ie,je,ke,i,j,k,ig,jg,kg,ii,ming;
  PetscInt       *indexg,*cols,*colsg;
  PetscScalar    *vals;
  Mat            temp_local_mat,elem_mat_DBC=0,*usedmat;
  IS             submatIS;

  PetscFunctionBeginUser;
  ierr = MatGetSize(glldata.elem_mat,&i,&j);CHKERRQ(ierr);
  ierr = PetscMalloc1(i,&indexg);CHKERRQ(ierr);
  ierr = PetscMalloc1(i,&colsg);CHKERRQ(ierr);
  /* get submatrix of elem_mat without dirichlet nodes */
  if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
    xloc = dd.p+1;
    yloc = 1;
    zloc = 1;
    if (dd.dim>1) yloc = dd.p+1;
    if (dd.dim>2) zloc = dd.p+1;
    ii = 0;
    for (k=0;k<zloc;k++) {
      for (j=0;j<yloc;j++) {
        for (i=1;i<xloc;i++) {
          indexg[ii]=k*xloc*yloc+j*xloc+i;
          ii++;
        }
      }
    }
    ierr = ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);CHKERRQ(ierr);
    ierr = MatGetSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);CHKERRQ(ierr);
    ierr = ISDestroy(&submatIS);CHKERRQ(ierr);
  }

  /* Assemble subdomain matrix */
  localsize = dd.xm_l*dd.ym_l*dd.zm_l;
  ierr      = MatCreate(PETSC_COMM_SELF,&temp_local_mat);CHKERRQ(ierr);
  ierr      = MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);CHKERRQ(ierr);
  ierr      = MatSetOptionsPrefix(temp_local_mat,"subdomain_");CHKERRQ(ierr);
  /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
  /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
  if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
    ierr      = MatSetType(temp_local_mat,MATSEQAIJ);CHKERRQ(ierr);
  } else {
    ierr      = MatSetType(temp_local_mat,MATSEQSBAIJ);CHKERRQ(ierr);
  }
  ierr = MatSetFromOptions(temp_local_mat);CHKERRQ(ierr);

  i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim);

  ierr = MatSeqAIJSetPreallocation(temp_local_mat,i,NULL);CHKERRQ(ierr);      /* very overestimated */
  ierr = MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr);      /* very overestimated */
  ierr = MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);

  yloc = dd.p+1;
  zloc = dd.p+1;
  if (dd.dim < 3) zloc = 1;
  if (dd.dim < 2) yloc = 1;

  auxnez = dd.nez_l;
  auxney = dd.ney_l;
  auxnex = dd.nex_l;
  if (dd.dim < 3) auxnez = 1;
  if (dd.dim < 2) auxney = 1;

  for (ke=0; ke<auxnez; ke++) {
    for (je=0; je<auxney; je++) {
      for (ie=0; ie<auxnex; ie++) {
        /* customize element accounting for BC */
        xloc    = dd.p+1;
        ming    = 0;
        usedmat = &glldata.elem_mat;
        if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
          if (ie == 0) {
            xloc    = dd.p;
            usedmat = &elem_mat_DBC;
          } else {
            ming    = -1;
            usedmat = &glldata.elem_mat;
          }
        }
        /* local to the element/global to the subdomain indexing */
        for (k=0; k<zloc; k++) {
          kg = ke*dd.p+k;
          for (j=0; j<yloc; j++) {
            jg = je*dd.p+j;
            for (i=0; i<xloc; i++) {
              ig         = ie*dd.p+i+ming;
              ii         = k*xloc*yloc+j*xloc+i;
              indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig;
            }
          }
        }
        /* Set values */
        for (i=0; i<xloc*yloc*zloc; i++) {
          ierr = MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr);
          for (k=0; k<j; k++) colsg[k] = indexg[cols[k]];
          ierr = MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);CHKERRQ(ierr);
          ierr = MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr);
        }
      }
    }
  }
  ierr = PetscFree(indexg);CHKERRQ(ierr);
  ierr = PetscFree(colsg);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd  (temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
#if DEBUG
  {
    Vec       lvec,rvec;
    PetscReal norm;
    ierr = MatCreateVecs(temp_local_mat,&lvec,&rvec);CHKERRQ(ierr);
    ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
    ierr = MatMult(temp_local_mat,lvec,rvec);CHKERRQ(ierr);
    ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
    printf("Test null space of local mat % 1.14e\n",norm);
    ierr = VecDestroy(&lvec);CHKERRQ(ierr);
    ierr = VecDestroy(&rvec);CHKERRQ(ierr);
  }
#endif
  *local_mat = temp_local_mat;
  ierr       = MatDestroy(&elem_mat_DBC);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 13
0
static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann)
{
  PetscErrorCode ierr;
  IS             temp_dirichlet=0,temp_neumann=0;
  PetscInt       localsize,i,j,k,*indices;
  PetscBool      *touched;

  PetscFunctionBeginUser;
  localsize = dd.xm_l*dd.ym_l*dd.zm_l;

  ierr = PetscMalloc1(localsize,&indices);CHKERRQ(ierr);
  ierr = PetscMalloc1(localsize,&touched);CHKERRQ(ierr);
  for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE;

  if (dirichlet) {
    i = 0;
    /* west boundary */
    if (dd.ipx == 0) {
      for (k=0;k<dd.zm_l;k++) {
        for (j=0;j<dd.ym_l;j++) {
          indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
          touched[indices[i]]=PETSC_TRUE;
          i++;
        }
      }
    }
    ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);CHKERRQ(ierr);
  }
  if (neumann) {
    i = 0;
    /* west boundary */
    if (dd.ipx == 0) {
      for (k=0;k<dd.zm_l;k++) {
        for (j=0;j<dd.ym_l;j++) {
          indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
          if (!touched[indices[i]]) {
            touched[indices[i]]=PETSC_TRUE;
            i++;
          }
        }
      }
    }
    /* east boundary */
    if (dd.ipx == dd.npx-1) {
      for (k=0;k<dd.zm_l;k++) {
        for (j=0;j<dd.ym_l;j++) {
          indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1;
          if (!touched[indices[i]]) {
            touched[indices[i]]=PETSC_TRUE;
            i++;
          }
        }
      }
    }
    /* south boundary */
    if (dd.ipy == 0 && dd.dim > 1) {
      for (k=0;k<dd.zm_l;k++) {
        for (j=0;j<dd.xm_l;j++) {
          indices[i]=k*dd.ym_l*dd.xm_l+j;
          if (!touched[indices[i]]) {
            touched[indices[i]]=PETSC_TRUE;
            i++;
          }
        }
      }
    }
    /* north boundary */
    if (dd.ipy == dd.npy-1 && dd.dim > 1) {
      for (k=0;k<dd.zm_l;k++) {
        for (j=0;j<dd.xm_l;j++) {
          indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j;
          if (!touched[indices[i]]) {
            touched[indices[i]]=PETSC_TRUE;
            i++;
          }
        }
      }
    }
    /* bottom boundary */
    if (dd.ipz == 0 && dd.dim > 2) {
      for (k=0;k<dd.ym_l;k++) {
        for (j=0;j<dd.xm_l;j++) {
          indices[i]=k*dd.xm_l+j;
          if (!touched[indices[i]]) {
            touched[indices[i]]=PETSC_TRUE;
            i++;
          }
        }
      }
    }
    /* top boundary */
    if (dd.ipz == dd.npz-1 && dd.dim > 2) {
      for (k=0;k<dd.ym_l;k++) {
        for (j=0;j<dd.xm_l;j++) {
          indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j;
          if (!touched[indices[i]]) {
            touched[indices[i]]=PETSC_TRUE;
            i++;
          }
        }
      }
    }
    ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);CHKERRQ(ierr);
  }
  if (dirichlet) *dirichlet = temp_dirichlet;
  if (neumann) *neumann = temp_neumann;
  ierr = PetscFree(indices);CHKERRQ(ierr);
  ierr = PetscFree(touched);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 14
0
PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work)
{
  Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data;
  PetscErrorCode ierr;
  PetscScalar    *b,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
  MatScalar      *aa,*ra;
  PetscInt       cn=B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n;
  PetscInt       am2=2*am,am3=3*am,bm4=4*bm;
  PetscScalar    *d,*c,*c2,*c3,*c4;
  PetscInt       *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n;
  PetscInt       rm2=2*rm,rm3=3*rm,colrm;

  PetscFunctionBegin;
  if (!dm || !dn) PetscFunctionReturn(0);
  if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm);
  if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am);
  if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n);
  if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n);

  ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr);
  ierr = MatDenseGetArray(RAB,&d);CHKERRQ(ierr);
  b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
  c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am;
  for (col=0; col<cn-4; col += 4){  /* over columns of C */
    for (i=0; i<am; i++) {        /* over rows of A in those columns */
      r1 = r2 = r3 = r4 = 0.0;
      n   = ai[i+1] - ai[i];
      aj  = a->j + ai[i];
      aa  = a->a + ai[i];
      for (j=0; j<n; j++) {
        r1 += (*aa)*b1[*aj];
        r2 += (*aa)*b2[*aj];
        r3 += (*aa)*b3[*aj];
        r4 += (*aa++)*b4[*aj++];
      }
      c[i]       = r1;
      c[am  + i] = r2;
      c[am2 + i] = r3;
      c[am3 + i] = r4;
    }
    b1 += bm4;
    b2 += bm4;
    b3 += bm4;
    b4 += bm4;

    /* RAB[:,col] = R*C[:,col] */
    colrm = col*rm;
    for (i=0; i<rm; i++) {        /* over rows of R in those columns */
      r1 = r2 = r3 = r4 = 0.0;
      n   = r->i[i+1] - r->i[i];
      rj  = r->j + r->i[i];
      ra  = r->a + r->i[i];
      for (j=0; j<n; j++) {
        r1 += (*ra)*c[*rj];
        r2 += (*ra)*c2[*rj];
        r3 += (*ra)*c3[*rj];
        r4 += (*ra++)*c4[*rj++];
      }
      d[colrm + i]       = r1;
      d[colrm + rm + i]  = r2;
      d[colrm + rm2 + i] = r3;
      d[colrm + rm3 + i] = r4;
    }
  }
  for (;col<cn; col++){     /* over extra columns of C */
    for (i=0; i<am; i++) {  /* over rows of A in those columns */
      r1 = 0.0;
      n   = a->i[i+1] - a->i[i];
      aj  = a->j + a->i[i];
      aa  = a->a + a->i[i];
      for (j=0; j<n; j++) {
        r1 += (*aa++)*b1[*aj++];
      }
      c[i]     = r1;
    }
    b1 += bm;

    for (i=0; i<rm; i++) {  /* over rows of R in those columns */
      r1 = 0.0;
      n   = r->i[i+1] - r->i[i];
      rj  = r->j + r->i[i];
      ra  = r->a + r->i[i];
      for (j=0; j<n; j++) {
        r1 += (*ra++)*c[*rj++];
      }
      d[col*rm + i]     = r1;
    }
  }
  ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr);

  ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(RAB,&d);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 15
0
PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
{
  Vec_Seq           *xv = (Vec_Seq *)xin->data;
  PetscErrorCode    ierr;
  PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
  PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
  const PetscScalar * PETSC_RESTRICT yy0,* PETSC_RESTRICT yy1,* PETSC_RESTRICT yy2,*PETSC_RESTRICT yy3,* PETSC_RESTRICT x;
  Vec               *yy;

  PetscFunctionBegin;
  sum0 = 0.;
  sum1 = 0.;
  sum2 = 0.;

  i      = nv;
  nv_rem = nv&0x3;
  yy     = (Vec *)yin;
  j      = n;
  x      = xv->array;

  switch (nv_rem) {
  case 3:
    ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr);
    ierr = VecGetArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr);
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; 
      sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); 
      sum2 += x2*PetscConj(yy2[2]); 
    case 2: 
      x1 = x[1]; 
      sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); 
      sum2 += x1*PetscConj(yy2[1]); 
    case 1: 
      x0 = x[0]; 
      sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); 
      sum2 += x0*PetscConj(yy2[0]); 
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      yy1 += j_rem;
      yy2 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      x0 = x[0];
      x1 = x[1];
      x2 = x[2];
      x3 = x[3];
      x += 4;
      
      sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
      sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
      sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
      j -= 4;
    }
    z[0] = sum0;
    z[1] = sum1;
    z[2] = sum2;
    ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr);
    break;
  case 2:
    ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr);
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; 
      sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); 
    case 2: 
      x1 = x[1]; 
      sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); 
    case 1: 
      x0 = x[0]; 
      sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); 
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      yy1 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      x0 = x[0];
      x1 = x[1];
      x2 = x[2];
      x3 = x[3];
      x += 4;
      
      sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
      sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
      j -= 4;
    }
    z[0] = sum0;
    z[1] = sum1;
 
    ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr);
    break;
  case 1:
    ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
    case 2: 
      x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
    case 1: 
      x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
            + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]); 
      yy0+=4;
      j -= 4; x+=4;
    }
    z[0] = sum0;

    ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    break;
  case 0:
    break;
  }
  z  += nv_rem;
  i  -= nv_rem;
  yy += nv_rem;

  while (i >0) {
    sum0 = 0.;
    sum1 = 0.;
    sum2 = 0.;
    sum3 = 0.;
    ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr);
    ierr = VecGetArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr);
    ierr = VecGetArray(yy[3],(PetscScalar **)&yy3);CHKERRQ(ierr);

    j = n;
    x = xv->array;
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; 
      sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); 
      sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
    case 2: 
      x1 = x[1]; 
      sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); 
      sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
    case 1: 
      x0 = x[0]; 
      sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); 
      sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      yy1 += j_rem;
      yy2 += j_rem;
      yy3 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      x0 = x[0];
      x1 = x[1];
      x2 = x[2];
      x3 = x[3];
      x += 4;
      
      sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
      sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
      sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
      sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
      j -= 4;
    }
    z[0] = sum0;
    z[1] = sum1;
    z[2] = sum2;
    z[3] = sum3;
    z   += 4;
    i   -= 4;
    ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[3],(PetscScalar **)&yy3);CHKERRQ(ierr);
    yy  += 4;
  }
  ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 16
0
static PetscErrorCode InitializeDomainData(DomainData *dd)
{
  PetscErrorCode ierr;
  PetscMPIInt    sizes,rank;
  PetscInt       factor;

  PetscFunctionBeginUser;
  dd->gcomm = PETSC_COMM_WORLD;
  ierr      = MPI_Comm_size(dd->gcomm,&sizes);CHKERRQ(ierr);
  ierr      = MPI_Comm_rank(dd->gcomm,&rank);CHKERRQ(ierr);
  /* test data passed in */
  if (sizes<2) SETERRQ(dd->gcomm,PETSC_ERR_USER,"This is not a uniprocessor test");
  /* Get informations from command line */
  /* Processors/subdomains per dimension */
  /* Default is 1d problem */
  dd->npx = sizes;
  dd->npy = 0;
  dd->npz = 0;
  dd->dim = 1;
  ierr    = PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);CHKERRQ(ierr);
  ierr    = PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);CHKERRQ(ierr);
  ierr    = PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);CHKERRQ(ierr);
  if (dd->npy) dd->dim++;
  if (dd->npz) dd->dim++;
  /* Number of elements per dimension */
  /* Default is one element per subdomain */
  dd->nex = dd->npx;
  dd->ney = dd->npy;
  dd->nez = dd->npz;
  ierr    = PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);CHKERRQ(ierr);
  ierr    = PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);CHKERRQ(ierr);
  ierr    = PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);CHKERRQ(ierr);
  if (!dd->npy) {
    dd->ney=0;
    dd->nez=0;
  }
  if (!dd->npz) dd->nez=0;
  /* Spectral degree */
  dd->p = 3;
  ierr  = PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);CHKERRQ(ierr);
  /* pure neumann problem? */
  dd->pure_neumann = PETSC_FALSE;
  ierr             = PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);CHKERRQ(ierr);

  /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
  dd->DBC_zerorows = PETSC_FALSE;

  ierr = PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);CHKERRQ(ierr);
  if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
  dd->scalingfactor = 1.0;

  factor = 0.0;
  ierr   = PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);CHKERRQ(ierr);
  /* checkerboard pattern */
  dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank));
  /* test data passed in */
  if (dd->dim==1) {
    if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx");
    if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
  } else if (dd->dim==2) {
    if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy");
    if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
  } else {
    if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz");
    if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 17
0
PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
{
  Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
  PetscErrorCode ierr;
  PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
  PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
  MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
  MatScalar      *u,*d,*w,*wp,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12;
  MatScalar      u13,u14,u15,u16,u17,u18,u19,u20,u21,u22,u23,u24,u25,u26,u27;
  MatScalar      u28,u29,u30,u31,u32,u33,u34,u35;
  MatScalar      d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12;
  MatScalar      d13,d14,d15,d16,d17,d18,d19,d20,d21,d22,d23,d24,d25,d26,d27;
  MatScalar      d28,d29,d30,d31,d32,d33,d34,d35;
  MatScalar      m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12;
  MatScalar      m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25,m26,m27;
  MatScalar      m28,m29,m30,m31,m32,m33,m34,m35;
  PetscReal      shift = info->shiftamount;
  PetscBool      allowzeropivot,zeropivotdetected;

  PetscFunctionBegin;
  /* initialization */
  allowzeropivot = PetscNot(A->erroriffailure);
  ierr = PetscCalloc1(36*mbs,&w);CHKERRQ(ierr);
  ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr);
  for (i=0; i<mbs; i++) {
    jl[i] = mbs; il[0] = 0;
  }
  ierr = PetscMalloc2(36,&dk,36,&uik);CHKERRQ(ierr);
  ai   = a->i; aj = a->j; aa = a->a;

  /* for each row k */
  for (k = 0; k<mbs; k++) {

    /*initialize k-th row with elements nonzero in row k of A */
    jmin = ai[k]; jmax = ai[k+1];
    if (jmin < jmax) {
      ap = aa + jmin*36;
      for (j = jmin; j < jmax; j++) {
        vj = aj[j];         /* block col. index */
        wp = w + vj*36;
        for (i=0; i<36; i++) *wp++ = *ap++;
      }
    }

    /* modify k-th row by adding in those rows i with U(i,k) != 0 */
    ierr = PetscMemcpy(dk,w+k*36,36*sizeof(MatScalar));CHKERRQ(ierr);
    i    = jl[k]; /* first row to be added to k_th row  */

    while (i < mbs) {
      nexti = jl[i]; /* next row to be added to k_th row */

      /* compute multiplier */
      ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

      /* uik = -inv(Di)*U_bar(i,k) */
      d = ba + i*36;
      u = ba + ili*36;

      u0  = u[0]; u1 = u[1]; u2 = u[2]; u3 = u[3]; u4 = u[4]; u5 = u[5]; u6 = u[6];
      u7  = u[7]; u8 = u[8]; u9 = u[9]; u10 = u[10]; u11 = u[11]; u12 = u[12]; u13 = u[13];
      u14 = u[14]; u15 = u[15]; u16 = u[16]; u17 = u[17]; u18 = u[18]; u19 = u[19]; u20 = u[20];
      u21 = u[21]; u22 = u[22]; u23 = u[23]; u24 = u[24]; u25 = u[25]; u26 = u[26]; u27 = u[27];
      u28 = u[28]; u29 = u[29]; u30 = u[30]; u31 = u[31]; u32 = u[32]; u33 = u[33]; u34 = u[34];
      u35 = u[35];

      d0  = d[0];  d1  = d[1];  d2  = d[2];  d3  = d[3];
      d4  = d[4];  d5  = d[5];  d6  = d[6];  d7  = d[7];
      d8  = d[8];  d9 = d[9];  d10 = d[10]; d11 = d[11];
      d12 = d[12]; d13 = d[13]; d14 = d[14]; d15 = d[15];
      d16 = d[16]; d17 = d[17]; d18 = d[18]; d19 = d[19];
      d20 = d[20]; d21 = d[21]; d22 = d[22]; d23 = d[23];
      d24 = d[24]; d25 = d[25]; d26 = d[26]; d27 = d[27];
      d28 = d[28]; d29 = d[29]; d30 = d[30]; d31 = d[31];
      d32 = d[32]; d33 = d[33]; d34 = d[34]; d35 = d[35];

      m0 = uik[0] = -(d0*u0 + d6*u1 + d12*u2 + d18*u3 + d24*u4 + d30*u5);
      m1 = uik[1] = -(d1*u0 + d7*u1 + d13*u2 + d19*u3 + d25*u4 + d31*u5);
      m2 = uik[2] = -(d2*u0 + d8*u1 + d14*u2 + d20*u3 + d26*u4 + d32*u5);
      m3 = uik[3] = -(d3*u0 + d9*u1 + d15*u2 + d21*u3 + d27*u4 + d33*u5);
      m4 = uik[4] = -(d4*u0+ d10*u1 + d16*u2 + d22*u3 + d28*u4 + d34*u5);
      m5 = uik[5] = -(d5*u0+ d11*u1 + d17*u2 + d23*u3 + d29*u4 + d35*u5);

      m6  = uik[6] = -(d0*u6 + d6*u7 + d12*u8 + d18*u9 + d24*u10 + d30*u11);
      m7  = uik[7] = -(d1*u6 + d7*u7 + d13*u8 + d19*u9 + d25*u10 + d31*u11);
      m8  = uik[8] = -(d2*u6 + d8*u7 + d14*u8 + d20*u9 + d26*u10 + d32*u11);
      m9  = uik[9] = -(d3*u6 + d9*u7 + d15*u8 + d21*u9 + d27*u10 + d33*u11);
      m10 = uik[10]= -(d4*u6+ d10*u7 + d16*u8 + d22*u9 + d28*u10 + d34*u11);
      m11 = uik[11]= -(d5*u6+ d11*u7 + d17*u8 + d23*u9 + d29*u10 + d35*u11);

      m12 = uik[12] = -(d0*u12 + d6*u13 + d12*u14 + d18*u15 + d24*u16 + d30*u17);
      m13 = uik[13] = -(d1*u12 + d7*u13 + d13*u14 + d19*u15 + d25*u16 + d31*u17);
      m14 = uik[14] = -(d2*u12 + d8*u13 + d14*u14 + d20*u15 + d26*u16 + d32*u17);
      m15 = uik[15] = -(d3*u12 + d9*u13 + d15*u14 + d21*u15 + d27*u16 + d33*u17);
      m16 = uik[16] = -(d4*u12+ d10*u13 + d16*u14 + d22*u15 + d28*u16 + d34*u17);
      m17 = uik[17] = -(d5*u12+ d11*u13 + d17*u14 + d23*u15 + d29*u16 + d35*u17);

      m18 = uik[18] = -(d0*u18 + d6*u19 + d12*u20 + d18*u21 + d24*u22 + d30*u23);
      m19 = uik[19] = -(d1*u18 + d7*u19 + d13*u20 + d19*u21 + d25*u22 + d31*u23);
      m20 = uik[20] = -(d2*u18 + d8*u19 + d14*u20 + d20*u21 + d26*u22 + d32*u23);
      m21 = uik[21] = -(d3*u18 + d9*u19 + d15*u20 + d21*u21 + d27*u22 + d33*u23);
      m22 = uik[22] = -(d4*u18+ d10*u19 + d16*u20 + d22*u21 + d28*u22 + d34*u23);
      m23 = uik[23] = -(d5*u18+ d11*u19 + d17*u20 + d23*u21 + d29*u22 + d35*u23);

      m24 = uik[24] = -(d0*u24 + d6*u25 + d12*u26 + d18*u27 + d24*u28 + d30*u29);
      m25 = uik[25] = -(d1*u24 + d7*u25 + d13*u26 + d19*u27 + d25*u28 + d31*u29);
      m26 = uik[26] = -(d2*u24 + d8*u25 + d14*u26 + d20*u27 + d26*u28 + d32*u29);
      m27 = uik[27] = -(d3*u24 + d9*u25 + d15*u26 + d21*u27 + d27*u28 + d33*u29);
      m28 = uik[28] = -(d4*u24+ d10*u25 + d16*u26 + d22*u27 + d28*u28 + d34*u29);
      m29 = uik[29] = -(d5*u24+ d11*u25 + d17*u26 + d23*u27 + d29*u28 + d35*u29);

      m30 = uik[30] = -(d0*u30 + d6*u31 + d12*u32 + d18*u33 + d24*u34 + d30*u35);
      m31 = uik[31] = -(d1*u30 + d7*u31 + d13*u32 + d19*u33 + d25*u34 + d31*u35);
      m32 = uik[32] = -(d2*u30 + d8*u31 + d14*u32 + d20*u33 + d26*u34 + d32*u35);
      m33 = uik[33] = -(d3*u30 + d9*u31 + d15*u32 + d21*u33 + d27*u34 + d33*u35);
      m34 = uik[34] = -(d4*u30+ d10*u31 + d16*u32 + d22*u33 + d28*u34 + d34*u35);
      m35 = uik[35] = -(d5*u30+ d11*u31 + d17*u32 + d23*u33 + d29*u34 + d35*u35);

      /* update D(k) += -U(i,k)^T * U_bar(i,k) */
      dk[0] +=  m0*u0 + m1*u1 + m2*u2 + m3*u3 + m4*u4 + m5*u5;
      dk[1] +=  m6*u0 + m7*u1 + m8*u2 + m9*u3+ m10*u4+ m11*u5;
      dk[2] += m12*u0+ m13*u1+ m14*u2+ m15*u3+ m16*u4+ m17*u5;
      dk[3] += m18*u0+ m19*u1+ m20*u2+ m21*u3+ m22*u4+ m23*u5;
      dk[4] += m24*u0+ m25*u1+ m26*u2+ m27*u3+ m28*u4+ m29*u5;
      dk[5] += m30*u0+ m31*u1+ m32*u2+ m33*u3+ m34*u4+ m35*u5;

      dk[6] +=  m0*u6 + m1*u7 + m2*u8 + m3*u9 + m4*u10 + m5*u11;
      dk[7] +=  m6*u6 + m7*u7 + m8*u8 + m9*u9+ m10*u10+ m11*u11;
      dk[8] += m12*u6+ m13*u7+ m14*u8+ m15*u9+ m16*u10+ m17*u11;
      dk[9] += m18*u6+ m19*u7+ m20*u8+ m21*u9+ m22*u10+ m23*u11;
      dk[10]+= m24*u6+ m25*u7+ m26*u8+ m27*u9+ m28*u10+ m29*u11;
      dk[11]+= m30*u6+ m31*u7+ m32*u8+ m33*u9+ m34*u10+ m35*u11;

      dk[12]+=  m0*u12 + m1*u13 + m2*u14 + m3*u15 + m4*u16 + m5*u17;
      dk[13]+=  m6*u12 + m7*u13 + m8*u14 + m9*u15+ m10*u16+ m11*u17;
      dk[14]+= m12*u12+ m13*u13+ m14*u14+ m15*u15+ m16*u16+ m17*u17;
      dk[15]+= m18*u12+ m19*u13+ m20*u14+ m21*u15+ m22*u16+ m23*u17;
      dk[16]+= m24*u12+ m25*u13+ m26*u14+ m27*u15+ m28*u16+ m29*u17;
      dk[17]+= m30*u12+ m31*u13+ m32*u14+ m33*u15+ m34*u16+ m35*u17;

      dk[18]+=  m0*u18 + m1*u19 + m2*u20 + m3*u21 + m4*u22 + m5*u23;
      dk[19]+=  m6*u18 + m7*u19 + m8*u20 + m9*u21+ m10*u22+ m11*u23;
      dk[20]+= m12*u18+ m13*u19+ m14*u20+ m15*u21+ m16*u22+ m17*u23;
      dk[21]+= m18*u18+ m19*u19+ m20*u20+ m21*u21+ m22*u22+ m23*u23;
      dk[22]+= m24*u18+ m25*u19+ m26*u20+ m27*u21+ m28*u22+ m29*u23;
      dk[23]+= m30*u18+ m31*u19+ m32*u20+ m33*u21+ m34*u22+ m35*u23;

      dk[24]+=  m0*u24 + m1*u25 + m2*u26 + m3*u27 + m4*u28 + m5*u29;
      dk[25]+=  m6*u24 + m7*u25 + m8*u26 + m9*u27+ m10*u28+ m11*u29;
      dk[26]+= m12*u24+ m13*u25+ m14*u26+ m15*u27+ m16*u28+ m17*u29;
      dk[27]+= m18*u24+ m19*u25+ m20*u26+ m21*u27+ m22*u28+ m23*u29;
      dk[28]+= m24*u24+ m25*u25+ m26*u26+ m27*u27+ m28*u28+ m29*u29;
      dk[29]+= m30*u24+ m31*u25+ m32*u26+ m33*u27+ m34*u28+ m35*u29;

      dk[30]+=  m0*u30 + m1*u31 + m2*u32 + m3*u33 + m4*u34 + m5*u35;
      dk[31]+=  m6*u30 + m7*u31 + m8*u32 + m9*u33+ m10*u34+ m11*u35;
      dk[32]+= m12*u30+ m13*u31+ m14*u32+ m15*u33+ m16*u34+ m17*u35;
      dk[33]+= m18*u30+ m19*u31+ m20*u32+ m21*u33+ m22*u34+ m23*u35;
      dk[34]+= m24*u30+ m25*u31+ m26*u32+ m27*u33+ m28*u34+ m29*u35;
      dk[35]+= m30*u30+ m31*u31+ m32*u32+ m33*u33+ m34*u34+ m35*u35;

      ierr = PetscLogFlops(216.0*4.0);CHKERRQ(ierr);

      /* update -U(i,k) */
      ierr = PetscMemcpy(ba+ili*36,uik,36*sizeof(MatScalar));CHKERRQ(ierr);

      /* add multiple of row i to k-th row ... */
      jmin = ili + 1; jmax = bi[i+1];
      if (jmin < jmax) {
        for (j=jmin; j<jmax; j++) {
          /* w += -U(i,k)^T * U_bar(i,j) */
          wp = w + bj[j]*36;
          u  = ba + j*36;

          u0  = u[0];  u1  = u[1];  u2  = u[2];  u3  = u[3];  u4  = u[4];  u5  = u[5];  u6  = u[6];
          u7  = u[7];  u8  = u[8];  u9  = u[9];  u10 = u[10]; u11 = u[11]; u12 = u[12]; u13 = u[13];
          u14 = u[14]; u15 = u[15]; u16 = u[16]; u17 = u[17]; u18 = u[18]; u19 = u[19]; u20 = u[20];
          u21 = u[21]; u22 = u[22]; u23 = u[23]; u24 = u[24]; u25 = u[25]; u26 = u[26]; u27 = u[27];
          u28 = u[28]; u29 = u[29]; u30 = u[30]; u31 = u[31]; u32 = u[32]; u33 = u[33]; u34 = u[34];
          u35 = u[35];

          wp[0] +=  m0*u0 + m1*u1 + m2*u2 + m3*u3 + m4*u4 + m5*u5;
          wp[1] +=  m6*u0 + m7*u1 + m8*u2 + m9*u3+ m10*u4+ m11*u5;
          wp[2] += m12*u0+ m13*u1+ m14*u2+ m15*u3+ m16*u4+ m17*u5;
          wp[3] += m18*u0+ m19*u1+ m20*u2+ m21*u3+ m22*u4+ m23*u5;
          wp[4] += m24*u0+ m25*u1+ m26*u2+ m27*u3+ m28*u4+ m29*u5;
          wp[5] += m30*u0+ m31*u1+ m32*u2+ m33*u3+ m34*u4+ m35*u5;

          wp[6] +=  m0*u6 + m1*u7 + m2*u8 + m3*u9 + m4*u10 + m5*u11;
          wp[7] +=  m6*u6 + m7*u7 + m8*u8 + m9*u9+ m10*u10+ m11*u11;
          wp[8] += m12*u6+ m13*u7+ m14*u8+ m15*u9+ m16*u10+ m17*u11;
          wp[9] += m18*u6+ m19*u7+ m20*u8+ m21*u9+ m22*u10+ m23*u11;
          wp[10]+= m24*u6+ m25*u7+ m26*u8+ m27*u9+ m28*u10+ m29*u11;
          wp[11]+= m30*u6+ m31*u7+ m32*u8+ m33*u9+ m34*u10+ m35*u11;

          wp[12]+=  m0*u12 + m1*u13 + m2*u14 + m3*u15 + m4*u16 + m5*u17;
          wp[13]+=  m6*u12 + m7*u13 + m8*u14 + m9*u15+ m10*u16+ m11*u17;
          wp[14]+= m12*u12+ m13*u13+ m14*u14+ m15*u15+ m16*u16+ m17*u17;
          wp[15]+= m18*u12+ m19*u13+ m20*u14+ m21*u15+ m22*u16+ m23*u17;
          wp[16]+= m24*u12+ m25*u13+ m26*u14+ m27*u15+ m28*u16+ m29*u17;
          wp[17]+= m30*u12+ m31*u13+ m32*u14+ m33*u15+ m34*u16+ m35*u17;

          wp[18]+=  m0*u18 + m1*u19 + m2*u20 + m3*u21 + m4*u22 + m5*u23;
          wp[19]+=  m6*u18 + m7*u19 + m8*u20 + m9*u21+ m10*u22+ m11*u23;
          wp[20]+= m12*u18+ m13*u19+ m14*u20+ m15*u21+ m16*u22+ m17*u23;
          wp[21]+= m18*u18+ m19*u19+ m20*u20+ m21*u21+ m22*u22+ m23*u23;
          wp[22]+= m24*u18+ m25*u19+ m26*u20+ m27*u21+ m28*u22+ m29*u23;
          wp[23]+= m30*u18+ m31*u19+ m32*u20+ m33*u21+ m34*u22+ m35*u23;

          wp[24]+=  m0*u24 + m1*u25 + m2*u26 + m3*u27 + m4*u28 + m5*u29;
          wp[25]+=  m6*u24 + m7*u25 + m8*u26 + m9*u27+ m10*u28+ m11*u29;
          wp[26]+= m12*u24+ m13*u25+ m14*u26+ m15*u27+ m16*u28+ m17*u29;
          wp[27]+= m18*u24+ m19*u25+ m20*u26+ m21*u27+ m22*u28+ m23*u29;
          wp[28]+= m24*u24+ m25*u25+ m26*u26+ m27*u27+ m28*u28+ m29*u29;
          wp[29]+= m30*u24+ m31*u25+ m32*u26+ m33*u27+ m34*u28+ m35*u29;

          wp[30]+=  m0*u30 + m1*u31 + m2*u32 + m3*u33 + m4*u34 + m5*u35;
          wp[31]+=  m6*u30 + m7*u31 + m8*u32 + m9*u33+ m10*u34+ m11*u35;
          wp[32]+= m12*u30+ m13*u31+ m14*u32+ m15*u33+ m16*u34+ m17*u35;
          wp[33]+= m18*u30+ m19*u31+ m20*u32+ m21*u33+ m22*u34+ m23*u35;
          wp[34]+= m24*u30+ m25*u31+ m26*u32+ m27*u33+ m28*u34+ m29*u35;
          wp[35]+= m30*u30+ m31*u31+ m32*u32+ m33*u33+ m34*u34+ m35*u35;
        }
        ierr = PetscLogFlops(2.0*216.0*(jmax-jmin));CHKERRQ(ierr);

        /* ... add i to row list for next nonzero entry */
        il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
        j     = bj[jmin];
        jl[i] = jl[j]; jl[j] = i; /* update jl */
      }
      i = nexti;
    }

    /* save nonzero entries in k-th row of U ... */

    /* invert diagonal block */
    d    = ba+k*36;
    ierr = PetscMemcpy(d,dk,36*sizeof(MatScalar));CHKERRQ(ierr);
    ierr = PetscKernel_A_gets_inverse_A_6(d,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
    if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

    jmin = bi[k]; jmax = bi[k+1];
    if (jmin < jmax) {
      for (j=jmin; j<jmax; j++) {
        vj = bj[j];            /* block col. index of U */
        u  = ba + j*36;
        wp = w + vj*36;
        for (k1=0; k1<36; k1++) {
          *u++  = *wp;
          *wp++ = 0.0;
        }
      }

      /* ... add k to row list for first nonzero entry in k-th row */
      il[k] = jmin;
      i     = bj[jmin];
      jl[k] = jl[i]; jl[i] = k;
    }
  }

  ierr = PetscFree(w);CHKERRQ(ierr);
  ierr = PetscFree2(il,jl);CHKERRQ(ierr);
  ierr = PetscFree2(dk,uik);CHKERRQ(ierr);

  C->ops->solve          = MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace;
  C->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace;
  C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace;
  C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace;
  C->assembled           = PETSC_TRUE;
  C->preallocated        = PETSC_TRUE;

  ierr = PetscLogFlops(1.3333*216*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */
  PetscFunctionReturn(0);
}
Exemplo n.º 18
0
static PetscErrorCode SNESLineSearchApply_CP(SNESLineSearch linesearch)
{
  PetscBool      changed_y, changed_w, domainerror;
  PetscErrorCode ierr;
  Vec             X, Y, F, W;
  SNES            snes;
  PetscReal       xnorm, ynorm, gnorm, steptol, atol, rtol, ltol, maxstep;

  PetscReal       lambda, lambda_old, lambda_update, delLambda;
  PetscScalar     fty, fty_init, fty_old, fty_mid1, fty_mid2, s;
  PetscInt        i, max_its;

  PetscViewer     monitor;

  PetscFunctionBegin;

  ierr = SNESLineSearchGetVecs(linesearch, &X, &F, &Y, &W, PETSC_NULL);CHKERRQ(ierr);
  ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &gnorm, &ynorm);CHKERRQ(ierr);
  ierr = SNESLineSearchGetSNES(linesearch, &snes);CHKERRQ(ierr);
  ierr = SNESLineSearchGetLambda(linesearch, &lambda);CHKERRQ(ierr);
  ierr = SNESLineSearchGetTolerances(linesearch, &steptol, &maxstep, &rtol, &atol, &ltol, &max_its);CHKERRQ(ierr);
  ierr = SNESLineSearchSetSuccess(linesearch, PETSC_TRUE);CHKERRQ(ierr);
  ierr = SNESLineSearchGetMonitor(linesearch, &monitor);CHKERRQ(ierr);

  /* precheck */
  ierr = SNESLineSearchPreCheck(linesearch,X,Y,&changed_y);CHKERRQ(ierr);
  lambda_old = 0.0;
  ierr = VecDot(F, Y, &fty_old);CHKERRQ(ierr);
  fty_init = fty_old;

  for (i = 0; i < max_its; i++) {

    /* compute the norm at lambda */
    ierr = VecCopy(X, W);CHKERRQ(ierr);
    ierr = VecAXPY(W, -lambda, Y);CHKERRQ(ierr);
    if (linesearch->ops->viproject) {
      ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr);
    }
    ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr);

    ierr = VecDot(F, Y, &fty);CHKERRQ(ierr);

    delLambda    = lambda - lambda_old;

    /* check for convergence */
    if (PetscAbsReal(delLambda) < steptol*lambda) break;
    if (PetscAbsScalar(fty) / PetscAbsScalar(fty_init) < rtol) break;
    if (PetscAbsScalar(fty) < atol && i > 0) break;
    if (monitor) {
      ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(monitor,"    Line search: lambdas = [%g, %g], ftys = [%g, %g]\n",(double)lambda, (double)lambda_old, (double)PetscRealPart(fty), (double)PetscRealPart(fty_old));CHKERRQ(ierr);
      ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    }

    /* compute the search direction */
    if (linesearch->order == SNES_LINESEARCH_ORDER_LINEAR) {
      s = (fty - fty_old) / delLambda;
    } else if (linesearch->order == SNES_LINESEARCH_ORDER_QUADRATIC) {
      ierr = VecCopy(X, W);CHKERRQ(ierr);
      ierr = VecAXPY(W, -0.5*(lambda + lambda_old), Y);CHKERRQ(ierr);
      if (linesearch->ops->viproject) {
        ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr);
      }
      ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr);
      ierr = VecDot(F, Y, &fty_mid1);CHKERRQ(ierr);
      s = (3.*fty - 4.*fty_mid1 + fty_old) / delLambda;
    } else {
      ierr = VecCopy(X, W);CHKERRQ(ierr);
      ierr = VecAXPY(W, -0.5*(lambda + lambda_old), Y);CHKERRQ(ierr);
      if (linesearch->ops->viproject) {
        ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr);
      }
      ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr);
      ierr = VecDot(F, Y, &fty_mid1);CHKERRQ(ierr);
      ierr = VecCopy(X, W);CHKERRQ(ierr);
      ierr = VecAXPY(W, -(lambda + 0.5*(lambda - lambda_old)), Y);CHKERRQ(ierr);
      if (linesearch->ops->viproject) {
        ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr);
      }
      ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr);
      ierr = VecDot(F, Y, &fty_mid2);CHKERRQ(ierr);
      s = (2.*fty_mid2 + 3.*fty - 6.*fty_mid1 + fty_old) / (3.*delLambda);
    }
    /* if the solve is going in the wrong direction, fix it */
    if (PetscRealPart(s) > 0.) s = -s;
    lambda_update =  lambda - PetscRealPart(fty / s);

    /* switch directions if we stepped out of bounds */
    if (lambda_update < steptol) {
      lambda_update = lambda + PetscRealPart(fty / s);
    }

    if (PetscIsInfOrNanScalar(lambda_update)) break;
    if (lambda_update > maxstep) {
      break;
    }

    /* compute the new state of the line search */
    lambda_old = lambda;
    lambda = lambda_update;
    fty_old = fty;
  }
  /* construct the solution */
  ierr = VecCopy(X, W);CHKERRQ(ierr);
  ierr = VecAXPY(W, -lambda, Y);CHKERRQ(ierr);
  if (linesearch->ops->viproject) {
    ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr);
  }
  /* postcheck */
  ierr = SNESLineSearchPostCheck(linesearch,X,Y,W,&changed_y,&changed_w);CHKERRQ(ierr);
  if (changed_y) {
    ierr = VecAXPY(X, -lambda, Y);CHKERRQ(ierr);
    if (linesearch->ops->viproject) {
      ierr = (*linesearch->ops->viproject)(snes, X);CHKERRQ(ierr);
    }
  } else {
    ierr = VecCopy(W, X);CHKERRQ(ierr);
  }
  ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr);
  ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr);
  if (domainerror) {
    ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = SNESLineSearchComputeNorms(linesearch);CHKERRQ(ierr);
  ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &gnorm, &ynorm);CHKERRQ(ierr);

  ierr = SNESLineSearchSetLambda(linesearch, lambda);CHKERRQ(ierr);

  if (monitor) {
    ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(monitor,"    Line search terminated: lambda = %g, fnorms = %g\n", (double)lambda, (double)gnorm);CHKERRQ(ierr);
    ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
  }
  if (lambda <= steptol) {
    ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 19
0
PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
{
  Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
  Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(sbaij->B->data);  
  PetscErrorCode ierr;
  PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
  PetscInt       bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
  IS             from,to;
  Vec            gvec;
  PetscMPIInt    rank=sbaij->rank,lsize,size=sbaij->size; 
  PetscInt       *owners=sbaij->rangebs,*sowners,*ec_owner,k; 
  PetscScalar    *ptr;

  PetscFunctionBegin;
  ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
  
  /* For the first stab we make an array as long as the number of columns */
  /* mark those columns that are in sbaij->B */
  ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr);
  ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=0; i<mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) {
      if (!indices[aj[B->i[i] + j]]) ec++; 
      indices[aj[B->i[i] + j] ] = 1;
    }
  }

  /* form arrays of columns we need */
  ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr);
  ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr);
  
  ec = 0;
  for (j=0; j<size; j++){
    for (i=owners[j]; i<owners[j+1]; i++){
      if (indices[i]) {
        garray[ec]   = i;
        ec_owner[ec] = j;
        ec++;
      }
    }
  }

  /* make indices now point into garray */
  for (i=0; i<ec; i++) indices[garray[i]] = i;

  /* compact out the extra columns in B */
  for (i=0; i<mbs; i++) {
    for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
  }
  B->nbs      = ec;
  sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
  ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr);
  ierr = PetscFree(indices);CHKERRQ(ierr);

  /* create local vector that is used to scatter into */
  ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);

  /* create two temporary index sets for building scatter-gather */
  ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);   
  for (i=0; i<ec; i++) { stmp[i] = i; } 
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);

  /* generate the scatter context 
     -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
  ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
  ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 
  ierr = VecDestroy(&gvec);CHKERRQ(ierr);

  sbaij->garray = garray;
  ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);

  ierr = ISDestroy(&from);CHKERRQ(ierr);
  ierr = ISDestroy(&to);CHKERRQ(ierr);

  /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
  lsize = (mbs + ec)*bs;
  ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
  ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
  ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);

  sowners = sbaij->slvec0->map->range;
 
  /* x index in the IS sfrom */
  for (i=0; i<ec; i++) { 
    j = ec_owner[i];
    sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);  
  } 
  /* b index in the IS sfrom */
  k = sowners[rank]/bs + mbs;
  for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);   
 
  /* x index in the IS sto */
  k = sowners[rank]/bs + mbs;
  for (i=0; i<ec; i++) stmp[i] = (k + i);  
  /* b index in the IS sto */
  for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 

  ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 

  ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);  
 
  ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
  ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
  ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);

  ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
  ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 

  ierr = PetscFree(stmp);CHKERRQ(ierr);
  ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr);
  
  ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
  
  ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISDestroy(&from);CHKERRQ(ierr);  
  ierr = ISDestroy(&to);CHKERRQ(ierr);
  ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 
  PetscFunctionReturn(0);
}
Exemplo n.º 20
0
Arquivo: ex42.c Projeto: wgapl/petsc
PetscErrorCode RHSJacobian(TS ts, PetscReal t, Vec X, Mat J, Mat B, void *ptr)
{
  AppCtx            *user = (AppCtx*)ptr;
  PetscInt          nb_cells, i, idx;
  PetscReal         alpha, beta;
  PetscReal         mu_a, D_a;
  PetscReal         mu_h, D_h;
  PetscReal         a, h;
  const PetscScalar *x;
  PetscScalar       va[4], vh[4];
  PetscInt          ca[4], ch[4], rowa, rowh;
  PetscErrorCode    ierr;

  PetscFunctionBegin;
  nb_cells = user->nb_cells;
  alpha    = user->alpha;
  beta     = user->beta;
  mu_a     = user->mu_a;
  D_a      = user->D_a;
  mu_h     = user->mu_h;
  D_h      = user->D_h;

  ierr = VecGetArrayRead(X, &x);CHKERRQ(ierr);
  for(i = 0; i < nb_cells ; ++i) {
    rowa = 2*i;
    rowh = 2*i+1;
    a = x[2*i];
    h = x[2*i+1];
    ca[0] = ch[1] = 2*i;
    va[0] = 2*alpha*a / (1.+beta*h) - mu_a;
    vh[1] = 2*alpha*a;
    ca[1] = ch[0] = 2*i+1;
    va[1] = -beta*alpha*a*a / ((1.+beta*h)*(1.+beta*h));
    vh[0] = -mu_h;
    idx = 2;
    if(i > 0) {
      ca[idx] = 2*(i-1);
      ch[idx] = 2*(i-1)+1;
      va[idx] = D_a;
      vh[idx] = D_h;
      va[0] -= D_a;
      vh[0] -= D_h;
      idx++;
    }
    if(i < nb_cells-1) {
      ca[idx] = 2*(i+1);
      ch[idx] = 2*(i+1)+1;
      va[idx] = D_a;
      vh[idx] = D_h;
      va[0] -= D_a;
      vh[0] -= D_h;
      idx++;
    }
    ierr = MatSetValues(B, 1, &rowa, idx, ca, va, INSERT_VALUES);CHKERRQ(ierr);
    ierr = MatSetValues(B, 1, &rowh, idx, ch, vh, INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = VecRestoreArrayRead(X, &x);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  if (J != B) {
    ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 21
0
PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
{
  Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
  Mat            B = baij->B,Bnew;
  Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
  PetscErrorCode ierr;
  PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
  PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
  MatScalar      *a = Bbaij->a;
  PetscScalar    *atmp;
#if defined(PETSC_USE_REAL_MAT_SINGLE)
  PetscInt       l;
#endif

  PetscFunctionBegin;
#if defined(PETSC_USE_REAL_MAT_SINGLE)
  ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp);
#endif
  /* free stuff related to matrix-vec multiply */
  ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
  ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 
  ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 

  ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
  ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 
  ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
  ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
  ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 

  if (baij->colmap) {
#if defined (PETSC_USE_CTABLE)
    ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
#else
    ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
    ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
#endif
  }

  /* make sure that B is assembled so we can access its values */
  ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  /* invent new B and copy stuff over */
  ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
  for (i=0; i<mbs; i++) {
    nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
  }
  ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
  ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
  ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
  ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
  ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */
  ierr = PetscFree(nz);CHKERRQ(ierr);
  
  ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
  for (i=0; i<mbs; i++) {
    rvals[0] = bs*i;
    for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
    for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
      col = garray[Bbaij->j[j]]*bs;
      for (k=0; k<bs; k++) {
#if defined(PETSC_USE_REAL_MAT_SINGLE)
        for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
#else
        atmp = a+j*bs2 + k*bs;
#endif
        ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
        col++;
      }
    }
  }
#if defined(PETSC_USE_REAL_MAT_SINGLE)
  ierr = PetscFree(atmp);CHKERRQ(ierr);
#endif
  ierr = PetscFree(baij->garray);CHKERRQ(ierr);
  baij->garray = 0;
  ierr = PetscFree(rvals);CHKERRQ(ierr);
  ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = MatDestroy(&B);CHKERRQ(ierr);
  ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
  baij->B = Bnew;
  A->was_assembled = PETSC_FALSE;
  PetscFunctionReturn(0);
}
Exemplo n.º 22
0
PetscErrorCode  KSPSolve_GROPPCG(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscScalar    alpha,beta = 0.0,gamma,gammaNew,t;
  PetscReal      dp = 0.0;
  Vec            x,b,r,p,s,S,z,Z;
  Mat            Amat,Pmat;
  PetscBool      diagonalscale;

  PetscFunctionBegin;
  ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr);
  if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

  x = ksp->vec_sol;
  b = ksp->vec_rhs;
  r = ksp->work[0];
  p = ksp->work[1];
  s = ksp->work[2];
  S = ksp->work[3];
  z = ksp->work[4];
  Z = ksp->work[5];

  ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);

  ksp->its = 0;
  if (!ksp->guess_zero) {
    ierr = KSP_MatMult(ksp,Amat,x,r);CHKERRQ(ierr);            /*     r <- b - Ax     */
    ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(b,r);CHKERRQ(ierr);                         /*     r <- b (x is 0) */
  }

  ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr);                   /*     z <- Br   */
  ierr = VecCopy(z,p);CHKERRQ(ierr);                           /*     p <- z    */
  ierr = VecDotBegin(r,z,&gamma);CHKERRQ(ierr);                  /*     gamma <- z'*r       */
  ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)r));CHKERRQ(ierr);
  ierr = KSP_MatMult(ksp,Amat,p,s);CHKERRQ(ierr);              /*     s <- Ap   */
  ierr = VecDotEnd(r,z,&gamma);CHKERRQ(ierr);                  /*     gamma <- z'*r       */

  switch (ksp->normtype) {
  case KSP_NORM_PRECONDITIONED:
    /* This could be merged with the computation of gamma above */
    ierr = VecNorm(z,NORM_2,&dp);CHKERRQ(ierr);                /*     dp <- z'*z = e'*A'*B'*B*A'*e'     */
    break;
  case KSP_NORM_UNPRECONDITIONED:
    /* This could be merged with the computation of gamma above */
    ierr = VecNorm(r,NORM_2,&dp);CHKERRQ(ierr);                /*     dp <- r'*r = e'*A'*A*e            */
    break;
  case KSP_NORM_NATURAL:
    KSPCheckDot(ksp,gamma);
    dp = PetscSqrtReal(PetscAbsScalar(gamma));                  /*     dp <- r'*z = r'*B*r = e'*A'*B*A*e */
    break;
  case KSP_NORM_NONE:
    dp = 0.0;
    break;
  default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
  }
  ierr       = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
  ierr       = KSPMonitor(ksp,0,dp);CHKERRQ(ierr);
  ksp->rnorm = dp;
  ierr       = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */
  if (ksp->reason) PetscFunctionReturn(0);

  i = 0;
  do {
    ksp->its = i+1;
    i++;

    ierr = VecDotBegin(p,s,&t);CHKERRQ(ierr);
    ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)p));CHKERRQ(ierr);

    ierr = KSP_PCApply(ksp,s,S);CHKERRQ(ierr);         /*   S <- Bs       */

    ierr = VecDotEnd(p,s,&t);CHKERRQ(ierr);

    alpha = gamma / t;
    ierr  = VecAXPY(x, alpha,p);CHKERRQ(ierr);   /*     x <- x + alpha * p   */
    ierr  = VecAXPY(r,-alpha,s);CHKERRQ(ierr);   /*     r <- r - alpha * s   */
    ierr  = VecAXPY(z,-alpha,S);CHKERRQ(ierr);   /*     z <- z - alpha * S   */

    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
      ierr = VecNormBegin(r,NORM_2,&dp);CHKERRQ(ierr);
    } else if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
      ierr = VecNormBegin(z,NORM_2,&dp);CHKERRQ(ierr);
    }
    ierr = VecDotBegin(r,z,&gammaNew);CHKERRQ(ierr);
    ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)r));CHKERRQ(ierr);

    ierr = KSP_MatMult(ksp,Amat,z,Z);CHKERRQ(ierr);      /*   Z <- Az       */

    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
      ierr = VecNormEnd(r,NORM_2,&dp);CHKERRQ(ierr);
    } else if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
      ierr = VecNormEnd(z,NORM_2,&dp);CHKERRQ(ierr);
    }
    ierr = VecDotEnd(r,z,&gammaNew);CHKERRQ(ierr);

    if (ksp->normtype == KSP_NORM_NATURAL) {
      KSPCheckDot(ksp,gammaNew);
      dp = PetscSqrtReal(PetscAbsScalar(gammaNew));                  /*     dp <- r'*z = r'*B*r = e'*A'*B*A*e */
    } else if (ksp->normtype == KSP_NORM_NONE) {
      dp = 0.0;
    }
    ksp->rnorm = dp;
    ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,i,dp);CHKERRQ(ierr);
    ierr = (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason) break;

    beta  = gammaNew / gamma;
    gamma = gammaNew;
    ierr  = VecAYPX(p,beta,z);CHKERRQ(ierr);   /*     p <- z + beta * p   */
    ierr  = VecAYPX(s,beta,Z);CHKERRQ(ierr);   /*     s <- Z + beta * s   */

  } while (i<ksp->max_it);

  if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Exemplo n.º 23
0
static PetscErrorCode FormJacobian_All(SNES snes,Vec X,Mat J,Mat B,void *ctx)
{
  User           user = (User)ctx;
  DM             dau,dak;
  DMDALocalInfo  infou,infok;
  PetscScalar    *u,*k;
  PetscErrorCode ierr;
  Vec            Uloc,Kloc;

  PetscFunctionBeginUser;
  ierr = DMCompositeGetEntries(user->pack,&dau,&dak);CHKERRQ(ierr);
  ierr = DMDAGetLocalInfo(dau,&infou);CHKERRQ(ierr);
  ierr = DMDAGetLocalInfo(dak,&infok);CHKERRQ(ierr);
  ierr = DMCompositeGetLocalVectors(user->pack,&Uloc,&Kloc);CHKERRQ(ierr);
  switch (user->ptype) {
  case 0:
    ierr = DMGlobalToLocalBegin(dau,X,INSERT_VALUES,Uloc);CHKERRQ(ierr);
    ierr = DMGlobalToLocalEnd  (dau,X,INSERT_VALUES,Uloc);CHKERRQ(ierr);
    ierr = DMDAVecGetArray(dau,Uloc,&u);CHKERRQ(ierr);
    ierr = DMDAVecGetArray(dak,user->Kloc,&k);CHKERRQ(ierr);
    ierr = FormJacobianLocal_U(user,&infou,u,k,B);CHKERRQ(ierr);
    ierr = DMDAVecRestoreArray(dau,Uloc,&u);CHKERRQ(ierr);
    ierr = DMDAVecRestoreArray(dak,user->Kloc,&k);CHKERRQ(ierr);
    break;
  case 1:
    ierr = DMGlobalToLocalBegin(dak,X,INSERT_VALUES,Kloc);CHKERRQ(ierr);
    ierr = DMGlobalToLocalEnd  (dak,X,INSERT_VALUES,Kloc);CHKERRQ(ierr);
    ierr = DMDAVecGetArray(dau,user->Uloc,&u);CHKERRQ(ierr);
    ierr = DMDAVecGetArray(dak,Kloc,&k);CHKERRQ(ierr);
    ierr = FormJacobianLocal_K(user,&infok,u,k,B);CHKERRQ(ierr);
    ierr = DMDAVecRestoreArray(dau,user->Uloc,&u);CHKERRQ(ierr);
    ierr = DMDAVecRestoreArray(dak,Kloc,&k);CHKERRQ(ierr);
    break;
  case 2: {
    Mat Buu,Buk,Bku,Bkk;
    IS  *is;
    ierr = DMCompositeScatter(user->pack,X,Uloc,Kloc);CHKERRQ(ierr);
    ierr = DMDAVecGetArray(dau,Uloc,&u);CHKERRQ(ierr);
    ierr = DMDAVecGetArray(dak,Kloc,&k);CHKERRQ(ierr);
    ierr = DMCompositeGetLocalISs(user->pack,&is);CHKERRQ(ierr);
    ierr = MatGetLocalSubMatrix(B,is[0],is[0],&Buu);CHKERRQ(ierr);
    ierr = MatGetLocalSubMatrix(B,is[0],is[1],&Buk);CHKERRQ(ierr);
    ierr = MatGetLocalSubMatrix(B,is[1],is[0],&Bku);CHKERRQ(ierr);
    ierr = MatGetLocalSubMatrix(B,is[1],is[1],&Bkk);CHKERRQ(ierr);
    ierr = FormJacobianLocal_U(user,&infou,u,k,Buu);CHKERRQ(ierr);
    ierr = FormJacobianLocal_UK(user,&infou,&infok,u,k,Buk);CHKERRQ(ierr);
    ierr = FormJacobianLocal_KU(user,&infou,&infok,u,k,Bku);CHKERRQ(ierr);
    ierr = FormJacobianLocal_K(user,&infok,u,k,Bkk);CHKERRQ(ierr);
    ierr = MatRestoreLocalSubMatrix(B,is[0],is[0],&Buu);CHKERRQ(ierr);
    ierr = MatRestoreLocalSubMatrix(B,is[0],is[1],&Buk);CHKERRQ(ierr);
    ierr = MatRestoreLocalSubMatrix(B,is[1],is[0],&Bku);CHKERRQ(ierr);
    ierr = MatRestoreLocalSubMatrix(B,is[1],is[1],&Bkk);CHKERRQ(ierr);
    ierr = DMDAVecRestoreArray(dau,Uloc,&u);CHKERRQ(ierr);
    ierr = DMDAVecRestoreArray(dak,Kloc,&k);CHKERRQ(ierr);

    ierr = ISDestroy(&is[0]);CHKERRQ(ierr);
    ierr = ISDestroy(&is[1]);CHKERRQ(ierr);
    ierr = PetscFree(is);CHKERRQ(ierr);
  } break;
  }
  ierr = DMCompositeRestoreLocalVectors(user->pack,&Uloc,&Kloc);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd  (B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  if (J != B) {
    ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd  (J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 24
0
/*@C
  AOCreateMapping - Creates a basic application mapping using two integer arrays.

  Input Parameters:
+ comm    - MPI communicator that is to share AO
. napp    - size of integer arrays
. myapp   - integer array that defines an ordering
- mypetsc - integer array that defines another ordering (may be NULL to indicate the identity ordering)

  Output Parameter:
. aoout   - the new application mapping

  Options Database Key:
. -ao_view : call AOView() at the conclusion of AOCreateMapping()

  Level: beginner

    Notes:
    the arrays myapp and mypetsc need NOT contain the all the integers 0 to napp-1, that is there CAN be "holes"  in the indices.
       Use AOCreateBasic() or AOCreateBasicIS() if they do not have holes for better performance.

.keywords: AO, create
.seealso: AOCreateBasic(), AOCreateBasic(), AOCreateMappingIS(), AODestroy()
@*/
PetscErrorCode  AOCreateMapping(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
{
  AO             ao;
  AO_Mapping     *aomap;
  PetscInt       *allpetsc,  *allapp;
  PetscInt       *petscPerm, *appPerm;
  PetscInt       *petsc;
  PetscMPIInt    size, rank,*lens, *disp,nnapp;
  PetscInt       N, start;
  PetscInt       i;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidPointer(aoout,5);
  *aoout = 0;
  ierr = AOInitializePackage();CHKERRQ(ierr);

  ierr     = PetscHeaderCreate(ao, AO_CLASSID, "AO", "Application Ordering", "AO", comm, AODestroy, AOView);CHKERRQ(ierr);
  ierr     = PetscNewLog(ao,&aomap);CHKERRQ(ierr);
  ierr     = PetscMemcpy(ao->ops, &AOps, sizeof(AOps));CHKERRQ(ierr);
  ao->data = (void*) aomap;

  /* transmit all lengths to all processors */
  ierr  = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
  ierr  = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
  ierr  = PetscMalloc2(size, &lens,size,&disp);CHKERRQ(ierr);
  nnapp = napp;
  ierr  = MPI_Allgather(&nnapp, 1, MPI_INT, lens, 1, MPI_INT, comm);CHKERRQ(ierr);
  N     = 0;
  for (i = 0; i < size; i++) {
    disp[i] = N;
    N      += lens[i];
  }
  aomap->N = N;
  ao->N    = N;
  ao->n    = N;

  /* If mypetsc is 0 then use "natural" numbering */
  if (!mypetsc) {
    start = disp[rank];
    ierr  = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr);
    for (i = 0; i < napp; i++) petsc[i] = start + i;
  } else {
    petsc = (PetscInt*)mypetsc;
  }

  /* get all indices on all processors */
  ierr = PetscMalloc4(N, &allapp,N,&appPerm,N,&allpetsc,N,&petscPerm);CHKERRQ(ierr);
  ierr = MPI_Allgatherv((void*)myapp, napp, MPIU_INT, allapp,   lens, disp, MPIU_INT, comm);CHKERRQ(ierr);
  ierr = MPI_Allgatherv((void*)petsc, napp, MPIU_INT, allpetsc, lens, disp, MPIU_INT, comm);CHKERRQ(ierr);
  ierr = PetscFree2(lens,disp);CHKERRQ(ierr);

  /* generate a list of application and PETSc node numbers */
  ierr = PetscMalloc4(N, &aomap->app,N,&aomap->appPerm,N,&aomap->petsc,N,&aomap->petscPerm);CHKERRQ(ierr);
  ierr = PetscLogObjectMemory((PetscObject)ao, 4*N * sizeof(PetscInt));CHKERRQ(ierr);
  for (i = 0; i < N; i++) {
    appPerm[i]   = i;
    petscPerm[i] = i;
  }
  ierr = PetscSortIntWithPermutation(N, allpetsc, petscPerm);CHKERRQ(ierr);
  ierr = PetscSortIntWithPermutation(N, allapp,   appPerm);CHKERRQ(ierr);
  /* Form sorted arrays of indices */
  for (i = 0; i < N; i++) {
    aomap->app[i]   = allapp[appPerm[i]];
    aomap->petsc[i] = allpetsc[petscPerm[i]];
  }
  /* Invert petscPerm[] into aomap->petscPerm[] */
  for (i = 0; i < N; i++) aomap->petscPerm[petscPerm[i]] = i;

  /* Form map between aomap->app[] and aomap->petsc[] */
  for (i = 0; i < N; i++) aomap->appPerm[i] = aomap->petscPerm[appPerm[i]];

  /* Invert appPerm[] into allapp[] */
  for (i = 0; i < N; i++) allapp[appPerm[i]] = i;

  /* Form map between aomap->petsc[] and aomap->app[] */
  for (i = 0; i < N; i++) aomap->petscPerm[i] = allapp[petscPerm[i]];

#if defined(PETSC_USE_DEBUG)
  /* Check that the permutations are complementary */
  for (i = 0; i < N; i++) {
    if (i != aomap->appPerm[aomap->petscPerm[i]]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Invalid ordering");
  }
#endif
  /* Cleanup */
  if (!mypetsc) {
    ierr = PetscFree(petsc);CHKERRQ(ierr);
  }
  ierr = PetscFree4(allapp,appPerm,allpetsc,petscPerm);CHKERRQ(ierr);

  ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr);

  *aoout = ao;
  PetscFunctionReturn(0);
}
Exemplo n.º 25
0
/*@C
    PetscViewerASCIIPrintf - Prints to a file, only from the first
    processor in the PetscViewer

    Not Collective, but only first processor in set has any effect

    Input Parameters:
+    viewer - optained with PetscViewerASCIIOpen()
-    format - the usual printf() format string

    Level: developer

    Fortran Note:
    The call sequence is PetscViewerASCIIPrintf(PetscViewer, character(*), int ierr) from Fortran.
    That is, you can only pass a single character string from Fortran.

  Concepts: PetscViewerASCII^printing
  Concepts: printing^to file
  Concepts: printf

.seealso: PetscPrintf(), PetscSynchronizedPrintf(), PetscViewerASCIIOpen(),
          PetscViewerASCIIPushTab(), PetscViewerASCIIPopTab(), PetscViewerASCIISynchronizedPrintf(),
          PetscViewerCreate(), PetscViewerDestroy(), PetscViewerSetType(), PetscViewerASCIIGetPointer(), PetscViewerASCIISynchronizedAllow()
@*/
PetscErrorCode  PetscViewerASCIIPrintf(PetscViewer viewer,const char format[],...)
{
  PetscViewer_ASCII *ascii = (PetscViewer_ASCII*)viewer->data;
  PetscMPIInt       rank;
  PetscInt          tab,intab = ascii->tab;
  PetscErrorCode    ierr;
  FILE              *fd = ascii->fd;
  PetscBool         iascii,issingleton = PETSC_FALSE;
  int               err;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,1);
  PetscValidCharPointer(format,2);
  ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
  if (!iascii) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not ASCII PetscViewer");
  if (ascii->bviewer) {
    viewer      = ascii->bviewer;
    ascii       = (PetscViewer_ASCII*)viewer->data;
    fd          = ascii->fd;
    issingleton = PETSC_TRUE;
  }

  ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)viewer),&rank);CHKERRQ(ierr);
  if (!rank) {
    va_list Argp;
    tab = intab;
    while (tab--) {
      ierr = PetscFPrintf(PETSC_COMM_SELF,fd,"  ");CHKERRQ(ierr);
    }

    va_start(Argp,format);
    ierr = (*PetscVFPrintf)(fd,format,Argp);CHKERRQ(ierr);
    err  = fflush(fd);
    if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
    if (petsc_history) {
      va_start(Argp,format);
      tab = intab;
      while (tab--) {
        ierr = PetscFPrintf(PETSC_COMM_SELF,petsc_history,"  ");CHKERRQ(ierr);
      }
      ierr = (*PetscVFPrintf)(petsc_history,format,Argp);CHKERRQ(ierr);
      err  = fflush(petsc_history);
      if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
    }
    va_end(Argp);
  } else if (issingleton) {
    char        *string;
    va_list     Argp;
    size_t      fullLength;
    PrintfQueue next;

    ierr = PetscNew(&next);CHKERRQ(ierr);
    if (petsc_printfqueue) {
      petsc_printfqueue->next = next;
      petsc_printfqueue       = next;
    } else {
      petsc_printfqueuebase = petsc_printfqueue = next;
    }
    petsc_printfqueuelength++;
    next->size = QUEUESTRINGSIZE;
    ierr       = PetscCalloc1(next->size, &next->string);CHKERRQ(ierr);
    string     = next->string;
    tab        = intab;
    tab       *= 2;
    while (tab--) {
      *string++ = ' ';
    }
    va_start(Argp,format);
    ierr = PetscVSNPrintf(string,next->size-2*ascii->tab,format,&fullLength,Argp);CHKERRQ(ierr);
    va_end(Argp);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 26
0
PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
{
  Vec_Seq           *xv = (Vec_Seq *)xin->data;
  PetscErrorCode    ierr;
  PetscInt          i,nv_rem,n = xin->map->n;
  PetscScalar       sum0,sum1,sum2,sum3;
  const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
  Vec               *yy;

  PetscFunctionBegin;
  sum0 = 0.0;
  sum1 = 0.0;
  sum2 = 0.0;

  i      = nv;
  nv_rem = nv&0x3;
  yy     = (Vec*)yin;
  x      = xv->array;

  switch (nv_rem) {
  case 3:
    ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecGetArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
    ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    z[0] = sum0;
    z[1] = sum1;
    z[2] = sum2;
    break;
  case 2:
    ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
    ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    z[0] = sum0;
    z[1] = sum1;
    break;
  case 1:
    ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    fortranmdot1_(x,yy0,&n,&sum0);
    ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    z[0] = sum0;
    break;
  case 0:
    break;
  }
  z  += nv_rem;
  i  -= nv_rem;
  yy += nv_rem;

  while (i >0) {
    sum0 = 0.;
    sum1 = 0.;
    sum2 = 0.;
    sum3 = 0.;
    ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecGetArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    ierr = VecGetArray(yy[3],(PetscScalar**)&yy3);CHKERRQ(ierr);
    fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
    ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    ierr = VecRestoreArray(yy[3],(PetscScalar**)&yy3);CHKERRQ(ierr);
    yy  += 4;
    z[0] = sum0;
    z[1] = sum1;
    z[2] = sum2;
    z[3] = sum3;
    z   += 4;
    i   -= 4;
  }
  ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemplo n.º 27
0
/*@C
    PetscViewerASCIISynchronizedPrintf - Prints synchronized output to the specified file from
    several processors.  Output of the first processor is followed by that of the
    second, etc.

    Not Collective, must call collective PetscViewerFlush() to get the results out

    Input Parameters:
+   viewer - the ASCII PetscViewer
-   format - the usual printf() format string

    Level: intermediate

    Notes: You must have previously called PetscViewerASCIISynchronizeAllow() to allow this routine to be called.

    Fortran Note:
      Can only print a single character* string

.seealso: PetscSynchronizedPrintf(), PetscSynchronizedFlush(), PetscFPrintf(),
          PetscFOpen(), PetscViewerFlush(), PetscViewerASCIIGetPointer(), PetscViewerDestroy(), PetscViewerASCIIOpen(),
          PetscViewerASCIIPrintf(), PetscViewerASCIISynchronizedAllow()

@*/
PetscErrorCode  PetscViewerASCIISynchronizedPrintf(PetscViewer viewer,const char format[],...)
{
  PetscViewer_ASCII *vascii = (PetscViewer_ASCII*)viewer->data;
  PetscErrorCode    ierr;
  PetscMPIInt       rank,size;
  PetscInt          tab = vascii->tab;
  MPI_Comm          comm;
  FILE              *fp;
  PetscBool         iascii;
  int               err;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,1);
  PetscValidCharPointer(format,2);
  ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
  if (!iascii) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not ASCII PetscViewer");
  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);CHKERRQ(ierr);
  if (size > 1 && !vascii->allowsynchronized) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"First call PetscViewerASCIISynchronizedAllow() to allow this call");
  if (!viewer->ops->flush) PetscFunctionReturn(0); /* This viewer obtained via PetscViewerGetSubcomm_ASCII(), should not participate. */

  ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
  fp   = vascii->fd;
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);

  /* First processor prints immediately to fp */
  if (!rank) {
    va_list Argp;

    while (tab--) {
      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"  ");CHKERRQ(ierr);
    }

    va_start(Argp,format);
    ierr = (*PetscVFPrintf)(fp,format,Argp);CHKERRQ(ierr);
    err  = fflush(fp);
    if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
    if (petsc_history) {
      va_start(Argp,format);
      ierr = (*PetscVFPrintf)(petsc_history,format,Argp);CHKERRQ(ierr);
      err  = fflush(petsc_history);
      if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file");
    }
    va_end(Argp);
  } else { /* other processors add to local queue */
    char        *string;
    va_list     Argp;
    size_t      fullLength;
    PrintfQueue next;

    ierr = PetscNew(&next);CHKERRQ(ierr);
    if (petsc_printfqueue) {
      petsc_printfqueue->next = next;
      petsc_printfqueue       = next;
    } else {
      petsc_printfqueuebase = petsc_printfqueue = next;
    }
    petsc_printfqueuelength++;
    next->size = QUEUESTRINGSIZE;
    ierr       = PetscMalloc1(next->size, &next->string);CHKERRQ(ierr);
    ierr       = PetscMemzero(next->string,next->size);CHKERRQ(ierr);
    string     = next->string;
    tab       *= 2;
    while (tab--) {
      *string++ = ' ';
    }
    va_start(Argp,format);
    ierr = PetscVSNPrintf(string,next->size-2*vascii->tab,format,&fullLength,Argp);CHKERRQ(ierr);
    va_end(Argp);
  }
  PetscFunctionReturn(0);
}
Exemplo n.º 28
0
PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
{
  Vec_Seq           *xdata = (Vec_Seq*)xin->data;
  PetscErrorCode    ierr;
  PetscInt          n = xin->map->n,j,j_rem;
  const PetscScalar *yy0,*yy1,*yy2,*yy3;
  PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

#if defined(PETSC_HAVE_PRAGMA_DISJOINT)
#pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
#endif

  PetscFunctionBegin;
  ierr = PetscLogFlops(nv*2.0*n);CHKERRQ(ierr);

  xx = xdata->array;  
  switch (j_rem=nv&0x3) {
  case 3: 
    ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecGetArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    alpha0 = alpha[0]; 
    alpha1 = alpha[1]; 
    alpha2 = alpha[2]; 
    alpha += 3;
    PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
    ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    y     += 3;
    break;
  case 2: 
    ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    alpha0 = alpha[0]; 
    alpha1 = alpha[1]; 
    alpha +=2;
    PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
    ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    y     +=2;
    break;
  case 1: 
    ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    alpha0 = *alpha++; 
    PetscAXPY(xx,alpha0,yy0,n);
    ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    y     +=1;
    break;
  }
  for (j=j_rem; j<nv; j+=4) {
    ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecGetArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecGetArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    ierr = VecGetArray(y[3],(PetscScalar**)&yy3);CHKERRQ(ierr);
    alpha0 = alpha[0];
    alpha1 = alpha[1];
    alpha2 = alpha[2];
    alpha3 = alpha[3];
    alpha  += 4;

    PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
    ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr);
    ierr = VecRestoreArray(y[3],(PetscScalar**)&yy3);CHKERRQ(ierr);
    y      += 4;
  }
  PetscFunctionReturn(0);
} 
Exemplo n.º 29
0
PetscErrorCode PetscFEGetTabulation_Composite(PetscFE fem, PetscInt npoints, const PetscReal points[], PetscReal *B, PetscReal *D, PetscReal *H)
{
  PetscFE_Composite *cmp = (PetscFE_Composite *) fem->data;
  DM                 dm;
  PetscInt           pdim;  /* Dimension of FE space P */
  PetscInt           spdim; /* Dimension of subelement FE space P */
  PetscInt           dim;   /* Spatial dimension */
  PetscInt           comp;  /* Field components */
  PetscInt          *subpoints;
  PetscReal         *tmpB, *tmpD, *tmpH, *subpoint;
  PetscInt           p, s, d, e, j, k;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  ierr = PetscDualSpaceGetDM(fem->dualSpace, &dm);CHKERRQ(ierr);
  ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
  ierr = PetscSpaceGetDimension(fem->basisSpace, &spdim);CHKERRQ(ierr);
  ierr = PetscDualSpaceGetDimension(fem->dualSpace, &pdim);CHKERRQ(ierr);
  ierr = PetscFEGetNumComponents(fem, &comp);CHKERRQ(ierr);
  /* Divide points into subelements */
  ierr = DMGetWorkArray(dm, npoints, MPIU_INT, &subpoints);CHKERRQ(ierr);
  ierr = DMGetWorkArray(dm, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr);
  for (p = 0; p < npoints; ++p) {
    for (s = 0; s < cmp->numSubelements; ++s) {
      PetscBool inside;

      /* Apply transform, and check that point is inside cell */
      for (d = 0; d < dim; ++d) {
        subpoint[d] = -1.0;
        for (e = 0; e < dim; ++e) subpoint[d] += cmp->invjac[(s*dim + d)*dim+e]*(points[p*dim+e] - cmp->v0[s*dim+e]);
      }
      ierr = CellRefinerInCellTest_Internal(cmp->cellRefiner, subpoint, &inside);CHKERRQ(ierr);
      if (inside) {subpoints[p] = s; break;}
    }
    if (s >= cmp->numSubelements) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Point %d was not found in any subelement", p);
  }
  ierr = DMRestoreWorkArray(dm, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr);
  /* Evaluate the prime basis functions at all points */
  if (B) {ierr = DMGetWorkArray(dm, npoints*spdim, MPIU_REAL, &tmpB);CHKERRQ(ierr);}
  if (D) {ierr = DMGetWorkArray(dm, npoints*spdim*dim, MPIU_REAL, &tmpD);CHKERRQ(ierr);}
  if (H) {ierr = DMGetWorkArray(dm, npoints*spdim*dim*dim, MPIU_REAL, &tmpH);CHKERRQ(ierr);}
  ierr = PetscSpaceEvaluate(fem->basisSpace, npoints, points, B ? tmpB : NULL, D ? tmpD : NULL, H ? tmpH : NULL);CHKERRQ(ierr);
  /* Translate to the nodal basis */
  if (B) {ierr = PetscMemzero(B, npoints*pdim*comp * sizeof(PetscReal));CHKERRQ(ierr);}
  if (D) {ierr = PetscMemzero(D, npoints*pdim*comp*dim * sizeof(PetscReal));CHKERRQ(ierr);}
  if (H) {ierr = PetscMemzero(H, npoints*pdim*comp*dim*dim * sizeof(PetscReal));CHKERRQ(ierr);}
  for (p = 0; p < npoints; ++p) {
    const PetscInt s = subpoints[p];

    if (B) {
      /* Multiply by V^{-1} (spdim x spdim) */
      for (j = 0; j < spdim; ++j) {
        const PetscInt i = (p*pdim + cmp->embedding[s*spdim+j])*comp;

        B[i] = 0.0;
        for (k = 0; k < spdim; ++k) {
          B[i] += fem->invV[(s*spdim + k)*spdim+j] * tmpB[p*spdim + k];
        }
      }
    }
    if (D) {
      /* Multiply by V^{-1} (spdim x spdim) */
      for (j = 0; j < spdim; ++j) {
        for (d = 0; d < dim; ++d) {
          const PetscInt i = ((p*pdim + cmp->embedding[s*spdim+j])*comp + 0)*dim + d;

          D[i] = 0.0;
          for (k = 0; k < spdim; ++k) {
            D[i] += fem->invV[(s*spdim + k)*spdim+j] * tmpD[(p*spdim + k)*dim + d];
          }
        }
      }
    }
    if (H) {
      /* Multiply by V^{-1} (pdim x pdim) */
      for (j = 0; j < spdim; ++j) {
        for (d = 0; d < dim*dim; ++d) {
          const PetscInt i = ((p*pdim + cmp->embedding[s*spdim+j])*comp + 0)*dim*dim + d;

          H[i] = 0.0;
          for (k = 0; k < spdim; ++k) {
            H[i] += fem->invV[(s*spdim + k)*spdim+j] * tmpH[(p*spdim + k)*dim*dim + d];
          }
        }
      }
    }
  }
  ierr = DMRestoreWorkArray(dm, npoints, MPIU_INT, &subpoints);CHKERRQ(ierr);
  if (B) {ierr = DMRestoreWorkArray(dm, npoints*spdim, MPIU_REAL, &tmpB);CHKERRQ(ierr);}
  if (D) {ierr = DMRestoreWorkArray(dm, npoints*spdim*dim, MPIU_REAL, &tmpD);CHKERRQ(ierr);}
  if (H) {ierr = DMRestoreWorkArray(dm, npoints*spdim*dim*dim, MPIU_REAL, &tmpH);CHKERRQ(ierr);}
  PetscFunctionReturn(0);
}
Exemplo n.º 30
0
PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
{
  /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
  /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
  PetscErrorCode     ierr;
  PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
  Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
  PetscInt           *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
  PetscInt           *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
  PetscInt           an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N;
  PetscInt           i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
  MatScalar          *ca;

  PetscFunctionBegin;
  /* some error checking which could be moved into interface layer */
  if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
  if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);

  /* Set up timers */
  ierr = PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr);

  /* Create ij structure of P^T */
  ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);

  /* Allocate ci array, arrays for fill computation and */
  /* free space for accumulating nonzero column info */
  ierr = PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
  ci[0] = 0;

  ierr = PetscMalloc4(an,PetscInt,&padenserow,an,PetscInt,&pasparserow,pm,PetscInt,&denserow,pm,PetscInt,&sparserow);CHKERRQ(ierr);
  ierr = PetscMemzero(padenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscMemzero(pasparserow,an*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscMemzero(denserow,pm*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscMemzero(sparserow,pm*sizeof(PetscInt));CHKERRQ(ierr);

  /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
  /* This should be reasonable if sparsity of PAPt is similar to that of A. */
  ierr          = PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);CHKERRQ(ierr);
  current_space = free_space;

  /* Determine fill for each row of C: */
  for (i=0;i<pm;i++) {
    pnzi  = pi[i+1] - pi[i];
    panzi = 0;
    /* Get symbolic sparse row of PA: */
    for (j=0;j<pnzi;j++) {
      arow = *pj++;
      anzj = ai[arow+1] - ai[arow];
      ajj  = aj + ai[arow];
      for (k=0;k<anzj;k++) {
        if (!padenserow[ajj[k]]) {
          padenserow[ajj[k]]   = -1;
          pasparserow[panzi++] = ajj[k];
        }
      }
    }
    /* Using symbolic row of PA, determine symbolic row of C: */
    paj    = pasparserow;
    cnzi   = 0;
    for (j=0;j<panzi;j++) {
      ptrow = *paj++;
      ptnzj = pti[ptrow+1] - pti[ptrow];
      ptjj  = ptj + pti[ptrow];
      for (k=0;k<ptnzj;k++) {
        if (!denserow[ptjj[k]]) {
          denserow[ptjj[k]] = -1;
          sparserow[cnzi++] = ptjj[k];
        }
      }
    }

    /* sort sparse representation */
    ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);

    /* If free space is not available, make more free space */
    /* Double the amount of total space in the list */
    if (current_space->local_remaining<cnzi) {
      ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
    }

    /* Copy data into free space, and zero out dense row */
    ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
    current_space->array           += cnzi;
    current_space->local_used      += cnzi;
    current_space->local_remaining -= cnzi;

    for (j=0;j<panzi;j++) {
      padenserow[pasparserow[j]] = 0;
    }
    for (j=0;j<cnzi;j++) {
      denserow[sparserow[j]] = 0;
    }
    ci[i+1] = ci[i] + cnzi;
  }
  /* column indices are in the list of free space */
  /* Allocate space for cj, initialize cj, and */
  /* destroy list of free space and other temporary array(s) */
  ierr = PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
  ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
  ierr = PetscFree4(padenserow,pasparserow,denserow,sparserow);CHKERRQ(ierr);

  /* Allocate space for ca */
  ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
  ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr);

  /* put together the new matrix */
  ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr);
  (*C)->rmap->bs = P->cmap->bs;
  (*C)->cmap->bs = P->cmap->bs;

  /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
  /* Since these are PETSc arrays, change flags to free them as necessary. */
  c = (Mat_SeqAIJ *)((*C)->data);
  c->free_a  = PETSC_TRUE;
  c->free_ij = PETSC_TRUE;
  c->nonew   = 0;

  /* Clean up. */
  ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);

  ierr = PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}