PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat P; PetscInt *rti,*rtj; Mat_RARt *rart; PetscContainer container; MatTransposeColoring matcoloring; ISColoring iscoloring; Mat Rt_dense,RARt_dense; PetscLogDouble GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0; Mat_SeqAIJ *c; PetscFunctionBegin; ierr = PetscGetTime(&t0);CHKERRQ(ierr); /* create symbolic P=Rt */ ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,PETSC_NULL,&P);CHKERRQ(ierr); /* get symbolic C=Pt*A*P */ ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); (*C)->rmap->bs = R->rmap->bs; (*C)->cmap->bs = R->rmap->bs; /* create a supporting struct */ ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); /* attach the supporting struct to C */ ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); ierr = PetscContainerSetPointer(container,rart);CHKERRQ(ierr); ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_RARt);CHKERRQ(ierr); ierr = PetscObjectCompose((PetscObject)(*C),"Mat_RARt",(PetscObject)container);CHKERRQ(ierr); ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); etime += tf - t0; /* Create MatTransposeColoring from symbolic C=R*A*R^T */ c=(Mat_SeqAIJ*)(*C)->data; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); GColor += tf - t0; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); rart->matcoloring = matcoloring; ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); MCCreate += tf - t0; ierr = PetscGetTime(&t0);CHKERRQ(ierr); /* Create Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(Rt_dense,PETSC_NULL);CHKERRQ(ierr); Rt_dense->assembled = PETSC_TRUE; rart->Rt = Rt_dense; /* Create RARt_dense = R*A*Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(RARt_dense,PETSC_NULL);CHKERRQ(ierr); rart->RARt = RARt_dense; /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); MDenCreate += tf - t0; rart->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; /* clean up */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) { PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr); ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
PetscErrorCode PetscViewerFileSetName_ASCII(PetscViewer viewer,const char name[]) { PetscErrorCode ierr; size_t len; char fname[PETSC_MAX_PATH_LEN],*gz; PetscViewer_ASCII *vascii = (PetscViewer_ASCII*)viewer->data; PetscBool isstderr,isstdout; PetscMPIInt rank; PetscFunctionBegin; ierr = PetscViewerFileClose_ASCII(viewer);CHKERRQ(ierr); if (!name) PetscFunctionReturn(0); ierr = PetscStrallocpy(name,&vascii->filename);CHKERRQ(ierr); /* Is this file to be compressed */ vascii->storecompressed = PETSC_FALSE; ierr = PetscStrstr(vascii->filename,".gz",&gz);CHKERRQ(ierr); if (gz) { ierr = PetscStrlen(gz,&len);CHKERRQ(ierr); if (len == 3) { *gz = 0; vascii->storecompressed = PETSC_TRUE; } } ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)viewer),&rank);CHKERRQ(ierr); if (!rank) { ierr = PetscStrcmp(name,"stderr",&isstderr);CHKERRQ(ierr); ierr = PetscStrcmp(name,"stdout",&isstdout);CHKERRQ(ierr); /* empty filename means stdout */ if (name[0] == 0) isstdout = PETSC_TRUE; if (isstderr) vascii->fd = PETSC_STDERR; else if (isstdout) vascii->fd = PETSC_STDOUT; else { ierr = PetscFixFilename(name,fname);CHKERRQ(ierr); switch (vascii->mode) { case FILE_MODE_READ: vascii->fd = fopen(fname,"r"); break; case FILE_MODE_WRITE: vascii->fd = fopen(fname,"w"); break; case FILE_MODE_APPEND: vascii->fd = fopen(fname,"a"); break; case FILE_MODE_UPDATE: vascii->fd = fopen(fname,"r+"); if (!vascii->fd) vascii->fd = fopen(fname,"w+"); break; case FILE_MODE_APPEND_UPDATE: /* I really want a file which is opened at the end for updating, not a+, which opens at the beginning, but makes writes at the end. */ vascii->fd = fopen(fname,"r+"); if (!vascii->fd) vascii->fd = fopen(fname,"w+"); else { ierr = fseek(vascii->fd, 0, SEEK_END);CHKERRQ(ierr); } break; default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG, "Invalid file mode %d", vascii->mode); } if (!vascii->fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Cannot open PetscViewer file: %s",fname); } } #if defined(PETSC_USE_LOG) PetscLogObjectState((PetscObject)viewer,"File: %s",name); #endif PetscFunctionReturn(0); }
PetscErrorCode PetscFESetUp_Composite(PetscFE fem) { PetscFE_Composite *cmp = (PetscFE_Composite *) fem->data; DM K; PetscReal *subpoint; PetscBLASInt *pivots; PetscBLASInt n, info; PetscScalar *work, *invVscalar; PetscInt dim, pdim, spdim, j, s; PetscErrorCode ierr; PetscFunctionBegin; /* Get affine mapping from reference cell to each subcell */ ierr = PetscDualSpaceGetDM(fem->dualSpace, &K);CHKERRQ(ierr); ierr = DMGetDimension(K, &dim);CHKERRQ(ierr); ierr = DMPlexGetCellRefiner_Internal(K, &cmp->cellRefiner);CHKERRQ(ierr); ierr = CellRefinerGetAffineTransforms_Internal(cmp->cellRefiner, &cmp->numSubelements, &cmp->v0, &cmp->jac, &cmp->invjac);CHKERRQ(ierr); /* Determine dof embedding into subelements */ ierr = PetscDualSpaceGetDimension(fem->dualSpace, &pdim);CHKERRQ(ierr); ierr = PetscSpaceGetDimension(fem->basisSpace, &spdim);CHKERRQ(ierr); ierr = PetscMalloc1(cmp->numSubelements*spdim,&cmp->embedding);CHKERRQ(ierr); ierr = DMGetWorkArray(K, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr); for (s = 0; s < cmp->numSubelements; ++s) { PetscInt sd = 0; for (j = 0; j < pdim; ++j) { PetscBool inside; PetscQuadrature f; PetscInt d, e; ierr = PetscDualSpaceGetFunctional(fem->dualSpace, j, &f);CHKERRQ(ierr); /* Apply transform to first point, and check that point is inside subcell */ for (d = 0; d < dim; ++d) { subpoint[d] = -1.0; for (e = 0; e < dim; ++e) subpoint[d] += cmp->invjac[(s*dim + d)*dim+e]*(f->points[e] - cmp->v0[s*dim+e]); } ierr = CellRefinerInCellTest_Internal(cmp->cellRefiner, subpoint, &inside);CHKERRQ(ierr); if (inside) {cmp->embedding[s*spdim+sd++] = j;} } if (sd != spdim) SETERRQ3(PetscObjectComm((PetscObject) fem), PETSC_ERR_PLIB, "Subelement %d has %d dual basis vectors != %d", s, sd, spdim); } ierr = DMRestoreWorkArray(K, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr); /* Construct the change of basis from prime basis to nodal basis for each subelement */ ierr = PetscMalloc1(cmp->numSubelements*spdim*spdim,&fem->invV);CHKERRQ(ierr); ierr = PetscMalloc2(spdim,&pivots,spdim,&work);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) ierr = PetscMalloc1(cmp->numSubelements*spdim*spdim,&invVscalar);CHKERRQ(ierr); #else invVscalar = fem->invV; #endif for (s = 0; s < cmp->numSubelements; ++s) { for (j = 0; j < spdim; ++j) { PetscReal *Bf; PetscQuadrature f; const PetscReal *points, *weights; PetscInt Nc, Nq, q, k; ierr = PetscDualSpaceGetFunctional(fem->dualSpace, cmp->embedding[s*spdim+j], &f);CHKERRQ(ierr); ierr = PetscQuadratureGetData(f, NULL, &Nc, &Nq, &points, &weights);CHKERRQ(ierr); ierr = PetscMalloc1(f->numPoints*spdim*Nc,&Bf);CHKERRQ(ierr); ierr = PetscSpaceEvaluate(fem->basisSpace, Nq, points, Bf, NULL, NULL);CHKERRQ(ierr); for (k = 0; k < spdim; ++k) { /* n_j \cdot \phi_k */ invVscalar[(s*spdim + j)*spdim+k] = 0.0; for (q = 0; q < Nq; ++q) { invVscalar[(s*spdim + j)*spdim+k] += Bf[q*spdim+k]*weights[q]; } } ierr = PetscFree(Bf);CHKERRQ(ierr); } n = spdim; PetscStackCallBLAS("LAPACKgetrf", LAPACKgetrf_(&n, &n, &invVscalar[s*spdim*spdim], &n, pivots, &info)); PetscStackCallBLAS("LAPACKgetri", LAPACKgetri_(&n, &invVscalar[s*spdim*spdim], &n, pivots, work, &n, &info)); } #if defined(PETSC_USE_COMPLEX) for (s = 0; s <cmp->numSubelements*spdim*spdim; s++) fem->invV[s] = PetscRealPart(invVscalar[s]); ierr = PetscFree(invVscalar);CHKERRQ(ierr); #endif ierr = PetscFree2(pivots,work);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i<ismax; i++) { ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; i<size; i++) { jmax = C->rmap->range[i+1]; for (; j<jmax; j++) rtable[j] = i; } /* evaluate communication - mesg to who,length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i<ismax; i++) { ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ jmax = nrow[i]; irow_i = irow[i]; for (j=0; j<jmax; j++) { row = irow_i[j]; proc = rtable[row]; w4[proc]++; } for (j=0; j<size; j++) { if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;} } } nrqs = 0; /* no of outgoing messages */ msz = 0; /* total mesg length (for all procs) */ w1[2*rank] = 0; /* no mesg sent to self */ w3[rank] = 0; for (i=0; i<size; i++) { if (w1[2*i]) { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */ } ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ for (i=0,j=0; i<size; i++) { if (w1[2*i]) { pa[j] = i; j++; } } /* Each message would have a header = 1 + 2*(no of IS) + data */ for (i=0; i<nrqs; i++) { j = pa[i]; w1[2*j] += w1[2*j+1] + 2* w3[j]; msz += w1[2*j]; } /* Do a global reduction to determine how many messages to expect*/ ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr); /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr); ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr); for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz; /* Post the receives */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr); } /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp,ict = 0; for (i=0; i<nrqs; i++) { j = pa[i]; iptr += ict; sbuf1[j] = iptr; ict = w1[2*j]; } } /* Form the outgoing messages */ /* Initialize the header space */ for (i=0; i<nrqs; i++) { j = pa[i]; sbuf1[j][0] = 0; ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); ptr[j] = sbuf1[j] + 2*w3[j] + 1; } /* Parse the isrow and copy data into outbuf */ for (i=0; i<ismax; i++) { ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { /* parse the indices of each IS */ row = irow_i[j]; proc = rtable[row]; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; j<size; j++) { /* Can Optimise this loop too */ if ((ctr_j = ctr[j])) { sbuf1_j = sbuf1[j]; k = ++sbuf1_j[0]; sbuf1_j[2*k] = ctr_j; sbuf1_j[2*k-1] = i; } } } /* Now post the sends */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { j = pa[i]; ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); } /* Post recieves to capture the row_data from other procs */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr); for (i=0; i<nrqs; i++) { j = pa[i]; count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N; ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr); ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); } /* Receive messages(row_nos) and then, pack and send off the rowvalues to the correct processors */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr); { PetscScalar *sbuf2_i,*v_start; PetscInt s_proc; for (i=0; i<nrqr; ++i) { ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); s_proc = r_status1[i].MPI_SOURCE; /* send processor */ rbuf1_i = rbuf1[idex]; /* Actual message from s_proc */ /* no of rows = end - start; since start is array idex[], 0idex, whel end is length of the buffer - which is 1idex */ start = 2*rbuf1_i[0] + 1; ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); /* allocate memory sufficinet to hold all the row values */ ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr); sbuf2_i = sbuf2[idex]; /* Now pack the data */ for (j=start; j<end; j++) { row = rbuf1_i[j] - rstart; v_start = a->v + row; for (k=0; k<N; k++) { sbuf2_i[0] = v_start[0]; sbuf2_i++; v_start += C->rmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)(submats[i]->data); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; i<ismax; i++) { ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)submats[i]->data; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; j<m; j++) { row = irow_i[j]; proc = rtable[row]; if (proc == rank) { row = row - rstart; mat_vi = mat_v + row; imat_vi = imat_v + j; for (k=0; k<ncol[i]; k++) { col = icol[i][k]; imat_vi[k*m] = mat_vi[col*C->rmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N; for (i=0; i<ismax; i++) { rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { rmap_i[irow_i[j]] = j; } } /* Now Receive the row_values and assemble the rest of the matrix */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); { PetscInt is_max,tmp1,col,*sbuf1_i,is_sz; PetscScalar *rbuf2_i,*imat_v,*imat_vi; for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */ ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr); /* Now dig out the corresponding sbuf1, which contains the IS data_structure */ sbuf1_i = sbuf1[pa[i]]; is_max = sbuf1_i[0]; ct1 = 2*is_max+1; rbuf2_i = rbuf2[i]; for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */ is_no = sbuf1_i[2*j-1]; is_sz = sbuf1_i[2*j]; mat = (Mat_SeqDense*)submats[is_no]->data; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k<is_sz; k++,rbuf2_i+=N) { /* For each row */ row = sbuf1_i[ct1]; ct1++; row = rmap_i[row]; imat_vi = imat_v + row; for (l=0; l<ncol[is_no]; l++) { /* For each col */ col = icol[is_no][l]; imat_vi[l*m] = rbuf2_i[col]; } } } } } /* End Send-Recv of row_values */ ierr = PetscFree(r_status2);CHKERRQ(ierr); ierr = PetscFree(r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} ierr = PetscFree(s_status2);CHKERRQ(ierr); ierr = PetscFree(s_waits2);CHKERRQ(ierr); /* Restore the indices */ for (i=0; i<ismax; i++) { ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); } /* Destroy allocated memory */ ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(rbuf2);CHKERRQ(ierr); ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(sbuf2);CHKERRQ(ierr); ierr = PetscFree(rmap[0]);CHKERRQ(ierr); ierr = PetscFree(rmap);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_CR(KSP ksp) { PetscErrorCode ierr; PetscInt i = 0; MatStructure pflag; PetscReal dp; PetscScalar ai, bi; PetscScalar apq,btop, bbot; Vec X,B,R,RT,P,AP,ART,Q; Mat Amat, Pmat; PetscFunctionBegin; X = ksp->vec_sol; B = ksp->vec_rhs; R = ksp->work[0]; RT = ksp->work[1]; P = ksp->work[2]; AP = ksp->work[3]; ART = ksp->work[4]; Q = ksp->work[5]; /* R is the true residual norm, RT is the preconditioned residual norm */ ierr = PCGetOperators(ksp->pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr); /* R <- A*X */ ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); /* R <- B-R == B-A*X */ } else { ierr = VecCopy(B,R);CHKERRQ(ierr); /* R <- B (X is 0) */ } ierr = KSP_PCApply(ksp,R,P);CHKERRQ(ierr); /* P <- B*R */ ierr = KSP_MatMult(ksp,Amat,P,AP);CHKERRQ(ierr); /* AP <- A*P */ ierr = VecCopy(P,RT);CHKERRQ(ierr); /* RT <- P */ ierr = VecCopy(AP,ART);CHKERRQ(ierr); /* ART <- AP */ ierr = VecDotBegin(RT,ART,&btop);CHKERRQ(ierr); /* (RT,ART) */ if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNormBegin(RT,NORM_2,&dp);CHKERRQ(ierr); /* dp <- RT'*RT */ ierr = VecDotEnd (RT,ART,&btop) ;CHKERRQ(ierr); /* (RT,ART) */ ierr = VecNormEnd (RT,NORM_2,&dp);CHKERRQ(ierr); /* dp <- RT'*RT */ } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNormBegin(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- R'*R */ ierr = VecDotEnd (RT,ART,&btop);CHKERRQ(ierr); /* (RT,ART) */ ierr = VecNormEnd (R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- RT'*RT */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = VecDotEnd (RT,ART,&btop) ;CHKERRQ(ierr); /* (RT,ART) */ dp = sqrt(PetscAbsScalar(btop)); /* dp = sqrt(R,AR) */ } if (PetscAbsScalar(btop) < 0.0) { ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite matrix\n");CHKERRQ(ierr); PetscFunctionReturn(0); } ksp->its = 0; KSPMonitor(ksp,0,dp); ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->rnorm = dp; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); KSPLogResidualHistory(ksp,dp); ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); i = 0; do { ierr = KSP_PCApply(ksp,AP,Q);CHKERRQ(ierr);/* Q <- B* AP */ ierr = VecDot(AP,Q,&apq);CHKERRQ(ierr); if (PetscAbsScalar(apq) <= 0.0) { ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo(ksp,"KSPSolve_CR:diverging due to indefinite or negative definite PC\n");CHKERRQ(ierr); break; } ai = btop/apq; /* ai = (RT,ART)/(AP,Q) */ ierr = VecAXPY(X,ai,P);CHKERRQ(ierr); /* X <- X + ai*P */ ierr = VecAXPY(RT,-ai,Q);CHKERRQ(ierr); /* RT <- RT - ai*Q */ ierr = KSP_MatMult(ksp,Amat,RT,ART);CHKERRQ(ierr);/* ART <- A*RT */ bbot = btop; ierr = VecDotBegin(RT,ART,&btop);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNormBegin(RT,NORM_2,&dp);CHKERRQ(ierr); /* dp <- || RT || */ ierr = VecDotEnd (RT,ART,&btop) ;CHKERRQ(ierr); ierr = VecNormEnd (RT,NORM_2,&dp);CHKERRQ(ierr); /* dp <- || RT || */ } else if (ksp->normtype == KSP_NORM_NATURAL) { ierr = VecDotEnd(RT,ART,&btop);CHKERRQ(ierr); dp = sqrt(PetscAbsScalar(btop)); /* dp = sqrt(R,AR) */ } else if (ksp->normtype == KSP_NORM_NO) { ierr = VecDotEnd(RT,ART,&btop);CHKERRQ(ierr); dp = 0.0; } else if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecAXPY(R,ai,AP);CHKERRQ(ierr); /* R <- R - ai*AP */ ierr = VecNormBegin(R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- R'*R */ ierr = VecDotEnd (RT,ART,&btop);CHKERRQ(ierr); ierr = VecNormEnd (R,NORM_2,&dp);CHKERRQ(ierr); /* dp <- R'*R */ } else { SETERRQ1(PETSC_ERR_SUP,"KSPNormType of %d not supported",(int)ksp->normtype); } if (PetscAbsScalar(btop) < 0.0) { ksp->reason = KSP_DIVERGED_INDEFINITE_MAT; ierr = PetscInfo(ksp,"diverging due to indefinite or negative definite PC\n");CHKERRQ(ierr); break; } ierr = PetscObjectTakeAccess(ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = dp; ierr = PetscObjectGrantAccess(ksp);CHKERRQ(ierr); KSPLogResidualHistory(ksp,dp); KSPMonitor(ksp,i+1,dp); ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; bi = btop/bbot; ierr = VecAYPX(P,bi,RT);CHKERRQ(ierr); /* P <- RT + Bi P */ ierr = VecAYPX(AP,bi,ART);CHKERRQ(ierr); /* AP <- ART + Bi AP */ i++; } while (i<ksp->max_it); if (i >= ksp->max_it) { ksp->reason = KSP_DIVERGED_ITS; } PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_STCG(KSP ksp) { #ifdef PETSC_USE_COMPLEX SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "STCG is not available for complex systems"); #else KSP_STCG *cg = (KSP_STCG *)ksp->data; PetscErrorCode ierr; MatStructure pflag; Mat Qmat, Mmat; Vec r, z, p, d; PC pc; PetscReal norm_r, norm_d, norm_dp1, norm_p, dMp; PetscReal alpha, beta, kappa, rz, rzm1; PetscReal rr, r2, step; PetscInt max_cg_its; PetscBool diagonalscale; PetscFunctionBegin; /***************************************************************************/ /* Check the arguments and parameters. */ /***************************************************************************/ ierr = PCGetDiagonalScale(ksp->pc, &diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name); if (cg->radius < 0.0) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_OUTOFRANGE, "Input error: radius < 0"); /***************************************************************************/ /* Get the workspace vectors and initialize variables */ /***************************************************************************/ r2 = cg->radius * cg->radius; r = ksp->work[0]; z = ksp->work[1]; p = ksp->work[2]; d = ksp->vec_sol; pc = ksp->pc; ierr = PCGetOperators(pc, &Qmat, &Mmat, &pflag);CHKERRQ(ierr); ierr = VecGetSize(d, &max_cg_its);CHKERRQ(ierr); max_cg_its = PetscMin(max_cg_its, ksp->max_it); ksp->its = 0; /***************************************************************************/ /* Initialize objective function and direction. */ /***************************************************************************/ cg->o_fcn = 0.0; ierr = VecSet(d, 0.0);CHKERRQ(ierr); /* d = 0 */ cg->norm_d = 0.0; /***************************************************************************/ /* Begin the conjugate gradient method. Check the right-hand side for */ /* numerical problems. The check for not-a-number and infinite values */ /* need be performed only once. */ /***************************************************************************/ ierr = VecCopy(ksp->vec_rhs, r);CHKERRQ(ierr); /* r = -grad */ ierr = VecDot(r, r, &rr);CHKERRQ(ierr); /* rr = r^T r */ if (PetscIsInfOrNanScalar(rr)) { /*************************************************************************/ /* The right-hand side contains not-a-number or an infinite value. */ /* The gradient step does not work; return a zero value for the step. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NAN; ierr = PetscInfo1(ksp, "KSPSolve_STCG: bad right-hand side: rr=%g\n", rr);CHKERRQ(ierr); PetscFunctionReturn(0); } /***************************************************************************/ /* Check the preconditioner for numerical problems and for positive */ /* definiteness. The check for not-a-number and infinite values need be */ /* performed only once. */ /***************************************************************************/ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T inv(M) r */ if (PetscIsInfOrNanScalar(rz)) { /*************************************************************************/ /* The preconditioner contains not-a-number or an infinite value. */ /* Return the gradient direction intersected with the trust region. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NAN; ierr = PetscInfo1(ksp, "KSPSolve_STCG: bad preconditioner: rz=%g\n", rz);CHKERRQ(ierr); if (cg->radius != 0) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } if (rz < 0.0) { /*************************************************************************/ /* The preconditioner is indefinite. Because this is the first */ /* and we do not have a direction yet, we use the gradient step. Note */ /* that we cannot use the preconditioned norm when computing the step */ /* because the matrix is indefinite. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPSolve_STCG: indefinite preconditioner: rz=%g\n", rz);CHKERRQ(ierr); if (cg->radius != 0.0) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute and log the residual. Check convergence because this */ /* initializes things, but do not terminate until at least one conjugate */ /* gradient iteration has been performed. */ /***************************************************************************/ switch(ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: norm_r = PetscSqrtReal(rr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.0; break; } KSPLogResidualHistory(ksp, norm_r); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); /***************************************************************************/ /* Compute the first direction and update the iteration. */ /***************************************************************************/ ierr = VecCopy(z, p);CHKERRQ(ierr); /* p = z */ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ++ksp->its; /***************************************************************************/ /* Check the matrix for numerical problems. */ /***************************************************************************/ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ if (PetscIsInfOrNanScalar(kappa)) { /*************************************************************************/ /* The matrix produced not-a-number or an infinite value. In this case, */ /* we must stop and use the gradient direction. This condition need */ /* only be checked once. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NAN; ierr = PetscInfo1(ksp, "KSPSolve_STCG: bad matrix: kappa=%g\n", kappa);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Initialize variables for calculating the norm of the direction. */ /***************************************************************************/ dMp = 0.0; norm_d = 0.0; switch(cg->dtype) { case STCG_PRECONDITIONED_DIRECTION: norm_p = rz; break; default: ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /***************************************************************************/ /* Check for negative curvature. */ /***************************************************************************/ if (kappa <= 0.0) { /*************************************************************************/ /* In this case, the matrix is indefinite and we have encountered a */ /* direction of negative curvature. Because negative curvature occurs */ /* during the first step, we must follow a direction. */ /*************************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPSolve_STCG: negative curvature: kappa=%g\n", kappa);CHKERRQ(ierr); if (cg->radius != 0.0 && norm_p > 0.0) { /***********************************************************************/ /* Follow direction of negative curvature to the boundary of the */ /* trust region. */ /***********************************************************************/ step = PetscSqrtReal(r2 / norm_p); cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /***********************************************************************/ /* Update objective function. */ /***********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else if (cg->radius != 0.0) { /***********************************************************************/ /* The norm of the preconditioned direction is zero; use the gradient */ /* step. */ /***********************************************************************/ if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Run the conjugate gradient method until either the problem is solved, */ /* we encounter the boundary of the trust region, or the conjugate */ /* gradient method breaks down. */ /***************************************************************************/ while(1) { /*************************************************************************/ /* Know that kappa is nonzero, because we have not broken down, so we */ /* can compute the steplength. */ /*************************************************************************/ alpha = rz / kappa; /*************************************************************************/ /* Compute the steplength and check for intersection with the trust */ /* region. */ /*************************************************************************/ norm_dp1 = norm_d + alpha*(2.0*dMp + alpha*norm_p); if (cg->radius != 0.0 && norm_dp1 >= r2) { /***********************************************************************/ /* In this case, the matrix is positive definite as far as we know. */ /* However, the full step goes beyond the trust region. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; ierr = PetscInfo1(ksp, "KSPSolve_STCG: constrained step: radius=%g\n", cg->radius);CHKERRQ(ierr); if (norm_p > 0.0) { /*********************************************************************/ /* Follow the direction to the boundary of the trust region. */ /*********************************************************************/ step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p; cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /*********************************************************************/ /* Update objective function. */ /*********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else { /*********************************************************************/ /* The norm of the direction is zero; there is nothing to follow. */ /*********************************************************************/ } break; } /*************************************************************************/ /* Now we can update the direction and residual. */ /*************************************************************************/ ierr = VecAXPY(d, alpha, p);CHKERRQ(ierr); /* d = d + alpha p */ ierr = VecAXPY(r, -alpha, z); /* r = r - alpha Q p */ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ switch(cg->dtype) { case STCG_PRECONDITIONED_DIRECTION: norm_d = norm_dp1; break; default: ierr = VecDot(d, d, &norm_d);CHKERRQ(ierr); break; } cg->norm_d = PetscSqrtReal(norm_d); /*************************************************************************/ /* Update objective function. */ /*************************************************************************/ cg->o_fcn -= 0.5 * alpha * rz; /*************************************************************************/ /* Check that the preconditioner appears positive semidefinite. */ /*************************************************************************/ rzm1 = rz; ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T z */ if (rz < 0.0) { /***********************************************************************/ /* The preconditioner is indefinite. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPSolve_STCG: cg indefinite preconditioner: rz=%g\n", rz);CHKERRQ(ierr); break; } /*************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute the residual and check for convergence. */ /*************************************************************************/ switch(ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr);/* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(r, NORM_2, &norm_r);CHKERRQ(ierr);/* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.0; break; } KSPLogResidualHistory(ksp, norm_r); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /***********************************************************************/ /* The method has converged. */ /***********************************************************************/ ierr = PetscInfo2(ksp, "KSPSolve_STCG: truncated step: rnorm=%g, radius=%g\n", norm_r, cg->radius);CHKERRQ(ierr); break; } /*************************************************************************/ /* We have not converged yet. Check for breakdown. */ /*************************************************************************/ beta = rz / rzm1; if (fabs(beta) <= 0.0) { /***********************************************************************/ /* Conjugate gradients has broken down. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_BREAKDOWN; ierr = PetscInfo1(ksp, "KSPSolve_STCG: breakdown: beta=%g\n", beta);CHKERRQ(ierr); break; } /*************************************************************************/ /* Check iteration limit. */ /*************************************************************************/ if (ksp->its >= max_cg_its) { ksp->reason = KSP_DIVERGED_ITS; ierr = PetscInfo1(ksp, "KSPSolve_STCG: iterlim: its=%d\n", ksp->its);CHKERRQ(ierr); break; } /*************************************************************************/ /* Update p and the norms. */ /*************************************************************************/ ierr = VecAYPX(p, beta, z);CHKERRQ(ierr); /* p = z + beta p */ switch(cg->dtype) { case STCG_PRECONDITIONED_DIRECTION: dMp = beta*(dMp + alpha*norm_p); norm_p = beta*(rzm1 + beta*norm_p); break; default: ierr = VecDot(d, p, &dMp);CHKERRQ(ierr); ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /*************************************************************************/ /* Compute the new direction and update the iteration. */ /*************************************************************************/ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ ++ksp->its; /*************************************************************************/ /* Check for negative curvature. */ /*************************************************************************/ if (kappa <= 0.0) { /***********************************************************************/ /* In this case, the matrix is indefinite and we have encountered */ /* a direction of negative curvature. Follow the direction to the */ /* boundary of the trust region. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPSolve_STCG: negative curvature: kappa=%g\n", kappa);CHKERRQ(ierr); if (cg->radius != 0.0 && norm_p > 0.0) { /*********************************************************************/ /* Follow direction of negative curvature to boundary. */ /*********************************************************************/ step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p; cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /*********************************************************************/ /* Update objective function. */ /*********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else if (cg->radius != 0.0) { /*********************************************************************/ /* The norm of the direction is zero; there is nothing to follow. */ /*********************************************************************/ } break; } } PetscFunctionReturn(0); #endif }
PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) { Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; PetscInt bs = mat->rmap->bs,*stmp; IS from,to; Vec gvec; #if defined (PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt Nbs = baij->Nbs,*indices; #endif PetscFunctionBegin; #if defined (PETSC_USE_CTABLE) /* use a table - Mark Adams */ PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i]+j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid --; aj[B->i[i]+j] = lid; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* For the first stab we make an array as long as the number of columns */ /* mark those columns that are in baij->B */ ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j] ] = 1; } } /* form array of columns we need */ ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<Nbs; i++) { if (indices[i]) { garray[ec++] = i; } } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } B->nbs = ec; baij->B->cmap->n = ec*mat->rmap->bs; ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); /* create two temporary index sets for building scatter-gather */ ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); for (i=0; i<ec; i++) { stmp[i] = i; } ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); baij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A) { PetscErrorCode ierr; GLLData gll; Mat local_mat =0,temp_A=0; ISLocalToGlobalMapping matis_map =0; IS dirichletIS=0; PetscFunctionBeginUser; /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */ ierr = GLLStuffs(dd,&gll);CHKERRQ(ierr); /* Compute matrix of subdomain Neumann problem */ ierr = ComputeSubdomainMatrix(dd,gll,&local_mat);CHKERRQ(ierr); /* Compute global mapping of local dofs */ ierr = ComputeMapping(dd,&matis_map);CHKERRQ(ierr); /* Create MATIS object needed by BDDC */ ierr = MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);CHKERRQ(ierr); /* Set local subdomain matrices into MATIS object */ ierr = MatScale(local_mat,dd.scalingfactor);CHKERRQ(ierr); ierr = MatISSetLocalMat(temp_A,local_mat);CHKERRQ(ierr); /* Call assembly functions */ ierr = MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (dd.DBC_zerorows) { PetscInt dirsize; ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);CHKERRQ(ierr); ierr = MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); ierr = MatZeroRowsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);CHKERRQ(ierr); ierr = ISGetLocalSize(dirichletIS,&dirsize);CHKERRQ(ierr); /* giving hints to local and global matrices could be useful for the BDDC */ if (!dirsize) { ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } else { ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); ierr = MatSetOption(local_mat,MAT_SPD,PETSC_FALSE);CHKERRQ(ierr); } ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr); } else { /* safe to set the options for the global matrices (they will be communicated to the matis local matrices) */ ierr = MatSetOption(temp_A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(temp_A,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } #if DEBUG { Vec lvec,rvec; PetscReal norm; ierr = MatCreateVecs(temp_A,&lvec,&rvec);CHKERRQ(ierr); ierr = VecSet(lvec,1.0);CHKERRQ(ierr); ierr = MatMult(temp_A,lvec,rvec);CHKERRQ(ierr); ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr); printf("Test null space of global mat % 1.14e\n",norm); ierr = VecDestroy(&lvec);CHKERRQ(ierr); ierr = VecDestroy(&rvec);CHKERRQ(ierr); } #endif /* free allocated workspace */ ierr = PetscFree(gll.zGL);CHKERRQ(ierr); ierr = PetscFree(gll.rhoGL);CHKERRQ(ierr); ierr = PetscFree(gll.A[0]);CHKERRQ(ierr); ierr = PetscFree(gll.A);CHKERRQ(ierr); ierr = MatDestroy(&gll.elem_mat);CHKERRQ(ierr); ierr = MatDestroy(&local_mat);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingDestroy(&matis_map);CHKERRQ(ierr); /* give back the pointer to te MATIS object */ *A = temp_A; PetscFunctionReturn(0); }
static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp) { PetscErrorCode ierr; KSP temp_ksp; PC pc; IS dirichletIS=0,neumannIS=0,*bddc_dofs_splitting; PetscInt localsize,*xadj=NULL,*adjncy=NULL; MatNullSpace near_null_space; PetscFunctionBeginUser; ierr = KSPCreate(dd.gcomm,&temp_ksp);CHKERRQ(ierr); ierr = KSPSetOperators(temp_ksp,A,A);CHKERRQ(ierr); ierr = KSPSetType(temp_ksp,KSPCG);CHKERRQ(ierr); ierr = KSPGetPC(temp_ksp,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCBDDC);CHKERRQ(ierr); localsize = dd.xm_l*dd.ym_l*dd.zm_l; /* BDDC customization */ /* jumping coefficients case */ ierr = PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);CHKERRQ(ierr); /* Dofs splitting Simple stride-1 IS It is not needed since, by default, PCBDDC assumes a stride-1 split */ ierr = PetscMalloc1(1,&bddc_dofs_splitting);CHKERRQ(ierr); #if 1 ierr = ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr); ierr = PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);CHKERRQ(ierr); #else /* examples for global ordering */ /* each process lists the nodes it owns */ PetscInt sr,er; ierr = MatGetOwnershipRange(A,&sr,&er);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr); ierr = PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);CHKERRQ(ierr); /* Split can be passed in a more general way since any process can list any node */ #endif ierr = ISDestroy(&bddc_dofs_splitting[0]);CHKERRQ(ierr); ierr = PetscFree(bddc_dofs_splitting);CHKERRQ(ierr); /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants (which in practice is not needed since, by default, PCBDDC build the primal space using constants for quadrature formulas */ #if 0 Vec vecs[2]; PetscRandom rctx; ierr = MatCreateVecs(A,&vecs[0],&vecs[1]);CHKERRQ(ierr); ierr = PetscRandomCreate(dd.gcomm,&rctx);CHKERRQ(ierr); ierr = VecSetRandom(vecs[0],rctx);CHKERRQ(ierr); ierr = VecSetRandom(vecs[1],rctx);CHKERRQ(ierr); ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);CHKERRQ(ierr); ierr = VecDestroy(&vecs[0]);CHKERRQ(ierr); ierr = VecDestroy(&vecs[1]);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr); #else ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);CHKERRQ(ierr); #endif ierr = MatSetNearNullSpace(A,near_null_space);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr); /* CSR graph of subdomain dofs */ ierr = BuildCSRGraph(dd,&xadj,&adjncy);CHKERRQ(ierr); ierr = PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); /* Neumann/Dirichlet indices on the global boundary */ if (dd.DBC_zerorows) { /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */ ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr); ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr); ierr = PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);CHKERRQ(ierr); } else { if (dd.pure_neumann) { /* In such a case, all interface nodes lying on the global boundary are neumann nodes */ ierr = ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);CHKERRQ(ierr); ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr); } else { /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */ /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */ ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr); ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr); } } /* Pass local null space information to local matrices (needed when using approximate local solvers) */ if (dd.ipx || dd.pure_neumann) { MatNullSpace nsp; Mat local_mat; ierr = MatISGetLocalMat(A,&local_mat);CHKERRQ(ierr); ierr = MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);CHKERRQ(ierr); ierr = MatSetNullSpace(local_mat,nsp);CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&nsp);CHKERRQ(ierr); } ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr); ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr); ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); *ksp = temp_ksp; ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr); ierr = ISDestroy(&neumannIS);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata) { PetscErrorCode ierr; PetscReal *M,si; PetscScalar x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk; PetscBLASInt pm1,lierr; PetscInt i,j,n,k,s,r,q,ii,jj,p=dd.p; PetscInt xloc,yloc,zloc,xyloc,xyzloc; PetscFunctionBeginUser; /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */ ierr = PetscMalloc1(p+1,&glldata->zGL);CHKERRQ(ierr); ierr = PetscMemzero(glldata->zGL,(p+1)*sizeof(*glldata->zGL));CHKERRQ(ierr); glldata->zGL[0]=-1.0; glldata->zGL[p]= 1.0; if (p > 1) { if (p == 2) glldata->zGL[1]=0.0; else { ierr = PetscMalloc1(p-1,&M);CHKERRQ(ierr); for (i=0; i<p-1; i++) { si = (PetscReal)(i+1.0); M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5))); } pm1 = p-1; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); ierr = PetscFree(M);CHKERRQ(ierr); } } /* Weights for 1D quadrature */ ierr = PetscMalloc1(p+1,&glldata->rhoGL);CHKERRQ(ierr); glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0)); glldata->rhoGL[p]=glldata->rhoGL[0]; z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */ for (i=1; i<p; i++) { x = glldata->zGL[i]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0 = z1; z1 = z2; } glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2); } /* Auxiliary mat for laplacian */ ierr = PetscMalloc1(p+1,&glldata->A);CHKERRQ(ierr); ierr = PetscMalloc1((p+1)*(p+1),&glldata->A[0]);CHKERRQ(ierr); for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1; for (j=1; j<p; j++) { x =glldata->zGL[j]; z0=1.0; z1=x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpj=z2; for (r=1; r<p; r++) { if (r == j) { glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj); } else { x = glldata->zGL[r]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpr = z2; glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r])); } } } for (j=1; j<p+1; j++) { x = glldata->zGL[j]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpj = z2; glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j])); glldata->A[0][j]=glldata->A[j][0]; } for (j=0; j<p; j++) { x = glldata->zGL[j]; z0 = 1.0; z1 = x; for (n=1; n<p; n++) { z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0)); z0=z1; z1=z2; } Lpj=z2; glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j])); glldata->A[j][p]=glldata->A[p][j]; } glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0; glldata->A[p][p]=glldata->A[0][0]; /* compute element matrix */ xloc = p+1; yloc = p+1; zloc = p+1; if (dd.dim<2) yloc=1; if (dd.dim<3) zloc=1; xyloc = xloc*yloc; xyzloc = xloc*yloc*zloc; ierr = MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);CHKERRQ(ierr); ierr = MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);CHKERRQ(ierr); ierr = MatSetType(glldata->elem_mat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL);CHKERRQ(ierr); /* overestimated */ ierr = MatZeroEntries(glldata->elem_mat);CHKERRQ(ierr); ierr = MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (k=0; k<zloc; k++) { if (dd.dim>2) rhoGLk=glldata->rhoGL[k]; else rhoGLk=1.0; for (j=0; j<yloc; j++) { if (dd.dim>1) rhoGLj=glldata->rhoGL[j]; else rhoGLj=1.0; for (i=0; i<xloc; i++) { ii = k*xyloc+j*xloc+i; s = k; r = j; for (q=0; q<xloc; q++) { jj = s*xyloc+r*xloc+q; ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);CHKERRQ(ierr); } if (dd.dim>1) { s=k; q=i; for (r=0; r<yloc; r++) { jj = s*xyloc+r*xloc+q; ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);CHKERRQ(ierr); } } if (dd.dim>2) { r=j; q=i; for (s=0; s<zloc; s++) { jj = s*xyloc+r*xloc+q; ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);CHKERRQ(ierr); } } } } } ierr = MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); #if DEBUG { Vec lvec,rvec; PetscReal norm; ierr = MatCreateVecs(glldata->elem_mat,&lvec,&rvec);CHKERRQ(ierr); ierr = VecSet(lvec,1.0);CHKERRQ(ierr); ierr = MatMult(glldata->elem_mat,lvec,rvec);CHKERRQ(ierr); ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr); printf("Test null space of elem mat % 1.14e\n",norm); ierr = VecDestroy(&lvec);CHKERRQ(ierr); ierr = VecDestroy(&rvec);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
static PetscErrorCode DomainDecomposition(DomainData *dd) { PetscMPIInt rank; PetscInt i,j,k; PetscFunctionBeginUser; /* Subdomain index in cartesian coordinates */ MPI_Comm_rank(dd->gcomm,&rank); dd->ipx = rank%dd->npx; if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy); else dd->ipz = 0; dd->ipy = rank/dd->npx-dd->ipz*dd->npy; /* number of local elements */ dd->nex_l = dd->nex/dd->npx; if (dd->ipx < dd->nex%dd->npx) dd->nex_l++; if (dd->dim>1) { dd->ney_l = dd->ney/dd->npy; if (dd->ipy < dd->ney%dd->npy) dd->ney_l++; } else { dd->ney_l = 0; } if (dd->dim>2) { dd->nez_l = dd->nez/dd->npz; if (dd->ipz < dd->nez%dd->npz) dd->nez_l++; } else { dd->nez_l = 0; } /* local and global number of dofs */ dd->xm_l = dd->nex_l*dd->p+1; dd->xm = dd->nex*dd->p+1; dd->ym_l = dd->ney_l*dd->p+1; dd->ym = dd->ney*dd->p+1; dd->zm_l = dd->nez_l*dd->p+1; dd->zm = dd->nez*dd->p+1; if (!dd->pure_neumann) { if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--; if (!dd->DBC_zerorows) dd->xm--; } /* starting global index for local dofs (simple lexicographic order) */ dd->startx = 0; j = dd->nex/dd->npx; for (i=0; i<dd->ipx; i++) { k = j; if (i<dd->nex%dd->npx) k++; dd->startx = dd->startx+k*dd->p; } if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--; dd->starty = 0; if (dd->dim > 1) { j = dd->ney/dd->npy; for (i=0; i<dd->ipy; i++) { k = j; if (i<dd->ney%dd->npy) k++; dd->starty = dd->starty+k*dd->p; } } dd->startz = 0; if (dd->dim > 2) { j = dd->nez/dd->npz; for (i=0; i<dd->ipz; i++) { k = j; if (i<dd->nez%dd->npz) k++; dd->startz = dd->startz+k*dd->p; } } PetscFunctionReturn(0); }
static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat) { PetscErrorCode ierr; PetscInt localsize,zloc,yloc,xloc,auxnex,auxney,auxnez; PetscInt ie,je,ke,i,j,k,ig,jg,kg,ii,ming; PetscInt *indexg,*cols,*colsg; PetscScalar *vals; Mat temp_local_mat,elem_mat_DBC=0,*usedmat; IS submatIS; PetscFunctionBeginUser; ierr = MatGetSize(glldata.elem_mat,&i,&j);CHKERRQ(ierr); ierr = PetscMalloc1(i,&indexg);CHKERRQ(ierr); ierr = PetscMalloc1(i,&colsg);CHKERRQ(ierr); /* get submatrix of elem_mat without dirichlet nodes */ if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) { xloc = dd.p+1; yloc = 1; zloc = 1; if (dd.dim>1) yloc = dd.p+1; if (dd.dim>2) zloc = dd.p+1; ii = 0; for (k=0;k<zloc;k++) { for (j=0;j<yloc;j++) { for (i=1;i<xloc;i++) { indexg[ii]=k*xloc*yloc+j*xloc+i; ii++; } } } ierr = ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);CHKERRQ(ierr); ierr = MatGetSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);CHKERRQ(ierr); ierr = ISDestroy(&submatIS);CHKERRQ(ierr); } /* Assemble subdomain matrix */ localsize = dd.xm_l*dd.ym_l*dd.zm_l; ierr = MatCreate(PETSC_COMM_SELF,&temp_local_mat);CHKERRQ(ierr); ierr = MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(temp_local_mat,"subdomain_");CHKERRQ(ierr); /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */ /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */ if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */ ierr = MatSetType(temp_local_mat,MATSEQAIJ);CHKERRQ(ierr); } else { ierr = MatSetType(temp_local_mat,MATSEQSBAIJ);CHKERRQ(ierr); } ierr = MatSetFromOptions(temp_local_mat);CHKERRQ(ierr); i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim); ierr = MatSeqAIJSetPreallocation(temp_local_mat,i,NULL);CHKERRQ(ierr); /* very overestimated */ ierr = MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr); /* very overestimated */ ierr = MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); yloc = dd.p+1; zloc = dd.p+1; if (dd.dim < 3) zloc = 1; if (dd.dim < 2) yloc = 1; auxnez = dd.nez_l; auxney = dd.ney_l; auxnex = dd.nex_l; if (dd.dim < 3) auxnez = 1; if (dd.dim < 2) auxney = 1; for (ke=0; ke<auxnez; ke++) { for (je=0; je<auxney; je++) { for (ie=0; ie<auxnex; ie++) { /* customize element accounting for BC */ xloc = dd.p+1; ming = 0; usedmat = &glldata.elem_mat; if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) { if (ie == 0) { xloc = dd.p; usedmat = &elem_mat_DBC; } else { ming = -1; usedmat = &glldata.elem_mat; } } /* local to the element/global to the subdomain indexing */ for (k=0; k<zloc; k++) { kg = ke*dd.p+k; for (j=0; j<yloc; j++) { jg = je*dd.p+j; for (i=0; i<xloc; i++) { ig = ie*dd.p+i+ming; ii = k*xloc*yloc+j*xloc+i; indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig; } } } /* Set values */ for (i=0; i<xloc*yloc*zloc; i++) { ierr = MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr); for (k=0; k<j; k++) colsg[k] = indexg[cols[k]]; ierr = MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr); } } } } ierr = PetscFree(indexg);CHKERRQ(ierr); ierr = PetscFree(colsg);CHKERRQ(ierr); ierr = MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); #if DEBUG { Vec lvec,rvec; PetscReal norm; ierr = MatCreateVecs(temp_local_mat,&lvec,&rvec);CHKERRQ(ierr); ierr = VecSet(lvec,1.0);CHKERRQ(ierr); ierr = MatMult(temp_local_mat,lvec,rvec);CHKERRQ(ierr); ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr); printf("Test null space of local mat % 1.14e\n",norm); ierr = VecDestroy(&lvec);CHKERRQ(ierr); ierr = VecDestroy(&rvec);CHKERRQ(ierr); } #endif *local_mat = temp_local_mat; ierr = MatDestroy(&elem_mat_DBC);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann) { PetscErrorCode ierr; IS temp_dirichlet=0,temp_neumann=0; PetscInt localsize,i,j,k,*indices; PetscBool *touched; PetscFunctionBeginUser; localsize = dd.xm_l*dd.ym_l*dd.zm_l; ierr = PetscMalloc1(localsize,&indices);CHKERRQ(ierr); ierr = PetscMalloc1(localsize,&touched);CHKERRQ(ierr); for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE; if (dirichlet) { i = 0; /* west boundary */ if (dd.ipx == 0) { for (k=0;k<dd.zm_l;k++) { for (j=0;j<dd.ym_l;j++) { indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l; touched[indices[i]]=PETSC_TRUE; i++; } } } ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);CHKERRQ(ierr); } if (neumann) { i = 0; /* west boundary */ if (dd.ipx == 0) { for (k=0;k<dd.zm_l;k++) { for (j=0;j<dd.ym_l;j++) { indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l; if (!touched[indices[i]]) { touched[indices[i]]=PETSC_TRUE; i++; } } } } /* east boundary */ if (dd.ipx == dd.npx-1) { for (k=0;k<dd.zm_l;k++) { for (j=0;j<dd.ym_l;j++) { indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1; if (!touched[indices[i]]) { touched[indices[i]]=PETSC_TRUE; i++; } } } } /* south boundary */ if (dd.ipy == 0 && dd.dim > 1) { for (k=0;k<dd.zm_l;k++) { for (j=0;j<dd.xm_l;j++) { indices[i]=k*dd.ym_l*dd.xm_l+j; if (!touched[indices[i]]) { touched[indices[i]]=PETSC_TRUE; i++; } } } } /* north boundary */ if (dd.ipy == dd.npy-1 && dd.dim > 1) { for (k=0;k<dd.zm_l;k++) { for (j=0;j<dd.xm_l;j++) { indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j; if (!touched[indices[i]]) { touched[indices[i]]=PETSC_TRUE; i++; } } } } /* bottom boundary */ if (dd.ipz == 0 && dd.dim > 2) { for (k=0;k<dd.ym_l;k++) { for (j=0;j<dd.xm_l;j++) { indices[i]=k*dd.xm_l+j; if (!touched[indices[i]]) { touched[indices[i]]=PETSC_TRUE; i++; } } } } /* top boundary */ if (dd.ipz == dd.npz-1 && dd.dim > 2) { for (k=0;k<dd.ym_l;k++) { for (j=0;j<dd.xm_l;j++) { indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j; if (!touched[indices[i]]) { touched[indices[i]]=PETSC_TRUE; i++; } } } } ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);CHKERRQ(ierr); } if (dirichlet) *dirichlet = temp_dirichlet; if (neumann) *neumann = temp_neumann; ierr = PetscFree(indices);CHKERRQ(ierr); ierr = PetscFree(touched);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R,Mat A,Mat B,Mat RAB,PetscScalar *work) { Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*r=(Mat_SeqAIJ*)R->data; PetscErrorCode ierr; PetscScalar *b,r1,r2,r3,r4,*b1,*b2,*b3,*b4; MatScalar *aa,*ra; PetscInt cn=B->cmap->n,bm=B->rmap->n,col,i,j,n,*ai=a->i,*aj,am=A->rmap->n; PetscInt am2=2*am,am3=3*am,bm4=4*bm; PetscScalar *d,*c,*c2,*c3,*c4; PetscInt *rj,rm=R->rmap->n,dm=RAB->rmap->n,dn=RAB->cmap->n; PetscInt rm2=2*rm,rm3=3*rm,colrm; PetscFunctionBegin; if (!dm || !dn) PetscFunctionReturn(0); if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); if (am != R->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in R %D not equal rows in A %D\n",R->cmap->n,am); if (R->rmap->n != RAB->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in RAB %D not equal rows in R %D\n",RAB->rmap->n,R->rmap->n); if (B->cmap->n != RAB->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in RAB %D not equal columns in B %D\n",RAB->cmap->n,B->cmap->n); ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); ierr = MatDenseGetArray(RAB,&d);CHKERRQ(ierr); b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; c = work; c2 = c + am; c3 = c2 + am; c4 = c3 + am; for (col=0; col<cn-4; col += 4){ /* over columns of C */ for (i=0; i<am; i++) { /* over rows of A in those columns */ r1 = r2 = r3 = r4 = 0.0; n = ai[i+1] - ai[i]; aj = a->j + ai[i]; aa = a->a + ai[i]; for (j=0; j<n; j++) { r1 += (*aa)*b1[*aj]; r2 += (*aa)*b2[*aj]; r3 += (*aa)*b3[*aj]; r4 += (*aa++)*b4[*aj++]; } c[i] = r1; c[am + i] = r2; c[am2 + i] = r3; c[am3 + i] = r4; } b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; /* RAB[:,col] = R*C[:,col] */ colrm = col*rm; for (i=0; i<rm; i++) { /* over rows of R in those columns */ r1 = r2 = r3 = r4 = 0.0; n = r->i[i+1] - r->i[i]; rj = r->j + r->i[i]; ra = r->a + r->i[i]; for (j=0; j<n; j++) { r1 += (*ra)*c[*rj]; r2 += (*ra)*c2[*rj]; r3 += (*ra)*c3[*rj]; r4 += (*ra++)*c4[*rj++]; } d[colrm + i] = r1; d[colrm + rm + i] = r2; d[colrm + rm2 + i] = r3; d[colrm + rm3 + i] = r4; } } for (;col<cn; col++){ /* over extra columns of C */ for (i=0; i<am; i++) { /* over rows of A in those columns */ r1 = 0.0; n = a->i[i+1] - a->i[i]; aj = a->j + a->i[i]; aa = a->a + a->i[i]; for (j=0; j<n; j++) { r1 += (*aa++)*b1[*aj++]; } c[i] = r1; } b1 += bm; for (i=0; i<rm; i++) { /* over rows of R in those columns */ r1 = 0.0; n = r->i[i+1] - r->i[i]; rj = r->j + r->i[i]; ra = r->a + r->i[i]; for (j=0; j<n; j++) { r1 += (*ra++)*c[*rj++]; } d[col*rm + i] = r1; } } ierr = PetscLogFlops(cn*2.0*(a->nz + r->nz));CHKERRQ(ierr); ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); ierr = MatDenseRestoreArray(RAB,&d);CHKERRQ(ierr); ierr = MatAssemblyBegin(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(RAB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z) { Vec_Seq *xv = (Vec_Seq *)xin->data; PetscErrorCode ierr; PetscInt n = xin->map->n,i,j,nv_rem,j_rem; PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3; const PetscScalar * PETSC_RESTRICT yy0,* PETSC_RESTRICT yy1,* PETSC_RESTRICT yy2,*PETSC_RESTRICT yy3,* PETSC_RESTRICT x; Vec *yy; PetscFunctionBegin; sum0 = 0.; sum1 = 0.; sum2 = 0.; i = nv; nv_rem = nv&0x3; yy = (Vec *)yin; j = n; x = xv->array; switch (nv_rem) { case 3: ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); ierr = VecGetArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr); ierr = VecGetArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr); switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); sum2 += x2*PetscConj(yy2[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); sum2 += x1*PetscConj(yy2[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); sum2 += x0*PetscConj(yy2[0]); case 0: x += j_rem; yy0 += j_rem; yy1 += j_rem; yy2 += j_rem; j -= j_rem; break; } while (j>0) { x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x += 4; sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4; sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4; sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4; j -= 4; } z[0] = sum0; z[1] = sum1; z[2] = sum2; ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr); ierr = VecRestoreArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr); break; case 2: ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); ierr = VecGetArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr); switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); case 0: x += j_rem; yy0 += j_rem; yy1 += j_rem; j -= j_rem; break; } while (j>0) { x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x += 4; sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4; sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4; j -= 4; } z[0] = sum0; z[1] = sum1; ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr); break; case 1: ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); case 0: x += j_rem; yy0 += j_rem; j -= j_rem; break; } while (j>0) { sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1]) + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]); yy0+=4; j -= 4; x+=4; } z[0] = sum0; ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); break; case 0: break; } z += nv_rem; i -= nv_rem; yy += nv_rem; while (i >0) { sum0 = 0.; sum1 = 0.; sum2 = 0.; sum3 = 0.; ierr = VecGetArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); ierr = VecGetArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr); ierr = VecGetArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr); ierr = VecGetArray(yy[3],(PetscScalar **)&yy3);CHKERRQ(ierr); j = n; x = xv->array; switch (j_rem=j&0x3) { case 3: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]); case 2: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]); case 1: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]); case 0: x += j_rem; yy0 += j_rem; yy1 += j_rem; yy2 += j_rem; yy3 += j_rem; j -= j_rem; break; } while (j>0) { x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x += 4; sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4; sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4; sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4; sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4; j -= 4; } z[0] = sum0; z[1] = sum1; z[2] = sum2; z[3] = sum3; z += 4; i -= 4; ierr = VecRestoreArray(yy[0],(PetscScalar **)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(yy[1],(PetscScalar **)&yy1);CHKERRQ(ierr); ierr = VecRestoreArray(yy[2],(PetscScalar **)&yy2);CHKERRQ(ierr); ierr = VecRestoreArray(yy[3],(PetscScalar **)&yy3);CHKERRQ(ierr); yy += 4; } ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode InitializeDomainData(DomainData *dd) { PetscErrorCode ierr; PetscMPIInt sizes,rank; PetscInt factor; PetscFunctionBeginUser; dd->gcomm = PETSC_COMM_WORLD; ierr = MPI_Comm_size(dd->gcomm,&sizes);CHKERRQ(ierr); ierr = MPI_Comm_rank(dd->gcomm,&rank);CHKERRQ(ierr); /* test data passed in */ if (sizes<2) SETERRQ(dd->gcomm,PETSC_ERR_USER,"This is not a uniprocessor test"); /* Get informations from command line */ /* Processors/subdomains per dimension */ /* Default is 1d problem */ dd->npx = sizes; dd->npy = 0; dd->npz = 0; dd->dim = 1; ierr = PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);CHKERRQ(ierr); if (dd->npy) dd->dim++; if (dd->npz) dd->dim++; /* Number of elements per dimension */ /* Default is one element per subdomain */ dd->nex = dd->npx; dd->ney = dd->npy; dd->nez = dd->npz; ierr = PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);CHKERRQ(ierr); if (!dd->npy) { dd->ney=0; dd->nez=0; } if (!dd->npz) dd->nez=0; /* Spectral degree */ dd->p = 3; ierr = PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);CHKERRQ(ierr); /* pure neumann problem? */ dd->pure_neumann = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);CHKERRQ(ierr); /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */ dd->DBC_zerorows = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);CHKERRQ(ierr); if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE; dd->scalingfactor = 1.0; factor = 0.0; ierr = PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);CHKERRQ(ierr); /* checkerboard pattern */ dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank)); /* test data passed in */ if (dd->dim==1) { if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx"); if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim"); } else if (dd->dim==2) { if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy"); if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim"); } else { if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz"); if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim"); } PetscFunctionReturn(0); }
PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data; PetscErrorCode ierr; PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; MatScalar *ba = b->a,*aa,*ap,*dk,*uik; MatScalar *u,*d,*w,*wp,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12; MatScalar u13,u14,u15,u16,u17,u18,u19,u20,u21,u22,u23,u24,u25,u26,u27; MatScalar u28,u29,u30,u31,u32,u33,u34,u35; MatScalar d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12; MatScalar d13,d14,d15,d16,d17,d18,d19,d20,d21,d22,d23,d24,d25,d26,d27; MatScalar d28,d29,d30,d31,d32,d33,d34,d35; MatScalar m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12; MatScalar m13,m14,m15,m16,m17,m18,m19,m20,m21,m22,m23,m24,m25,m26,m27; MatScalar m28,m29,m30,m31,m32,m33,m34,m35; PetscReal shift = info->shiftamount; PetscBool allowzeropivot,zeropivotdetected; PetscFunctionBegin; /* initialization */ allowzeropivot = PetscNot(A->erroriffailure); ierr = PetscCalloc1(36*mbs,&w);CHKERRQ(ierr); ierr = PetscMalloc2(mbs,&il,mbs,&jl);CHKERRQ(ierr); for (i=0; i<mbs; i++) { jl[i] = mbs; il[0] = 0; } ierr = PetscMalloc2(36,&dk,36,&uik);CHKERRQ(ierr); ai = a->i; aj = a->j; aa = a->a; /* for each row k */ for (k = 0; k<mbs; k++) { /*initialize k-th row with elements nonzero in row k of A */ jmin = ai[k]; jmax = ai[k+1]; if (jmin < jmax) { ap = aa + jmin*36; for (j = jmin; j < jmax; j++) { vj = aj[j]; /* block col. index */ wp = w + vj*36; for (i=0; i<36; i++) *wp++ = *ap++; } } /* modify k-th row by adding in those rows i with U(i,k) != 0 */ ierr = PetscMemcpy(dk,w+k*36,36*sizeof(MatScalar));CHKERRQ(ierr); i = jl[k]; /* first row to be added to k_th row */ while (i < mbs) { nexti = jl[i]; /* next row to be added to k_th row */ /* compute multiplier */ ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ /* uik = -inv(Di)*U_bar(i,k) */ d = ba + i*36; u = ba + ili*36; u0 = u[0]; u1 = u[1]; u2 = u[2]; u3 = u[3]; u4 = u[4]; u5 = u[5]; u6 = u[6]; u7 = u[7]; u8 = u[8]; u9 = u[9]; u10 = u[10]; u11 = u[11]; u12 = u[12]; u13 = u[13]; u14 = u[14]; u15 = u[15]; u16 = u[16]; u17 = u[17]; u18 = u[18]; u19 = u[19]; u20 = u[20]; u21 = u[21]; u22 = u[22]; u23 = u[23]; u24 = u[24]; u25 = u[25]; u26 = u[26]; u27 = u[27]; u28 = u[28]; u29 = u[29]; u30 = u[30]; u31 = u[31]; u32 = u[32]; u33 = u[33]; u34 = u[34]; u35 = u[35]; d0 = d[0]; d1 = d[1]; d2 = d[2]; d3 = d[3]; d4 = d[4]; d5 = d[5]; d6 = d[6]; d7 = d[7]; d8 = d[8]; d9 = d[9]; d10 = d[10]; d11 = d[11]; d12 = d[12]; d13 = d[13]; d14 = d[14]; d15 = d[15]; d16 = d[16]; d17 = d[17]; d18 = d[18]; d19 = d[19]; d20 = d[20]; d21 = d[21]; d22 = d[22]; d23 = d[23]; d24 = d[24]; d25 = d[25]; d26 = d[26]; d27 = d[27]; d28 = d[28]; d29 = d[29]; d30 = d[30]; d31 = d[31]; d32 = d[32]; d33 = d[33]; d34 = d[34]; d35 = d[35]; m0 = uik[0] = -(d0*u0 + d6*u1 + d12*u2 + d18*u3 + d24*u4 + d30*u5); m1 = uik[1] = -(d1*u0 + d7*u1 + d13*u2 + d19*u3 + d25*u4 + d31*u5); m2 = uik[2] = -(d2*u0 + d8*u1 + d14*u2 + d20*u3 + d26*u4 + d32*u5); m3 = uik[3] = -(d3*u0 + d9*u1 + d15*u2 + d21*u3 + d27*u4 + d33*u5); m4 = uik[4] = -(d4*u0+ d10*u1 + d16*u2 + d22*u3 + d28*u4 + d34*u5); m5 = uik[5] = -(d5*u0+ d11*u1 + d17*u2 + d23*u3 + d29*u4 + d35*u5); m6 = uik[6] = -(d0*u6 + d6*u7 + d12*u8 + d18*u9 + d24*u10 + d30*u11); m7 = uik[7] = -(d1*u6 + d7*u7 + d13*u8 + d19*u9 + d25*u10 + d31*u11); m8 = uik[8] = -(d2*u6 + d8*u7 + d14*u8 + d20*u9 + d26*u10 + d32*u11); m9 = uik[9] = -(d3*u6 + d9*u7 + d15*u8 + d21*u9 + d27*u10 + d33*u11); m10 = uik[10]= -(d4*u6+ d10*u7 + d16*u8 + d22*u9 + d28*u10 + d34*u11); m11 = uik[11]= -(d5*u6+ d11*u7 + d17*u8 + d23*u9 + d29*u10 + d35*u11); m12 = uik[12] = -(d0*u12 + d6*u13 + d12*u14 + d18*u15 + d24*u16 + d30*u17); m13 = uik[13] = -(d1*u12 + d7*u13 + d13*u14 + d19*u15 + d25*u16 + d31*u17); m14 = uik[14] = -(d2*u12 + d8*u13 + d14*u14 + d20*u15 + d26*u16 + d32*u17); m15 = uik[15] = -(d3*u12 + d9*u13 + d15*u14 + d21*u15 + d27*u16 + d33*u17); m16 = uik[16] = -(d4*u12+ d10*u13 + d16*u14 + d22*u15 + d28*u16 + d34*u17); m17 = uik[17] = -(d5*u12+ d11*u13 + d17*u14 + d23*u15 + d29*u16 + d35*u17); m18 = uik[18] = -(d0*u18 + d6*u19 + d12*u20 + d18*u21 + d24*u22 + d30*u23); m19 = uik[19] = -(d1*u18 + d7*u19 + d13*u20 + d19*u21 + d25*u22 + d31*u23); m20 = uik[20] = -(d2*u18 + d8*u19 + d14*u20 + d20*u21 + d26*u22 + d32*u23); m21 = uik[21] = -(d3*u18 + d9*u19 + d15*u20 + d21*u21 + d27*u22 + d33*u23); m22 = uik[22] = -(d4*u18+ d10*u19 + d16*u20 + d22*u21 + d28*u22 + d34*u23); m23 = uik[23] = -(d5*u18+ d11*u19 + d17*u20 + d23*u21 + d29*u22 + d35*u23); m24 = uik[24] = -(d0*u24 + d6*u25 + d12*u26 + d18*u27 + d24*u28 + d30*u29); m25 = uik[25] = -(d1*u24 + d7*u25 + d13*u26 + d19*u27 + d25*u28 + d31*u29); m26 = uik[26] = -(d2*u24 + d8*u25 + d14*u26 + d20*u27 + d26*u28 + d32*u29); m27 = uik[27] = -(d3*u24 + d9*u25 + d15*u26 + d21*u27 + d27*u28 + d33*u29); m28 = uik[28] = -(d4*u24+ d10*u25 + d16*u26 + d22*u27 + d28*u28 + d34*u29); m29 = uik[29] = -(d5*u24+ d11*u25 + d17*u26 + d23*u27 + d29*u28 + d35*u29); m30 = uik[30] = -(d0*u30 + d6*u31 + d12*u32 + d18*u33 + d24*u34 + d30*u35); m31 = uik[31] = -(d1*u30 + d7*u31 + d13*u32 + d19*u33 + d25*u34 + d31*u35); m32 = uik[32] = -(d2*u30 + d8*u31 + d14*u32 + d20*u33 + d26*u34 + d32*u35); m33 = uik[33] = -(d3*u30 + d9*u31 + d15*u32 + d21*u33 + d27*u34 + d33*u35); m34 = uik[34] = -(d4*u30+ d10*u31 + d16*u32 + d22*u33 + d28*u34 + d34*u35); m35 = uik[35] = -(d5*u30+ d11*u31 + d17*u32 + d23*u33 + d29*u34 + d35*u35); /* update D(k) += -U(i,k)^T * U_bar(i,k) */ dk[0] += m0*u0 + m1*u1 + m2*u2 + m3*u3 + m4*u4 + m5*u5; dk[1] += m6*u0 + m7*u1 + m8*u2 + m9*u3+ m10*u4+ m11*u5; dk[2] += m12*u0+ m13*u1+ m14*u2+ m15*u3+ m16*u4+ m17*u5; dk[3] += m18*u0+ m19*u1+ m20*u2+ m21*u3+ m22*u4+ m23*u5; dk[4] += m24*u0+ m25*u1+ m26*u2+ m27*u3+ m28*u4+ m29*u5; dk[5] += m30*u0+ m31*u1+ m32*u2+ m33*u3+ m34*u4+ m35*u5; dk[6] += m0*u6 + m1*u7 + m2*u8 + m3*u9 + m4*u10 + m5*u11; dk[7] += m6*u6 + m7*u7 + m8*u8 + m9*u9+ m10*u10+ m11*u11; dk[8] += m12*u6+ m13*u7+ m14*u8+ m15*u9+ m16*u10+ m17*u11; dk[9] += m18*u6+ m19*u7+ m20*u8+ m21*u9+ m22*u10+ m23*u11; dk[10]+= m24*u6+ m25*u7+ m26*u8+ m27*u9+ m28*u10+ m29*u11; dk[11]+= m30*u6+ m31*u7+ m32*u8+ m33*u9+ m34*u10+ m35*u11; dk[12]+= m0*u12 + m1*u13 + m2*u14 + m3*u15 + m4*u16 + m5*u17; dk[13]+= m6*u12 + m7*u13 + m8*u14 + m9*u15+ m10*u16+ m11*u17; dk[14]+= m12*u12+ m13*u13+ m14*u14+ m15*u15+ m16*u16+ m17*u17; dk[15]+= m18*u12+ m19*u13+ m20*u14+ m21*u15+ m22*u16+ m23*u17; dk[16]+= m24*u12+ m25*u13+ m26*u14+ m27*u15+ m28*u16+ m29*u17; dk[17]+= m30*u12+ m31*u13+ m32*u14+ m33*u15+ m34*u16+ m35*u17; dk[18]+= m0*u18 + m1*u19 + m2*u20 + m3*u21 + m4*u22 + m5*u23; dk[19]+= m6*u18 + m7*u19 + m8*u20 + m9*u21+ m10*u22+ m11*u23; dk[20]+= m12*u18+ m13*u19+ m14*u20+ m15*u21+ m16*u22+ m17*u23; dk[21]+= m18*u18+ m19*u19+ m20*u20+ m21*u21+ m22*u22+ m23*u23; dk[22]+= m24*u18+ m25*u19+ m26*u20+ m27*u21+ m28*u22+ m29*u23; dk[23]+= m30*u18+ m31*u19+ m32*u20+ m33*u21+ m34*u22+ m35*u23; dk[24]+= m0*u24 + m1*u25 + m2*u26 + m3*u27 + m4*u28 + m5*u29; dk[25]+= m6*u24 + m7*u25 + m8*u26 + m9*u27+ m10*u28+ m11*u29; dk[26]+= m12*u24+ m13*u25+ m14*u26+ m15*u27+ m16*u28+ m17*u29; dk[27]+= m18*u24+ m19*u25+ m20*u26+ m21*u27+ m22*u28+ m23*u29; dk[28]+= m24*u24+ m25*u25+ m26*u26+ m27*u27+ m28*u28+ m29*u29; dk[29]+= m30*u24+ m31*u25+ m32*u26+ m33*u27+ m34*u28+ m35*u29; dk[30]+= m0*u30 + m1*u31 + m2*u32 + m3*u33 + m4*u34 + m5*u35; dk[31]+= m6*u30 + m7*u31 + m8*u32 + m9*u33+ m10*u34+ m11*u35; dk[32]+= m12*u30+ m13*u31+ m14*u32+ m15*u33+ m16*u34+ m17*u35; dk[33]+= m18*u30+ m19*u31+ m20*u32+ m21*u33+ m22*u34+ m23*u35; dk[34]+= m24*u30+ m25*u31+ m26*u32+ m27*u33+ m28*u34+ m29*u35; dk[35]+= m30*u30+ m31*u31+ m32*u32+ m33*u33+ m34*u34+ m35*u35; ierr = PetscLogFlops(216.0*4.0);CHKERRQ(ierr); /* update -U(i,k) */ ierr = PetscMemcpy(ba+ili*36,uik,36*sizeof(MatScalar));CHKERRQ(ierr); /* add multiple of row i to k-th row ... */ jmin = ili + 1; jmax = bi[i+1]; if (jmin < jmax) { for (j=jmin; j<jmax; j++) { /* w += -U(i,k)^T * U_bar(i,j) */ wp = w + bj[j]*36; u = ba + j*36; u0 = u[0]; u1 = u[1]; u2 = u[2]; u3 = u[3]; u4 = u[4]; u5 = u[5]; u6 = u[6]; u7 = u[7]; u8 = u[8]; u9 = u[9]; u10 = u[10]; u11 = u[11]; u12 = u[12]; u13 = u[13]; u14 = u[14]; u15 = u[15]; u16 = u[16]; u17 = u[17]; u18 = u[18]; u19 = u[19]; u20 = u[20]; u21 = u[21]; u22 = u[22]; u23 = u[23]; u24 = u[24]; u25 = u[25]; u26 = u[26]; u27 = u[27]; u28 = u[28]; u29 = u[29]; u30 = u[30]; u31 = u[31]; u32 = u[32]; u33 = u[33]; u34 = u[34]; u35 = u[35]; wp[0] += m0*u0 + m1*u1 + m2*u2 + m3*u3 + m4*u4 + m5*u5; wp[1] += m6*u0 + m7*u1 + m8*u2 + m9*u3+ m10*u4+ m11*u5; wp[2] += m12*u0+ m13*u1+ m14*u2+ m15*u3+ m16*u4+ m17*u5; wp[3] += m18*u0+ m19*u1+ m20*u2+ m21*u3+ m22*u4+ m23*u5; wp[4] += m24*u0+ m25*u1+ m26*u2+ m27*u3+ m28*u4+ m29*u5; wp[5] += m30*u0+ m31*u1+ m32*u2+ m33*u3+ m34*u4+ m35*u5; wp[6] += m0*u6 + m1*u7 + m2*u8 + m3*u9 + m4*u10 + m5*u11; wp[7] += m6*u6 + m7*u7 + m8*u8 + m9*u9+ m10*u10+ m11*u11; wp[8] += m12*u6+ m13*u7+ m14*u8+ m15*u9+ m16*u10+ m17*u11; wp[9] += m18*u6+ m19*u7+ m20*u8+ m21*u9+ m22*u10+ m23*u11; wp[10]+= m24*u6+ m25*u7+ m26*u8+ m27*u9+ m28*u10+ m29*u11; wp[11]+= m30*u6+ m31*u7+ m32*u8+ m33*u9+ m34*u10+ m35*u11; wp[12]+= m0*u12 + m1*u13 + m2*u14 + m3*u15 + m4*u16 + m5*u17; wp[13]+= m6*u12 + m7*u13 + m8*u14 + m9*u15+ m10*u16+ m11*u17; wp[14]+= m12*u12+ m13*u13+ m14*u14+ m15*u15+ m16*u16+ m17*u17; wp[15]+= m18*u12+ m19*u13+ m20*u14+ m21*u15+ m22*u16+ m23*u17; wp[16]+= m24*u12+ m25*u13+ m26*u14+ m27*u15+ m28*u16+ m29*u17; wp[17]+= m30*u12+ m31*u13+ m32*u14+ m33*u15+ m34*u16+ m35*u17; wp[18]+= m0*u18 + m1*u19 + m2*u20 + m3*u21 + m4*u22 + m5*u23; wp[19]+= m6*u18 + m7*u19 + m8*u20 + m9*u21+ m10*u22+ m11*u23; wp[20]+= m12*u18+ m13*u19+ m14*u20+ m15*u21+ m16*u22+ m17*u23; wp[21]+= m18*u18+ m19*u19+ m20*u20+ m21*u21+ m22*u22+ m23*u23; wp[22]+= m24*u18+ m25*u19+ m26*u20+ m27*u21+ m28*u22+ m29*u23; wp[23]+= m30*u18+ m31*u19+ m32*u20+ m33*u21+ m34*u22+ m35*u23; wp[24]+= m0*u24 + m1*u25 + m2*u26 + m3*u27 + m4*u28 + m5*u29; wp[25]+= m6*u24 + m7*u25 + m8*u26 + m9*u27+ m10*u28+ m11*u29; wp[26]+= m12*u24+ m13*u25+ m14*u26+ m15*u27+ m16*u28+ m17*u29; wp[27]+= m18*u24+ m19*u25+ m20*u26+ m21*u27+ m22*u28+ m23*u29; wp[28]+= m24*u24+ m25*u25+ m26*u26+ m27*u27+ m28*u28+ m29*u29; wp[29]+= m30*u24+ m31*u25+ m32*u26+ m33*u27+ m34*u28+ m35*u29; wp[30]+= m0*u30 + m1*u31 + m2*u32 + m3*u33 + m4*u34 + m5*u35; wp[31]+= m6*u30 + m7*u31 + m8*u32 + m9*u33+ m10*u34+ m11*u35; wp[32]+= m12*u30+ m13*u31+ m14*u32+ m15*u33+ m16*u34+ m17*u35; wp[33]+= m18*u30+ m19*u31+ m20*u32+ m21*u33+ m22*u34+ m23*u35; wp[34]+= m24*u30+ m25*u31+ m26*u32+ m27*u33+ m28*u34+ m29*u35; wp[35]+= m30*u30+ m31*u31+ m32*u32+ m33*u33+ m34*u34+ m35*u35; } ierr = PetscLogFlops(2.0*216.0*(jmax-jmin));CHKERRQ(ierr); /* ... add i to row list for next nonzero entry */ il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; /* update jl */ } i = nexti; } /* save nonzero entries in k-th row of U ... */ /* invert diagonal block */ d = ba+k*36; ierr = PetscMemcpy(d,dk,36*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscKernel_A_gets_inverse_A_6(d,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; jmin = bi[k]; jmax = bi[k+1]; if (jmin < jmax) { for (j=jmin; j<jmax; j++) { vj = bj[j]; /* block col. index of U */ u = ba + j*36; wp = w + vj*36; for (k1=0; k1<36; k1++) { *u++ = *wp; *wp++ = 0.0; } } /* ... add k to row list for first nonzero entry in k-th row */ il[k] = jmin; i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; } } ierr = PetscFree(w);CHKERRQ(ierr); ierr = PetscFree2(il,jl);CHKERRQ(ierr); ierr = PetscFree2(dk,uik);CHKERRQ(ierr); C->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace; C->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering_inplace; C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace; C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering_inplace; C->assembled = PETSC_TRUE; C->preallocated = PETSC_TRUE; ierr = PetscLogFlops(1.3333*216*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }
static PetscErrorCode SNESLineSearchApply_CP(SNESLineSearch linesearch) { PetscBool changed_y, changed_w, domainerror; PetscErrorCode ierr; Vec X, Y, F, W; SNES snes; PetscReal xnorm, ynorm, gnorm, steptol, atol, rtol, ltol, maxstep; PetscReal lambda, lambda_old, lambda_update, delLambda; PetscScalar fty, fty_init, fty_old, fty_mid1, fty_mid2, s; PetscInt i, max_its; PetscViewer monitor; PetscFunctionBegin; ierr = SNESLineSearchGetVecs(linesearch, &X, &F, &Y, &W, PETSC_NULL);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &gnorm, &ynorm);CHKERRQ(ierr); ierr = SNESLineSearchGetSNES(linesearch, &snes);CHKERRQ(ierr); ierr = SNESLineSearchGetLambda(linesearch, &lambda);CHKERRQ(ierr); ierr = SNESLineSearchGetTolerances(linesearch, &steptol, &maxstep, &rtol, &atol, <ol, &max_its);CHKERRQ(ierr); ierr = SNESLineSearchSetSuccess(linesearch, PETSC_TRUE);CHKERRQ(ierr); ierr = SNESLineSearchGetMonitor(linesearch, &monitor);CHKERRQ(ierr); /* precheck */ ierr = SNESLineSearchPreCheck(linesearch,X,Y,&changed_y);CHKERRQ(ierr); lambda_old = 0.0; ierr = VecDot(F, Y, &fty_old);CHKERRQ(ierr); fty_init = fty_old; for (i = 0; i < max_its; i++) { /* compute the norm at lambda */ ierr = VecCopy(X, W);CHKERRQ(ierr); ierr = VecAXPY(W, -lambda, Y);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr); ierr = VecDot(F, Y, &fty);CHKERRQ(ierr); delLambda = lambda - lambda_old; /* check for convergence */ if (PetscAbsReal(delLambda) < steptol*lambda) break; if (PetscAbsScalar(fty) / PetscAbsScalar(fty_init) < rtol) break; if (PetscAbsScalar(fty) < atol && i > 0) break; if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: lambdas = [%g, %g], ftys = [%g, %g]\n",(double)lambda, (double)lambda_old, (double)PetscRealPart(fty), (double)PetscRealPart(fty_old));CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } /* compute the search direction */ if (linesearch->order == SNES_LINESEARCH_ORDER_LINEAR) { s = (fty - fty_old) / delLambda; } else if (linesearch->order == SNES_LINESEARCH_ORDER_QUADRATIC) { ierr = VecCopy(X, W);CHKERRQ(ierr); ierr = VecAXPY(W, -0.5*(lambda + lambda_old), Y);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr); ierr = VecDot(F, Y, &fty_mid1);CHKERRQ(ierr); s = (3.*fty - 4.*fty_mid1 + fty_old) / delLambda; } else { ierr = VecCopy(X, W);CHKERRQ(ierr); ierr = VecAXPY(W, -0.5*(lambda + lambda_old), Y);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr); ierr = VecDot(F, Y, &fty_mid1);CHKERRQ(ierr); ierr = VecCopy(X, W);CHKERRQ(ierr); ierr = VecAXPY(W, -(lambda + 0.5*(lambda - lambda_old)), Y);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } ierr = SNESComputeFunction(snes, W, F);CHKERRQ(ierr); ierr = VecDot(F, Y, &fty_mid2);CHKERRQ(ierr); s = (2.*fty_mid2 + 3.*fty - 6.*fty_mid1 + fty_old) / (3.*delLambda); } /* if the solve is going in the wrong direction, fix it */ if (PetscRealPart(s) > 0.) s = -s; lambda_update = lambda - PetscRealPart(fty / s); /* switch directions if we stepped out of bounds */ if (lambda_update < steptol) { lambda_update = lambda + PetscRealPart(fty / s); } if (PetscIsInfOrNanScalar(lambda_update)) break; if (lambda_update > maxstep) { break; } /* compute the new state of the line search */ lambda_old = lambda; lambda = lambda_update; fty_old = fty; } /* construct the solution */ ierr = VecCopy(X, W);CHKERRQ(ierr); ierr = VecAXPY(W, -lambda, Y);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } /* postcheck */ ierr = SNESLineSearchPostCheck(linesearch,X,Y,W,&changed_y,&changed_w);CHKERRQ(ierr); if (changed_y) { ierr = VecAXPY(X, -lambda, Y);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, X);CHKERRQ(ierr); } } else { ierr = VecCopy(W, X);CHKERRQ(ierr); } ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = SNESLineSearchComputeNorms(linesearch);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &gnorm, &ynorm);CHKERRQ(ierr); ierr = SNESLineSearchSetLambda(linesearch, lambda);CHKERRQ(ierr); if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search terminated: lambda = %g, fnorms = %g\n", (double)lambda, (double)gnorm);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } if (lambda <= steptol) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) { Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); PetscErrorCode ierr; PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; IS from,to; Vec gvec; PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; PetscScalar *ptr; PetscFunctionBegin; ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); /* For the first stab we make an array as long as the number of columns */ /* mark those columns that are in sbaij->B */ ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<mbs; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j] ] = 1; } } /* form arrays of columns we need */ ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); ec = 0; for (j=0; j<size; j++){ for (i=owners[j]; i<owners[j+1]; i++){ if (indices[i]) { garray[ec] = i; ec_owner[ec] = j; ec++; } } } /* make indices now point into garray */ for (i=0; i<ec; i++) indices[garray[i]] = i; /* compact out the extra columns in B */ for (i=0; i<mbs; i++) { for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } B->nbs = ec; sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); /* create two temporary index sets for building scatter-gather */ ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); for (i=0; i<ec; i++) { stmp[i] = i; } ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); /* generate the scatter context -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); sbaij->garray = garray; ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ lsize = (mbs + ec)*bs; ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); sowners = sbaij->slvec0->map->range; /* x index in the IS sfrom */ for (i=0; i<ec; i++) { j = ec_owner[i]; sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); } /* b index in the IS sfrom */ k = sowners[rank]/bs + mbs; for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); /* x index in the IS sto */ k = sowners[rank]/bs + mbs; for (i=0; i<ec; i++) stmp[i] = (k + i); /* b index in the IS sto */ for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); ierr = PetscFree(stmp);CHKERRQ(ierr); ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode RHSJacobian(TS ts, PetscReal t, Vec X, Mat J, Mat B, void *ptr) { AppCtx *user = (AppCtx*)ptr; PetscInt nb_cells, i, idx; PetscReal alpha, beta; PetscReal mu_a, D_a; PetscReal mu_h, D_h; PetscReal a, h; const PetscScalar *x; PetscScalar va[4], vh[4]; PetscInt ca[4], ch[4], rowa, rowh; PetscErrorCode ierr; PetscFunctionBegin; nb_cells = user->nb_cells; alpha = user->alpha; beta = user->beta; mu_a = user->mu_a; D_a = user->D_a; mu_h = user->mu_h; D_h = user->D_h; ierr = VecGetArrayRead(X, &x);CHKERRQ(ierr); for(i = 0; i < nb_cells ; ++i) { rowa = 2*i; rowh = 2*i+1; a = x[2*i]; h = x[2*i+1]; ca[0] = ch[1] = 2*i; va[0] = 2*alpha*a / (1.+beta*h) - mu_a; vh[1] = 2*alpha*a; ca[1] = ch[0] = 2*i+1; va[1] = -beta*alpha*a*a / ((1.+beta*h)*(1.+beta*h)); vh[0] = -mu_h; idx = 2; if(i > 0) { ca[idx] = 2*(i-1); ch[idx] = 2*(i-1)+1; va[idx] = D_a; vh[idx] = D_h; va[0] -= D_a; vh[0] -= D_h; idx++; } if(i < nb_cells-1) { ca[idx] = 2*(i+1); ch[idx] = 2*(i+1)+1; va[idx] = D_a; vh[idx] = D_h; va[0] -= D_a; vh[0] -= D_h; idx++; } ierr = MatSetValues(B, 1, &rowa, idx, ca, va, INSERT_VALUES);CHKERRQ(ierr); ierr = MatSetValues(B, 1, &rowh, idx, ch, vh, INSERT_VALUES);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(X, &x);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (J != B) { ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) { Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; Mat B = baij->B,Bnew; Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; PetscErrorCode ierr; PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; MatScalar *a = Bbaij->a; PetscScalar *atmp; #if defined(PETSC_USE_REAL_MAT_SINGLE) PetscInt l; #endif PetscFunctionBegin; #if defined(PETSC_USE_REAL_MAT_SINGLE) ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); #endif /* free stuff related to matrix-vec multiply */ ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); if (baij->colmap) { #if defined (PETSC_USE_CTABLE) ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); #else ierr = PetscFree(baij->colmap);CHKERRQ(ierr); ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); #endif } /* make sure that B is assembled so we can access its values */ ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* invent new B and copy stuff over */ ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); for (i=0; i<mbs; i++) { nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; } ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ ierr = PetscFree(nz);CHKERRQ(ierr); ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); for (i=0; i<mbs; i++) { rvals[0] = bs*i; for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { col = garray[Bbaij->j[j]]*bs; for (k=0; k<bs; k++) { #if defined(PETSC_USE_REAL_MAT_SINGLE) for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; #else atmp = a+j*bs2 + k*bs; #endif ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); col++; } } } #if defined(PETSC_USE_REAL_MAT_SINGLE) ierr = PetscFree(atmp);CHKERRQ(ierr); #endif ierr = PetscFree(baij->garray);CHKERRQ(ierr); baij->garray = 0; ierr = PetscFree(rvals);CHKERRQ(ierr); ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); baij->B = Bnew; A->was_assembled = PETSC_FALSE; PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_GROPPCG(KSP ksp) { PetscErrorCode ierr; PetscInt i; PetscScalar alpha,beta = 0.0,gamma,gammaNew,t; PetscReal dp = 0.0; Vec x,b,r,p,s,S,z,Z; Mat Amat,Pmat; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); x = ksp->vec_sol; b = ksp->vec_rhs; r = ksp->work[0]; p = ksp->work[1]; s = ksp->work[2]; S = ksp->work[3]; z = ksp->work[4]; Z = ksp->work[5]; ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ksp->its = 0; if (!ksp->guess_zero) { ierr = KSP_MatMult(ksp,Amat,x,r);CHKERRQ(ierr); /* r <- b - Ax */ ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); } else { ierr = VecCopy(b,r);CHKERRQ(ierr); /* r <- b (x is 0) */ } ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr); /* z <- Br */ ierr = VecCopy(z,p);CHKERRQ(ierr); /* p <- z */ ierr = VecDotBegin(r,z,&gamma);CHKERRQ(ierr); /* gamma <- z'*r */ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)r));CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,p,s);CHKERRQ(ierr); /* s <- Ap */ ierr = VecDotEnd(r,z,&gamma);CHKERRQ(ierr); /* gamma <- z'*r */ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: /* This could be merged with the computation of gamma above */ ierr = VecNorm(z,NORM_2,&dp);CHKERRQ(ierr); /* dp <- z'*z = e'*A'*B'*B*A'*e' */ break; case KSP_NORM_UNPRECONDITIONED: /* This could be merged with the computation of gamma above */ ierr = VecNorm(r,NORM_2,&dp);CHKERRQ(ierr); /* dp <- r'*r = e'*A'*A*e */ break; case KSP_NORM_NATURAL: KSPCheckDot(ksp,gamma); dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ break; case KSP_NORM_NONE: dp = 0.0; break; default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]); } ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,dp);CHKERRQ(ierr); ksp->rnorm = dp; ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* test for convergence */ if (ksp->reason) PetscFunctionReturn(0); i = 0; do { ksp->its = i+1; i++; ierr = VecDotBegin(p,s,&t);CHKERRQ(ierr); ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)p));CHKERRQ(ierr); ierr = KSP_PCApply(ksp,s,S);CHKERRQ(ierr); /* S <- Bs */ ierr = VecDotEnd(p,s,&t);CHKERRQ(ierr); alpha = gamma / t; ierr = VecAXPY(x, alpha,p);CHKERRQ(ierr); /* x <- x + alpha * p */ ierr = VecAXPY(r,-alpha,s);CHKERRQ(ierr); /* r <- r - alpha * s */ ierr = VecAXPY(z,-alpha,S);CHKERRQ(ierr); /* z <- z - alpha * S */ if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNormBegin(r,NORM_2,&dp);CHKERRQ(ierr); } else if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNormBegin(z,NORM_2,&dp);CHKERRQ(ierr); } ierr = VecDotBegin(r,z,&gammaNew);CHKERRQ(ierr); ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)r));CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,z,Z);CHKERRQ(ierr); /* Z <- Az */ if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) { ierr = VecNormEnd(r,NORM_2,&dp);CHKERRQ(ierr); } else if (ksp->normtype == KSP_NORM_PRECONDITIONED) { ierr = VecNormEnd(z,NORM_2,&dp);CHKERRQ(ierr); } ierr = VecDotEnd(r,z,&gammaNew);CHKERRQ(ierr); if (ksp->normtype == KSP_NORM_NATURAL) { KSPCheckDot(ksp,gammaNew); dp = PetscSqrtReal(PetscAbsScalar(gammaNew)); /* dp <- r'*z = r'*B*r = e'*A'*B*A*e */ } else if (ksp->normtype == KSP_NORM_NONE) { dp = 0.0; } ksp->rnorm = dp; ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i,dp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) break; beta = gammaNew / gamma; gamma = gammaNew; ierr = VecAYPX(p,beta,z);CHKERRQ(ierr); /* p <- z + beta * p */ ierr = VecAYPX(s,beta,Z);CHKERRQ(ierr); /* s <- Z + beta * s */ } while (i<ksp->max_it); if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
static PetscErrorCode FormJacobian_All(SNES snes,Vec X,Mat J,Mat B,void *ctx) { User user = (User)ctx; DM dau,dak; DMDALocalInfo infou,infok; PetscScalar *u,*k; PetscErrorCode ierr; Vec Uloc,Kloc; PetscFunctionBeginUser; ierr = DMCompositeGetEntries(user->pack,&dau,&dak);CHKERRQ(ierr); ierr = DMDAGetLocalInfo(dau,&infou);CHKERRQ(ierr); ierr = DMDAGetLocalInfo(dak,&infok);CHKERRQ(ierr); ierr = DMCompositeGetLocalVectors(user->pack,&Uloc,&Kloc);CHKERRQ(ierr); switch (user->ptype) { case 0: ierr = DMGlobalToLocalBegin(dau,X,INSERT_VALUES,Uloc);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd (dau,X,INSERT_VALUES,Uloc);CHKERRQ(ierr); ierr = DMDAVecGetArray(dau,Uloc,&u);CHKERRQ(ierr); ierr = DMDAVecGetArray(dak,user->Kloc,&k);CHKERRQ(ierr); ierr = FormJacobianLocal_U(user,&infou,u,k,B);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(dau,Uloc,&u);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(dak,user->Kloc,&k);CHKERRQ(ierr); break; case 1: ierr = DMGlobalToLocalBegin(dak,X,INSERT_VALUES,Kloc);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd (dak,X,INSERT_VALUES,Kloc);CHKERRQ(ierr); ierr = DMDAVecGetArray(dau,user->Uloc,&u);CHKERRQ(ierr); ierr = DMDAVecGetArray(dak,Kloc,&k);CHKERRQ(ierr); ierr = FormJacobianLocal_K(user,&infok,u,k,B);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(dau,user->Uloc,&u);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(dak,Kloc,&k);CHKERRQ(ierr); break; case 2: { Mat Buu,Buk,Bku,Bkk; IS *is; ierr = DMCompositeScatter(user->pack,X,Uloc,Kloc);CHKERRQ(ierr); ierr = DMDAVecGetArray(dau,Uloc,&u);CHKERRQ(ierr); ierr = DMDAVecGetArray(dak,Kloc,&k);CHKERRQ(ierr); ierr = DMCompositeGetLocalISs(user->pack,&is);CHKERRQ(ierr); ierr = MatGetLocalSubMatrix(B,is[0],is[0],&Buu);CHKERRQ(ierr); ierr = MatGetLocalSubMatrix(B,is[0],is[1],&Buk);CHKERRQ(ierr); ierr = MatGetLocalSubMatrix(B,is[1],is[0],&Bku);CHKERRQ(ierr); ierr = MatGetLocalSubMatrix(B,is[1],is[1],&Bkk);CHKERRQ(ierr); ierr = FormJacobianLocal_U(user,&infou,u,k,Buu);CHKERRQ(ierr); ierr = FormJacobianLocal_UK(user,&infou,&infok,u,k,Buk);CHKERRQ(ierr); ierr = FormJacobianLocal_KU(user,&infou,&infok,u,k,Bku);CHKERRQ(ierr); ierr = FormJacobianLocal_K(user,&infok,u,k,Bkk);CHKERRQ(ierr); ierr = MatRestoreLocalSubMatrix(B,is[0],is[0],&Buu);CHKERRQ(ierr); ierr = MatRestoreLocalSubMatrix(B,is[0],is[1],&Buk);CHKERRQ(ierr); ierr = MatRestoreLocalSubMatrix(B,is[1],is[0],&Bku);CHKERRQ(ierr); ierr = MatRestoreLocalSubMatrix(B,is[1],is[1],&Bkk);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(dau,Uloc,&u);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(dak,Kloc,&k);CHKERRQ(ierr); ierr = ISDestroy(&is[0]);CHKERRQ(ierr); ierr = ISDestroy(&is[1]);CHKERRQ(ierr); ierr = PetscFree(is);CHKERRQ(ierr); } break; } ierr = DMCompositeRestoreLocalVectors(user->pack,&Uloc,&Kloc);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (J != B) { ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@C AOCreateMapping - Creates a basic application mapping using two integer arrays. Input Parameters: + comm - MPI communicator that is to share AO . napp - size of integer arrays . myapp - integer array that defines an ordering - mypetsc - integer array that defines another ordering (may be NULL to indicate the identity ordering) Output Parameter: . aoout - the new application mapping Options Database Key: . -ao_view : call AOView() at the conclusion of AOCreateMapping() Level: beginner Notes: the arrays myapp and mypetsc need NOT contain the all the integers 0 to napp-1, that is there CAN be "holes" in the indices. Use AOCreateBasic() or AOCreateBasicIS() if they do not have holes for better performance. .keywords: AO, create .seealso: AOCreateBasic(), AOCreateBasic(), AOCreateMappingIS(), AODestroy() @*/ PetscErrorCode AOCreateMapping(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) { AO ao; AO_Mapping *aomap; PetscInt *allpetsc, *allapp; PetscInt *petscPerm, *appPerm; PetscInt *petsc; PetscMPIInt size, rank,*lens, *disp,nnapp; PetscInt N, start; PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; PetscValidPointer(aoout,5); *aoout = 0; ierr = AOInitializePackage();CHKERRQ(ierr); ierr = PetscHeaderCreate(ao, AO_CLASSID, "AO", "Application Ordering", "AO", comm, AODestroy, AOView);CHKERRQ(ierr); ierr = PetscNewLog(ao,&aomap);CHKERRQ(ierr); ierr = PetscMemcpy(ao->ops, &AOps, sizeof(AOps));CHKERRQ(ierr); ao->data = (void*) aomap; /* transmit all lengths to all processors */ ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc2(size, &lens,size,&disp);CHKERRQ(ierr); nnapp = napp; ierr = MPI_Allgather(&nnapp, 1, MPI_INT, lens, 1, MPI_INT, comm);CHKERRQ(ierr); N = 0; for (i = 0; i < size; i++) { disp[i] = N; N += lens[i]; } aomap->N = N; ao->N = N; ao->n = N; /* If mypetsc is 0 then use "natural" numbering */ if (!mypetsc) { start = disp[rank]; ierr = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr); for (i = 0; i < napp; i++) petsc[i] = start + i; } else { petsc = (PetscInt*)mypetsc; } /* get all indices on all processors */ ierr = PetscMalloc4(N, &allapp,N,&appPerm,N,&allpetsc,N,&petscPerm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)myapp, napp, MPIU_INT, allapp, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)petsc, napp, MPIU_INT, allpetsc, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = PetscFree2(lens,disp);CHKERRQ(ierr); /* generate a list of application and PETSc node numbers */ ierr = PetscMalloc4(N, &aomap->app,N,&aomap->appPerm,N,&aomap->petsc,N,&aomap->petscPerm);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao, 4*N * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < N; i++) { appPerm[i] = i; petscPerm[i] = i; } ierr = PetscSortIntWithPermutation(N, allpetsc, petscPerm);CHKERRQ(ierr); ierr = PetscSortIntWithPermutation(N, allapp, appPerm);CHKERRQ(ierr); /* Form sorted arrays of indices */ for (i = 0; i < N; i++) { aomap->app[i] = allapp[appPerm[i]]; aomap->petsc[i] = allpetsc[petscPerm[i]]; } /* Invert petscPerm[] into aomap->petscPerm[] */ for (i = 0; i < N; i++) aomap->petscPerm[petscPerm[i]] = i; /* Form map between aomap->app[] and aomap->petsc[] */ for (i = 0; i < N; i++) aomap->appPerm[i] = aomap->petscPerm[appPerm[i]]; /* Invert appPerm[] into allapp[] */ for (i = 0; i < N; i++) allapp[appPerm[i]] = i; /* Form map between aomap->petsc[] and aomap->app[] */ for (i = 0; i < N; i++) aomap->petscPerm[i] = allapp[petscPerm[i]]; #if defined(PETSC_USE_DEBUG) /* Check that the permutations are complementary */ for (i = 0; i < N; i++) { if (i != aomap->appPerm[aomap->petscPerm[i]]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Invalid ordering"); } #endif /* Cleanup */ if (!mypetsc) { ierr = PetscFree(petsc);CHKERRQ(ierr); } ierr = PetscFree4(allapp,appPerm,allpetsc,petscPerm);CHKERRQ(ierr); ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr); *aoout = ao; PetscFunctionReturn(0); }
/*@C PetscViewerASCIIPrintf - Prints to a file, only from the first processor in the PetscViewer Not Collective, but only first processor in set has any effect Input Parameters: + viewer - optained with PetscViewerASCIIOpen() - format - the usual printf() format string Level: developer Fortran Note: The call sequence is PetscViewerASCIIPrintf(PetscViewer, character(*), int ierr) from Fortran. That is, you can only pass a single character string from Fortran. Concepts: PetscViewerASCII^printing Concepts: printing^to file Concepts: printf .seealso: PetscPrintf(), PetscSynchronizedPrintf(), PetscViewerASCIIOpen(), PetscViewerASCIIPushTab(), PetscViewerASCIIPopTab(), PetscViewerASCIISynchronizedPrintf(), PetscViewerCreate(), PetscViewerDestroy(), PetscViewerSetType(), PetscViewerASCIIGetPointer(), PetscViewerASCIISynchronizedAllow() @*/ PetscErrorCode PetscViewerASCIIPrintf(PetscViewer viewer,const char format[],...) { PetscViewer_ASCII *ascii = (PetscViewer_ASCII*)viewer->data; PetscMPIInt rank; PetscInt tab,intab = ascii->tab; PetscErrorCode ierr; FILE *fd = ascii->fd; PetscBool iascii,issingleton = PETSC_FALSE; int err; PetscFunctionBegin; PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,1); PetscValidCharPointer(format,2); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); if (!iascii) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not ASCII PetscViewer"); if (ascii->bviewer) { viewer = ascii->bviewer; ascii = (PetscViewer_ASCII*)viewer->data; fd = ascii->fd; issingleton = PETSC_TRUE; } ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)viewer),&rank);CHKERRQ(ierr); if (!rank) { va_list Argp; tab = intab; while (tab--) { ierr = PetscFPrintf(PETSC_COMM_SELF,fd," ");CHKERRQ(ierr); } va_start(Argp,format); ierr = (*PetscVFPrintf)(fd,format,Argp);CHKERRQ(ierr); err = fflush(fd); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); if (petsc_history) { va_start(Argp,format); tab = intab; while (tab--) { ierr = PetscFPrintf(PETSC_COMM_SELF,petsc_history," ");CHKERRQ(ierr); } ierr = (*PetscVFPrintf)(petsc_history,format,Argp);CHKERRQ(ierr); err = fflush(petsc_history); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } va_end(Argp); } else if (issingleton) { char *string; va_list Argp; size_t fullLength; PrintfQueue next; ierr = PetscNew(&next);CHKERRQ(ierr); if (petsc_printfqueue) { petsc_printfqueue->next = next; petsc_printfqueue = next; } else { petsc_printfqueuebase = petsc_printfqueue = next; } petsc_printfqueuelength++; next->size = QUEUESTRINGSIZE; ierr = PetscCalloc1(next->size, &next->string);CHKERRQ(ierr); string = next->string; tab = intab; tab *= 2; while (tab--) { *string++ = ' '; } va_start(Argp,format); ierr = PetscVSNPrintf(string,next->size-2*ascii->tab,format,&fullLength,Argp);CHKERRQ(ierr); va_end(Argp); } PetscFunctionReturn(0); }
PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z) { Vec_Seq *xv = (Vec_Seq *)xin->data; PetscErrorCode ierr; PetscInt i,nv_rem,n = xin->map->n; PetscScalar sum0,sum1,sum2,sum3; const PetscScalar *yy0,*yy1,*yy2,*yy3,*x; Vec *yy; PetscFunctionBegin; sum0 = 0.0; sum1 = 0.0; sum2 = 0.0; i = nv; nv_rem = nv&0x3; yy = (Vec*)yin; x = xv->array; switch (nv_rem) { case 3: ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecGetArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecGetArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr); fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2); ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecRestoreArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr); z[0] = sum0; z[1] = sum1; z[2] = sum2; break; case 2: ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecGetArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr); fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1); ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr); z[0] = sum0; z[1] = sum1; break; case 1: ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); fortranmdot1_(x,yy0,&n,&sum0); ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); z[0] = sum0; break; case 0: break; } z += nv_rem; i -= nv_rem; yy += nv_rem; while (i >0) { sum0 = 0.; sum1 = 0.; sum2 = 0.; sum3 = 0.; ierr = VecGetArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecGetArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecGetArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr); ierr = VecGetArray(yy[3],(PetscScalar**)&yy3);CHKERRQ(ierr); fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3); ierr = VecRestoreArray(yy[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(yy[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecRestoreArray(yy[2],(PetscScalar**)&yy2);CHKERRQ(ierr); ierr = VecRestoreArray(yy[3],(PetscScalar**)&yy3);CHKERRQ(ierr); yy += 4; z[0] = sum0; z[1] = sum1; z[2] = sum2; z[3] = sum3; z += 4; i -= 4; } ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscViewerASCIISynchronizedPrintf - Prints synchronized output to the specified file from several processors. Output of the first processor is followed by that of the second, etc. Not Collective, must call collective PetscViewerFlush() to get the results out Input Parameters: + viewer - the ASCII PetscViewer - format - the usual printf() format string Level: intermediate Notes: You must have previously called PetscViewerASCIISynchronizeAllow() to allow this routine to be called. Fortran Note: Can only print a single character* string .seealso: PetscSynchronizedPrintf(), PetscSynchronizedFlush(), PetscFPrintf(), PetscFOpen(), PetscViewerFlush(), PetscViewerASCIIGetPointer(), PetscViewerDestroy(), PetscViewerASCIIOpen(), PetscViewerASCIIPrintf(), PetscViewerASCIISynchronizedAllow() @*/ PetscErrorCode PetscViewerASCIISynchronizedPrintf(PetscViewer viewer,const char format[],...) { PetscViewer_ASCII *vascii = (PetscViewer_ASCII*)viewer->data; PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt tab = vascii->tab; MPI_Comm comm; FILE *fp; PetscBool iascii; int err; PetscFunctionBegin; PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,1); PetscValidCharPointer(format,2); ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); if (!iascii) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not ASCII PetscViewer"); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);CHKERRQ(ierr); if (size > 1 && !vascii->allowsynchronized) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"First call PetscViewerASCIISynchronizedAllow() to allow this call"); if (!viewer->ops->flush) PetscFunctionReturn(0); /* This viewer obtained via PetscViewerGetSubcomm_ASCII(), should not participate. */ ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); fp = vascii->fd; ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* First processor prints immediately to fp */ if (!rank) { va_list Argp; while (tab--) { ierr = PetscFPrintf(PETSC_COMM_SELF,fp," ");CHKERRQ(ierr); } va_start(Argp,format); ierr = (*PetscVFPrintf)(fp,format,Argp);CHKERRQ(ierr); err = fflush(fp); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); if (petsc_history) { va_start(Argp,format); ierr = (*PetscVFPrintf)(petsc_history,format,Argp);CHKERRQ(ierr); err = fflush(petsc_history); if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fflush() failed on file"); } va_end(Argp); } else { /* other processors add to local queue */ char *string; va_list Argp; size_t fullLength; PrintfQueue next; ierr = PetscNew(&next);CHKERRQ(ierr); if (petsc_printfqueue) { petsc_printfqueue->next = next; petsc_printfqueue = next; } else { petsc_printfqueuebase = petsc_printfqueue = next; } petsc_printfqueuelength++; next->size = QUEUESTRINGSIZE; ierr = PetscMalloc1(next->size, &next->string);CHKERRQ(ierr); ierr = PetscMemzero(next->string,next->size);CHKERRQ(ierr); string = next->string; tab *= 2; while (tab--) { *string++ = ' '; } va_start(Argp,format); ierr = PetscVSNPrintf(string,next->size-2*vascii->tab,format,&fullLength,Argp);CHKERRQ(ierr); va_end(Argp); } PetscFunctionReturn(0); }
PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y) { Vec_Seq *xdata = (Vec_Seq*)xin->data; PetscErrorCode ierr; PetscInt n = xin->map->n,j,j_rem; const PetscScalar *yy0,*yy1,*yy2,*yy3; PetscScalar *xx,alpha0,alpha1,alpha2,alpha3; #if defined(PETSC_HAVE_PRAGMA_DISJOINT) #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha) #endif PetscFunctionBegin; ierr = PetscLogFlops(nv*2.0*n);CHKERRQ(ierr); xx = xdata->array; switch (j_rem=nv&0x3) { case 3: ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecGetArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecGetArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr); alpha0 = alpha[0]; alpha1 = alpha[1]; alpha2 = alpha[2]; alpha += 3; PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n); ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecRestoreArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr); y += 3; break; case 2: ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecGetArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr); alpha0 = alpha[0]; alpha1 = alpha[1]; alpha +=2; PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n); ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr); y +=2; break; case 1: ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); alpha0 = *alpha++; PetscAXPY(xx,alpha0,yy0,n); ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); y +=1; break; } for (j=j_rem; j<nv; j+=4) { ierr = VecGetArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecGetArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecGetArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr); ierr = VecGetArray(y[3],(PetscScalar**)&yy3);CHKERRQ(ierr); alpha0 = alpha[0]; alpha1 = alpha[1]; alpha2 = alpha[2]; alpha3 = alpha[3]; alpha += 4; PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n); ierr = VecRestoreArray(y[0],(PetscScalar**)&yy0);CHKERRQ(ierr); ierr = VecRestoreArray(y[1],(PetscScalar**)&yy1);CHKERRQ(ierr); ierr = VecRestoreArray(y[2],(PetscScalar**)&yy2);CHKERRQ(ierr); ierr = VecRestoreArray(y[3],(PetscScalar**)&yy3);CHKERRQ(ierr); y += 4; } PetscFunctionReturn(0); }
PetscErrorCode PetscFEGetTabulation_Composite(PetscFE fem, PetscInt npoints, const PetscReal points[], PetscReal *B, PetscReal *D, PetscReal *H) { PetscFE_Composite *cmp = (PetscFE_Composite *) fem->data; DM dm; PetscInt pdim; /* Dimension of FE space P */ PetscInt spdim; /* Dimension of subelement FE space P */ PetscInt dim; /* Spatial dimension */ PetscInt comp; /* Field components */ PetscInt *subpoints; PetscReal *tmpB, *tmpD, *tmpH, *subpoint; PetscInt p, s, d, e, j, k; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscDualSpaceGetDM(fem->dualSpace, &dm);CHKERRQ(ierr); ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = PetscSpaceGetDimension(fem->basisSpace, &spdim);CHKERRQ(ierr); ierr = PetscDualSpaceGetDimension(fem->dualSpace, &pdim);CHKERRQ(ierr); ierr = PetscFEGetNumComponents(fem, &comp);CHKERRQ(ierr); /* Divide points into subelements */ ierr = DMGetWorkArray(dm, npoints, MPIU_INT, &subpoints);CHKERRQ(ierr); ierr = DMGetWorkArray(dm, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr); for (p = 0; p < npoints; ++p) { for (s = 0; s < cmp->numSubelements; ++s) { PetscBool inside; /* Apply transform, and check that point is inside cell */ for (d = 0; d < dim; ++d) { subpoint[d] = -1.0; for (e = 0; e < dim; ++e) subpoint[d] += cmp->invjac[(s*dim + d)*dim+e]*(points[p*dim+e] - cmp->v0[s*dim+e]); } ierr = CellRefinerInCellTest_Internal(cmp->cellRefiner, subpoint, &inside);CHKERRQ(ierr); if (inside) {subpoints[p] = s; break;} } if (s >= cmp->numSubelements) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Point %d was not found in any subelement", p); } ierr = DMRestoreWorkArray(dm, dim, MPIU_REAL, &subpoint);CHKERRQ(ierr); /* Evaluate the prime basis functions at all points */ if (B) {ierr = DMGetWorkArray(dm, npoints*spdim, MPIU_REAL, &tmpB);CHKERRQ(ierr);} if (D) {ierr = DMGetWorkArray(dm, npoints*spdim*dim, MPIU_REAL, &tmpD);CHKERRQ(ierr);} if (H) {ierr = DMGetWorkArray(dm, npoints*spdim*dim*dim, MPIU_REAL, &tmpH);CHKERRQ(ierr);} ierr = PetscSpaceEvaluate(fem->basisSpace, npoints, points, B ? tmpB : NULL, D ? tmpD : NULL, H ? tmpH : NULL);CHKERRQ(ierr); /* Translate to the nodal basis */ if (B) {ierr = PetscMemzero(B, npoints*pdim*comp * sizeof(PetscReal));CHKERRQ(ierr);} if (D) {ierr = PetscMemzero(D, npoints*pdim*comp*dim * sizeof(PetscReal));CHKERRQ(ierr);} if (H) {ierr = PetscMemzero(H, npoints*pdim*comp*dim*dim * sizeof(PetscReal));CHKERRQ(ierr);} for (p = 0; p < npoints; ++p) { const PetscInt s = subpoints[p]; if (B) { /* Multiply by V^{-1} (spdim x spdim) */ for (j = 0; j < spdim; ++j) { const PetscInt i = (p*pdim + cmp->embedding[s*spdim+j])*comp; B[i] = 0.0; for (k = 0; k < spdim; ++k) { B[i] += fem->invV[(s*spdim + k)*spdim+j] * tmpB[p*spdim + k]; } } } if (D) { /* Multiply by V^{-1} (spdim x spdim) */ for (j = 0; j < spdim; ++j) { for (d = 0; d < dim; ++d) { const PetscInt i = ((p*pdim + cmp->embedding[s*spdim+j])*comp + 0)*dim + d; D[i] = 0.0; for (k = 0; k < spdim; ++k) { D[i] += fem->invV[(s*spdim + k)*spdim+j] * tmpD[(p*spdim + k)*dim + d]; } } } } if (H) { /* Multiply by V^{-1} (pdim x pdim) */ for (j = 0; j < spdim; ++j) { for (d = 0; d < dim*dim; ++d) { const PetscInt i = ((p*pdim + cmp->embedding[s*spdim+j])*comp + 0)*dim*dim + d; H[i] = 0.0; for (k = 0; k < spdim; ++k) { H[i] += fem->invV[(s*spdim + k)*spdim+j] * tmpH[(p*spdim + k)*dim*dim + d]; } } } } } ierr = DMRestoreWorkArray(dm, npoints, MPIU_INT, &subpoints);CHKERRQ(ierr); if (B) {ierr = DMRestoreWorkArray(dm, npoints*spdim, MPIU_REAL, &tmpB);CHKERRQ(ierr);} if (D) {ierr = DMRestoreWorkArray(dm, npoints*spdim*dim, MPIU_REAL, &tmpD);CHKERRQ(ierr);} if (H) {ierr = DMRestoreWorkArray(dm, npoints*spdim*dim*dim, MPIU_REAL, &tmpH);CHKERRQ(ierr);} PetscFunctionReturn(0); }
PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ PetscErrorCode ierr; PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; MatScalar *ca; PetscFunctionBegin; /* some error checking which could be moved into interface layer */ if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); /* Set up timers */ ierr = PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); /* Create ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscMalloc4(an,PetscInt,&padenserow,an,PetscInt,&pasparserow,pm,PetscInt,&denserow,pm,PetscInt,&sparserow);CHKERRQ(ierr); ierr = PetscMemzero(padenserow,an*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(pasparserow,an*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(denserow,pm*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(sparserow,pm*sizeof(PetscInt));CHKERRQ(ierr); /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ /* This should be reasonable if sparsity of PAPt is similar to that of A. */ ierr = PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);CHKERRQ(ierr); current_space = free_space; /* Determine fill for each row of C: */ for (i=0;i<pm;i++) { pnzi = pi[i+1] - pi[i]; panzi = 0; /* Get symbolic sparse row of PA: */ for (j=0;j<pnzi;j++) { arow = *pj++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0;k<anzj;k++) { if (!padenserow[ajj[k]]) { padenserow[ajj[k]] = -1; pasparserow[panzi++] = ajj[k]; } } } /* Using symbolic row of PA, determine symbolic row of C: */ paj = pasparserow; cnzi = 0; for (j=0;j<panzi;j++) { ptrow = *paj++; ptnzj = pti[ptrow+1] - pti[ptrow]; ptjj = ptj + pti[ptrow]; for (k=0;k<ptnzj;k++) { if (!denserow[ptjj[k]]) { denserow[ptjj[k]] = -1; sparserow[cnzi++] = ptjj[k]; } } } /* sort sparse representation */ ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); } /* Copy data into free space, and zero out dense row */ ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0;j<panzi;j++) { padenserow[pasparserow[j]] = 0; } for (j=0;j<cnzi;j++) { denserow[sparserow[j]] = 0; } ci[i+1] = ci[i] + cnzi; } /* column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree4(padenserow,pasparserow,denserow,sparserow);CHKERRQ(ierr); /* Allocate space for ca */ ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); (*C)->rmap->bs = P->cmap->bs; (*C)->cmap->bs = P->cmap->bs; /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ *)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }