int main(int argc,char **argv) { KSP solver; PC pc; Mat A,B; Vec X,Y,Z; MatScalar *a; PetscScalar *b,*x,*y,*z; PetscReal nrm; PetscErrorCode ierr,size=8,lda=10, i,j; ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr; /* Create matrix and three vectors: these are all normal */ ierr = PetscMalloc1(lda*size,&b);CHKERRQ(ierr); for (i=0; i<size; i++) { for (j=0; j<size; j++) { b[i+j*lda] = rand(); } } ierr = MatCreate(MPI_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,size,size,size,size);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(A,NULL);CHKERRQ(ierr); ierr = MatDenseGetArray(A,&a);CHKERRQ(ierr); for (i=0; i<size; i++) { for (j=0; j<size; j++) { a[i+j*size] = b[i+j*lda]; } } ierr = MatDenseRestoreArray(A,&a);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatCreate(MPI_COMM_SELF,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,size,size,size,size);CHKERRQ(ierr); ierr = MatSetType(B,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(B,b);CHKERRQ(ierr); ierr = MatSeqDenseSetLDA(B,lda);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscMalloc1(size,&x);CHKERRQ(ierr); for (i=0; i<size; i++) x[i] = 1.0; ierr = VecCreateSeqWithArray(MPI_COMM_SELF,1,size,x,&X);CHKERRQ(ierr); ierr = VecAssemblyBegin(X);CHKERRQ(ierr); ierr = VecAssemblyEnd(X);CHKERRQ(ierr); ierr = PetscMalloc1(size,&y);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,1,size,y,&Y);CHKERRQ(ierr); ierr = VecAssemblyBegin(Y);CHKERRQ(ierr); ierr = VecAssemblyEnd(Y);CHKERRQ(ierr); ierr = PetscMalloc1(size,&z);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,1,size,z,&Z);CHKERRQ(ierr); ierr = VecAssemblyBegin(Z);CHKERRQ(ierr); ierr = VecAssemblyEnd(Z);CHKERRQ(ierr); /* * Solve with A and B */ ierr = KSPCreate(MPI_COMM_SELF,&solver);CHKERRQ(ierr); ierr = KSPSetType(solver,KSPPREONLY);CHKERRQ(ierr); ierr = KSPGetPC(solver,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCLU);CHKERRQ(ierr); ierr = KSPSetOperators(solver,A,A);CHKERRQ(ierr); ierr = KSPSolve(solver,X,Y);CHKERRQ(ierr); ierr = KSPSetOperators(solver,B,B);CHKERRQ(ierr); ierr = KSPSolve(solver,X,Z);CHKERRQ(ierr); ierr = VecAXPY(Z,-1.0,Y);CHKERRQ(ierr); ierr = VecNorm(Z,NORM_2,&nrm); ierr = PetscPrintf(PETSC_COMM_SELF,"Test1; error norm=%e\n",nrm);CHKERRQ(ierr); /* Free spaces */ ierr = PetscFree(b);CHKERRQ(ierr); ierr = PetscFree(x);CHKERRQ(ierr); ierr = PetscFree(y);CHKERRQ(ierr); ierr = PetscFree(z);CHKERRQ(ierr); ierr = VecDestroy(&X);CHKERRQ(ierr); ierr = VecDestroy(&Y);CHKERRQ(ierr); ierr = VecDestroy(&Z);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = KSPDestroy(&solver);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
int main(int argc,char **argv) { Mat A,R,C,C_dense,C_sparse,Rt_dense,P,PtAP; PetscInt row,col,m,n; PetscErrorCode ierr; MatScalar one =1.0,val; MatColoring mc; MatTransposeColoring matcoloring = 0; ISColoring iscoloring; PetscBool equal; PetscMPIInt size; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); /* Create seqaij A */ ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,4,4,4,4);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); row = 0; col=0; val=1.0; ierr = MatSetValues(A,1,&row,1,&col,&val,ADD_VALUES);CHKERRQ(ierr); row = 1; col=3; val=2.0; ierr = MatSetValues(A,1,&row,1,&col,&val,ADD_VALUES);CHKERRQ(ierr); row = 2; col=2; val=3.0; ierr = MatSetValues(A,1,&row,1,&col,&val,ADD_VALUES);CHKERRQ(ierr); row = 3; col=0; val=4.0; ierr = MatSetValues(A,1,&row,1,&col,&val,ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(A,"A_");CHKERRQ(ierr); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"\n");CHKERRQ(ierr); /* Create seqaij R */ ierr = MatCreate(PETSC_COMM_SELF,&R);CHKERRQ(ierr); ierr = MatSetSizes(R,2,4,2,4);CHKERRQ(ierr); ierr = MatSetType(R,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(R);CHKERRQ(ierr); ierr = MatSetUp(R);CHKERRQ(ierr); row = 0; col=0; ierr = MatSetValues(R,1,&row,1,&col,&one,ADD_VALUES);CHKERRQ(ierr); row = 0; col=1; ierr = MatSetValues(R,1,&row,1,&col,&one,ADD_VALUES);CHKERRQ(ierr); row = 1; col=1; ierr = MatSetValues(R,1,&row,1,&col,&one,ADD_VALUES);CHKERRQ(ierr); row = 1; col=2; ierr = MatSetValues(R,1,&row,1,&col,&one,ADD_VALUES);CHKERRQ(ierr); row = 1; col=3; ierr = MatSetValues(R,1,&row,1,&col,&one,ADD_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(R,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(R,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(R,"R_");CHKERRQ(ierr); ierr = MatView(R,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"\n");CHKERRQ(ierr); /* C = A*R^T */ ierr = MatMatTransposeMult(A,R,MAT_INITIAL_MATRIX,2.0,&C);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(C,"ARt_");CHKERRQ(ierr); ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"\n");CHKERRQ(ierr); /* Create MatTransposeColoring from symbolic C=A*R^T */ ierr = MatColoringCreate(C,&mc);CHKERRQ(ierr); ierr = MatColoringSetDistance(mc,2);CHKERRQ(ierr); /* ierr = MatColoringSetType(mc,MATCOLORINGSL);CHKERRQ(ierr); */ ierr = MatColoringSetFromOptions(mc);CHKERRQ(ierr); ierr = MatColoringApply(mc,&iscoloring);CHKERRQ(ierr); ierr = MatColoringDestroy(&mc);CHKERRQ(ierr); ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr); ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); /* Create Rt_dense */ ierr = MatCreate(PETSC_COMM_WORLD,&Rt_dense);CHKERRQ(ierr); ierr = MatSetSizes(Rt_dense,4,matcoloring->ncolors,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetType(Rt_dense,MATDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(Rt_dense,NULL);CHKERRQ(ierr); ierr = MatAssemblyBegin(Rt_dense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Rt_dense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatGetLocalSize(Rt_dense,&m,&n);CHKERRQ(ierr); printf("Rt_dense: %d,%d\n",(int)m,(int)n); /* Get Rt_dense by Apply MatTransposeColoring to R */ ierr = MatTransColoringApplySpToDen(matcoloring,R,Rt_dense);CHKERRQ(ierr); /* C_dense = A*Rt_dense */ ierr = MatMatMult(A,Rt_dense,MAT_INITIAL_MATRIX,2.0,&C_dense);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(C_dense,"ARt_dense_");CHKERRQ(ierr); /*ierr = MatView(C_dense,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ /*ierr = PetscPrintf(PETSC_COMM_SELF,"\n");CHKERRQ(ierr); */ /* Recover C from C_dense */ ierr = MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&C_sparse);CHKERRQ(ierr); ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C_sparse);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(C_sparse,"ARt_color_");CHKERRQ(ierr); ierr = MatView(C_sparse,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_SELF,"\n");CHKERRQ(ierr); ierr = MatDestroy(&C_dense);CHKERRQ(ierr); ierr = MatDestroy(&C_sparse);CHKERRQ(ierr); ierr = MatDestroy(&Rt_dense);CHKERRQ(ierr); ierr = MatTransposeColoringDestroy(&matcoloring);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); /* Test PtAP = P^T*A*P, P = R^T */ ierr = MatTranspose(R,MAT_INITIAL_MATRIX,&P);CHKERRQ(ierr); ierr = MatPtAP(A,P,MAT_INITIAL_MATRIX,2.0,&PtAP);CHKERRQ(ierr); ierr = MatSetOptionsPrefix(PtAP,"PtAP_");CHKERRQ(ierr); ierr = MatView(PtAP,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); /* Test C = RARt */ ierr = MatRARt(A,R,MAT_INITIAL_MATRIX,2.0,&C);CHKERRQ(ierr); ierr = MatRARt(A,R,MAT_REUSE_MATRIX,2.0,&C);CHKERRQ(ierr); ierr = MatEqual(C,PtAP,&equal);CHKERRQ(ierr); if (!equal) { ierr = PetscPrintf(PETSC_COMM_SELF,"Error: PtAP != RARt");CHKERRQ(ierr); } /* Free spaces */ ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&R);CHKERRQ(ierr); ierr = MatDestroy(&PtAP);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i<ismax; i++) { ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted"); ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); } ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; i<size; i++) { jmax = C->rmap->range[i+1]; for (; j<jmax; j++) rtable[j] = i; } /* evaluate communication - mesg to who,length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i<ismax; i++) { ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/ jmax = nrow[i]; irow_i = irow[i]; for (j=0; j<jmax; j++) { row = irow_i[j]; proc = rtable[row]; w4[proc]++; } for (j=0; j<size; j++) { if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;} } } nrqs = 0; /* no of outgoing messages */ msz = 0; /* total mesg length (for all procs) */ w1[2*rank] = 0; /* no mesg sent to self */ w3[rank] = 0; for (i=0; i<size; i++) { if (w1[2*i]) { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */ } ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ for (i=0,j=0; i<size; i++) { if (w1[2*i]) { pa[j] = i; j++; } } /* Each message would have a header = 1 + 2*(no of IS) + data */ for (i=0; i<nrqs; i++) { j = pa[i]; w1[2*j] += w1[2*j+1] + 2* w3[j]; msz += w1[2*j]; } /* Do a global reduction to determine how many messages to expect*/ ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr); /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr); ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr); for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz; /* Post the receives */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr); } /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp,ict = 0; for (i=0; i<nrqs; i++) { j = pa[i]; iptr += ict; sbuf1[j] = iptr; ict = w1[2*j]; } } /* Form the outgoing messages */ /* Initialize the header space */ for (i=0; i<nrqs; i++) { j = pa[i]; sbuf1[j][0] = 0; ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); ptr[j] = sbuf1[j] + 2*w3[j] + 1; } /* Parse the isrow and copy data into outbuf */ for (i=0; i<ismax; i++) { ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { /* parse the indices of each IS */ row = irow_i[j]; proc = rtable[row]; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; j<size; j++) { /* Can Optimise this loop too */ if ((ctr_j = ctr[j])) { sbuf1_j = sbuf1[j]; k = ++sbuf1_j[0]; sbuf1_j[2*k] = ctr_j; sbuf1_j[2*k-1] = i; } } } /* Now post the sends */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { j = pa[i]; ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); } /* Post recieves to capture the row_data from other procs */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr); for (i=0; i<nrqs; i++) { j = pa[i]; count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N; ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr); ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); } /* Receive messages(row_nos) and then, pack and send off the rowvalues to the correct processors */ ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr); { PetscScalar *sbuf2_i,*v_start; PetscInt s_proc; for (i=0; i<nrqr; ++i) { ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); s_proc = r_status1[i].MPI_SOURCE; /* send processor */ rbuf1_i = rbuf1[idex]; /* Actual message from s_proc */ /* no of rows = end - start; since start is array idex[], 0idex, whel end is length of the buffer - which is 1idex */ start = 2*rbuf1_i[0] + 1; ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); /* allocate memory sufficinet to hold all the row values */ ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr); sbuf2_i = sbuf2[idex]; /* Now pack the data */ for (j=start; j<end; j++) { row = rbuf1_i[j] - rstart; v_start = a->v + row; for (k=0; k<N; k++) { sbuf2_i[0] = v_start[0]; sbuf2_i++; v_start += C->rmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)(submats[i]->data); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; i<ismax; i++) { ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; i<ismax; i++) { mat = (Mat_SeqDense*)submats[i]->data; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; j<m; j++) { row = irow_i[j]; proc = rtable[row]; if (proc == rank) { row = row - rstart; mat_vi = mat_v + row; imat_vi = imat_v + j; for (k=0; k<ncol[i]; k++) { col = icol[i][k]; imat_vi[k*m] = mat_vi[col*C->rmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N; for (i=0; i<ismax; i++) { rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j<jmax; j++) { rmap_i[irow_i[j]] = j; } } /* Now Receive the row_values and assemble the rest of the matrix */ ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); { PetscInt is_max,tmp1,col,*sbuf1_i,is_sz; PetscScalar *rbuf2_i,*imat_v,*imat_vi; for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */ ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr); /* Now dig out the corresponding sbuf1, which contains the IS data_structure */ sbuf1_i = sbuf1[pa[i]]; is_max = sbuf1_i[0]; ct1 = 2*is_max+1; rbuf2_i = rbuf2[i]; for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */ is_no = sbuf1_i[2*j-1]; is_sz = sbuf1_i[2*j]; mat = (Mat_SeqDense*)submats[is_no]->data; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k<is_sz; k++,rbuf2_i+=N) { /* For each row */ row = sbuf1_i[ct1]; ct1++; row = rmap_i[row]; imat_vi = imat_v + row; for (l=0; l<ncol[is_no]; l++) { /* For each col */ col = icol[is_no][l]; imat_vi[l*m] = rbuf2_i[col]; } } } } } /* End Send-Recv of row_values */ ierr = PetscFree(r_status2);CHKERRQ(ierr); ierr = PetscFree(r_waits2);CHKERRQ(ierr); ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} ierr = PetscFree(s_status2);CHKERRQ(ierr); ierr = PetscFree(s_waits2);CHKERRQ(ierr); /* Restore the indices */ for (i=0; i<ismax; i++) { ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); } /* Destroy allocated memory */ ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr); ierr = PetscFree(pa);CHKERRQ(ierr); for (i=0; i<nrqs; ++i) { ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(rbuf2);CHKERRQ(ierr); ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); for (i=0; i<nrqr; ++i) { ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); } ierr = PetscFree(sbuf2);CHKERRQ(ierr); ierr = PetscFree(rmap[0]);CHKERRQ(ierr); ierr = PetscFree(rmap);CHKERRQ(ierr); for (i=0; i<ismax; i++) { ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc,char **argv) { Mat A,B,C,D; PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,am,an; PetscScalar v; PetscErrorCode ierr; PetscRandom r; PetscBool equal=PETSC_FALSE; PetscReal fill = 1.0; PetscMPIInt size; PetscInitialize(&argc,&argv,(char*)0,help); ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetReal(NULL,"-fill",&fill,NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&r);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); /* Create a aij matrix A */ M = N = m*n; ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); am = Iend - Istart; for (Ii=Istart; Ii<Iend; Ii++) { v = -1.0; i = Ii/n; j = Ii - i*n; if (i>0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Create a dense matrix B */ ierr = MatGetLocalSize(A,&am,&an);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,an,PETSC_DECIDE,PETSC_DECIDE,M);CHKERRQ(ierr); ierr = MatSetType(B,MATDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(B,NULL);CHKERRQ(ierr); ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); ierr = MatSetFromOptions(B);CHKERRQ(ierr); ierr = MatSetRandom(B,r);CHKERRQ(ierr); ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Test C = A*B (aij*dense) */ ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,fill,&C);CHKERRQ(ierr); ierr = MatMatMult(A,B,MAT_REUSE_MATRIX,fill,&C);CHKERRQ(ierr); ierr = MatMatMultSymbolic(A,B,fill,&D);CHKERRQ(ierr); for (i=0; i<2; i++) { ierr = MatMatMultNumeric(A,B,D);CHKERRQ(ierr); } ierr = MatEqual(C,D,&equal);CHKERRQ(ierr); if (!equal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"C != D"); ierr = MatDestroy(&D);CHKERRQ(ierr); /* Test D = C*A (dense*aij) */ ierr = MatMatMult(C,A,MAT_INITIAL_MATRIX,fill,&D);CHKERRQ(ierr); ierr = MatMatMult(C,A,MAT_REUSE_MATRIX,fill,&D);CHKERRQ(ierr); ierr = MatDestroy(&D);CHKERRQ(ierr); /* Test D = A*C (aij*dense) */ ierr = MatMatMult(A,C,MAT_INITIAL_MATRIX,fill,&D);CHKERRQ(ierr); ierr = MatMatMult(A,C,MAT_REUSE_MATRIX,fill,&D);CHKERRQ(ierr); ierr = MatDestroy(&D);CHKERRQ(ierr); /* Test D = B*C (dense*dense) */ ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size == 1) { ierr = MatMatMult(B,C,MAT_INITIAL_MATRIX,fill,&D);CHKERRQ(ierr); ierr = MatMatMult(B,C,MAT_REUSE_MATRIX,fill,&D);CHKERRQ(ierr); ierr = MatDestroy(&D);CHKERRQ(ierr); } ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); PetscFinalize(); return(0); }
PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A,Mat R,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat P; PetscInt *rti,*rtj; Mat_RARt *rart; MatColoring coloring; MatTransposeColoring matcoloring; ISColoring iscoloring; Mat Rt_dense,RARt_dense; Mat_SeqAIJ *c; PetscFunctionBegin; /* create symbolic P=Rt */ ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,NULL,&P);CHKERRQ(ierr); /* get symbolic C=Pt*A*P */ ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C);CHKERRQ(ierr); ierr = MatSetBlockSizes(*C,PetscAbs(R->rmap->bs),PetscAbs(R->rmap->bs));CHKERRQ(ierr); (*C)->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart; /* create a supporting struct */ ierr = PetscNew(&rart);CHKERRQ(ierr); c = (Mat_SeqAIJ*)(*C)->data; c->rart = rart; /* ------ Use coloring ---------- */ /* inode causes memory problem, don't know why */ if (c->inode.use) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MAT_USE_INODES is not supported. Use '-mat_no_inode'"); /* Create MatTransposeColoring from symbolic C=R*A*R^T */ ierr = MatColoringCreate(*C,&coloring);CHKERRQ(ierr); ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr); ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr); ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr); ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr); ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr); ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); rart->matcoloring = matcoloring; ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); /* Create Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(Rt_dense,NULL);CHKERRQ(ierr); Rt_dense->assembled = PETSC_TRUE; rart->Rt = Rt_dense; /* Create RARt_dense = R*A*Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(RARt_dense,NULL);CHKERRQ(ierr); rart->RARt = RARt_dense; /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ ierr = PetscMalloc1(A->rmap->n*4,&rart->work);CHKERRQ(ierr); rart->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; /* clean up */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) { PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); ierr = PetscInfo(*C,"C=R*(A*Rt) via coloring C - use sparse-dense inner products\n");CHKERRQ(ierr); ierr = PetscInfo6(*C,"RARt_den %D %D; Rt %D %D (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,R->cmap->n,R->rmap->n,c->nz,density);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
void PETSC_STDCALL matseqdensesetpreallocation_(Mat *mat,PetscScalar *data,PetscErrorCode *ierr) { CHKFORTRANNULLSCALAR(data); *ierr = MatSeqDenseSetPreallocation(*mat,data); }
For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK \n\n"; #include <petscmat.h> #undef __FUNCT__ #define __FUNCT__ "main" int main(int argc,char **argv) { Mat mat,F,RHS,SOLU; MatInfo info; PetscErrorCode ierr; PetscInt m = 10,n = 10,i,j,rstart,rend,nrhs=2; PetscScalar value = 1.0; Vec x,y,b,ytmp; PetscReal norm,tol=1.e-15; PetscMPIInt size; PetscScalar *rhs_array,*solu_array; PetscRandom rand; PetscScalar *array,rval; PetscInitialize(&argc,&argv,(char*) 0,help); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); /* create single vectors */ ierr = VecCreate(PETSC_COMM_WORLD,&y);CHKERRQ(ierr); ierr = VecSetSizes(y,PETSC_DECIDE,m);CHKERRQ(ierr); ierr = VecSetFromOptions(y);CHKERRQ(ierr); ierr = VecDuplicate(y,&x);CHKERRQ(ierr); ierr = VecDuplicate(y,&ytmp);CHKERRQ(ierr); ierr = VecSet(x,value);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&b);CHKERRQ(ierr); ierr = VecSetSizes(b,PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(b);CHKERRQ(ierr); /* create multiple vectors RHS and SOLU */ ierr = MatCreate(PETSC_COMM_WORLD,&RHS);CHKERRQ(ierr); ierr = MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,n,nrhs);CHKERRQ(ierr); ierr = MatSetType(RHS,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(RHS);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(RHS,NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatDenseGetArray(RHS,&array);CHKERRQ(ierr); for (j=0; j<nrhs; j++) { for (i=0; i<n; i++) { ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[n*j+i] = rval; } } ierr = MatDenseRestoreArray(RHS,&array);CHKERRQ(ierr); ierr = MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU);CHKERRQ(ierr); /* create matrix */ ierr = MatCreateSeqDense(PETSC_COMM_WORLD,m,n,NULL,&mat);CHKERRQ(ierr); ierr = MatGetOwnershipRange(mat,&rstart,&rend);CHKERRQ(ierr); for (i=rstart; i<rend; i++) { value = (PetscReal)i+1; ierr = MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %D, allocated nonzeros = %D\n", (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);CHKERRQ(ierr); /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ /* in-place Cholesky */ ierr = MatMult(mat,x,b);CHKERRQ(ierr); ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); ierr = MatCholeskyFactor(F,0,0);CHKERRQ(ierr); ierr = MatSolve(F,b,y);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); value = -1.0; ierr = VecAXPY(y,value,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for Cholesky %G\n",norm);CHKERRQ(ierr); } /* out-place Cholesky */ ierr = MatGetFactor(mat,MATSOLVERPETSC,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); ierr = MatCholeskyFactorSymbolic(F,mat,0,0);CHKERRQ(ierr); ierr = MatCholeskyFactorNumeric(F,mat,0);CHKERRQ(ierr); ierr = MatSolve(F,b,y);CHKERRQ(ierr); value = -1.0; ierr = VecAXPY(y,value,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for Cholesky %G\n",norm);CHKERRQ(ierr); } ierr = MatDestroy(&F);CHKERRQ(ierr); /* LU factorization - perms and factinfo are ignored by LAPACK */ i = m-1; value = 1.0; ierr = MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMult(mat,x,b);CHKERRQ(ierr); ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); /* in-place LU */ ierr = MatLUFactor(F,0,0,0);CHKERRQ(ierr); ierr = MatSolve(F,b,y);CHKERRQ(ierr); value = -1.0; ierr = VecAXPY(y,value,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for LU %G\n",norm);CHKERRQ(ierr); } ierr = MatMatSolve(F,RHS,SOLU);CHKERRQ(ierr); ierr = MatDenseGetArray(SOLU,&solu_array);CHKERRQ(ierr); ierr = MatDenseGetArray(RHS,&rhs_array);CHKERRQ(ierr); for (j=0; j<nrhs; j++) { ierr = VecPlaceArray(y,solu_array+j*m);CHKERRQ(ierr); ierr = VecPlaceArray(b,rhs_array+j*m);CHKERRQ(ierr); ierr = MatMult(mat,y,ytmp);CHKERRQ(ierr); ierr = VecAXPY(ytmp,-1.0,b);CHKERRQ(ierr); /* ytmp = mat*SOLU[:,j] - RHS[:,j] */ ierr = VecNorm(ytmp,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for LU %G\n",norm);CHKERRQ(ierr); } ierr = VecResetArray(b);CHKERRQ(ierr); ierr = VecResetArray(y);CHKERRQ(ierr); } ierr = MatDenseRestoreArray(RHS,&rhs_array);CHKERRQ(ierr); ierr = MatDenseRestoreArray(SOLU,&solu_array);CHKERRQ(ierr); ierr = MatDestroy(&F);CHKERRQ(ierr); /* out-place LU */ ierr = MatGetFactor(mat,MATSOLVERPETSC,MAT_FACTOR_LU,&F);CHKERRQ(ierr); ierr = MatLUFactorSymbolic(F,mat,0,0,0);CHKERRQ(ierr); ierr = MatLUFactorNumeric(F,mat,0);CHKERRQ(ierr); ierr = MatSolve(F,b,y);CHKERRQ(ierr); value = -1.0; ierr = VecAXPY(y,value,x);CHKERRQ(ierr); ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol) { ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for LU %G\n",norm);CHKERRQ(ierr); } /* free space */ ierr = MatDestroy(&F);CHKERRQ(ierr); ierr = MatDestroy(&mat);CHKERRQ(ierr); ierr = MatDestroy(&RHS);CHKERRQ(ierr); ierr = MatDestroy(&SOLU);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&ytmp);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/*@ MatComputeExplicitOperator - Computes the explicit matrix Collective on Mat Input Parameter: . inmat - the matrix Output Parameter: . mat - the explict preconditioned operator Notes: This computation is done by applying the operators to columns of the identity matrix. Currently, this routine uses a dense matrix format when 1 processor is used and a sparse format otherwise. This routine is costly in general, and is recommended for use only with relatively small systems. Level: advanced .keywords: Mat, compute, explicit, operator @*/ PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) { Vec in,out; PetscErrorCode ierr; PetscInt i,m,n,M,N,*rows,start,end; MPI_Comm comm; PetscScalar *array,zero = 0.0,one = 1.0; PetscMPIInt size; PetscFunctionBegin; PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); PetscValidPointer(mat,2); ierr = PetscObjectGetComm((PetscObject)inmat,&comm); CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size); CHKERRQ(ierr); ierr = MatGetLocalSize(inmat,&m,&n); CHKERRQ(ierr); ierr = MatGetSize(inmat,&M,&N); CHKERRQ(ierr); ierr = MatCreateVecs(inmat,&in,&out); CHKERRQ(ierr); ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE); CHKERRQ(ierr); ierr = VecGetOwnershipRange(out,&start,&end); CHKERRQ(ierr); ierr = PetscMalloc1(m,&rows); CHKERRQ(ierr); for (i=0; i<m; i++) rows[i] = start + i; ierr = MatCreate(comm,mat); CHKERRQ(ierr); ierr = MatSetSizes(*mat,m,n,M,N); CHKERRQ(ierr); if (size == 1) { ierr = MatSetType(*mat,MATSEQDENSE); CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(*mat,NULL); CHKERRQ(ierr); } else { ierr = MatSetType(*mat,MATMPIAIJ); CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL); CHKERRQ(ierr); } for (i=0; i<N; i++) { ierr = VecSet(in,zero); CHKERRQ(ierr); ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES); CHKERRQ(ierr); ierr = VecAssemblyBegin(in); CHKERRQ(ierr); ierr = VecAssemblyEnd(in); CHKERRQ(ierr); ierr = MatMult(inmat,in,out); CHKERRQ(ierr); ierr = VecGetArray(out,&array); CHKERRQ(ierr); ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES); CHKERRQ(ierr); ierr = VecRestoreArray(out,&array); CHKERRQ(ierr); } ierr = PetscFree(rows); CHKERRQ(ierr); ierr = VecDestroy(&out); CHKERRQ(ierr); ierr = VecDestroy(&in); CHKERRQ(ierr); ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { Mat A,A11,A12,A21,A22; Vec X,X1,X2,Y,Z,Z1,Z2; PetscScalar *a,*b,*x,*y,*z,v,one=1; PetscReal nrm; PetscErrorCode ierr; PetscInt size=8,size1=6,size2=2, i,j; PetscInitialize(&argc,&argv,0,0); /* * Create matrix and three vectors: these are all normal */ ierr = PetscMalloc(size*size*sizeof(PetscScalar),&a);CHKERRQ(ierr); ierr = PetscMalloc(size*size*sizeof(PetscScalar),&b);CHKERRQ(ierr); for (i=0; i<size; i++) { for (j=0; j<size; j++) { a[i+j*size] = rand(); b[i+j*size] = a[i+j*size]; } } ierr = MatCreate(MPI_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,size,size,size,size);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(A,a);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscScalar),&x);CHKERRQ(ierr); for (i=0; i<size; i++) { x[i] = one; } ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size,x,&X);CHKERRQ(ierr); ierr = VecAssemblyBegin(X);CHKERRQ(ierr); ierr = VecAssemblyEnd(X);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscScalar),&y);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size,y,&Y);CHKERRQ(ierr); ierr = VecAssemblyBegin(Y);CHKERRQ(ierr); ierr = VecAssemblyEnd(Y);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscScalar),&z);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size,z,&Z);CHKERRQ(ierr); ierr = VecAssemblyBegin(Z);CHKERRQ(ierr); ierr = VecAssemblyEnd(Z);CHKERRQ(ierr); /* * Now create submatrices and subvectors */ ierr = MatCreate(MPI_COMM_SELF,&A11);CHKERRQ(ierr); ierr = MatSetSizes(A11,size1,size1,size1,size1);CHKERRQ(ierr); ierr = MatSetType(A11,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(A11,b);CHKERRQ(ierr); ierr = MatSeqDenseSetLDA(A11,size);CHKERRQ(ierr); ierr = MatAssemblyBegin(A11,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A11,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatCreate(MPI_COMM_SELF,&A12);CHKERRQ(ierr); ierr = MatSetSizes(A12,size1,size2,size1,size2);CHKERRQ(ierr); ierr = MatSetType(A12,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(A12,b+size1*size);CHKERRQ(ierr); ierr = MatSeqDenseSetLDA(A12,size);CHKERRQ(ierr); ierr = MatAssemblyBegin(A12,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A12,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatCreate(MPI_COMM_SELF,&A21);CHKERRQ(ierr); ierr = MatSetSizes(A21,size2,size1,size2,size1);CHKERRQ(ierr); ierr = MatSetType(A21,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(A21,b+size1);CHKERRQ(ierr); ierr = MatSeqDenseSetLDA(A21,size);CHKERRQ(ierr); ierr = MatAssemblyBegin(A21,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A21,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatCreate(MPI_COMM_SELF,&A22);CHKERRQ(ierr); ierr = MatSetSizes(A22,size2,size2,size2,size2);CHKERRQ(ierr); ierr = MatSetType(A22,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(A22,b+size1*size+size1);CHKERRQ(ierr); ierr = MatSeqDenseSetLDA(A22,size);CHKERRQ(ierr); ierr = MatAssemblyBegin(A22,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A22,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size1,x,&X1);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size2,x+size1,&X2);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size1,z,&Z1);CHKERRQ(ierr); ierr = VecCreateSeqWithArray(MPI_COMM_SELF,size2,z+size1,&Z2);CHKERRQ(ierr); /* * Now multiple matrix times input in two ways; * compare the results */ ierr = MatMult(A,X,Y);CHKERRQ(ierr); ierr = MatMult(A11,X1,Z1);CHKERRQ(ierr); ierr = MatMultAdd(A12,X2,Z1,Z1);CHKERRQ(ierr); ierr = MatMult(A22,X2,Z2);CHKERRQ(ierr); ierr = MatMultAdd(A21,X1,Z2,Z2);CHKERRQ(ierr); ierr = VecAXPY(Z,-1.0,Y);CHKERRQ(ierr); ierr = VecNorm(Z,NORM_2,&nrm); printf("Test1; error norm=%e\n",nrm); /* printf("MatMult the usual way:\n"); VecView(Y,0); printf("MatMult by subblock:\n"); VecView(Z,0); */ /* * Next test: change both matrices */ v = rand(); i=1; j=size-2; ierr = MatSetValues(A,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); j -= size1; ierr = MatSetValues(A12,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); v = rand(); i=j=size1+1; ierr = MatSetValues(A,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); i=j=1; ierr = MatSetValues(A22,1,&i,1,&j,&v,INSERT_VALUES);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(A12,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A12,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(A22,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A22,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMult(A,X,Y);CHKERRQ(ierr); ierr = MatMult(A11,X1,Z1);CHKERRQ(ierr); ierr = MatMultAdd(A12,X2,Z1,Z1);CHKERRQ(ierr); ierr = MatMult(A22,X2,Z2);CHKERRQ(ierr); ierr = MatMultAdd(A21,X1,Z2,Z2);CHKERRQ(ierr); ierr = VecAXPY(Z,-1.0,Y);CHKERRQ(ierr); ierr = VecNorm(Z,NORM_2,&nrm); printf("Test2; error norm=%e\n",nrm); /* * Transpose product */ ierr = MatMultTranspose(A,X,Y);CHKERRQ(ierr); ierr = MatMultTranspose(A11,X1,Z1);CHKERRQ(ierr); ierr = MatMultTransposeAdd(A21,X2,Z1,Z1);CHKERRQ(ierr); ierr = MatMultTranspose(A22,X2,Z2);CHKERRQ(ierr); ierr = MatMultTransposeAdd(A12,X1,Z2,Z2);CHKERRQ(ierr); ierr = VecAXPY(Z,-1.0,Y);CHKERRQ(ierr); ierr = VecNorm(Z,NORM_2,&nrm); printf("Test3; error norm=%e\n",nrm); ierr = PetscFree(a);CHKERRQ(ierr); ierr = PetscFree(b);CHKERRQ(ierr); ierr = PetscFree(x);CHKERRQ(ierr); ierr = PetscFree(y);CHKERRQ(ierr); ierr = PetscFree(z);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); ierr = MatDestroy(A11);CHKERRQ(ierr); ierr = MatDestroy(A12);CHKERRQ(ierr); ierr = MatDestroy(A21);CHKERRQ(ierr); ierr = MatDestroy(A22);CHKERRQ(ierr); ierr = VecDestroy(X);CHKERRQ(ierr); ierr = VecDestroy(Y);CHKERRQ(ierr); ierr = VecDestroy(Z);CHKERRQ(ierr); ierr = VecDestroy(X1);CHKERRQ(ierr); ierr = VecDestroy(X2);CHKERRQ(ierr); ierr = VecDestroy(Z1);CHKERRQ(ierr); ierr = VecDestroy(Z2);CHKERRQ(ierr); /*ierr = PetscLogPrintSummary(MPI_COMM_SELF,"ex2.log");CHKERRQ(ierr);*/ ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat R,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat P; PetscInt *rti,*rtj; Mat_RARt *rart; PetscContainer container; MatTransposeColoring matcoloring; ISColoring iscoloring; Mat Rt_dense,RARt_dense; PetscLogDouble GColor=0.0,MCCreate=0.0,MDenCreate=0.0,t0,tf,etime=0.0; Mat_SeqAIJ *c; PetscFunctionBegin; ierr = PetscGetTime(&t0);CHKERRQ(ierr); /* create symbolic P=Rt */ ierr = MatGetSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,R->cmap->n,R->rmap->n,rti,rtj,PETSC_NULL,&P);CHKERRQ(ierr); /* get symbolic C=Pt*A*P */ ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); (*C)->rmap->bs = R->rmap->bs; (*C)->cmap->bs = R->rmap->bs; /* create a supporting struct */ ierr = PetscNew(Mat_RARt,&rart);CHKERRQ(ierr); /* attach the supporting struct to C */ ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); ierr = PetscContainerSetPointer(container,rart);CHKERRQ(ierr); ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_RARt);CHKERRQ(ierr); ierr = PetscObjectCompose((PetscObject)(*C),"Mat_RARt",(PetscObject)container);CHKERRQ(ierr); ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); etime += tf - t0; /* Create MatTransposeColoring from symbolic C=R*A*R^T */ c=(Mat_SeqAIJ*)(*C)->data; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); GColor += tf - t0; ierr = PetscGetTime(&t0);CHKERRQ(ierr); ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); rart->matcoloring = matcoloring; ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); MCCreate += tf - t0; ierr = PetscGetTime(&t0);CHKERRQ(ierr); /* Create Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&Rt_dense);CHKERRQ(ierr); ierr = MatSetSizes(Rt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(Rt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(Rt_dense,PETSC_NULL);CHKERRQ(ierr); Rt_dense->assembled = PETSC_TRUE; rart->Rt = Rt_dense; /* Create RARt_dense = R*A*Rt_dense */ ierr = MatCreate(PETSC_COMM_SELF,&RARt_dense);CHKERRQ(ierr); ierr = MatSetSizes(RARt_dense,(*C)->rmap->n,matcoloring->ncolors,(*C)->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); ierr = MatSetType(RARt_dense,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(RARt_dense,PETSC_NULL);CHKERRQ(ierr); rart->RARt = RARt_dense; /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */ ierr = PetscMalloc(A->rmap->n*4*sizeof(PetscScalar),&rart->work);CHKERRQ(ierr); ierr = PetscGetTime(&tf);CHKERRQ(ierr); MDenCreate += tf - t0; rart->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_RARt; /* clean up */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(R,&rti,&rtj);CHKERRQ(ierr); ierr = MatDestroy(&P);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) { PetscReal density= (PetscReal)(c->nz)/(RARt_dense->rmap->n*RARt_dense->cmap->n); ierr = PetscInfo6(*C,"RARt_den %D %D; Rt_den %D %D, (RARt->nz %D)/(m*ncolors)=%g\n",RARt_dense->rmap->n,RARt_dense->cmap->n,Rt_dense->rmap->n,Rt_dense->cmap->n,c->nz,density);CHKERRQ(ierr); ierr = PetscInfo5(*C,"Sym = GetColor %g + MColorCreate %g + MDenCreate %g + other %g = %g\n",GColor,MCCreate,MDenCreate,etime,GColor+MCCreate+MDenCreate+etime);CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }