static PetscErrorCode PCSetUp_PBJacobi(PC pc) { PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; PetscErrorCode ierr; Mat A = pc->pmat; PetscFunctionBegin; if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supported only for square matrices and square storage"); ierr = MatInvertBlockDiagonal(A,&jac->diag);CHKERRQ(ierr); jac->bs = A->rmap->bs; jac->mbs = A->rmap->n/A->rmap->bs; switch (jac->bs){ case 1: pc->ops->apply = PCApply_PBJacobi_1; break; case 2: pc->ops->apply = PCApply_PBJacobi_2; break; case 3: pc->ops->apply = PCApply_PBJacobi_3; break; case 4: pc->ops->apply = PCApply_PBJacobi_4; break; case 5: pc->ops->apply = PCApply_PBJacobi_5; break; case 6: pc->ops->apply = PCApply_PBJacobi_6; break; case 7: pc->ops->apply = PCApply_PBJacobi_7; break; default: SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"not supported for block size %D",jac->bs); } PetscFunctionReturn(0); }
PetscErrorCode MatSOR_SeqBSTRM_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; const MatScalar *idiag,*mdiag; const PetscScalar *b; PetscErrorCode ierr; PetscInt m = a->mbs,i,i2,nz,idx; const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*)A->spptr; MatScalar *v1, *v2, *v3, *v4, *v5, *v10, *v20, *v30, *v40, *v50; PetscInt slen; PetscFunctionBegin; if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); its = its*lits; if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} diag = a->diag; idiag = a->idiag; ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); slen = 5*(ai[m]-ai[0]); v10 = bstrm->as; v20 = v10 + slen; v30 = v20 + slen; v40 = v30 + slen; v50 = v40 + slen; if (flag & SOR_ZERO_INITIAL_GUESS) { if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; i2 = 5; idiag += 25; for (i=1; i<m; i++) { v1 = v10 + 5*ai[i]; v2 = v20 + 5*ai[i]; v3 = v30 + 5*ai[i]; v4 = v40 + 5*ai[i]; v5 = v50 + 5*ai[i]; vi = aj + ai[i]; nz = diag[i] - ai[i]; s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; while (nz--) { idx = 5*(*vi++); x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; s1 -= v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4 + v1[4]*x5; s2 -= v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4 + v2[4]*x5; s3 -= v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4 + v3[4]*x5; s4 -= v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4 + v4[4]*x5; s5 -= v5[0]*x1 + v5[1]*x2 + v5[2]*x3 + v5[3]*x4 + v5[4]*x5; v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5; } x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; idiag += 25; i2 += 5; } /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); } if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { i2 = 0; mdiag = a->idiag+25*a->mbs; for (i=0; i<m; i++) { x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; mdiag += 25; i2 += 5; } ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); } if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { idiag = a->idiag+25*a->mbs - 25; i2 = 5*m - 5; x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; idiag -= 25; i2 -= 5; for (i=m-2; i>=0; i--) { v1 = v10 + 5*(ai[i+1]-1); v2 = v20 + 5*(ai[i+1]-1); v3 = v30 + 5*(ai[i+1]-1); v4 = v40 + 5*(ai[i+1]-1); v5 = v50 + 5*(ai[i+1]-1); vi = aj + ai[i+1] - 1; nz = ai[i+1] - diag[i] - 1; s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; while (nz--) { idx = 5*(*vi--); x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; s1 -= v1[4]*x5 + v1[3]*x4 + v1[2]*x3 + v1[1]*x2 + v1[0]*x1; s2 -= v2[4]*x5 + v2[3]*x4 + v2[2]*x3 + v2[1]*x2 + v2[0]*x1; s3 -= v3[4]*x5 + v3[3]*x4 + v3[2]*x3 + v3[1]*x2 + v3[0]*x1; s4 -= v4[4]*x5 + v4[3]*x4 + v4[2]*x3 + v4[1]*x2 + v4[0]*x1; s5 -= v5[4]*x5 + v5[3]*x4 + v5[2]*x3 + v5[1]*x2 + v5[0]*x1; v1 -= 5; v2 -= 5; v3 -= 5; v4 -= 5; v5 -= 5; } x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; idiag -= 25; i2 -= 5; } ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); } } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); PetscFunctionReturn(0); }