PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; PetscBool row_identity,col_identity,both_identity; IS isicol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,*ai=a->i,*aj=a->j; PetscInt *bi,*bj,*ajtmp; PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; PetscReal f; PetscInt nlnk,*lnk,k,**bi_ptr; PetscFreeSpaceList free_space=NULL,current_space=NULL; PetscBT lnkbt; PetscBool missing; PetscFunctionBegin; if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); bi[0] = bdiag[0] = 0; /* linked list for storing column indices of the active row */ nlnk = n + 1; ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); /* initial FreeSpace size is f*(ai[n]+1) */ f = info->fill; ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<n; i++) { /* copy previous fill into linked list */ nzi = 0; nnz = ai[r[i]+1] - ai[r[i]]; ajtmp = aj + ai[r[i]]; ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); nzi += nlnk; /* add pivot rows into linked list */ row = lnk[n]; while (row < i) { nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); nzi += nlnk; row = lnk[row]; } bi[i+1] = bi[i] + nzi; im[i] = nzi; /* mark bdiag */ nzbd = 0; nnz = nzi; k = lnk[n]; while (nnz-- && k < i) { nzbd++; k = lnk[k]; } bdiag[i] = bi[i] + nzbd; /* if free space is not available, make more free space */ if (current_space->local_remaining<nzi) { nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); reallocs++; } /* copy data into free space, then initialize lnk */ ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); bi_ptr[i] = current_space->array; current_space->array += nzi; current_space->local_used += nzi; current_space->local_remaining -= nzi; } #if defined(PETSC_USE_INFO) if (ai[n] != 0) { PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); } else { ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); } #endif ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); /* put together the new matrix */ ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); b = (Mat_SeqBAIJ*)(B)->data; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; b->singlemalloc = PETSC_FALSE; ierr = PetscMalloc1((bi[n]+1)*bs2,&b->a);CHKERRQ(ierr); b->j = bj; b->i = bi; b->diag = bdiag; b->free_diag = PETSC_TRUE; b->ilen = 0; b->imax = 0; b->row = isrow; b->col = iscol; b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->icol = isicol; ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); b->maxnz = b->nz = bi[n]; (B)->factortype = MAT_FACTOR_LU; (B)->info.factor_mallocs = reallocs; (B)->info.fill_ratio_given = f; if (ai[n] != 0) { (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); } else { (B)->info.fill_ratio_needed = 0.0; } ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); both_identity = (PetscBool) (row_identity && col_identity); ierr = MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; PetscFreeSpaceList free_space=NULL,current_space=NULL; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; MatScalar *ca; PetscBT lnkbt; PetscReal afill; PetscFunctionBegin; /* Get ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ptJ = ptj; /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc1(pn+1,&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscCalloc1(2*an+1,&ptadenserow);CHKERRQ(ierr); ptasparserow = ptadenserow + an; /* create and initialize a linked list */ nlnk = pn+1; ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); /* Set initial free space to be fill*(nnz(A)+ nnz(P)) */ ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],pi[pm])),&free_space);CHKERRQ(ierr); current_space = free_space; /* Determine symbolic info for each row of C: */ for (i=0; i<pn; i++) { ptnzi = pti[i+1] - pti[i]; ptanzi = 0; /* Determine symbolic row of PtA: */ for (j=0; j<ptnzi; j++) { arow = *ptJ++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0; k<anzj; k++) { if (!ptadenserow[ajj[k]]) { ptadenserow[ajj[k]] = -1; ptasparserow[ptanzi++] = ajj[k]; } } } /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ ptaj = ptasparserow; cnzi = 0; for (j=0; j<ptanzi; j++) { prow = *ptaj++; pnzj = pi[prow+1] - pi[prow]; pjj = pj + pi[prow]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLAddSorted(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); cnzi += nlnk; } /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; /* Aside: Perhaps we should save the pta info for the numerical factorization. */ /* For now, we will recompute what is needed. */ ci[i+1] = ci[i] + cnzi; } /* nnz is now stored in ci[ptm], column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1(ci[pn]+1,&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree(ptadenserow);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscCalloc1(ci[pn]+1,&ca);CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),pn,pn,ci,cj,ca,C);CHKERRQ(ierr); ierr = MatSetBlockSizes(*C,PetscAbs(P->cmap->bs),PetscAbs(P->cmap->bs));CHKERRQ(ierr); /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ*)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy; /* set MatInfo */ afill = (PetscReal)ci[pn]/(ai[am]+pi[pm] + 1.e-5); if (afill < 1.0) afill = 1.0; c->maxnz = ci[pn]; c->nz = ci[pn]; (*C)->info.mallocs = nspacedouble; (*C)->info.fill_ratio_given = fill; (*C)->info.fill_ratio_needed = afill; /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) if (ci[pn] != 0) { ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill);CHKERRQ(ierr); ierr = PetscInfo1((*C),"Use MatPtAP(A,P,MatReuse,%g,&C) for best performance.\n",(double)afill);CHKERRQ(ierr); } else { ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy2(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N; PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; MatScalar *ca; PetscBT lnkbt; PetscFunctionBegin; /* Get ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ptJ=ptj; /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); ptasparserow = ptadenserow + an; /* create and initialize a linked list */ nlnk = pn+1; ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); /* Set initial free space to be fill*nnz(A). */ /* This should be reasonable if sparsity of PtAP is similar to that of A. */ ierr = PetscFreeSpaceGet((PetscInt)(fill*ai[am]),&free_space); current_space = free_space; /* Determine symbolic info for each row of C: */ for (i=0;i<pn;i++) { ptnzi = pti[i+1] - pti[i]; ptanzi = 0; /* Determine symbolic row of PtA: */ for (j=0;j<ptnzi;j++) { arow = *ptJ++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0;k<anzj;k++) { if (!ptadenserow[ajj[k]]) { ptadenserow[ajj[k]] = -1; ptasparserow[ptanzi++] = ajj[k]; } } } /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ ptaj = ptasparserow; cnzi = 0; for (j=0;j<ptanzi;j++) { prow = *ptaj++; pnzj = pi[prow+1] - pi[prow]; pjj = pj + pi[prow]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLAddSorted(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); cnzi += nlnk; } /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0;j<ptanzi;j++) { ptadenserow[ptasparserow[j]] = 0; } /* Aside: Perhaps we should save the pta info for the numerical factorization. */ /* For now, we will recompute what is needed. */ ci[i+1] = ci[i] + cnzi; } /* nnz is now stored in ci[ptm], column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree(ptadenserow);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); /* Allocate space for ca */ ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); (*C)->rmap->bs = P->cmap->bs; (*C)->cmap->bs = P->cmap->bs; PetscPrintf(PETSC_COMM_SELF,"************%s C.bs=%d,%d\n",__FUNCT__,(*C)->rmap->bs,(*C)->cmap->bs); /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ *)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; A->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy2; /* should use *C->ops until PtAP insterface is updated to double dispatch as MatMatMult() */ /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) if (ci[pn] != 0) { PetscReal afill = ((PetscReal)ci[pn])/ai[am]; if (afill < 1.0) afill = 1.0; ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); ierr = PetscInfo1((*C),"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);CHKERRQ(ierr); } else { ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }