PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_DenseAxpy(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat_SeqAIJ *ap,*c; PetscInt *api,*apj,*ci,pn=P->cmap->N; MatScalar *ca; Mat_PtAP *ptap; Mat Pt,AP; PetscFunctionBegin; /* Get symbolic Pt = P^T */ ierr = MatTransposeSymbolic_SeqAIJ(P,&Pt);CHKERRQ(ierr); /* Get symbolic AP = A*P */ ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,P,fill,&AP);CHKERRQ(ierr); ap = (Mat_SeqAIJ*)AP->data; api = ap->i; apj = ap->j; ap->free_ij = PETSC_FALSE; /* api and apj are kept in struct ptap, cannot be destroyed with AP */ /* Get C = Pt*AP */ ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(Pt,AP,fill,C);CHKERRQ(ierr); c = (Mat_SeqAIJ*)(*C)->data; ci = c->i; ierr = PetscCalloc1(ci[pn]+1,&ca);CHKERRQ(ierr); c->a = ca; c->free_a = PETSC_TRUE; /* Create a supporting struct for reuse by MatPtAPNumeric() */ ierr = PetscNew(&ptap);CHKERRQ(ierr); c->ptap = ptap; ptap->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_PtAP; /* Allocate temporary array for storage of one row of A*P */ ierr = PetscCalloc1(pn+1,&ptap->apa);CHKERRQ(ierr); (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; ptap->api = api; ptap->apj = apj; /* Clean up. */ ierr = MatDestroy(&Pt);CHKERRQ(ierr); ierr = MatDestroy(&AP);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) ierr = PetscInfo1((*C),"given fill %g\n",(double)fill);CHKERRQ(ierr); #endif PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat_SeqAIJ *ap,*c; PetscInt *api,*apj,*ci,pn=P->cmap->N; MatScalar *ca; Mat_PtAP *ptap; Mat Pt,AP; PetscBool sparse_axpy=PETSC_TRUE; PetscFunctionBegin; ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); /* flag 'sparse_axpy' determines which implementations to be used: 0: do dense axpy in MatPtAPNumeric() - fastest, but requires storage of struct A*P; 1: do two sparse axpy in MatPtAPNumeric() - slowest, does not store structure of A*P. */ ierr = PetscOptionsBool("-matptap_scalable","Use sparse axpy but slower MatPtAPNumeric()","",sparse_axpy,&sparse_axpy,NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); if (sparse_axpy) { ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A,P,fill,C);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Get symbolic Pt = P^T */ ierr = MatTransposeSymbolic_SeqAIJ(P,&Pt);CHKERRQ(ierr); /* Get symbolic AP = A*P */ ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,P,fill,&AP);CHKERRQ(ierr); ap = (Mat_SeqAIJ*)AP->data; api = ap->i; apj = ap->j; ap->free_ij = PETSC_FALSE; /* api and apj are kept in struct ptap, cannot be destroyed with AP */ /* Get C = Pt*AP */ ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(Pt,AP,fill,C);CHKERRQ(ierr); c = (Mat_SeqAIJ*)(*C)->data; ci = c->i; ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); c->a = ca; c->free_a = PETSC_TRUE; /* Create a supporting struct for reuse by MatPtAPNumeric() */ ierr = PetscNew(Mat_PtAP,&ptap);CHKERRQ(ierr); c->ptap = ptap; ptap->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_PtAP; /* Allocate temporary array for storage of one row of A*P */ ierr = PetscMalloc((pn+1)*sizeof(PetscScalar),&ptap->apa);CHKERRQ(ierr); ierr = PetscMemzero(ptap->apa,(pn+1)*sizeof(PetscScalar));CHKERRQ(ierr); (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; ptap->api = api; ptap->apj = apj; /* Clean up. */ ierr = MatDestroy(&Pt);CHKERRQ(ierr); ierr = MatDestroy(&AP);CHKERRQ(ierr); #if defined(PETSC_USE_INFO) ierr = PetscInfo2((*C),"given fill %G, use scalable %d\n",fill,sparse_axpy);CHKERRQ(ierr); #endif PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat_SeqAIJ *ap,*c; PetscInt *api,*apj,*ci,pn=P->cmap->N,sparse_axpy=0; MatScalar *ca; Mat_PtAP *ptap; Mat Pt,AP; PetscFunctionBegin; /* flag 'sparse_axpy' determines which implementations to be used: 0: do dense axpy in MatPtAPNumeric() - fastest, but requires storage of struct A*P; (default) 1: do one sparse axpy - uses same memory as sparse_axpy=0 and might execute less flops (apnz vs. cnz in the outerproduct), slower than case '0' when cnz is not too large than apnz; 2: do two sparse axpy in MatPtAPNumeric() - slowest, does not store structure of A*P. */ ierr = PetscOptionsGetInt(PETSC_NULL,"-matptap_sparseaxpy",&sparse_axpy,PETSC_NULL);CHKERRQ(ierr); if (sparse_axpy == 2){ ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy2(A,P,fill,C);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Get symbolic Pt = P^T */ ierr = MatTransposeSymbolic_SeqAIJ(P,&Pt);CHKERRQ(ierr); /* Get symbolic AP = A*P */ ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,P,fill,&AP);CHKERRQ(ierr); ap = (Mat_SeqAIJ*)AP->data; api = ap->i; apj = ap->j; ap->free_ij = PETSC_FALSE; /* api and apj are kept in struct ptap, cannot be destroyed with AP */ /* Get C = Pt*AP */ ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(Pt,AP,fill,C);CHKERRQ(ierr); c = (Mat_SeqAIJ*)(*C)->data; ci = c->i; ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); c->a = ca; c->free_a = PETSC_TRUE; /* Create a supporting struct for reuse by MatPtAPNumeric() */ ierr = PetscNew(Mat_PtAP,&ptap);CHKERRQ(ierr); c->ptap = ptap; ptap->destroy = (*C)->ops->destroy; (*C)->ops->destroy = MatDestroy_SeqAIJ_PtAP; /* Allocate temporary array for storage of one row of A*P */ ierr = PetscMalloc((pn+1)*sizeof(PetscScalar),&ptap->apa);CHKERRQ(ierr); ierr = PetscMemzero(ptap->apa,(pn+1)*sizeof(PetscScalar));CHKERRQ(ierr); if (sparse_axpy == 1){ A->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy; } else { A->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; } ptap->api = api; ptap->apj = apj; /* Clean up. */ ierr = MatDestroy(&Pt);CHKERRQ(ierr); ierr = MatDestroy(&AP);CHKERRQ(ierr); PetscFunctionReturn(0); }