/*@ ISEqual - Compares if two index sets have the same set of indices. Collective on IS Input Parameters: . is1, is2 - The index sets being compared Output Parameters: . flg - output flag, either PETSC_TRUE (if both index sets have the same indices), or PETSC_FALSE if the index sets differ by size or by the set of indices) Level: intermediate Note: This routine sorts the contents of the index sets before the comparision is made, so the order of the indices on a processor is immaterial. Each processor has to have the same indices in the two sets, for example, $ Processor $ 0 1 $ is1 = {0, 1} {2, 3} $ is2 = {2, 3} {0, 1} will return false. Concepts: index sets^equal Concepts: IS^equal @*/ PetscErrorCode PETSCVEC_DLLEXPORT ISEqual(IS is1,IS is2,PetscTruth *flg) { PetscInt sz1,sz2,*a1,*a2; const PetscInt *ptr1,*ptr2; PetscTruth flag; MPI_Comm comm; PetscErrorCode ierr; PetscMPIInt mflg; PetscFunctionBegin; PetscValidHeaderSpecific(is1,IS_COOKIE,1); PetscValidHeaderSpecific(is2,IS_COOKIE,2); PetscValidIntPointer(flg,3); if (is1 == is2) { *flg = PETSC_TRUE; PetscFunctionReturn(0); } ierr = MPI_Comm_compare(((PetscObject)is1)->comm,((PetscObject)is2)->comm,&mflg);CHKERRQ(ierr); if (mflg != MPI_CONGRUENT && mflg != MPI_IDENT) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } ierr = ISGetSize(is1,&sz1);CHKERRQ(ierr); ierr = ISGetSize(is2,&sz2);CHKERRQ(ierr); if (sz1 != sz2) { *flg = PETSC_FALSE; } else { ierr = ISGetLocalSize(is1,&sz1);CHKERRQ(ierr); ierr = ISGetLocalSize(is2,&sz2);CHKERRQ(ierr); if (sz1 != sz2) { flag = PETSC_FALSE; } else { ierr = ISGetIndices(is1,&ptr1);CHKERRQ(ierr); ierr = ISGetIndices(is2,&ptr2);CHKERRQ(ierr); ierr = PetscMalloc(sz1*sizeof(PetscInt),&a1);CHKERRQ(ierr); ierr = PetscMalloc(sz2*sizeof(PetscInt),&a2);CHKERRQ(ierr); ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); ierr = PetscMemcmp(a1,a2,sz1*sizeof(PetscInt),&flag);CHKERRQ(ierr); ierr = ISRestoreIndices(is1,&ptr1);CHKERRQ(ierr); ierr = ISRestoreIndices(is2,&ptr2);CHKERRQ(ierr); ierr = PetscFree(a1);CHKERRQ(ierr); ierr = PetscFree(a2);CHKERRQ(ierr); } ierr = PetscObjectGetComm((PetscObject)is1,&comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&flag,flg,1,MPI_INT,MPI_MIN,comm);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscInt i,n = 1000,*values; PetscRandom rnd; PetscScalar value,avg = 0.0; PetscErrorCode ierr; PetscMPIInt rank; PetscInt view_rank=-1; #if defined(PETSC_USE_LOG) PetscLogEvent event; #endif ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-view_randomvalues",&view_rank,NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rnd);CHKERRQ(ierr); /* force imaginary part of random number to always be zero; thus obtain reproducible results with real and complex numbers */ ierr = PetscRandomSetInterval(rnd,0.0,1.0);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rnd);CHKERRQ(ierr); ierr = PetscMalloc1(n,&values);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscRandomGetValue(rnd,&value);CHKERRQ(ierr); avg += value; if (view_rank == (PetscInt)rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] value[%D] = %6.4e\n",rank,i,(double)PetscRealPart(value));CHKERRQ(ierr); } values[i] = (PetscInt)(n*PetscRealPart(value) + 2.0); } avg = avg/((PetscReal)n); if (view_rank == (PetscInt)rank) { ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] Average value %6.4e\n",rank,(double)PetscRealPart(avg));CHKERRQ(ierr); } ierr = PetscSortInt(n,values);CHKERRQ(ierr); ierr = PetscLogEventRegister("Sort",0,&event);CHKERRQ(ierr); ierr = PetscLogEventBegin(event,0,0,0,0);CHKERRQ(ierr); ierr = PetscRandomSeed(rnd);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscRandomGetValue(rnd,&value);CHKERRQ(ierr); values[i] = (PetscInt)(n*PetscRealPart(value) + 2.0); /* printf("value[%d] = %g\n",i,value); */ } ierr = PetscSortInt(n,values);CHKERRQ(ierr); ierr = PetscLogEventEnd(event,0,0,0,0);CHKERRQ(ierr); for (i=1; i<n; i++) { if (values[i] < values[i-1]) SETERRQ(PETSC_COMM_SELF,1,"Values not sorted"); } ierr = PetscFree(values);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rnd);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode DMLabelPermute(DMLabel label, IS permutation, DMLabel *labelNew) { const PetscInt *perm; PetscInt numValues, numPoints, v, q; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMLabelMakeAllValid_Private(label);CHKERRQ(ierr); ierr = DMLabelDuplicate(label, labelNew);CHKERRQ(ierr); ierr = DMLabelGetNumValues(*labelNew, &numValues);CHKERRQ(ierr); ierr = ISGetLocalSize(permutation, &numPoints);CHKERRQ(ierr); ierr = ISGetIndices(permutation, &perm);CHKERRQ(ierr); for (v = 0; v < numValues; ++v) { const PetscInt size = (*labelNew)->stratumSizes[v]; for (q = 0; q < size; ++q) { const PetscInt point = (*labelNew)->points[v][q]; if ((point < 0) || (point >= numPoints)) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Label point %D is not in [0, %D) for the remapping", point, numPoints); (*labelNew)->points[v][q] = perm[point]; } ierr = PetscSortInt(size, &(*labelNew)->points[v][0]);CHKERRQ(ierr); } ierr = ISRestoreIndices(permutation, &perm);CHKERRQ(ierr); if (label->bt) { ierr = PetscBTDestroy(&label->bt);CHKERRQ(ierr); ierr = DMLabelCreateIndex(label, label->pStart, label->pEnd);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@ ISSetPermutation - Informs the index set that it is a permutation. Logically Collective on IS Input Parmeters: . is - the index set Level: intermediate Concepts: permutation Concepts: index sets^permutation The debug version of the libraries (./configure --with-debugging=1) checks if the index set is actually a permutation. The optimized version just believes you. .seealso: ISPermutation() @*/ PetscErrorCode ISSetPermutation(IS is) { PetscFunctionBegin; PetscValidHeaderSpecific(is,IS_CLASSID,1); #if defined(PETSC_USE_DEBUG) { PetscMPIInt size; PetscErrorCode ierr; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)is),&size);CHKERRQ(ierr); if (size == 1) { PetscInt i,n,*idx; const PetscInt *iidx; ierr = ISGetSize(is,&n);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&idx);CHKERRQ(ierr); ierr = ISGetIndices(is,&iidx);CHKERRQ(ierr); ierr = PetscMemcpy(idx,iidx,n*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortInt(n,idx);CHKERRQ(ierr); for (i=0; i<n; i++) { if (idx[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Index set is not a permutation"); } ierr = PetscFree(idx);CHKERRQ(ierr); ierr = ISRestoreIndices(is,&iidx);CHKERRQ(ierr); } } #endif is->isperm = PETSC_TRUE; PetscFunctionReturn(0); }
/* DMLabelMakeValid_Private - Transfer stratum data from the hash format to the sorted list format Input parameter: + label - The DMLabel - v - The stratum value Output parameter: . label - The DMLabel with stratum in sorted list format Level: developer .seealso: DMLabelCreate() */ static PetscErrorCode DMLabelMakeValid_Private(DMLabel label, PetscInt v) { PetscInt off; PetscErrorCode ierr; if (label->arrayValid[v]) return 0; if (v >= label->numStrata) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Trying to access invalid stratum %D in DMLabelMakeValid_Private\n", v); PetscFunctionBegin; PetscHashISize(label->ht[v], label->stratumSizes[v]); ierr = PetscMalloc1(label->stratumSizes[v], &label->points[v]);CHKERRQ(ierr); off = 0; ierr = PetscHashIGetKeys(label->ht[v], &off, &(label->points[v][0]));CHKERRQ(ierr); if (off != label->stratumSizes[v]) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of contributed points %D from value %D should be %D", off, label->stratumValues[v], label->stratumSizes[v]); PetscHashIClear(label->ht[v]); ierr = PetscSortInt(label->stratumSizes[v], label->points[v]);CHKERRQ(ierr); if (label->bt) { PetscInt p; for (p = 0; p < label->stratumSizes[v]; ++p) { const PetscInt point = label->points[v][p]; if ((point < label->pStart) || (point >= label->pEnd)) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Label point %D is not in [%D, %D)", point, label->pStart, label->pEnd); ierr = PetscBTSet(label->bt, point - label->pStart);CHKERRQ(ierr); } } label->arrayValid[v] = PETSC_TRUE; ++label->state; PetscFunctionReturn(0); }
static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) { PC_IS *pcis=(PC_IS*)pc->data; PC_BDDC *pcbddc=(PC_BDDC*)pc->data; PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx; PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; PetscErrorCode ierr; PetscFunctionBegin; /* reset data structures if the topology has changed */ if (pcbddc->recompute_topography) { ierr = PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);CHKERRQ(ierr); } /* Compute data structures to solve sequential problems */ ierr = PCBDDCScalingSetUp_Deluxe_Private(pc);CHKERRQ(ierr); /* diagonal scaling on interface dofs not contained in cc */ if (sub_schurs->is_vertices || sub_schurs->is_dir) { PetscInt n_com,n_dir; n_com = 0; if (sub_schurs->is_vertices) { ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_com);CHKERRQ(ierr); } n_dir = 0; if (sub_schurs->is_dir) { ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr); } if (!deluxe_ctx->n_simple) { deluxe_ctx->n_simple = n_dir + n_com; ierr = PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);CHKERRQ(ierr); if (sub_schurs->is_vertices) { PetscInt nmap; const PetscInt *idxs; ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com); ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); } if (sub_schurs->is_dir) { PetscInt nmap; const PetscInt *idxs; ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);CHKERRQ(ierr); if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir); ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr); } ierr = PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);CHKERRQ(ierr); } else { if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple); } } else { deluxe_ctx->n_simple = 0; deluxe_ctx->idx_simple_B = 0; } PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscInt i,n = 1000,*values; int event; PetscRandom rand; PetscReal value; PetscErrorCode ierr; PetscBool values_view=PETSC_FALSE; PetscMPIInt rank; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,0,"-values_view",&values_view,NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_SELF,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = PetscMalloc1(n,&values);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscRandomGetValueReal(rand,&value);CHKERRQ(ierr); values[i] = (PetscInt)(n*value + 2.0); } ierr = PetscSortInt(n,values);CHKERRQ(ierr); ierr = PetscLogEventRegister("Sort",0,&event);CHKERRQ(ierr); ierr = PetscLogEventBegin(event,0,0,0,0);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscRandomGetValueReal(rand,&value);CHKERRQ(ierr); values[i] = (PetscInt)(n*value + 2.0); } ierr = PetscSortInt(n,values);CHKERRQ(ierr); ierr = PetscLogEventEnd(event,0,0,0,0);CHKERRQ(ierr); for (i=1; i<n; i++) { if (values[i] < values[i-1]) SETERRQ(PETSC_COMM_SELF,1,"Values not sorted"); if (values_view && !rank) {ierr = PetscPrintf(PETSC_COMM_SELF,"%D %D\n",i,values[i]);CHKERRQ(ierr);} } ierr = PetscFree(values);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
/*@ PetscSortRemoveDupsInt - Sorts an array of integers in place in increasing order removes all duplicate entries Not Collective Input Parameters: + n - number of values - ii - array of integers Output Parameter: . n - number of non-redundant values Level: intermediate Concepts: sorting^ints .seealso: PetscSortReal(), PetscSortIntWithPermutation(), PetscSortInt(), PetscSortedRemoveDupsInt() @*/ PetscErrorCode PetscSortRemoveDupsInt(PetscInt *n,PetscInt ii[]) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscSortInt(*n,ii);CHKERRQ(ierr); ierr = PetscSortedRemoveDupsInt(n,ii);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode ISSort_Block(IS is) { IS_Block *sub = (IS_Block*)is->data; PetscErrorCode ierr; PetscFunctionBegin; if (sub->sorted) PetscFunctionReturn(0); ierr = PetscSortInt(sub->n,sub->idx);CHKERRQ(ierr); sub->sorted = PETSC_TRUE; PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscInt i,n = 1000,*values; int event; PetscRandom rand; PetscScalar value; PetscErrorCode ierr; PetscInitialize(&argc,&argv,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-n",&n,PETSC_NULL);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_SELF,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&values);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscRandomGetValue(rand,&value);CHKERRQ(ierr); values[i] = (PetscInt)(n*PetscRealPart(value) + 2.0); } ierr = PetscSortInt(n,values);CHKERRQ(ierr); ierr = PetscLogEventRegister("Sort",0,&event);CHKERRQ(ierr); ierr = PetscLogEventBegin(event,0,0,0,0);CHKERRQ(ierr); for (i=0; i<n; i++) { ierr = PetscRandomGetValue(rand,&value);CHKERRQ(ierr); values[i] = (PetscInt)(n*PetscRealPart(value) + 2.0); } ierr = PetscSortInt(n,values);CHKERRQ(ierr); ierr = PetscLogEventEnd(event,0,0,0,0);CHKERRQ(ierr); for (i=1; i<n; i++) { if (values[i] < values[i-1]) SETERRQ(1,"Values not sorted"); } ierr = PetscFree(values);CHKERRQ(ierr); ierr = PetscRandomDestroy(rand);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
PetscErrorCode ISSort_General(IS is) { IS_General *sub = (IS_General*)is->data; PetscInt n; PetscErrorCode ierr; PetscFunctionBegin; if (sub->sorted) PetscFunctionReturn(0); ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr); ierr = PetscSortInt(n,sub->idx);CHKERRQ(ierr); sub->sorted = PETSC_TRUE; PetscFunctionReturn(0); }
/*@ PetscSortRemoveDupsInt - Sorts an array of integers in place in increasing order removes all duplicate entries Not Collective Input Parameters: + n - number of values - ii - array of integers Output Parameter: . n - number of non-redundant values Level: intermediate Concepts: sorting^ints .seealso: PetscSortReal(), PetscSortIntWithPermutation(), PetscSortInt() @*/ PetscErrorCode PetscSortRemoveDupsInt(PetscInt *n,PetscInt ii[]) { PetscErrorCode ierr; PetscInt i,s = 0,N = *n, b = 0; PetscFunctionBegin; ierr = PetscSortInt(N,ii);CHKERRQ(ierr); for (i=0; i<N-1; i++) { if (ii[b+s+1] != ii[b]) {ii[b+1] = ii[b+s+1]; b++;} else s++; } *n = N - s; PetscFunctionReturn(0); }
PetscErrorCode ISSort_Block(IS is) { IS_Block *sub = (IS_Block*)is->data; PetscInt bs, n; PetscErrorCode ierr; PetscFunctionBegin; if (sub->sorted) PetscFunctionReturn(0); ierr = PetscLayoutGetBlockSize(is->map, &bs);CHKERRQ(ierr); ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr); ierr = PetscSortInt(n/bs,sub->idx);CHKERRQ(ierr); sub->sorted = PETSC_TRUE; PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; if (!aij->garray) { #if defined(PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; aij->B->cmap->bs = 1; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif } else { garray = aij->garray; } if (!aij->lvec) { /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); } else { ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); } /* create two temporary Index sets for build scatter gather */ ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ if (aij->Mvctx_mpi1_flg) { ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); } else { ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); } aij->garray = garray; ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* This interpolates faces for cells at some stratum */ static PetscErrorCode DMPlexInterpolateFaces_Internal(DM dm, PetscInt cellDepth, DM idm) { PetscHashIJKL faceTable; PetscInt *pStart, *pEnd; PetscInt cellDim, depth, faceDepth = cellDepth, numPoints = 0, faceSizeAll = 0, face, c, d; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMPlexGetDimension(dm, &cellDim);CHKERRQ(ierr); ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); ++depth; ++cellDepth; cellDim -= depth - cellDepth; ierr = PetscMalloc2(depth+1,PetscInt,&pStart,depth+1,PetscInt,&pEnd);CHKERRQ(ierr); for (d = depth-1; d >= faceDepth; --d) { ierr = DMPlexGetDepthStratum(dm, d, &pStart[d+1], &pEnd[d+1]);CHKERRQ(ierr); } ierr = DMPlexGetDepthStratum(dm, -1, NULL, &pStart[faceDepth]);CHKERRQ(ierr); pEnd[faceDepth] = pStart[faceDepth]; for (d = faceDepth-1; d >= 0; --d) { ierr = DMPlexGetDepthStratum(dm, d, &pStart[d], &pEnd[d]);CHKERRQ(ierr); } if (pEnd[cellDepth] > pStart[cellDepth]) {ierr = DMPlexGetFaces_Internal(dm, cellDim, pStart[cellDepth], NULL, &faceSizeAll, NULL);CHKERRQ(ierr);} if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll); ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr); ierr = PetscHashIJKLSetMultivalued(faceTable, PETSC_FALSE);CHKERRQ(ierr); for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) { const PetscInt *cellFaces; PetscInt numCellFaces, faceSize, cf, f; ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll); for (cf = 0; cf < numCellFaces; ++cf) { const PetscInt *cellFace = &cellFaces[cf*faceSize]; PetscHashIJKLKey key; if (faceSize == 2) { key.i = PetscMin(cellFace[0], cellFace[1]); key.j = PetscMax(cellFace[0], cellFace[1]); } else { key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0; ierr = PetscSortInt(faceSize, (PetscInt *) &key); } ierr = PetscHashIJKLGet(faceTable, key, &f);CHKERRQ(ierr); if (f < 0) { ierr = PetscHashIJKLAdd(faceTable, key, face);CHKERRQ(ierr); f = face++; } } } pEnd[faceDepth] = face; ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr); /* Count new points */ for (d = 0; d <= depth; ++d) { numPoints += pEnd[d]-pStart[d]; } ierr = DMPlexSetChart(idm, 0, numPoints);CHKERRQ(ierr); /* Set cone sizes */ for (d = 0; d <= depth; ++d) { PetscInt coneSize, p; if (d == faceDepth) { for (p = pStart[d]; p < pEnd[d]; ++p) { /* I see no way to do this if we admit faces of different shapes */ ierr = DMPlexSetConeSize(idm, p, faceSizeAll);CHKERRQ(ierr); } } else if (d == cellDepth) { for (p = pStart[d]; p < pEnd[d]; ++p) { /* Number of cell faces may be different from number of cell vertices*/ ierr = DMPlexGetFaces_Internal(dm, cellDim, p, &coneSize, NULL, NULL);CHKERRQ(ierr); ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr); } } else { for (p = pStart[d]; p < pEnd[d]; ++p) { ierr = DMPlexGetConeSize(dm, p, &coneSize);CHKERRQ(ierr); ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr); } } } ierr = DMSetUp(idm);CHKERRQ(ierr); /* Get face cones from subsets of cell vertices */ if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll); ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr); ierr = PetscHashIJKLSetMultivalued(faceTable, PETSC_FALSE);CHKERRQ(ierr); for (d = depth; d > cellDepth; --d) { const PetscInt *cone; PetscInt p; for (p = pStart[d]; p < pEnd[d]; ++p) { ierr = DMPlexGetCone(dm, p, &cone);CHKERRQ(ierr); ierr = DMPlexSetCone(idm, p, cone);CHKERRQ(ierr); ierr = DMPlexGetConeOrientation(dm, p, &cone);CHKERRQ(ierr); ierr = DMPlexSetConeOrientation(idm, p, cone);CHKERRQ(ierr); } } for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) { const PetscInt *cellFaces; PetscInt numCellFaces, faceSize, cf, f; ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr); if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll); for (cf = 0; cf < numCellFaces; ++cf) { const PetscInt *cellFace = &cellFaces[cf*faceSize]; PetscHashIJKLKey key; if (faceSize == 2) { key.i = PetscMin(cellFace[0], cellFace[1]); key.j = PetscMax(cellFace[0], cellFace[1]); } else { key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0; ierr = PetscSortInt(faceSize, (PetscInt *) &key); } ierr = PetscHashIJKLGet(faceTable, key, &f);CHKERRQ(ierr); if (f < 0) { ierr = DMPlexSetCone(idm, face, cellFace);CHKERRQ(ierr); ierr = PetscHashIJKLAdd(faceTable, key, face);CHKERRQ(ierr); f = face++; ierr = DMPlexInsertCone(idm, c, cf, f);CHKERRQ(ierr); } else { const PetscInt *cone; PetscInt coneSize, ornt, i, j; ierr = DMPlexInsertCone(idm, c, cf, f);CHKERRQ(ierr); /* Orient face */ ierr = DMPlexGetConeSize(idm, f, &coneSize);CHKERRQ(ierr); ierr = DMPlexGetCone(idm, f, &cone);CHKERRQ(ierr); if (coneSize != faceSize) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of face vertices %D for face %D should be %D", coneSize, f, faceSize); /* - First find the initial vertex */ for (i = 0; i < faceSize; ++i) if (cellFace[0] == cone[i]) break; /* - Try forward comparison */ for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+j)%faceSize]) break; if (j == faceSize) { if ((faceSize == 2) && (i == 1)) ornt = -2; else ornt = i; } else { /* - Try backward comparison */ for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+faceSize-j)%faceSize]) break; if (j == faceSize) ornt = -(i+1); else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not determine face orientation"); } ierr = DMPlexInsertConeOrientation(idm, c, cf, ornt);CHKERRQ(ierr); } } } if (face != pEnd[faceDepth]) SETERRQ2(PetscObjectComm((PetscObject) dm), PETSC_ERR_PLIB, "Invalid number of faces %D should be %D", face-pStart[faceDepth], pEnd[faceDepth]-pStart[faceDepth]); ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr); ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr); ierr = DMPlexSymmetrize(idm);CHKERRQ(ierr); ierr = DMPlexStratify(idm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { PetscErrorCode ierr; PetscInt flops=0; Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; PetscInt *ci=c->i,*cj=c->j; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,cn=C->cmap->N,cm=C->rmap->N; PetscInt i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; PetscFunctionBegin; /* This error checking should be unnecessary if the symbolic was performed */ if (pm!=cm) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm); if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); if (pm!=cn) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn); /* Set up timers */ ierr = PetscLogEventBegin(MAT_Applypapt_numeric,A,P,C,0);CHKERRQ(ierr); ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscMalloc3(an,MatScalar,&paa,an,PetscInt,&paj,an,PetscInt,&pajdense);CHKERRQ(ierr); ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); for (i=0;i<pm;i++) { /* Form sparse row of P*A */ pnzi = pi[i+1] - pi[i]; panzj = 0; for (j=0;j<pnzi;j++) { arow = *pj++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; aaj = aa + ai[arow]; for (k=0;k<anzj;k++) { if (!pajdense[ajj[k]]) { pajdense[ajj[k]] = -1; paj[panzj++] = ajj[k]; } paa[ajj[k]] += (*pa)*aaj[k]; } flops += 2*anzj; pa++; } /* Sort the j index array for quick sparse axpy. */ ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); /* Compute P*A*P^T using sparse inner products. */ /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ cnzi = ci[i+1] - ci[i]; for (j=0;j<cnzi;j++) { /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ ptcol = *cj++; ptnzj = pi[ptcol+1] - pi[ptcol]; ptj = pjj + pi[ptcol]; ptaj = pta + pi[ptcol]; sum = 0.; k1 = 0; k2 = 0; while ((k1<panzj) && (k2<ptnzj)) { if (paj[k1]==ptj[k2]) { sum += paa[paj[k1++]]*ptaj[k2++]; } else if (paj[k1] < ptj[k2]) { k1++; } else /* if (paj[k1] > ptj[k2]) */ { k2++; } } *ca++ = sum; } /* Zero the current row info for P*A */ for (j=0;j<panzj;j++) { paa[paj[j]] = 0.; pajdense[paj[j]] = 0; } } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree3(paa,paj,pajdense);CHKERRQ(ierr); ierr = PetscLogFlops(flops);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_Applypapt_numeric,A,P,C,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; PetscInt bs = mat->rmap->bs,*stmp; IS from,to; Vec gvec; #if defined(PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt Nbs = baij->Nbs,*indices; #endif PetscFunctionBegin; #if defined(PETSC_USE_CTABLE) /* use a table - Mark Adams */ ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i]+j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid--; aj[B->i[i]+j] = lid; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in baij->B */ ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j]]) ec++; indices[aj[B->i[i] + j]] = 1; } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<Nbs; i++) { if (indices[i]) { garray[ec++] = i; } } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<B->mbs; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } B->nbs = ec; baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); /* create two temporary index sets for building scatter-gather */ ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); for (i=0; i<ec; i++) stmp[i] = i; ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); baij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ PetscErrorCode ierr; PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N; PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; MatScalar *ca; PetscFunctionBegin; /* some error checking which could be moved into interface layer */ if (pn!=am) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am); if (am!=an) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an); /* Set up timers */ ierr = PetscLogEventBegin(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); /* Create ij structure of P^T */ ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ ierr = PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); ci[0] = 0; ierr = PetscMalloc4(an,PetscInt,&padenserow,an,PetscInt,&pasparserow,pm,PetscInt,&denserow,pm,PetscInt,&sparserow);CHKERRQ(ierr); ierr = PetscMemzero(padenserow,an*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(pasparserow,an*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(denserow,pm*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(sparserow,pm*sizeof(PetscInt));CHKERRQ(ierr); /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ /* This should be reasonable if sparsity of PAPt is similar to that of A. */ ierr = PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);CHKERRQ(ierr); current_space = free_space; /* Determine fill for each row of C: */ for (i=0;i<pm;i++) { pnzi = pi[i+1] - pi[i]; panzi = 0; /* Get symbolic sparse row of PA: */ for (j=0;j<pnzi;j++) { arow = *pj++; anzj = ai[arow+1] - ai[arow]; ajj = aj + ai[arow]; for (k=0;k<anzj;k++) { if (!padenserow[ajj[k]]) { padenserow[ajj[k]] = -1; pasparserow[panzi++] = ajj[k]; } } } /* Using symbolic row of PA, determine symbolic row of C: */ paj = pasparserow; cnzi = 0; for (j=0;j<panzi;j++) { ptrow = *paj++; ptnzj = pti[ptrow+1] - pti[ptrow]; ptjj = ptj + pti[ptrow]; for (k=0;k<ptnzj;k++) { if (!denserow[ptjj[k]]) { denserow[ptjj[k]] = -1; sparserow[cnzi++] = ptjj[k]; } } } /* sort sparse representation */ ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining<cnzi) { ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); } /* Copy data into free space, and zero out dense row */ ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j=0;j<panzi;j++) { padenserow[pasparserow[j]] = 0; } for (j=0;j<cnzi;j++) { denserow[sparserow[j]] = 0; } ci[i+1] = ci[i] + cnzi; } /* column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); ierr = PetscFree4(padenserow,pasparserow,denserow,sparserow);CHKERRQ(ierr); /* Allocate space for ca */ ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); /* put together the new matrix */ ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); (*C)->rmap->bs = P->cmap->bs; (*C)->cmap->bs = P->cmap->bs; /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ *)((*C)->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; /* Clean up. */ ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_Applypapt_symbolic,A,P,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PCBDDCGraphGetCandidatesIS(PCBDDCGraph graph, PetscInt *n_faces, IS *FacesIS[], PetscInt *n_edges, IS *EdgesIS[], IS *VerticesIS) { IS *ISForFaces,*ISForEdges,ISForVertices; PetscInt i,nfc,nec,nvc,*idx,*mark; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscCalloc1(graph->ncc,&mark);CHKERRQ(ierr); /* loop on ccs to evalute number of faces, edges and vertices */ nfc = 0; nec = 0; nvc = 0; for (i=0;i<graph->ncc;i++) { PetscInt repdof = graph->queue[graph->cptr[i]]; if (graph->cptr[i+1]-graph->cptr[i] > graph->custom_minimal_size && graph->count[repdof] < graph->maxcount) { if (!graph->twodim && graph->count[repdof] == 1 && graph->special_dof[repdof] != PCBDDCGRAPH_NEUMANN_MARK) { nfc++; mark[i] = 2; } else { nec++; mark[i] = 1; } } else { nvc += graph->cptr[i+1]-graph->cptr[i]; } } /* allocate IS arrays for faces, edges. Vertices need a single index set. */ if (FacesIS) { ierr = PetscMalloc1(nfc,&ISForFaces);CHKERRQ(ierr); } if (EdgesIS) { ierr = PetscMalloc1(nec,&ISForEdges);CHKERRQ(ierr); } if (VerticesIS) { ierr = PetscMalloc1(nvc,&idx);CHKERRQ(ierr); } /* loop on ccs to compute index sets for faces and edges */ if (!graph->queue_sorted) { PetscInt *queue_global; ierr = PetscMalloc1(graph->cptr[graph->ncc],&queue_global);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(graph->l2gmap,graph->cptr[graph->ncc],graph->queue,queue_global);CHKERRQ(ierr); for (i=0;i<graph->ncc;i++) { ierr = PetscSortIntWithArray(graph->cptr[i+1]-graph->cptr[i],&queue_global[graph->cptr[i]],&graph->queue[graph->cptr[i]]);CHKERRQ(ierr); } ierr = PetscFree(queue_global);CHKERRQ(ierr); graph->queue_sorted = PETSC_TRUE; } nfc = 0; nec = 0; for (i=0;i<graph->ncc;i++) { if (mark[i] == 2) { if (FacesIS) { ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],&graph->queue[graph->cptr[i]],PETSC_USE_POINTER,&ISForFaces[nfc]);CHKERRQ(ierr); } nfc++; } else if (mark[i] == 1) { if (EdgesIS) { ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],&graph->queue[graph->cptr[i]],PETSC_USE_POINTER,&ISForEdges[nec]);CHKERRQ(ierr); } nec++; } } /* index set for vertices */ if (VerticesIS) { nvc = 0; for (i=0;i<graph->ncc;i++) { if (!mark[i]) { PetscInt j; for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { idx[nvc]=graph->queue[j]; nvc++; } } } /* sort vertex set (by local ordering) */ ierr = PetscSortInt(nvc,idx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,idx,PETSC_OWN_POINTER,&ISForVertices);CHKERRQ(ierr); } ierr = PetscFree(mark);CHKERRQ(ierr); /* get back info */ if (n_faces) *n_faces = nfc; if (FacesIS) *FacesIS = ISForFaces; if (n_edges) *n_edges = nec; if (EdgesIS) *EdgesIS = ISForEdges; if (VerticesIS) *VerticesIS = ISForVertices; PetscFunctionReturn(0); }
/*@ ISBuildTwoSided - Takes an IS that describes where we will go. Generates an IS that contains new numbers from remote or local on the IS. Collective on IS Input Parameters . to - an IS describes where we will go. Negative target rank will be ignored . toindx - an IS describes what indices should send. NULL means sending natural numbering Output Parameter: . rows - contains new numbers from remote or local Level: advanced .seealso: MatPartitioningCreate(), ISPartitioningToNumbering(), ISPartitioningCount() @*/ PetscErrorCode ISBuildTwoSided(IS ito,IS toindx, IS *rows) { const PetscInt *ito_indices,*toindx_indices; PetscInt *send_indices,rstart,*recv_indices,nrecvs,nsends; PetscInt *tosizes,*fromsizes,i,j,*tosizes_tmp,*tooffsets_tmp,ito_ln; PetscMPIInt *toranks,*fromranks,size,target_rank,*fromperm_newtoold,nto,nfrom; PetscLayout isrmap; MPI_Comm comm; PetscSF sf; PetscSFNode *iremote; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ito,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = ISGetLocalSize(ito,&ito_ln);CHKERRQ(ierr); /* why we do not have ISGetLayout? */ isrmap = ito->map; ierr = PetscLayoutGetRange(isrmap,&rstart,NULL);CHKERRQ(ierr); ierr = ISGetIndices(ito,&ito_indices);CHKERRQ(ierr); ierr = PetscCalloc2(size,&tosizes_tmp,size+1,&tooffsets_tmp);CHKERRQ(ierr); for(i=0; i<ito_ln; i++){ if(ito_indices[i]<0) continue; #if defined(PETSC_USE_DEBUG) if(ito_indices[i]>=size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"target rank %d is larger than communicator size %d ",ito_indices[i],size); #endif tosizes_tmp[ito_indices[i]]++; } nto = 0; for(i=0; i<size; i++){ tooffsets_tmp[i+1] = tooffsets_tmp[i]+tosizes_tmp[i]; if(tosizes_tmp[i]>0) nto++; } ierr = PetscCalloc2(nto,&toranks,2*nto,&tosizes);CHKERRQ(ierr); nto = 0; for(i=0; i<size; i++){ if(tosizes_tmp[i]>0){ toranks[nto] = i; tosizes[2*nto] = tosizes_tmp[i];/* size */ tosizes[2*nto+1] = tooffsets_tmp[i];/* offset */ nto++; } } nsends = tooffsets_tmp[size]; ierr = PetscCalloc1(nsends,&send_indices);CHKERRQ(ierr); if(toindx){ ierr = ISGetIndices(toindx,&toindx_indices);CHKERRQ(ierr); } for(i=0; i<ito_ln; i++){ if(ito_indices[i]<0) continue; target_rank = ito_indices[i]; send_indices[tooffsets_tmp[target_rank]] = toindx? toindx_indices[i]:(i+rstart); tooffsets_tmp[target_rank]++; } if(toindx){ ierr = ISRestoreIndices(toindx,&toindx_indices);CHKERRQ(ierr); } ierr = ISRestoreIndices(ito,&ito_indices);CHKERRQ(ierr); ierr = PetscFree2(tosizes_tmp,tooffsets_tmp);CHKERRQ(ierr); ierr = PetscCommBuildTwoSided(comm,2,MPIU_INT,nto,toranks,tosizes,&nfrom,&fromranks,&fromsizes);CHKERRQ(ierr); ierr = PetscFree2(toranks,tosizes);CHKERRQ(ierr); ierr = PetscCalloc1(nfrom,&fromperm_newtoold);CHKERRQ(ierr); for(i=0; i<nfrom; i++){ fromperm_newtoold[i] = i; } ierr = PetscSortMPIIntWithArray(nfrom,fromranks,fromperm_newtoold);CHKERRQ(ierr); nrecvs = 0; for(i=0; i<nfrom; i++){ nrecvs += fromsizes[i*2]; } ierr = PetscCalloc1(nrecvs,&recv_indices);CHKERRQ(ierr); ierr = PetscCalloc1(nrecvs,&iremote);CHKERRQ(ierr); nrecvs = 0; for(i=0; i<nfrom; i++){ for(j=0; j<fromsizes[2*fromperm_newtoold[i]]; j++){ iremote[nrecvs].rank = fromranks[i]; iremote[nrecvs++].index = fromsizes[2*fromperm_newtoold[i]+1]+j; } } ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nsends,nrecvs,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); /* how to put a prefix ? */ ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,send_indices,recv_indices);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,send_indices,recv_indices);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree(fromranks);CHKERRQ(ierr); ierr = PetscFree(fromsizes);CHKERRQ(ierr); ierr = PetscFree(fromperm_newtoold);CHKERRQ(ierr); ierr = PetscFree(send_indices);CHKERRQ(ierr); if(rows){ ierr = PetscSortInt(nrecvs,recv_indices);CHKERRQ(ierr); ierr = ISCreateGeneral(comm, nrecvs,recv_indices,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); }else{ ierr = PetscFree(recv_indices);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode PCBDDCGraphSetUp(PCBDDCGraph graph, PetscInt custom_minimal_size, IS neumann_is, IS dirichlet_is, PetscInt n_ISForDofs, IS ISForDofs[], IS custom_primal_vertices) { IS subset,subset_n; MPI_Comm comm; const PetscInt *is_indices; PetscInt n_neigh,*neigh,*n_shared,**shared,*queue_global; PetscInt i,j,k,s,total_counts,nodes_touched,is_size; PetscMPIInt commsize; PetscBool same_set,mirrors_found; PetscErrorCode ierr; PetscFunctionBegin; PetscValidLogicalCollectiveInt(graph->l2gmap,custom_minimal_size,2); if (neumann_is) { PetscValidHeaderSpecific(neumann_is,IS_CLASSID,3); PetscCheckSameComm(graph->l2gmap,1,neumann_is,3); } graph->has_dirichlet = PETSC_FALSE; if (dirichlet_is) { PetscValidHeaderSpecific(dirichlet_is,IS_CLASSID,4); PetscCheckSameComm(graph->l2gmap,1,dirichlet_is,4); graph->has_dirichlet = PETSC_TRUE; } PetscValidLogicalCollectiveInt(graph->l2gmap,n_ISForDofs,5); for (i=0;i<n_ISForDofs;i++) { PetscValidHeaderSpecific(ISForDofs[i],IS_CLASSID,6); PetscCheckSameComm(graph->l2gmap,1,ISForDofs[i],6); } if (custom_primal_vertices) { PetscValidHeaderSpecific(custom_primal_vertices,IS_CLASSID,6); PetscCheckSameComm(graph->l2gmap,1,custom_primal_vertices,7); } ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); /* custom_minimal_size */ graph->custom_minimal_size = custom_minimal_size; /* get info l2gmap and allocate work vectors */ ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); /* check if we have any local periodic nodes (periodic BCs) */ mirrors_found = PETSC_FALSE; if (graph->nvtxs && n_neigh) { for (i=0; i<n_shared[0]; i++) graph->count[shared[0][i]] += 1; for (i=0; i<n_shared[0]; i++) { if (graph->count[shared[0][i]] > 1) { mirrors_found = PETSC_TRUE; break; } } } /* compute local mirrors (if any) */ if (mirrors_found) { IS to,from; PetscInt *local_indices,*global_indices; ierr = ISCreateStride(PETSC_COMM_SELF,graph->nvtxs,0,1,&to);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,to,&from);CHKERRQ(ierr); /* get arrays of local and global indices */ ierr = PetscMalloc1(graph->nvtxs,&local_indices);CHKERRQ(ierr); ierr = ISGetIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr); ierr = PetscMemcpy(local_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr); ierr = PetscMalloc1(graph->nvtxs,&global_indices);CHKERRQ(ierr); ierr = ISGetIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr); ierr = PetscMemcpy(global_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr); /* allocate space for mirrors */ ierr = PetscMalloc2(graph->nvtxs,&graph->mirrors,graph->nvtxs,&graph->mirrors_set);CHKERRQ(ierr); ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); graph->mirrors_set[0] = 0; k=0; for (i=0;i<n_shared[0];i++) { j=shared[0][i]; if (graph->count[j] > 1) { graph->mirrors[j]++; k++; } } /* allocate space for set of mirrors */ ierr = PetscMalloc1(k,&graph->mirrors_set[0]);CHKERRQ(ierr); for (i=1;i<graph->nvtxs;i++) graph->mirrors_set[i]=graph->mirrors_set[i-1]+graph->mirrors[i-1]; /* fill arrays */ ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); for (j=0;j<n_shared[0];j++) { i=shared[0][j]; if (graph->count[i] > 1) graph->mirrors_set[i][graph->mirrors[i]++]=global_indices[i]; } ierr = PetscSortIntWithArray(graph->nvtxs,global_indices,local_indices);CHKERRQ(ierr); for (i=0;i<graph->nvtxs;i++) { if (graph->mirrors[i] > 0) { ierr = PetscFindInt(graph->mirrors_set[i][0],graph->nvtxs,global_indices,&k);CHKERRQ(ierr); j = global_indices[k]; while ( k > 0 && global_indices[k-1] == j) k--; for (j=0;j<graph->mirrors[i];j++) { graph->mirrors_set[i][j]=local_indices[k+j]; } ierr = PetscSortInt(graph->mirrors[i],graph->mirrors_set[i]);CHKERRQ(ierr); } } ierr = PetscFree(local_indices);CHKERRQ(ierr); ierr = PetscFree(global_indices);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); } ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr); /* Count total number of neigh per node */ k = 0; for (i=1;i<n_neigh;i++) { k += n_shared[i]; for (j=0;j<n_shared[i];j++) { graph->count[shared[i][j]] += 1; } } /* Allocate space for storing the set of neighbours for each node */ if (graph->nvtxs) { ierr = PetscMalloc1(k,&graph->neighbours_set[0]);CHKERRQ(ierr); } for (i=1;i<graph->nvtxs;i++) { /* dont count myself */ graph->neighbours_set[i]=graph->neighbours_set[i-1]+graph->count[i-1]; } /* Get information for sharing subdomains */ ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr); for (i=1;i<n_neigh;i++) { /* dont count myself */ s = n_shared[i]; for (j=0;j<s;j++) { k = shared[i][j]; graph->neighbours_set[k][graph->count[k]] = neigh[i]; graph->count[k] += 1; } } /* sort set of sharing subdomains */ for (i=0;i<graph->nvtxs;i++) { ierr = PetscSortRemoveDupsInt(&graph->count[i],graph->neighbours_set[i]);CHKERRQ(ierr); } /* free memory allocated by ISLocalToGlobalMappingGetInfo */ ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); /* Get info for dofs splitting User can specify just a subset; an additional field is considered as a complementary field */ for (i=0;i<graph->nvtxs;i++) graph->which_dof[i] = n_ISForDofs; /* by default a dof belongs to the complement set */ for (i=0;i<n_ISForDofs;i++) { ierr = ISGetLocalSize(ISForDofs[i],&is_size);CHKERRQ(ierr); ierr = ISGetIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); for (j=0;j<is_size;j++) { if (is_indices[j] > -1 && is_indices[j] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */ graph->which_dof[is_indices[j]] = i; } } ierr = ISRestoreIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* Take into account Neumann nodes */ if (neumann_is) { ierr = ISGetLocalSize(neumann_is,&is_size);CHKERRQ(ierr); ierr = ISGetIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); for (i=0;i<is_size;i++) { if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */ graph->special_dof[is_indices[i]] = PCBDDCGRAPH_NEUMANN_MARK; } } ierr = ISRestoreIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* Take into account Dirichlet nodes (they overwrite any neumann boundary mark previously set) */ if (dirichlet_is) { ierr = ISGetLocalSize(dirichlet_is,&is_size);CHKERRQ(ierr); ierr = ISGetIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); for (i=0;i<is_size;i++){ if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */ if (commsize > graph->commsizelimit) { /* dirichlet nodes treated as internal */ ierr = PetscBTSet(graph->touched,is_indices[i]);CHKERRQ(ierr); graph->subset[is_indices[i]] = 0; } graph->special_dof[is_indices[i]] = PCBDDCGRAPH_DIRICHLET_MARK; } } ierr = ISRestoreIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* mark local periodic nodes (if any) and adapt CSR graph (if any) */ if (graph->mirrors) { for (i=0;i<graph->nvtxs;i++) if (graph->mirrors[i]) graph->special_dof[i] = PCBDDCGRAPH_LOCAL_PERIODIC_MARK; if (graph->xadj) { PetscInt *new_xadj,*new_adjncy; /* sort CSR graph */ for (i=0;i<graph->nvtxs;i++) ierr = PetscSortInt(graph->xadj[i+1]-graph->xadj[i],&graph->adjncy[graph->xadj[i]]);CHKERRQ(ierr); /* adapt local CSR graph in case of local periodicity */ k = 0; for (i=0;i<graph->nvtxs;i++) for (j=graph->xadj[i];j<graph->xadj[i+1];j++) k += graph->mirrors[graph->adjncy[j]]; ierr = PetscMalloc1(graph->nvtxs+1,&new_xadj);CHKERRQ(ierr); ierr = PetscMalloc1(k+graph->xadj[graph->nvtxs],&new_adjncy);CHKERRQ(ierr); new_xadj[0] = 0; for (i=0;i<graph->nvtxs;i++) { k = graph->xadj[i+1]-graph->xadj[i]; ierr = PetscMemcpy(&new_adjncy[new_xadj[i]],&graph->adjncy[graph->xadj[i]],k*sizeof(PetscInt));CHKERRQ(ierr); new_xadj[i+1] = new_xadj[i]+k; for (j=graph->xadj[i];j<graph->xadj[i+1];j++) { k = graph->mirrors[graph->adjncy[j]]; ierr = PetscMemcpy(&new_adjncy[new_xadj[i+1]],graph->mirrors_set[graph->adjncy[j]],k*sizeof(PetscInt));CHKERRQ(ierr); new_xadj[i+1] += k; } k = new_xadj[i+1]-new_xadj[i]; ierr = PetscSortRemoveDupsInt(&k,&new_adjncy[new_xadj[i]]);CHKERRQ(ierr); new_xadj[i+1] = new_xadj[i]+k; } /* set new CSR into graph */ ierr = PetscFree(graph->xadj);CHKERRQ(ierr); ierr = PetscFree(graph->adjncy);CHKERRQ(ierr); graph->xadj = new_xadj; graph->adjncy = new_adjncy; } } /* mark special nodes (if any) -> each will become a single node equivalence class */ if (custom_primal_vertices) { ierr = ISGetLocalSize(custom_primal_vertices,&is_size);CHKERRQ(ierr); ierr = ISGetIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); for (i=0,j=0;i<is_size;i++){ if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs && graph->special_dof[is_indices[i]] != PCBDDCGRAPH_DIRICHLET_MARK) { /* out of bounds indices (if any) are skipped */ graph->special_dof[is_indices[i]] = PCBDDCGRAPH_SPECIAL_MARK-j; j++; } } ierr = ISRestoreIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); } /* mark interior nodes (if commsize > graph->commsizelimit) as touched and belonging to partition number 0 */ if (commsize > graph->commsizelimit) { for (i=0;i<graph->nvtxs;i++) { if (!graph->count[i]) { ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr); graph->subset[i] = 0; } } } /* init graph structure and compute default subsets */ nodes_touched = 0; for (i=0;i<graph->nvtxs;i++) { if (PetscBTLookup(graph->touched,i)) { nodes_touched++; } } i = 0; graph->ncc = 0; total_counts = 0; /* allocated space for queues */ if (commsize == graph->commsizelimit) { ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,graph->nvtxs,&graph->queue);CHKERRQ(ierr); } else { PetscInt nused = graph->nvtxs - nodes_touched; ierr = PetscMalloc2(nused+1,&graph->cptr,nused,&graph->queue);CHKERRQ(ierr); } while (nodes_touched<graph->nvtxs) { /* find first untouched node in local ordering */ while (PetscBTLookup(graph->touched,i)) i++; ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr); graph->subset[i] = graph->ncc+1; graph->cptr[graph->ncc] = total_counts; graph->queue[total_counts] = i; total_counts++; nodes_touched++; /* now find all other nodes having the same set of sharing subdomains */ for (j=i+1;j<graph->nvtxs;j++) { /* check for same number of sharing subdomains, dof number and same special mark */ if (!PetscBTLookup(graph->touched,j) && graph->count[i] == graph->count[j] && graph->which_dof[i] == graph->which_dof[j] && graph->special_dof[i] == graph->special_dof[j]) { /* check for same set of sharing subdomains */ same_set = PETSC_TRUE; for (k=0;k<graph->count[j];k++){ if (graph->neighbours_set[i][k] != graph->neighbours_set[j][k]) { same_set = PETSC_FALSE; } } /* I found a friend of mine */ if (same_set) { ierr = PetscBTSet(graph->touched,j);CHKERRQ(ierr); graph->subset[j] = graph->ncc+1; nodes_touched++; graph->queue[total_counts] = j; total_counts++; } } } graph->ncc++; } /* set default number of subsets (at this point no info on csr and/or local_subs has been taken into account, so n_subsets = ncc */ graph->n_subsets = graph->ncc; ierr = PetscMalloc1(graph->n_subsets,&graph->subset_ncc);CHKERRQ(ierr); for (i=0;i<graph->n_subsets;i++) { graph->subset_ncc[i] = 1; } /* final pointer */ graph->cptr[graph->ncc] = total_counts; /* For consistency reasons (among neighbours), I need to sort (by global ordering) each connected component */ /* Get a reference node (min index in global ordering) for each subset for tagging messages */ ierr = PetscMalloc1(graph->ncc,&graph->subset_ref_node);CHKERRQ(ierr); ierr = PetscMalloc1(graph->cptr[graph->ncc],&queue_global);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(graph->l2gmap,graph->cptr[graph->ncc],graph->queue,queue_global);CHKERRQ(ierr); for (j=0;j<graph->ncc;j++) { ierr = PetscSortIntWithArray(graph->cptr[j+1]-graph->cptr[j],&queue_global[graph->cptr[j]],&graph->queue[graph->cptr[j]]);CHKERRQ(ierr); graph->subset_ref_node[j] = graph->queue[graph->cptr[j]]; } ierr = PetscFree(queue_global);CHKERRQ(ierr); graph->queue_sorted = PETSC_TRUE; /* save information on subsets (needed when analyzing the connected components) */ if (graph->ncc) { ierr = PetscMalloc2(graph->ncc,&graph->subset_size,graph->ncc,&graph->subset_idxs);CHKERRQ(ierr); ierr = PetscMalloc1(graph->cptr[graph->ncc],&graph->subset_idxs[0]);CHKERRQ(ierr); ierr = PetscMemzero(graph->subset_idxs[0],graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr); for (j=1;j<graph->ncc;j++) { graph->subset_size[j-1] = graph->cptr[j] - graph->cptr[j-1]; graph->subset_idxs[j] = graph->subset_idxs[j-1] + graph->subset_size[j-1]; } graph->subset_size[graph->ncc-1] = graph->cptr[graph->ncc] - graph->cptr[graph->ncc-1]; ierr = PetscMemcpy(graph->subset_idxs[0],graph->queue,graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr); } /* renumber reference nodes */ ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(graph->l2gmap)),graph->ncc,graph->subset_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,subset_n,&subset);CHKERRQ(ierr); ierr = ISDestroy(&subset_n);CHKERRQ(ierr); ierr = ISRenumber(subset,NULL,NULL,&subset_n);CHKERRQ(ierr); ierr = ISDestroy(&subset);CHKERRQ(ierr); ierr = ISGetLocalSize(subset_n,&k);CHKERRQ(ierr); if (k != graph->ncc) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",k,graph->ncc); ierr = ISGetIndices(subset_n,&is_indices);CHKERRQ(ierr); ierr = PetscMemcpy(graph->subset_ref_node,is_indices,graph->ncc*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISRestoreIndices(subset_n,&is_indices);CHKERRQ(ierr); ierr = ISDestroy(&subset_n);CHKERRQ(ierr); /* free workspace */ graph->setupcalled = PETSC_TRUE; PetscFunctionReturn(0); }
PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx fetidpmat_ctx ) { PetscErrorCode ierr; PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; PCBDDCGraph mat_graph=pcbddc->mat_graph; Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; MPI_Comm comm; Mat ScalingMat; Vec lambda_global; IS IS_l2g_lambda; PetscBool skip_node,fully_redundant; PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; PetscMPIInt rank,size,buf_size,neigh; PetscScalar scalar_value; PetscInt *vertex_indices; PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; PetscInt *aux_sums,*cols_B_delta,*l2g_indices; PetscScalar *array,*scaling_factors,*vals_B_delta; PetscInt *aux_local_numbering_2; /* For communication of scaling factors */ PetscInt *ptrs_buffer,neigh_position; PetscScalar **all_factors,*send_buffer,*recv_buffer; MPI_Request *send_reqs,*recv_reqs; /* tests */ Vec test_vec; PetscBool test_fetidp; PetscViewer viewer; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)(fetidpmat_ctx->pc),&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* Default type of lagrange multipliers is non-redundant */ fully_redundant = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); /* Evaluate local and global number of lagrange multipliers */ ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); n_local_lambda = 0; partial_sum = 0; n_boundary_dofs = 0; s = 0; /* Get Vertices used to define the BDDC */ ierr = PCBDDCGetPrimalVerticesLocalIdx(fetidpmat_ctx->pc,&n_vertices,&vertex_indices);CHKERRQ(ierr); dual_size = pcis->n_B-n_vertices; ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscMalloc1(dual_size,&aux_local_numbering_2);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<pcis->n;i++){ j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ if ( j > 0 ) { n_boundary_dofs++; } skip_node = PETSC_FALSE; if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ skip_node = PETSC_TRUE; s++; } if (j < 1) { skip_node = PETSC_TRUE; } if ( !skip_node ) { if (fully_redundant) { /* fully redundant set of lagrange multipliers */ n_lambda_for_dof = (j*(j+1))/2; } else { n_lambda_for_dof = j; } n_local_lambda += j; /* needed to evaluate global number of lagrange multipliers */ array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ /* store some data needed */ dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; aux_local_numbering_1[partial_sum] = i; aux_local_numbering_2[partial_sum] = n_lambda_for_dof; partial_sum++; } } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); fetidpmat_ctx->n_lambda = (PetscInt)PetscRealPart(scalar_value); /* compute global ordering of lagrange multipliers and associate l2g map */ ierr = PCBDDCSubsetNumbering(comm,matis->mapping,partial_sum,aux_local_numbering_1,aux_local_numbering_2,&i,&aux_global_numbering);CHKERRQ(ierr); if (i != fetidpmat_ctx->n_lambda) { SETERRQ3(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Error in %s: global number of multipliers mismatch! (%d!=%d)\n",__FUNCT__,fetidpmat_ctx->n_lambda,i); } ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); /* init data for scaling factors exchange */ partial_sum = 0; j = 0; ierr = PetscMalloc1(pcis->n_neigh,&ptrs_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&send_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n_neigh-1,&recv_reqs);CHKERRQ(ierr); ierr = PetscMalloc1(pcis->n,&all_factors);CHKERRQ(ierr); ptrs_buffer[0]=0; for (i=1;i<pcis->n_neigh;i++) { partial_sum += pcis->n_shared[i]; ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; } ierr = PetscMalloc1(partial_sum,&send_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&recv_buffer);CHKERRQ(ierr); ierr = PetscMalloc1(partial_sum,&all_factors[0]);CHKERRQ(ierr); for (i=0;i<pcis->n-1;i++) { j = mat_graph->count[i]; all_factors[i+1]=all_factors[i]+j; } /* scatter B scaling to N vec */ ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); /* communications */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; } ierr = PetscMPIIntCast(ptrs_buffer[i]-ptrs_buffer[i-1],&buf_size);CHKERRQ(ierr); ierr = PetscMPIIntCast(pcis->neigh[i],&neigh);CHKERRQ(ierr); ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&send_reqs[i-1]);CHKERRQ(ierr); ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],buf_size,MPIU_SCALAR,neigh,0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); /* put values in correct places */ for (i=1;i<pcis->n_neigh;i++) { for (j=0;j<pcis->n_shared[i];j++) { k = pcis->shared[i][j]; neigh_position = 0; while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; } } ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree(send_reqs);CHKERRQ(ierr); ierr = PetscFree(recv_reqs);CHKERRQ(ierr); ierr = PetscFree(send_buffer);CHKERRQ(ierr); ierr = PetscFree(recv_buffer);CHKERRQ(ierr); ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); /* Compute B and B_delta (local actions) */ ierr = PetscMalloc1(pcis->n_neigh,&aux_sums);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&l2g_indices);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&vals_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&cols_B_delta);CHKERRQ(ierr); ierr = PetscMalloc1(n_local_lambda,&scaling_factors);CHKERRQ(ierr); n_global_lambda=0; partial_sum=0; for (i=0;i<dual_size;i++) { n_global_lambda = aux_global_numbering[i]; j = mat_graph->count[aux_local_numbering_1[i]]; aux_sums[0]=0; for (s=1;s<j;s++) { aux_sums[s]=aux_sums[s-1]+j-s+1; } array = all_factors[aux_local_numbering_1[i]]; n_neg_values = 0; while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values] < rank) {n_neg_values++;} n_pos_values = j - n_neg_values; if (fully_redundant) { for (s=0;s<n_neg_values;s++) { l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=-1.0; scaling_factors[partial_sum+s]=array[s]; } for (s=0;s<n_pos_values;s++) { l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s+n_neg_values]=1.0; scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; } partial_sum += j; } else { /* l2g_indices and default cols and vals of B_delta */ for (s=0;s<j;s++) { l2g_indices [partial_sum+s]=n_global_lambda+s; cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; vals_B_delta [partial_sum+s]=0.0; } /* B_delta */ if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ vals_B_delta [partial_sum+n_neg_values-1]=-1.0; } if ( n_neg_values < j ) { /* there's a rank next to me to the right */ vals_B_delta [partial_sum+n_neg_values]=1.0; } /* scaling as in Klawonn-Widlund 1999*/ for (s=0;s<n_neg_values;s++) { scalar_value = 0.0; for (k=0;k<s+1;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s] = -scalar_value; } for (s=0;s<n_pos_values;s++) { scalar_value = 0.0; for (k=s+n_neg_values;k<j;k++) { scalar_value += array[k]; } scaling_factors[partial_sum+s+n_neg_values] = scalar_value; } partial_sum += j; } } ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); ierr = PetscFree(aux_sums);CHKERRQ(ierr); ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); ierr = PetscFree(all_factors);CHKERRQ(ierr); /* Local to global mapping of fetidpmat */ ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); /* Create local part of B_delta */ ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (fully_redundant) { ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); } else { ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); for (i=0;i<n_local_lambda;i++) { ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = PetscFree(scaling_factors);CHKERRQ(ierr); ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); /* Create some vectors needed by fetidp */ ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); test_fetidp = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); if (test_fetidp && !pcbddc->use_deluxe_scaling) { PetscReal real_value; ierr = PetscViewerASCIIGetStdout(comm,&viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); if (fully_redundant) { ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST A/B: Test numbering of global lambda dofs */ /******************************************************************/ ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecNorm(test_vec,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (fully_redundant) { ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,PetscRealPart(scalar_value)-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } /******************************************************************/ /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ /* This is the meaning of the B matrix */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ /* E_D = R_D^TR */ /* P_D = B_{D,delta}^T B_{delta} */ /* eq.44 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ /* compute a random vector in \widetilde{W} */ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* store w for final comparison */ ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Average operator E_D : results stored in pcis->vec2_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec2_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_global,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* test E_D=I-P_D */ scalar_value = 1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); /******************************************************************/ /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ /* eq.48 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); scalar_value = 0.0; /* set zero at vertices */ for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); /* Jump operator P_D : results stored in pcis->vec1_B */ ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* scaling */ ierr = PCBDDCScalingExtension(fetidpmat_ctx->pc,pcis->vec1_B,pcis->vec1_global);CHKERRQ(ierr); ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); if (!fully_redundant) { /******************************************************************/ /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ /******************************************************************/ ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); /* Action of B_Ddelta^T */ ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); /* Action of B_delta */ ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); scalar_value = -1.0; ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); ierr = VecNorm(lambda_global,NORM_INFINITY,&real_value);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",real_value);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = VecDestroy(&test_vec);CHKERRQ(ierr); } } /* final cleanup */ ierr = PetscFree(vertex_indices);CHKERRQ(ierr); ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat P,Mat C) { PetscErrorCode ierr; Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; PetscInt *ci=c->i,*cj=c->j,*cjj; PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N; PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; PetscFunctionBegin; /* Allocate temporary array for storage of one row of A*P (cn: non-scalable) */ ierr = PetscMalloc3(cn,&apa,cn,&apjdense,cn,&apj);CHKERRQ(ierr); ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); /* Clear old values in C */ ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; i<am; i++) { /* Form sparse row of A*P */ anzi = ai[i+1] - ai[i]; apnzj = 0; for (j=0; j<anzi; j++) { prow = *aj++; pnzj = pi[prow+1] - pi[prow]; pjj = pj + pi[prow]; paj = pa + pi[prow]; for (k=0; k<pnzj; k++) { if (!apjdense[pjj[k]]) { apjdense[pjj[k]] = -1; apj[apnzj++] = pjj[k]; } apa[pjj[k]] += (*aa)*paj[k]; } ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); aa++; } /* Sort the j index array for quick sparse axpy. */ /* Note: a array does not need sorting as it is in dense storage locations. */ ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ pnzi = pi[i+1] - pi[i]; for (j=0; j<pnzi; j++) { nextap = 0; crow = *pJ++; cjj = cj + ci[crow]; caj = ca + ci[crow]; /* Perform sparse axpy operation. Note cjj includes apj. */ for (k=0; nextap<apnzj; k++) { #if defined(PETSC_USE_DEBUG) if (k >= ci[crow+1] - ci[crow]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"k too large k %d, crow %d",k,crow); #endif if (cjj[k]==apj[nextap]) { caj[k] += (*pA)*apa[apj[nextap++]]; } } ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); pA++; } /* Zero the current row info for A*P */ for (j=0; j<apnzj; j++) { apa[apj[j]] = 0.; apjdense[apj[j]] = 0; } } /* Assemble the final matrix and clean up */ ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree3(apa,apjdense,apj);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode AOCreate_Basic(AO ao) { AO_Basic *aobasic; PetscMPIInt size,rank,count,*lens,*disp; PetscInt napp,*allpetsc,*allapp,ip,ia,N,i,*petsc=NULL,start; PetscErrorCode ierr; IS isapp=ao->isapp,ispetsc=ao->ispetsc; MPI_Comm comm; const PetscInt *myapp,*mypetsc=NULL; PetscFunctionBegin; /* create special struct aobasic */ ierr = PetscNewLog(ao,&aobasic);CHKERRQ(ierr); ao->data = (void*) aobasic; ierr = PetscMemcpy(ao->ops,&AOOps_Basic,sizeof(struct _AOOps));CHKERRQ(ierr); ierr = PetscObjectChangeTypeName((PetscObject)ao,AOBASIC);CHKERRQ(ierr); ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); ierr = PetscMPIIntCast(napp,&count);CHKERRQ(ierr); /* transmit all lengths to all processors */ ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc2(size, &lens,size,&disp);CHKERRQ(ierr); ierr = MPI_Allgather(&count, 1, MPI_INT, lens, 1, MPI_INT, comm);CHKERRQ(ierr); N = 0; for (i = 0; i < size; i++) { ierr = PetscMPIIntCast(N,disp+i);CHKERRQ(ierr); /* = sum(lens[j]), j< i */ N += lens[i]; } ao->N = N; ao->n = N; /* If mypetsc is 0 then use "natural" numbering */ if (napp) { if (!ispetsc) { start = disp[rank]; ierr = PetscMalloc1((napp+1), &petsc);CHKERRQ(ierr); for (i=0; i<napp; i++) petsc[i] = start + i; } else { ierr = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr); petsc = (PetscInt*)mypetsc; } } /* get all indices on all processors */ ierr = PetscMalloc2(N,&allpetsc,N,&allapp);CHKERRQ(ierr); ierr = MPI_Allgatherv(petsc, count, MPIU_INT, allpetsc, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = MPI_Allgatherv((void*)myapp, count, MPIU_INT, allapp, lens, disp, MPIU_INT, comm);CHKERRQ(ierr); ierr = PetscFree2(lens,disp);CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) { PetscInt *sorted; ierr = PetscMalloc1(N,&sorted);CHKERRQ(ierr); ierr = PetscMemcpy(sorted,allpetsc,N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortInt(N,sorted);CHKERRQ(ierr); for (i=0; i<N; i++) { if (sorted[i] != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"PETSc ordering requires a permutation of numbers 0 to N-1\n it is missing %D has %D",i,sorted[i]); } ierr = PetscMemcpy(sorted,allapp,N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortInt(N,sorted);CHKERRQ(ierr); for (i=0; i<N; i++) { if (sorted[i] != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Application ordering requires a permutation of numbers 0 to N-1\n it is missing %D has %D",i,sorted[i]); } ierr = PetscFree(sorted);CHKERRQ(ierr); } #endif /* generate a list of application and PETSc node numbers */ ierr = PetscMalloc2(N, &aobasic->app,N,&aobasic->petsc);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao,2*N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(aobasic->app, N*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(aobasic->petsc, N*sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < N; i++) { ip = allpetsc[i]; ia = allapp[i]; /* check there are no duplicates */ if (aobasic->app[ip]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Duplicate in PETSc ordering at position %d. Already mapped to %d, not %d.", i, aobasic->app[ip]-1, ia); aobasic->app[ip] = ia + 1; if (aobasic->petsc[ia]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Duplicate in Application ordering at position %d. Already mapped to %d, not %d.", i, aobasic->petsc[ia]-1, ip); aobasic->petsc[ia] = ip + 1; } if (napp && !mypetsc) { ierr = PetscFree(petsc);CHKERRQ(ierr); } ierr = PetscFree2(allpetsc,allapp);CHKERRQ(ierr); /* shift indices down by one */ for (i = 0; i < N; i++) { aobasic->app[i]--; aobasic->petsc[i]--; } ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); if (napp) { if (ispetsc) { ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); } else { ierr = PetscFree(petsc);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
void PETSC_STDCALL petscsortint_(PetscInt *n,PetscInt i[], int *__ierr ){ *__ierr = PetscSortInt(*n,i); }
PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); PetscErrorCode ierr; PetscInt i,j,*aj = B->j,ec = 0,*garray; IS from,to; Vec gvec; PetscBool useblockis; #if defined (PETSC_USE_CTABLE) PetscTable gid1_lid1; PetscTablePosition tpos; PetscInt gid,lid; #else PetscInt N = mat->cmap->N,*indices; #endif PetscFunctionBegin; #if defined (PETSC_USE_CTABLE) /* use a table */ ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt data,gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); if (!data) { /* one based table */ ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); } } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); gid--; lid--; garray[lid] = gid; } ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); for (i=0; i<ec; i++) { ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { PetscInt gid1 = aj[B->i[i] + j] + 1; ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); lid --; aj[B->i[i] + j] = lid; } } aij->B->cmap->n = aij->B->cmap->N = ec; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); #else /* Make an array as long as the number of columns */ /* mark those columns that are in aij->B */ ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { if (!indices[aj[B->i[i] + j] ]) ec++; indices[aj[B->i[i] + j] ] = 1; } } /* form array of columns we need */ ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); ec = 0; for (i=0; i<N; i++) { if (indices[i]) garray[ec++] = i; } /* make indices now point into garray */ for (i=0; i<ec; i++) { indices[garray[i]] = i; } /* compact out the extra columns in B */ for (i=0; i<aij->B->rmap->n; i++) { for (j=0; j<B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } } aij->B->cmap->n = aij->B->cmap->N = ec; ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); ierr = PetscFree(indices);CHKERRQ(ierr); #endif /* create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); /* create two temporary Index sets for build scatter gather */ /* check for the special case where blocks are communicated for faster VecScatterXXX */ useblockis = PETSC_FALSE; if (mat->cmap->bs > 1) { PetscInt bs = mat->cmap->bs,ibs,ga; if (!(ec % bs)) { useblockis = PETSC_TRUE; for (i=0; i<ec/bs; i++) { if ((ga = garray[ibs = i*bs]) % bs) { useblockis = PETSC_FALSE; break; } for (j=1; j<bs; j++) { if (garray[ibs+j] != ga+j) { useblockis = PETSC_FALSE; break; } } if (!useblockis) break; } } } #if defined(PETSC_USE_DEBUG) i = (PetscInt)useblockis; ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm); CHKERRQ(ierr); if(j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); #endif if (useblockis) { PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; if(ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); ierr = PetscInfo(mat,"Using block index set to define scatter\n"); ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); } else { ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); } ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); /* create temporary global vector to generate scatter context */ /* This does not allocate the array's memory so is efficient */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); /* generate the scatter context */ ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); aij->garray = garray; ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); }