/*@ DMPlexCreateExodus - Create a DMPlex mesh from an ExodusII file ID. Collective on comm Input Parameters: + comm - The MPI communicator . exoid - The ExodusII id associated with a exodus file and obtained using ex_open - interpolate - Create faces and edges in the mesh Output Parameter: . dm - The DM object representing the mesh Level: beginner .keywords: mesh,ExodusII .seealso: DMPLEX, DMCreate() @*/ PetscErrorCode DMPlexCreateExodus(MPI_Comm comm, PetscInt exoid, PetscBool interpolate, DM *dm) { #if defined(PETSC_HAVE_EXODUSII) PetscMPIInt num_proc, rank; PetscSection coordSection; Vec coordinates; PetscScalar *coords; PetscInt coordSize, v; PetscErrorCode ierr; /* Read from ex_get_init() */ char title[PETSC_MAX_PATH_LEN+1]; int dim = 0, numVertices = 0, numCells = 0; int num_cs = 0, num_vs = 0, num_fs = 0; #endif PetscFunctionBegin; #if defined(PETSC_HAVE_EXODUSII) ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &num_proc);CHKERRQ(ierr); ierr = DMCreate(comm, dm);CHKERRQ(ierr); ierr = DMSetType(*dm, DMPLEX);CHKERRQ(ierr); /* Open EXODUS II file and read basic informations on rank 0, then broadcast to all processors */ if (!rank) { ierr = PetscMemzero(title,(PETSC_MAX_PATH_LEN+1)*sizeof(char));CHKERRQ(ierr); ierr = ex_get_init(exoid, title, &dim, &numVertices, &numCells, &num_cs, &num_vs, &num_fs); if (ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ExodusII ex_get_init() failed with error code %D\n",ierr); if (!num_cs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Exodus file does not contain any cell set\n"); } ierr = MPI_Bcast(title, PETSC_MAX_PATH_LEN+1, MPI_CHAR, 0, comm);CHKERRQ(ierr); ierr = MPI_Bcast(&dim, 1, MPI_INT, 0, comm);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *dm, title);CHKERRQ(ierr); ierr = DMSetDimension(*dm, dim);CHKERRQ(ierr); ierr = DMPlexSetChart(*dm, 0, numCells+numVertices);CHKERRQ(ierr); /* Read cell sets information */ if (!rank) { PetscInt *cone; int c, cs, c_loc, v, v_loc; /* Read from ex_get_elem_blk_ids() */ int *cs_id; /* Read from ex_get_elem_block() */ char buffer[PETSC_MAX_PATH_LEN+1]; int num_cell_in_set, num_vertex_per_cell, num_attr; /* Read from ex_get_elem_conn() */ int *cs_connect; /* Get cell sets IDs */ ierr = PetscMalloc1(num_cs, &cs_id);CHKERRQ(ierr); ierr = ex_get_elem_blk_ids(exoid, cs_id);CHKERRQ(ierr); /* Read the cell set connectivity table and build mesh topology EXO standard requires that cells in cell sets be numbered sequentially and be pairwise disjoint. */ /* First set sizes */ for (cs = 0, c = 0; cs < num_cs; cs++) { ierr = ex_get_elem_block(exoid, cs_id[cs], buffer, &num_cell_in_set, &num_vertex_per_cell, &num_attr);CHKERRQ(ierr); for (c_loc = 0; c_loc < num_cell_in_set; ++c_loc, ++c) { ierr = DMPlexSetConeSize(*dm, c, num_vertex_per_cell);CHKERRQ(ierr); } } ierr = DMSetUp(*dm);CHKERRQ(ierr); for (cs = 0, c = 0; cs < num_cs; cs++) { ierr = ex_get_elem_block(exoid, cs_id[cs], buffer, &num_cell_in_set, &num_vertex_per_cell, &num_attr);CHKERRQ(ierr); ierr = PetscMalloc2(num_vertex_per_cell*num_cell_in_set,&cs_connect,num_vertex_per_cell,&cone);CHKERRQ(ierr); ierr = ex_get_elem_conn(exoid, cs_id[cs], cs_connect);CHKERRQ(ierr); /* EXO uses Fortran-based indexing, sieve uses C-style and numbers cell first then vertices. */ for (c_loc = 0, v = 0; c_loc < num_cell_in_set; ++c_loc, ++c) { for (v_loc = 0; v_loc < num_vertex_per_cell; ++v_loc, ++v) { cone[v_loc] = cs_connect[v]+numCells-1; } if (dim == 3) { /* Tetrahedra are inverted */ if (num_vertex_per_cell == 4) { PetscInt tmp = cone[0]; cone[0] = cone[1]; cone[1] = tmp; } /* Hexahedra are inverted */ if (num_vertex_per_cell == 8) { PetscInt tmp = cone[1]; cone[1] = cone[3]; cone[3] = tmp; } } ierr = DMPlexSetCone(*dm, c, cone);CHKERRQ(ierr); ierr = DMSetLabelValue(*dm, "Cell Sets", c, cs_id[cs]);CHKERRQ(ierr); } ierr = PetscFree2(cs_connect,cone);CHKERRQ(ierr); } ierr = PetscFree(cs_id);CHKERRQ(ierr); } ierr = DMPlexSymmetrize(*dm);CHKERRQ(ierr); ierr = DMPlexStratify(*dm);CHKERRQ(ierr); if (interpolate) { DM idm = NULL; ierr = DMPlexInterpolate(*dm, &idm);CHKERRQ(ierr); /* Maintain Cell Sets label */ { DMLabel label; ierr = DMRemoveLabel(*dm, "Cell Sets", &label);CHKERRQ(ierr); if (label) {ierr = DMAddLabel(idm, label);CHKERRQ(ierr);} } ierr = DMDestroy(dm);CHKERRQ(ierr); *dm = idm; } /* Create vertex set label */ if (!rank && (num_vs > 0)) { int vs, v; /* Read from ex_get_node_set_ids() */ int *vs_id; /* Read from ex_get_node_set_param() */ int num_vertex_in_set, num_attr; /* Read from ex_get_node_set() */ int *vs_vertex_list; /* Get vertex set ids */ ierr = PetscMalloc1(num_vs, &vs_id);CHKERRQ(ierr); ierr = ex_get_node_set_ids(exoid, vs_id);CHKERRQ(ierr); for (vs = 0; vs < num_vs; ++vs) { ierr = ex_get_node_set_param(exoid, vs_id[vs], &num_vertex_in_set, &num_attr);CHKERRQ(ierr); ierr = PetscMalloc1(num_vertex_in_set, &vs_vertex_list);CHKERRQ(ierr); ierr = ex_get_node_set(exoid, vs_id[vs], vs_vertex_list);CHKERRQ(ierr); for (v = 0; v < num_vertex_in_set; ++v) { ierr = DMSetLabelValue(*dm, "Vertex Sets", vs_vertex_list[v]+numCells-1, vs_id[vs]);CHKERRQ(ierr); } ierr = PetscFree(vs_vertex_list);CHKERRQ(ierr); } ierr = PetscFree(vs_id);CHKERRQ(ierr); } /* Read coordinates */ ierr = DMGetCoordinateSection(*dm, &coordSection);CHKERRQ(ierr); ierr = PetscSectionSetNumFields(coordSection, 1);CHKERRQ(ierr); ierr = PetscSectionSetFieldComponents(coordSection, 0, dim);CHKERRQ(ierr); ierr = PetscSectionSetChart(coordSection, numCells, numCells + numVertices);CHKERRQ(ierr); for (v = numCells; v < numCells+numVertices; ++v) { ierr = PetscSectionSetDof(coordSection, v, dim);CHKERRQ(ierr); ierr = PetscSectionSetFieldDof(coordSection, v, 0, dim);CHKERRQ(ierr); } ierr = PetscSectionSetUp(coordSection);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(coordSection, &coordSize);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_SELF, &coordinates);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) coordinates, "coordinates");CHKERRQ(ierr); ierr = VecSetSizes(coordinates, coordSize, PETSC_DETERMINE);CHKERRQ(ierr); ierr = VecSetBlockSize(coordinates, dim);CHKERRQ(ierr); ierr = VecSetType(coordinates,VECSTANDARD);CHKERRQ(ierr); ierr = VecGetArray(coordinates, &coords);CHKERRQ(ierr); if (!rank) { float *x, *y, *z; ierr = PetscMalloc3(numVertices,&x,numVertices,&y,numVertices,&z);CHKERRQ(ierr); ierr = ex_get_coord(exoid, x, y, z);CHKERRQ(ierr); if (dim > 0) { for (v = 0; v < numVertices; ++v) coords[v*dim+0] = x[v]; } if (dim > 1) { for (v = 0; v < numVertices; ++v) coords[v*dim+1] = y[v]; } if (dim > 2) { for (v = 0; v < numVertices; ++v) coords[v*dim+2] = z[v]; } ierr = PetscFree3(x,y,z);CHKERRQ(ierr); } ierr = VecRestoreArray(coordinates, &coords);CHKERRQ(ierr); ierr = DMSetCoordinatesLocal(*dm, coordinates);CHKERRQ(ierr); ierr = VecDestroy(&coordinates);CHKERRQ(ierr); /* Create side set label */ if (!rank && interpolate && (num_fs > 0)) { int fs, f, voff; /* Read from ex_get_side_set_ids() */ int *fs_id; /* Read from ex_get_side_set_param() */ int num_side_in_set, num_dist_fact_in_set; /* Read from ex_get_side_set_node_list() */ int *fs_vertex_count_list, *fs_vertex_list; /* Get side set ids */ ierr = PetscMalloc1(num_fs, &fs_id);CHKERRQ(ierr); ierr = ex_get_side_set_ids(exoid, fs_id);CHKERRQ(ierr); for (fs = 0; fs < num_fs; ++fs) { ierr = ex_get_side_set_param(exoid, fs_id[fs], &num_side_in_set, &num_dist_fact_in_set);CHKERRQ(ierr); ierr = PetscMalloc2(num_side_in_set,&fs_vertex_count_list,num_side_in_set*4,&fs_vertex_list);CHKERRQ(ierr); ierr = ex_get_side_set_node_list(exoid, fs_id[fs], fs_vertex_count_list, fs_vertex_list);CHKERRQ(ierr); for (f = 0, voff = 0; f < num_side_in_set; ++f) { const PetscInt *faces = NULL; PetscInt faceSize = fs_vertex_count_list[f], numFaces; PetscInt faceVertices[4], v; if (faceSize > 4) SETERRQ1(comm, PETSC_ERR_ARG_WRONG, "ExodusII side cannot have %d > 4 vertices", faceSize); for (v = 0; v < faceSize; ++v, ++voff) { faceVertices[v] = fs_vertex_list[voff]+numCells-1; } ierr = DMPlexGetFullJoin(*dm, faceSize, faceVertices, &numFaces, &faces);CHKERRQ(ierr); if (numFaces != 1) SETERRQ3(comm, PETSC_ERR_ARG_WRONG, "Invalid ExodusII side %d in set %d maps to %d faces", f, fs, numFaces); ierr = DMSetLabelValue(*dm, "Face Sets", faces[0], fs_id[fs]);CHKERRQ(ierr); ierr = DMPlexRestoreJoin(*dm, faceSize, faceVertices, &numFaces, &faces);CHKERRQ(ierr); } ierr = PetscFree2(fs_vertex_count_list,fs_vertex_list);CHKERRQ(ierr); } ierr = PetscFree(fs_id);CHKERRQ(ierr); } #else SETERRQ(comm, PETSC_ERR_SUP, "This method requires ExodusII support. Reconfigure using --download-exodusii"); #endif PetscFunctionReturn(0); }
PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) { PetscErrorCode ierr; Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; Mat_SeqAIJ *p_loc,*p_oth; Mat_PtAPMPI *ptap; Mat_Merge_SeqsToMPI *merge; PetscInt *adi=ad->i,*aoi=ao->i,*adj,*aoj,*apJ,nextp; PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pj; PetscInt i,j,k,anz,pnz,apnz,nextap,row,*cj; MatScalar *ada,*aoa,*apa,*pa,*ca,*pa_loc,*pa_oth,valtmp; PetscInt am =A->rmap->n,cm=C->rmap->n,pon=(p->B)->cmap->n; MPI_Comm comm; PetscMPIInt size,rank,taga,*len_s; PetscInt *owners,proc,nrows,**buf_ri_k,**nextrow,**nextci; PetscInt **buf_ri,**buf_rj; PetscInt cnz=0,*bj_i,*bi,*bj,bnz,nextcj; /* bi,bj,ba: local array of C(mpi mat) */ MPI_Request *s_waits,*r_waits; MPI_Status *status; MatScalar **abuf_r,*ba_i,*pA,*coa,*ba; PetscInt *api,*apj,*coi,*coj; PetscInt *poJ=po->j,*pdJ=pd->j,pcstart=P->cmap->rstart,pcend=P->cmap->rend; PetscBool scalable; #if defined(PTAP_PROFILE) PetscLogDouble t0,t1,t2,t3,t4,et2_AP=0.0,et2_PtAP=0.0,t2_0,t2_1,t2_2; #endif PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t0);CHKERRQ(ierr); #endif ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ptap = c->ptap; if (!ptap) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_INCOMP,"MatPtAP() has not been called to create matrix C yet, cannot use MAT_REUSE_MATRIX"); merge = ptap->merge; apa = ptap->apa; scalable = ptap->scalable; /* 1) get P_oth = ptap->P_oth and P_loc = ptap->P_loc */ /*--------------------------------------------------*/ if (ptap->reuse == MAT_INITIAL_MATRIX) { /* P_oth and P_loc are obtained in MatPtASymbolic(), skip calling MatGetBrowsOfAoCols() and MatMPIAIJGetLocalMat() */ ptap->reuse = MAT_REUSE_MATRIX; } else { /* update numerical values of P_oth and P_loc */ ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_REUSE_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr); ierr = MatMPIAIJGetLocalMat(P,MAT_REUSE_MATRIX,&ptap->P_loc);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t1);CHKERRQ(ierr); #endif /* 2) compute numeric C_seq = P_loc^T*A_loc*P - dominating part */ /*--------------------------------------------------------------*/ /* get data from symbolic products */ p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; coi = merge->coi; coj = merge->coj; ierr = PetscCalloc1(coi[pon]+1,&coa);CHKERRQ(ierr); bi = merge->bi; bj = merge->bj; owners = merge->rowmap->range; ierr = PetscCalloc1(bi[cm]+1,&ba);CHKERRQ(ierr); /* ba: Cseq->a */ api = ptap->api; apj = ptap->apj; if (!scalable) { /* Do dense axpy on apa (length of pN, stores A[i,:]*P) - nonscalable, but fast */ ierr = PetscInfo(C,"Using non-scalable dense axpy\n");CHKERRQ(ierr); /*-----------------------------------------------------------------------------------------------------*/ for (i=0; i<am; i++) { #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_0);CHKERRQ(ierr); #endif /* 2-a) form i-th sparse row of A_loc*P = Ad*P_loc + Ao*P_oth */ /*------------------------------------------------------------*/ apJ = apj + api[i]; /* diagonal portion of A */ anz = adi[i+1] - adi[i]; adj = ad->j + adi[i]; ada = ad->a + adi[i]; for (j=0; j<anz; j++) { row = adj[j]; pnz = pi_loc[row+1] - pi_loc[row]; pj = pj_loc + pi_loc[row]; pa = pa_loc + pi_loc[row]; /* perform dense axpy */ valtmp = ada[j]; for (k=0; k<pnz; k++) { apa[pj[k]] += valtmp*pa[k]; } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } /* off-diagonal portion of A */ anz = aoi[i+1] - aoi[i]; aoj = ao->j + aoi[i]; aoa = ao->a + aoi[i]; for (j=0; j<anz; j++) { row = aoj[j]; pnz = pi_oth[row+1] - pi_oth[row]; pj = pj_oth + pi_oth[row]; pa = pa_oth + pi_oth[row]; /* perform dense axpy */ valtmp = aoa[j]; for (k=0; k<pnz; k++) { apa[pj[k]] += valtmp*pa[k]; } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_1);CHKERRQ(ierr); et2_AP += t2_1 - t2_0; #endif /* 2-b) Compute Cseq = P_loc[i,:]^T*AP[i,:] using outer product */ /*--------------------------------------------------------------*/ apnz = api[i+1] - api[i]; /* put the value into Co=(p->B)^T*AP (off-diagonal part, send to others) */ pnz = po->i[i+1] - po->i[i]; poJ = po->j + po->i[i]; pA = po->a + po->i[i]; for (j=0; j<pnz; j++) { row = poJ[j]; cnz = coi[row+1] - coi[row]; cj = coj + coi[row]; ca = coa + coi[row]; /* perform dense axpy */ valtmp = pA[j]; for (k=0; k<cnz; k++) { ca[k] += valtmp*apa[cj[k]]; } ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr); } /* put the value into Cd (diagonal part) */ pnz = pd->i[i+1] - pd->i[i]; pdJ = pd->j + pd->i[i]; pA = pd->a + pd->i[i]; for (j=0; j<pnz; j++) { row = pdJ[j]; cnz = bi[row+1] - bi[row]; cj = bj + bi[row]; ca = ba + bi[row]; /* perform dense axpy */ valtmp = pA[j]; for (k=0; k<cnz; k++) { ca[k] += valtmp*apa[cj[k]]; } ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr); } /* zero the current row of A*P */ for (k=0; k<apnz; k++) apa[apJ[k]] = 0.0; #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_2);CHKERRQ(ierr); et2_PtAP += t2_2 - t2_1; #endif } } else { /* Do sparse axpy on apa (length of ap_rmax, stores A[i,:]*P) - scalable, but slower */ ierr = PetscInfo(C,"Using scalable sparse axpy\n");CHKERRQ(ierr); /*-----------------------------------------------------------------------------------------*/ pA=pa_loc; for (i=0; i<am; i++) { #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_0);CHKERRQ(ierr); #endif /* form i-th sparse row of A*P */ apnz = api[i+1] - api[i]; apJ = apj + api[i]; /* diagonal portion of A */ anz = adi[i+1] - adi[i]; adj = ad->j + adi[i]; ada = ad->a + adi[i]; for (j=0; j<anz; j++) { row = adj[j]; pnz = pi_loc[row+1] - pi_loc[row]; pj = pj_loc + pi_loc[row]; pa = pa_loc + pi_loc[row]; valtmp = ada[j]; nextp = 0; for (k=0; nextp<pnz; k++) { if (apJ[k] == pj[nextp]) { /* col of AP == col of P */ apa[k] += valtmp*pa[nextp++]; } } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } /* off-diagonal portion of A */ anz = aoi[i+1] - aoi[i]; aoj = ao->j + aoi[i]; aoa = ao->a + aoi[i]; for (j=0; j<anz; j++) { row = aoj[j]; pnz = pi_oth[row+1] - pi_oth[row]; pj = pj_oth + pi_oth[row]; pa = pa_oth + pi_oth[row]; valtmp = aoa[j]; nextp = 0; for (k=0; nextp<pnz; k++) { if (apJ[k] == pj[nextp]) { /* col of AP == col of P */ apa[k] += valtmp*pa[nextp++]; } } ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_1);CHKERRQ(ierr); et2_AP += t2_1 - t2_0; #endif /* 2-b) Compute Cseq = P_loc[i,:]^T*AP[i,:] using outer product */ /*--------------------------------------------------------------*/ pnz = pi_loc[i+1] - pi_loc[i]; pJ = pj_loc + pi_loc[i]; for (j=0; j<pnz; j++) { nextap = 0; row = pJ[j]; /* global index */ if (row < pcstart || row >=pcend) { /* put the value into Co */ row = *poJ; cj = coj + coi[row]; ca = coa + coi[row]; poJ++; } else { /* put the value into Cd */ row = *pdJ; cj = bj + bi[row]; ca = ba + bi[row]; pdJ++; } valtmp = pA[j]; for (k=0; nextap<apnz; k++) { if (cj[k]==apJ[nextap]) ca[k] += valtmp*apa[nextap++]; } ierr = PetscLogFlops(2.0*apnz);CHKERRQ(ierr); } pA += pnz; /* zero the current row info for A*P */ ierr = PetscMemzero(apa,apnz*sizeof(MatScalar));CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t2_2);CHKERRQ(ierr); et2_PtAP += t2_2 - t2_1; #endif } } #if defined(PTAP_PROFILE) ierr = PetscTime(&t2);CHKERRQ(ierr); #endif /* 3) send and recv matrix values coa */ /*------------------------------------*/ buf_ri = merge->buf_ri; buf_rj = merge->buf_rj; len_s = merge->len_s; ierr = PetscCommGetNewTag(comm,&taga);CHKERRQ(ierr); ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); ierr = PetscMalloc2(merge->nsend+1,&s_waits,size,&status);CHKERRQ(ierr); for (proc=0,k=0; proc<size; proc++) { if (!len_s[proc]) continue; i = merge->owners_co[proc]; ierr = MPI_Isend(coa+coi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); k++; } if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} ierr = PetscFree2(s_waits,status);CHKERRQ(ierr); ierr = PetscFree(r_waits);CHKERRQ(ierr); ierr = PetscFree(coa);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t3);CHKERRQ(ierr); #endif /* 4) insert local Cseq and received values into Cmpi */ /*------------------------------------------------------*/ ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci);CHKERRQ(ierr); for (k=0; k<merge->nrecv; k++) { buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ nrows = *(buf_ri_k[k]); nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ } for (i=0; i<cm; i++) { row = owners[rank] + i; /* global row index of C_seq */ bj_i = bj + bi[i]; /* col indices of the i-th row of C */ ba_i = ba + bi[i]; bnz = bi[i+1] - bi[i]; /* add received vals into ba */ for (k=0; k<merge->nrecv; k++) { /* k-th received message */ /* i-th row */ if (i == *nextrow[k]) { cnz = *(nextci[k]+1) - *nextci[k]; cj = buf_rj[k] + *(nextci[k]); ca = abuf_r[k] + *(nextci[k]); nextcj = 0; for (j=0; nextcj<cnz; j++) { if (bj_i[j] == cj[nextcj]) { /* bcol == ccol */ ba_i[j] += ca[nextcj++]; } } nextrow[k]++; nextci[k]++; ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr); } } ierr = MatSetValues(C,1,&row,bnz,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscFree(ba);CHKERRQ(ierr); ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); ierr = PetscFree(abuf_r);CHKERRQ(ierr); ierr = PetscFree3(buf_ri_k,nextrow,nextci);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t4);CHKERRQ(ierr); if (rank==1) PetscPrintf(MPI_COMM_SELF," [%d] PtAPNum %g/P + %g/PtAP( %g + %g ) + %g/comm + %g/Cloc = %g\n\n",rank,t1-t0,t2-t1,et2_AP,et2_PtAP,t3-t2,t4-t3,t4-t0);CHKERRQ(ierr); #endif PetscFunctionReturn(0); }
/*@C DMPlexDistribute - Distributes the mesh and any associated sections. Not Collective Input Parameter: + dm - The original DMPlex object . partitioner - The partitioning package, or NULL for the default - overlap - The overlap of partitions, 0 is the default Output Parameter: + sf - The PetscSF used for point distribution - parallelMesh - The distributed DMPlex object, or NULL Note: If the mesh was not distributed, the return value is NULL. The user can control the definition of adjacency for the mesh using DMPlexGetAdjacencyUseCone() and DMPlexSetAdjacencyUseClosure(). They should choose the combination appropriate for the function representation on the mesh. Level: intermediate .keywords: mesh, elements .seealso: DMPlexCreate(), DMPlexDistributeByFace(), DMPlexSetAdjacencyUseCone(), DMPlexSetAdjacencyUseClosure() @*/ PetscErrorCode DMPlexDistribute(DM dm, const char partitioner[], PetscInt overlap, PetscSF *sf, DM *dmParallel) { DM_Plex *mesh = (DM_Plex*) dm->data, *pmesh; MPI_Comm comm; const PetscInt height = 0; PetscInt dim, numRemoteRanks; IS origCellPart, origPart, cellPart, part; PetscSection origCellPartSection, origPartSection, cellPartSection, partSection; PetscSFNode *remoteRanks; PetscSF partSF, pointSF, coneSF; ISLocalToGlobalMapping renumbering; PetscSection originalConeSection, newConeSection; PetscInt *remoteOffsets; PetscInt *cones, *newCones, newConesSize; PetscBool flg; PetscMPIInt rank, numProcs, p; PetscErrorCode ierr; PetscFunctionBegin; PetscValidHeaderSpecific(dm, DM_CLASSID, 1); if (sf) PetscValidPointer(sf,4); PetscValidPointer(dmParallel,5); ierr = PetscLogEventBegin(DMPLEX_Distribute,dm,0,0,0);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &numProcs);CHKERRQ(ierr); *dmParallel = NULL; if (numProcs == 1) PetscFunctionReturn(0); ierr = DMPlexGetDimension(dm, &dim);CHKERRQ(ierr); /* Create cell partition - We need to rewrite to use IS, use the MatPartition stuff */ ierr = PetscLogEventBegin(DMPLEX_Partition,dm,0,0,0);CHKERRQ(ierr); if (overlap > 1) SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Overlap > 1 not yet implemented"); ierr = DMPlexCreatePartition(dm, partitioner, height, overlap > 0 ? PETSC_TRUE : PETSC_FALSE, &cellPartSection, &cellPart, &origCellPartSection, &origCellPart);CHKERRQ(ierr); /* Create SF assuming a serial partition for all processes: Could check for IS length here */ if (!rank) numRemoteRanks = numProcs; else numRemoteRanks = 0; ierr = PetscMalloc1(numRemoteRanks, &remoteRanks);CHKERRQ(ierr); for (p = 0; p < numRemoteRanks; ++p) { remoteRanks[p].rank = p; remoteRanks[p].index = 0; } ierr = PetscSFCreate(comm, &partSF);CHKERRQ(ierr); ierr = PetscSFSetGraph(partSF, 1, numRemoteRanks, NULL, PETSC_OWN_POINTER, remoteRanks, PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->prefix, "-partition_view", &flg);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(comm, "Cell Partition:\n");CHKERRQ(ierr); ierr = PetscSectionView(cellPartSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(cellPart, NULL);CHKERRQ(ierr); if (origCellPart) { ierr = PetscPrintf(comm, "Original Cell Partition:\n");CHKERRQ(ierr); ierr = PetscSectionView(origCellPartSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(origCellPart, NULL);CHKERRQ(ierr); } ierr = PetscSFView(partSF, NULL);CHKERRQ(ierr); } /* Close the partition over the mesh */ ierr = DMPlexCreatePartitionClosure(dm, cellPartSection, cellPart, &partSection, &part);CHKERRQ(ierr); ierr = ISDestroy(&cellPart);CHKERRQ(ierr); ierr = PetscSectionDestroy(&cellPartSection);CHKERRQ(ierr); /* Create new mesh */ ierr = DMPlexCreate(comm, dmParallel);CHKERRQ(ierr); ierr = DMPlexSetDimension(*dmParallel, dim);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *dmParallel, "Parallel Mesh");CHKERRQ(ierr); pmesh = (DM_Plex*) (*dmParallel)->data; /* Distribute sieve points and the global point numbering (replaces creating remote bases) */ ierr = PetscSFConvertPartition(partSF, partSection, part, &renumbering, &pointSF);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(comm, "Point Partition:\n");CHKERRQ(ierr); ierr = PetscSectionView(partSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = ISView(part, NULL);CHKERRQ(ierr); ierr = PetscSFView(pointSF, NULL);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Point Renumbering after partition:\n");CHKERRQ(ierr); ierr = ISLocalToGlobalMappingView(renumbering, NULL);CHKERRQ(ierr); } ierr = PetscLogEventEnd(DMPLEX_Partition,dm,0,0,0);CHKERRQ(ierr); ierr = PetscLogEventBegin(DMPLEX_DistributeCones,dm,0,0,0);CHKERRQ(ierr); /* Distribute cone section */ ierr = DMPlexGetConeSection(dm, &originalConeSection);CHKERRQ(ierr); ierr = DMPlexGetConeSection(*dmParallel, &newConeSection);CHKERRQ(ierr); ierr = PetscSFDistributeSection(pointSF, originalConeSection, &remoteOffsets, newConeSection);CHKERRQ(ierr); ierr = DMSetUp(*dmParallel);CHKERRQ(ierr); { PetscInt pStart, pEnd, p; ierr = PetscSectionGetChart(newConeSection, &pStart, &pEnd);CHKERRQ(ierr); for (p = pStart; p < pEnd; ++p) { PetscInt coneSize; ierr = PetscSectionGetDof(newConeSection, p, &coneSize);CHKERRQ(ierr); pmesh->maxConeSize = PetscMax(pmesh->maxConeSize, coneSize); } } /* Communicate and renumber cones */ ierr = PetscSFCreateSectionSF(pointSF, originalConeSection, remoteOffsets, newConeSection, &coneSF);CHKERRQ(ierr); ierr = DMPlexGetCones(dm, &cones);CHKERRQ(ierr); ierr = DMPlexGetCones(*dmParallel, &newCones);CHKERRQ(ierr); ierr = PetscSFBcastBegin(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSFBcastEnd(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSectionGetStorageSize(newConeSection, &newConesSize);CHKERRQ(ierr); ierr = ISGlobalToLocalMappingApply(renumbering, IS_GTOLM_MASK, newConesSize, newCones, NULL, newCones);CHKERRQ(ierr); ierr = PetscOptionsHasName(((PetscObject) dm)->prefix, "-cones_view", &flg);CHKERRQ(ierr); if (flg) { ierr = PetscPrintf(comm, "Serial Cone Section:\n");CHKERRQ(ierr); ierr = PetscSectionView(originalConeSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscPrintf(comm, "Parallel Cone Section:\n");CHKERRQ(ierr); ierr = PetscSectionView(newConeSection, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = PetscSFView(coneSF, NULL);CHKERRQ(ierr); } ierr = DMPlexGetConeOrientations(dm, &cones);CHKERRQ(ierr); ierr = DMPlexGetConeOrientations(*dmParallel, &newCones);CHKERRQ(ierr); ierr = PetscSFBcastBegin(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSFBcastEnd(coneSF, MPIU_INT, cones, newCones);CHKERRQ(ierr); ierr = PetscSFDestroy(&coneSF);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_DistributeCones,dm,0,0,0);CHKERRQ(ierr); /* Create supports and stratify sieve */ { PetscInt pStart, pEnd; ierr = PetscSectionGetChart(pmesh->coneSection, &pStart, &pEnd);CHKERRQ(ierr); ierr = PetscSectionSetChart(pmesh->supportSection, pStart, pEnd);CHKERRQ(ierr); } ierr = DMPlexSymmetrize(*dmParallel);CHKERRQ(ierr); ierr = DMPlexStratify(*dmParallel);CHKERRQ(ierr); /* Distribute Coordinates */ { PetscSection originalCoordSection, newCoordSection; Vec originalCoordinates, newCoordinates; const char *name; ierr = DMGetCoordinateSection(dm, &originalCoordSection);CHKERRQ(ierr); ierr = DMGetCoordinateSection(*dmParallel, &newCoordSection);CHKERRQ(ierr); ierr = DMGetCoordinatesLocal(dm, &originalCoordinates);CHKERRQ(ierr); ierr = VecCreate(comm, &newCoordinates);CHKERRQ(ierr); ierr = PetscObjectGetName((PetscObject) originalCoordinates, &name);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) newCoordinates, name);CHKERRQ(ierr); ierr = DMPlexDistributeField(dm, pointSF, originalCoordSection, originalCoordinates, newCoordSection, newCoordinates);CHKERRQ(ierr); ierr = DMSetCoordinatesLocal(*dmParallel, newCoordinates);CHKERRQ(ierr); ierr = VecDestroy(&newCoordinates);CHKERRQ(ierr); } /* Distribute labels */ ierr = PetscLogEventBegin(DMPLEX_DistributeLabels,dm,0,0,0);CHKERRQ(ierr); { DMLabel next = mesh->labels, newNext = pmesh->labels; PetscInt numLabels = 0, l; /* Bcast number of labels */ while (next) {++numLabels; next = next->next;} ierr = MPI_Bcast(&numLabels, 1, MPIU_INT, 0, comm);CHKERRQ(ierr); next = mesh->labels; for (l = 0; l < numLabels; ++l) { DMLabel labelNew; PetscBool isdepth; /* Skip "depth" because it is recreated */ if (!rank) {ierr = PetscStrcmp(next->name, "depth", &isdepth);CHKERRQ(ierr);} ierr = MPI_Bcast(&isdepth, 1, MPIU_BOOL, 0, comm);CHKERRQ(ierr); if (isdepth) {if (!rank) next = next->next; continue;} ierr = DMLabelDistribute(next, partSection, part, renumbering, &labelNew);CHKERRQ(ierr); /* Insert into list */ if (newNext) newNext->next = labelNew; else pmesh->labels = labelNew; newNext = labelNew; if (!rank) next = next->next; } } ierr = PetscLogEventEnd(DMPLEX_DistributeLabels,dm,0,0,0);CHKERRQ(ierr); /* Setup hybrid structure */ { const PetscInt *gpoints; PetscInt depth, n, d; for (d = 0; d <= dim; ++d) {pmesh->hybridPointMax[d] = mesh->hybridPointMax[d];} ierr = MPI_Bcast(pmesh->hybridPointMax, dim+1, MPIU_INT, 0, comm);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetSize(renumbering, &n);CHKERRQ(ierr); ierr = ISLocalToGlobalMappingGetIndices(renumbering, &gpoints);CHKERRQ(ierr); ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); for (d = 0; d <= dim; ++d) { PetscInt pmax = pmesh->hybridPointMax[d], newmax = 0, pEnd, stratum[2], p; if (pmax < 0) continue; ierr = DMPlexGetDepthStratum(dm, d > depth ? depth : d, &stratum[0], &stratum[1]);CHKERRQ(ierr); ierr = DMPlexGetDepthStratum(*dmParallel, d, NULL, &pEnd);CHKERRQ(ierr); ierr = MPI_Bcast(stratum, 2, MPIU_INT, 0, comm);CHKERRQ(ierr); for (p = 0; p < n; ++p) { const PetscInt point = gpoints[p]; if ((point >= stratum[0]) && (point < stratum[1]) && (point >= pmax)) ++newmax; } if (newmax > 0) pmesh->hybridPointMax[d] = pEnd - newmax; else pmesh->hybridPointMax[d] = -1; } ierr = ISLocalToGlobalMappingRestoreIndices(renumbering, &gpoints);CHKERRQ(ierr); } /* Cleanup Partition */ ierr = ISLocalToGlobalMappingDestroy(&renumbering);CHKERRQ(ierr); ierr = PetscSFDestroy(&partSF);CHKERRQ(ierr); ierr = PetscSectionDestroy(&partSection);CHKERRQ(ierr); ierr = ISDestroy(&part);CHKERRQ(ierr); /* Create point SF for parallel mesh */ ierr = PetscLogEventBegin(DMPLEX_DistributeSF,dm,0,0,0);CHKERRQ(ierr); { const PetscInt *leaves; PetscSFNode *remotePoints, *rowners, *lowners; PetscInt numRoots, numLeaves, numGhostPoints = 0, p, gp, *ghostPoints; PetscInt pStart, pEnd; ierr = DMPlexGetChart(*dmParallel, &pStart, &pEnd);CHKERRQ(ierr); ierr = PetscSFGetGraph(pointSF, &numRoots, &numLeaves, &leaves, NULL);CHKERRQ(ierr); ierr = PetscMalloc2(numRoots,&rowners,numLeaves,&lowners);CHKERRQ(ierr); for (p=0; p<numRoots; p++) { rowners[p].rank = -1; rowners[p].index = -1; } if (origCellPart) { /* Make sure points in the original partition are not assigned to other procs */ const PetscInt *origPoints; ierr = DMPlexCreatePartitionClosure(dm, origCellPartSection, origCellPart, &origPartSection, &origPart);CHKERRQ(ierr); ierr = ISGetIndices(origPart, &origPoints);CHKERRQ(ierr); for (p = 0; p < numProcs; ++p) { PetscInt dof, off, d; ierr = PetscSectionGetDof(origPartSection, p, &dof);CHKERRQ(ierr); ierr = PetscSectionGetOffset(origPartSection, p, &off);CHKERRQ(ierr); for (d = off; d < off+dof; ++d) { rowners[origPoints[d]].rank = p; } } ierr = ISRestoreIndices(origPart, &origPoints);CHKERRQ(ierr); ierr = ISDestroy(&origPart);CHKERRQ(ierr); ierr = PetscSectionDestroy(&origPartSection);CHKERRQ(ierr); } ierr = ISDestroy(&origCellPart);CHKERRQ(ierr); ierr = PetscSectionDestroy(&origCellPartSection);CHKERRQ(ierr); ierr = PetscSFBcastBegin(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); ierr = PetscSFBcastEnd(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); for (p = 0; p < numLeaves; ++p) { if (lowners[p].rank < 0 || lowners[p].rank == rank) { /* Either put in a bid or we know we own it */ lowners[p].rank = rank; lowners[p].index = leaves ? leaves[p] : p; } else if (lowners[p].rank >= 0) { /* Point already claimed so flag so that MAXLOC does not listen to us */ lowners[p].rank = -2; lowners[p].index = -2; } } for (p=0; p<numRoots; p++) { /* Root must not participate in the rediction, flag so that MAXLOC does not use */ rowners[p].rank = -3; rowners[p].index = -3; } ierr = PetscSFReduceBegin(pointSF, MPIU_2INT, lowners, rowners, MPI_MAXLOC);CHKERRQ(ierr); ierr = PetscSFReduceEnd(pointSF, MPIU_2INT, lowners, rowners, MPI_MAXLOC);CHKERRQ(ierr); ierr = PetscSFBcastBegin(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); ierr = PetscSFBcastEnd(pointSF, MPIU_2INT, rowners, lowners);CHKERRQ(ierr); for (p = 0; p < numLeaves; ++p) { if (lowners[p].rank < 0 || lowners[p].index < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cell partition corrupt: point not claimed"); if (lowners[p].rank != rank) ++numGhostPoints; } ierr = PetscMalloc1(numGhostPoints, &ghostPoints);CHKERRQ(ierr); ierr = PetscMalloc1(numGhostPoints, &remotePoints);CHKERRQ(ierr); for (p = 0, gp = 0; p < numLeaves; ++p) { if (lowners[p].rank != rank) { ghostPoints[gp] = leaves ? leaves[p] : p; remotePoints[gp].rank = lowners[p].rank; remotePoints[gp].index = lowners[p].index; ++gp; } } ierr = PetscFree2(rowners,lowners);CHKERRQ(ierr); ierr = PetscSFSetGraph((*dmParallel)->sf, pEnd - pStart, numGhostPoints, ghostPoints, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetFromOptions((*dmParallel)->sf);CHKERRQ(ierr); } pmesh->useCone = mesh->useCone; pmesh->useClosure = mesh->useClosure; ierr = PetscLogEventEnd(DMPLEX_DistributeSF,dm,0,0,0);CHKERRQ(ierr); /* Cleanup */ if (sf) {*sf = pointSF;} else {ierr = PetscSFDestroy(&pointSF);CHKERRQ(ierr);} ierr = DMSetFromOptions(*dmParallel);CHKERRQ(ierr); ierr = PetscLogEventEnd(DMPLEX_Distribute,dm,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char *argv[]) { char mat_type[256] = "aij"; /* default matrix type */ PetscErrorCode ierr; MPI_Comm comm; PetscMPIInt rank,size; DM slice; PetscInt i,bs=1,N=5,n,m,rstart,ghosts[2],*d_nnz,*o_nnz,dfill[4]={1,0,0,1},ofill[4]={1,1,1,1}; PetscReal alpha =1,K=1,rho0=1,u0=0,sigma=0.2; PetscBool useblock=PETSC_TRUE; PetscScalar *xx; Mat A; Vec x,b,lf; ierr = PetscInitialize(&argc,&argv,0,help);CHKERRQ(ierr); comm = PETSC_COMM_WORLD; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = PetscOptionsBegin(comm,0,"Options for DMSliced test",0);CHKERRQ(ierr); { ierr = PetscOptionsInt("-n","Global number of nodes","",N,&N,NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-bs","Block size (1 or 2)","",bs,&bs,NULL);CHKERRQ(ierr); if (bs != 1) { if (bs != 2) SETERRQ(PETSC_COMM_WORLD,1,"Block size must be 1 or 2"); ierr = PetscOptionsReal("-alpha","Inverse time step for wave operator","",alpha,&alpha,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-K","Bulk modulus of compressibility","",K,&K,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-rho0","Reference density","",rho0,&rho0,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-u0","Reference velocity","",u0,&u0,NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-sigma","Width of Gaussian density perturbation","",sigma,&sigma,NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-block","Use block matrix assembly","",useblock,&useblock,NULL);CHKERRQ(ierr); } ierr = PetscOptionsString("-sliced_mat_type","Matrix type to use (aij or baij)","",mat_type,mat_type,sizeof(mat_type),NULL);CHKERRQ(ierr); } ierr = PetscOptionsEnd();CHKERRQ(ierr); /* Split ownership, set up periodic grid in 1D */ n = PETSC_DECIDE; ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); rstart = 0; ierr = MPI_Scan(&n,&rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); rstart -= n; ghosts[0] = (N+rstart-1)%N; ghosts[1] = (rstart+n)%N; ierr = PetscMalloc2(n,PetscInt,&d_nnz,n,PetscInt,&o_nnz);CHKERRQ(ierr); for (i=0; i<n; i++) { if (size > 1 && (i==0 || i==n-1)) { d_nnz[i] = 2; o_nnz[i] = 1; } else { d_nnz[i] = 3; o_nnz[i] = 0; } } ierr = DMSlicedCreate(comm,bs,n,2,ghosts,d_nnz,o_nnz,&slice);CHKERRQ(ierr); /* Currently does not copy X_nnz so we can't free them until after DMSlicedGetMatrix */ if (!useblock) {ierr = DMSlicedSetBlockFills(slice,dfill,ofill);CHKERRQ(ierr);} /* Irrelevant for baij formats */ ierr = DMSetMatType(slice,mat_type);CHKERRQ(ierr); ierr = DMCreateMatrix(slice,&A);CHKERRQ(ierr); ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); ierr = MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); ierr = DMCreateGlobalVector(slice,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecGhostGetLocalForm(x,&lf);CHKERRQ(ierr); ierr = VecGetSize(lf,&m);CHKERRQ(ierr); if (m != (n+2)*bs) SETERRQ2(PETSC_COMM_SELF,1,"size of local form %D, expected %D",m,(n+2)*bs); ierr = VecGetArray(lf,&xx);CHKERRQ(ierr); for (i=0; i<n; i++) { PetscInt row[2],col[9],im,ip; PetscScalar v[12]; const PetscReal xref = 2.0*(rstart+i)/N - 1; /* [-1,1] */ const PetscReal h = 1.0/N; /* grid spacing */ im = (i==0) ? n : i-1; ip = (i==n-1) ? n+1 : i+1; switch (bs) { case 1: /* Laplacian with periodic boundaries */ col[0] = im; col[1] = i; col[2] = ip; v[0] = -h; v[1] = 2*h; v[2] = -h; ierr = MatSetValuesLocal(A,1,&i,3,col,v,INSERT_VALUES);CHKERRQ(ierr); xx[i] = sin(xref*PETSC_PI); break; case 2: /* Linear acoustic wave operator in variables [rho, u], central differences, periodic, timestep 1/alpha */ v[0] = -0.5*u0; v[1] = -0.5*K; v[2] = alpha; v[3] = 0; v[4] = 0.5*u0; v[5] = 0.5*K; v[6] = -0.5/rho0; v[7] = -0.5*u0; v[8] = 0; v[9] = alpha; v[10] = 0.5/rho0; v[11] = 0.5*u0; if (useblock) { row[0] = i; col[0] = im; col[1] = i; col[2] = ip; ierr = MatSetValuesBlockedLocal(A,1,row,3,col,v,INSERT_VALUES);CHKERRQ(ierr); } else { row[0] = 2*i; row[1] = 2*i+1; col[0] = 2*im; col[1] = 2*im+1; col[2] = 2*i; col[3] = 2*ip; col[4] = 2*ip+1; v[3] = v[4]; v[4] = v[5]; /* pack values in first row */ ierr = MatSetValuesLocal(A,1,row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); col[2] = 2*i+1; v[8] = v[9]; v[9] = v[10]; v[10] = v[11]; /* pack values in second row */ ierr = MatSetValuesLocal(A,1,row+1,5,col,v+6,INSERT_VALUES);CHKERRQ(ierr); } /* Set current state (gaussian density perturbation) */ xx[2*i] = 0.2*exp(-PetscSqr(xref)/(2*PetscSqr(sigma))); xx[2*i+1] = 0; break; default: SETERRQ1(PETSC_COMM_SELF,1,"not implemented for block size %D",bs); } } ierr = VecRestoreArray(lf,&xx);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(x,&lf);CHKERRQ(ierr); ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatMult(A,x,b);CHKERRQ(ierr); ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); ierr = VecView(b,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* Update the ghosted values, view the result on rank 0. */ ierr = VecGhostUpdateBegin(b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecGhostUpdateEnd(b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); if (!rank) { ierr = VecGhostGetLocalForm(b,&lf);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_SELF,"Local form of b on rank 0, last two nodes are ghost nodes\n");CHKERRQ(ierr); ierr = VecView(lf,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); ierr = VecGhostRestoreLocalForm(b,&lf);CHKERRQ(ierr); } ierr = DMDestroy(&slice);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode DMDAVTKWriteAll_VTS(DM da,PetscViewer viewer) { #if defined(PETSC_USE_REAL_SINGLE) const char precision[] = "Float32"; #elif defined(PETSC_USE_REAL_DOUBLE) const char precision[] = "Float64"; #else const char precision[] = "UnknownPrecision"; #endif MPI_Comm comm; Vec Coords; PetscViewer_VTK *vtk = (PetscViewer_VTK*)viewer->data; PetscViewerVTKObjectLink link; FILE *fp; PetscMPIInt rank,size,tag; DMDALocalInfo info; PetscInt dim,mx,my,mz,cdim,bs,boffset,maxnnodes,i,j,k,f,r; PetscInt rloc[6],(*grloc)[6] = NULL; PetscScalar *array,*array2; PetscReal gmin[3],gmax[3]; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) SETERRQ(comm,PETSC_ERR_SUP,"Complex values not supported"); #endif ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = DMDAGetInfo(da,&dim, &mx,&my,&mz, 0,0,0, &bs,0,0,0,0,0);CHKERRQ(ierr); ierr = DMDAGetLocalInfo(da,&info);CHKERRQ(ierr); ierr = DMDAGetBoundingBox(da,gmin,gmax);CHKERRQ(ierr); ierr = DMGetCoordinates(da,&Coords);CHKERRQ(ierr); if (Coords) { PetscInt csize; ierr = VecGetSize(Coords,&csize);CHKERRQ(ierr); if (csize % (mx*my*mz)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Coordinate vector size mismatch"); cdim = csize/(mx*my*mz); if (cdim < dim || cdim > 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Coordinate vector size mismatch"); } else { cdim = dim; } ierr = PetscFOpen(comm,vtk->filename,"wb",&fp);CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp,"<?xml version=\"1.0\"?>\n");CHKERRQ(ierr); #if defined(PETSC_WORDS_BIGENDIAN) ierr = PetscFPrintf(comm,fp,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"BigEndian\">\n");CHKERRQ(ierr); #else ierr = PetscFPrintf(comm,fp,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr); #endif ierr = PetscFPrintf(comm,fp," <StructuredGrid WholeExtent=\"%D %D %D %D %D %D\">\n",0,mx-1,0,my-1,0,mz-1);CHKERRQ(ierr); if (!rank) {ierr = PetscMalloc1(size*6,&grloc);CHKERRQ(ierr);} rloc[0] = info.xs; rloc[1] = info.xm; rloc[2] = info.ys; rloc[3] = info.ym; rloc[4] = info.zs; rloc[5] = info.zm; ierr = MPI_Gather(rloc,6,MPIU_INT,&grloc[0][0],6,MPIU_INT,0,comm);CHKERRQ(ierr); /* Write XML header */ maxnnodes = 0; /* Used for the temporary array size on rank 0 */ boffset = 0; /* Offset into binary file */ for (r=0; r<size; r++) { PetscInt xs=-1,xm=-1,ys=-1,ym=-1,zs=-1,zm=-1,nnodes = 0; if (!rank) { xs = grloc[r][0]; xm = grloc[r][1]; ys = grloc[r][2]; ym = grloc[r][3]; zs = grloc[r][4]; zm = grloc[r][5]; nnodes = xm*ym*zm; } maxnnodes = PetscMax(maxnnodes,nnodes); #if 0 switch (dim) { case 1: ierr = PetscFPrintf(comm,fp," <Piece Extent=\"%D %D %D %D %D %D\">\n",xs,xs+xm-1,0,0,0,0);CHKERRQ(ierr); break; case 2: ierr = PetscFPrintf(comm,fp," <Piece Extent=\"%D %D %D %D %D %D\">\n",xs,xs+xm,ys+ym-1,xs,xs+xm-1,0,0);CHKERRQ(ierr); break; case 3: ierr = PetscFPrintf(comm,fp," <Piece Extent=\"%D %D %D %D %D %D\">\n",xs,xs+xm-1,ys,ys+ym-1,zs,zs+zm-1);CHKERRQ(ierr); break; default: SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"No support for dimension %D",dim); } #endif ierr = PetscFPrintf(comm,fp," <Piece Extent=\"%D %D %D %D %D %D\">\n",xs,xs+xm-1,ys,ys+ym-1,zs,zs+zm-1);CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp," <Points>\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp," <DataArray type=\"%s\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",precision,boffset);CHKERRQ(ierr); boffset += 3*nnodes*sizeof(PetscScalar) + sizeof(int); ierr = PetscFPrintf(comm,fp," </Points>\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp," <PointData Scalars=\"ScalarPointData\">\n");CHKERRQ(ierr); for (link=vtk->link; link; link=link->next) { Vec X = (Vec)link->vec; const char *vecname = ""; if (((PetscObject)X)->name || link != vtk->link) { /* If the object is already named, use it. If it is past the first link, name it to disambiguate. */ ierr = PetscObjectGetName((PetscObject)X,&vecname);CHKERRQ(ierr); } for (i=0; i<bs; i++) { char buf[256]; const char *fieldname; ierr = DMDAGetFieldName(da,i,&fieldname);CHKERRQ(ierr); if (!fieldname) { ierr = PetscSNPrintf(buf,sizeof(buf),"Unnamed%D",i);CHKERRQ(ierr); fieldname = buf; } ierr = PetscFPrintf(comm,fp," <DataArray type=\"%s\" Name=\"%s%s\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",precision,vecname,fieldname,boffset);CHKERRQ(ierr); boffset += nnodes*sizeof(PetscScalar) + sizeof(int); } } ierr = PetscFPrintf(comm,fp," </PointData>\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp," </Piece>\n");CHKERRQ(ierr); } ierr = PetscFPrintf(comm,fp," </StructuredGrid>\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp," <AppendedData encoding=\"raw\">\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp,"_");CHKERRQ(ierr); /* Now write the arrays. */ tag = ((PetscObject)viewer)->tag; ierr = PetscMalloc2(maxnnodes*PetscMax(3,bs),&array,maxnnodes*3,&array2);CHKERRQ(ierr); for (r=0; r<size; r++) { MPI_Status status; PetscInt xs=-1,xm=-1,ys=-1,ym=-1,zs=-1,zm=-1,nnodes = 0; if (!rank) { xs = grloc[r][0]; xm = grloc[r][1]; ys = grloc[r][2]; ym = grloc[r][3]; zs = grloc[r][4]; zm = grloc[r][5]; nnodes = xm*ym*zm; } else if (r == rank) { nnodes = info.xm*info.ym*info.zm; } /* Write the coordinates */ if (Coords) { const PetscScalar *coords; ierr = VecGetArrayRead(Coords,&coords);CHKERRQ(ierr); if (!rank) { if (r) { PetscMPIInt nn; ierr = MPI_Recv(array,nnodes*cdim,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRQ(ierr); if (nn != nnodes*cdim) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Array size mismatch"); } else { ierr = PetscMemcpy(array,coords,nnodes*cdim*sizeof(PetscScalar));CHKERRQ(ierr); } /* Transpose coordinates to VTK (C-style) ordering */ for (k=0; k<zm; k++) { for (j=0; j<ym; j++) { for (i=0; i<xm; i++) { PetscInt Iloc = i+xm*(j+ym*k); array2[Iloc*3+0] = array[Iloc*cdim + 0]; array2[Iloc*3+1] = cdim > 1 ? array[Iloc*cdim + 1] : 0.0; array2[Iloc*3+2] = cdim > 2 ? array[Iloc*cdim + 2] : 0.0; } } } } else if (r == rank) { ierr = MPI_Send((void*)coords,nnodes*cdim,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(Coords,&coords);CHKERRQ(ierr); } else { /* Fabricate some coordinates using grid index */ for (k=0; k<zm; k++) { for (j=0; j<ym; j++) { for (i=0; i<xm; i++) { PetscInt Iloc = i+xm*(j+ym*k); array2[Iloc*3+0] = xs+i; array2[Iloc*3+1] = ys+j; array2[Iloc*3+2] = zs+k; } } } } ierr = PetscViewerVTKFWrite(viewer,fp,array2,nnodes*3,PETSC_SCALAR);CHKERRQ(ierr); /* Write each of the objects queued up for this file */ for (link=vtk->link; link; link=link->next) { Vec X = (Vec)link->vec; const PetscScalar *x; ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); if (!rank) { if (r) { PetscMPIInt nn; ierr = MPI_Recv(array,nnodes*bs,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr); ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRQ(ierr); if (nn != nnodes*bs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Array size mismatch receiving from rank %D",r); } else { ierr = PetscMemcpy(array,x,nnodes*bs*sizeof(PetscScalar));CHKERRQ(ierr); } for (f=0; f<bs; f++) { /* Extract and transpose the f'th field */ for (k=0; k<zm; k++) { for (j=0; j<ym; j++) { for (i=0; i<xm; i++) { PetscInt Iloc = i+xm*(j+ym*k); array2[Iloc] = array[Iloc*bs + f]; } } } ierr = PetscViewerVTKFWrite(viewer,fp,array2,nnodes,PETSC_SCALAR);CHKERRQ(ierr); } } else if (r == rank) { ierr = MPI_Send((void*)x,nnodes*bs,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr); } ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); } } ierr = PetscFree2(array,array2);CHKERRQ(ierr); ierr = PetscFree(grloc);CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp,"\n </AppendedData>\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fp,"</VTKFile>\n");CHKERRQ(ierr); ierr = PetscFClose(comm,fp);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc) { PetscErrorCode ierr; AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; PetscLayout map = aomems->map; PetscInt n_local = map->n,i,j; PetscMPIInt rank,size,tag; PetscInt *owner,*start,*sizes,nsends,nreceives; PetscInt nmax,count,*sindices,*rindices,idx,lastidx; PetscInt *owners = aomems->map->range; MPI_Request *send_waits,*recv_waits; MPI_Status recv_status; PetscMPIInt nindices,widx; PetscInt *rbuf; PetscInt n=napp,ip,ia; MPI_Status *send_status; PetscFunctionBegin; ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* first count number of contributors (of from_array[]) to each processor */ ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr); j = 0; lastidx = -1; for (i=0; i<n; i++) { /* if indices are NOT locally sorted, need to start search at the beginning */ if (lastidx > (idx = from_array[i])) j = 0; lastidx = idx; for (; j<size; j++) { if (idx >= owners[j] && idx < owners[j+1]) { sizes[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */ sizes[2*j+1] = 1; /* send to proc[j] */ owner[i] = j; break; } } } sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ nsends = 0; for (i=0; i<size; i++) nsends += sizes[2*i+1]; /* inform other processors of number of messages and max length*/ ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr); /* allocate arrays */ ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr); ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr); ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr); ierr = PetscMalloc1(size,&start);CHKERRQ(ierr); /* post receives: */ for (i=0; i<nreceives; i++) { ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); } /* do sends: 1) starts[i] gives the starting index in svalues for stuff going to the ith processor */ start[0] = 0; for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; for (i=0; i<n; i++) { j = owner[i]; if (j != rank) { ip = from_array[i]; ia = to_array[i]; sindices[start[j]++] = ip; sindices[start[j]++] = ia; } else { /* compute my own map */ ip = from_array[i] - owners[rank]; ia = to_array[i]; aomap_loc[ip] = ia; } } start[0] = 0; for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; for (i=0,count=0; i<size; i++) { if (sizes[2*i+1]) { ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr); count++; } } if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count); /* wait on sends */ if (nsends) { ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); } /* recvs */ count=0; for (j= nreceives; j>0; j--) { ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); rbuf = rindices+nmax*widx; /* global index */ /* compute local mapping */ for (i=0; i<nindices; i+=2) { /* pack aomap_loc */ ip = rbuf[i] - owners[rank]; /* local index */ ia = rbuf[i+1]; aomap_loc[ip] = ia; } count++; } ierr = PetscFree(start);CHKERRQ(ierr); ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); ierr = PetscFree(owner);CHKERRQ(ierr); ierr = PetscFree(sizes);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* MatConvert_Basic - Converts from any input format to another format. For parallel formats, the new matrix distribution is determined by PETSc. Does not do preallocation so in general will be slow */ PetscErrorCode MatConvert_Basic(Mat mat, MatType newtype,MatReuse reuse,Mat *newmat) { Mat M; const PetscScalar *vwork; PetscErrorCode ierr; PetscInt i,j,nz,m,n,rstart,rend,lm,ln,prbs,pcbs,cstart,cend,*dnz,*onz; const PetscInt *cwork; PetscBool isseqsbaij,ismpisbaij,isseqbaij,ismpibaij,isseqdense,ismpidense; PetscFunctionBegin; ierr = MatGetSize(mat,&m,&n);CHKERRQ(ierr); ierr = MatGetLocalSize(mat,&lm,&ln);CHKERRQ(ierr); if (ln == n) ln = PETSC_DECIDE; /* try to preserve column ownership */ ierr = MatCreate(PetscObjectComm((PetscObject)mat),&M);CHKERRQ(ierr); ierr = MatSetSizes(M,lm,ln,m,n);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(M,mat,mat);CHKERRQ(ierr); ierr = MatSetType(M,newtype);CHKERRQ(ierr); ierr = MatGetOwnershipRange(mat,&rstart,&rend);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)M,MATSEQSBAIJ,&isseqsbaij);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)M,MATMPISBAIJ,&ismpisbaij);CHKERRQ(ierr); if (isseqsbaij || ismpisbaij) {ierr = MatSetOption(M,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);} ierr = PetscObjectTypeCompare((PetscObject)M,MATSEQBAIJ,&isseqbaij);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)M,MATMPIBAIJ,&ismpibaij);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)M,MATMPIDENSE,&ismpidense);CHKERRQ(ierr); if (isseqdense) { ierr = MatSeqDenseSetPreallocation(M,NULL);CHKERRQ(ierr); } else if (ismpidense) { ierr = MatMPIDenseSetPreallocation(M,NULL);CHKERRQ(ierr); } else { /* Preallocation block sizes. (S)BAIJ matrices will have one index per block. */ prbs = (isseqbaij || ismpibaij || isseqsbaij || ismpisbaij) ? PetscAbs(M->rmap->bs) : 1; pcbs = (isseqbaij || ismpibaij || isseqsbaij || ismpisbaij) ? PetscAbs(M->cmap->bs) : 1; ierr = PetscMalloc2(lm/prbs,&dnz,lm/prbs,&onz);CHKERRQ(ierr); ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); for (i=0; i<lm; i+=prbs) { ierr = MatGetRow(mat,rstart+i,&nz,&cwork,NULL);CHKERRQ(ierr); dnz[i] = 0; onz[i] = 0; for (j=0; j<nz; j+=pcbs) { if ((isseqsbaij || ismpisbaij) && cwork[j] < rstart+i) continue; if (cstart <= cwork[j] && cwork[j] < cend) dnz[i]++; else onz[i]++; } ierr = MatRestoreRow(mat,rstart+i,&nz,&cwork,NULL);CHKERRQ(ierr); } ierr = MatXAIJSetPreallocation(M,PETSC_DECIDE,dnz,onz,dnz,onz);CHKERRQ(ierr); ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); } for (i=rstart; i<rend; i++) { ierr = MatGetRow(mat,i,&nz,&cwork,&vwork);CHKERRQ(ierr); ierr = MatSetValues(M,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(mat,i,&nz,&cwork,&vwork);CHKERRQ(ierr); } ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (reuse == MAT_REUSE_MATRIX) { ierr = MatHeaderReplace(mat,M);CHKERRQ(ierr); } else { *newmat = M; } PetscFunctionReturn(0); }
PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) { Mat C=B; Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ *)C->data; PetscErrorCode ierr; PetscInt i,j,k,nz,nzL,row; const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag,*pj,bs2=a->bs2; MatScalar *rtmp,*pc,*mwork,*v,*vv,*pv,*aa=a->a,work[25]; PetscInt flg,ipvt[5]; PetscReal shift = info->shiftamount; PetscFunctionBegin; /* generate work space needed by the factorization */ ierr = PetscMalloc2(bs2*n,MatScalar,&rtmp,bs2,MatScalar,&mwork);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); for (i=0; i<n; i++){ /* zero rtmp */ /* L part */ nz = bi[i+1] - bi[i]; bjtmp = bj + bi[i]; for (j=0; j<nz; j++){ ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); } /* U part */ nz = bdiag[i] - bdiag[i+1]; bjtmp = bj + bdiag[i+1]+1; for (j=0; j<nz; j++){ ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); } /* load in initial (unfactored row) */ nz = ai[i+1] - ai[i]; ajtmp = aj + ai[i]; v = aa + bs2*ai[i]; for (j=0; j<nz; j++) { ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); } /* elimination */ bjtmp = bj + bi[i]; nzL = bi[i+1] - bi[i]; for(k=0;k < nzL;k++) { row = bjtmp[k]; pc = rtmp + bs2*row; for (flg=0,j=0; j<bs2; j++) { if (pc[j]!=0.0) { flg = 1; break; }} if (flg) { pv = b->a + bs2*bdiag[row]; /* Kernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ ierr = Kernel_A_gets_A_times_B_5(pc,pv,mwork);CHKERRQ(ierr); pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ pv = b->a + bs2*(bdiag[row+1]+1); nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ for (j=0; j<nz; j++) { /* Kernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ vv = rtmp + bs2*pj[j]; ierr = Kernel_A_gets_A_minus_B_times_C_5(vv,pc,pv);CHKERRQ(ierr); pv += bs2; } ierr = PetscLogFlops(250*nz+225);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ } } /* finished row so stick it into b->a */ /* L part */ pv = b->a + bs2*bi[i] ; pj = b->j + bi[i] ; nz = bi[i+1] - bi[i]; for (j=0; j<nz; j++) { ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); } /* Mark diagonal and invert diagonal for simplier triangular solves */ pv = b->a + bs2*bdiag[i]; pj = b->j + bdiag[i]; ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); /* ierr = Kernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); */ ierr = Kernel_A_gets_inverse_A_5(pv,ipvt,work,shift);CHKERRQ(ierr); /* U part */ pv = b->a + bs2*(bdiag[i+1]+1); pj = b->j + bdiag[i+1]+1; nz = bdiag[i] - bdiag[i+1] - 1; for (j=0; j<nz; j++){ ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); } } ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); C->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; C->assembled = PETSC_TRUE; ierr = PetscLogFlops(1.333333333333*5*5*5*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }
int main(int argc,char **args) { Mat C,F,Cpetsc,Csymm; Vec u,x,b,bpla; PetscErrorCode ierr; PetscMPIInt rank,nproc; PetscInt i,j,k,M = 10,m,nfact,nsolve,Istart,Iend,*im,*in,start,end; PetscScalar *array,rval; PetscReal norm,tol=1.e-12; IS perm,iperm; MatFactorInfo info; PetscRandom rand; PetscInitialize(&argc,&args,(char *)0,help); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &nproc);CHKERRQ(ierr); /* Test non-symmetric operations */ /*-------------------------------*/ /* Create a Plapack dense matrix C */ ierr = PetscOptionsGetInt(PETSC_NULL,"-M",&M,PETSC_NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,M);CHKERRQ(ierr); ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(C);CHKERRQ(ierr); ierr = MatSetUp(C);CHKERRQ(ierr); /* Create vectors */ ierr = MatGetOwnershipRange(C,&start,&end);CHKERRQ(ierr); m = end - start; /* printf("[%d] C - local size m: %d\n",rank,m); */ ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr); ierr = VecSetSizes(x,m,PETSC_DECIDE);CHKERRQ(ierr); ierr = VecSetFromOptions(x);CHKERRQ(ierr); ierr = VecDuplicate(x,&b);CHKERRQ(ierr); ierr = VecDuplicate(x,&bpla);CHKERRQ(ierr); ierr = VecDuplicate(x,&u);CHKERRQ(ierr); /* save the true solution */ /* Create a petsc dense matrix Cpetsc */ ierr = PetscOptionsGetInt(PETSC_NULL,"-M",&M,PETSC_NULL);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD,&Cpetsc);CHKERRQ(ierr); ierr = MatSetSizes(Cpetsc,m,m,M,M);CHKERRQ(ierr); ierr = MatSetType(Cpetsc,MATDENSE);CHKERRQ(ierr); ierr = MatMPIDenseSetPreallocation(Cpetsc,PETSC_NULL);CHKERRQ(ierr); ierr = MatSetFromOptions(Cpetsc);CHKERRQ(ierr); ierr = MatSetUp(Cpetsc);CHKERRQ(ierr); ierr = MatSetOption(Cpetsc,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); ierr = MatSetOption(C,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* Assembly */ /* PLAPACK doesn't support INSERT_VALUES mode, zero all entries before calling MatSetValues() */ ierr = MatZeroEntries(C);CHKERRQ(ierr); ierr = MatZeroEntries(Cpetsc);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); ierr = MatGetOwnershipRange(C,&Istart,&Iend);CHKERRQ(ierr); /* printf(" [%d] C m: %d, Istart/end: %d %d\n",rank,m,Istart,Iend); */ ierr = PetscMalloc((m*M+1)*sizeof(PetscScalar),&array);CHKERRQ(ierr); ierr = PetscMalloc2(m,PetscInt,&im,M,PetscInt,&in);CHKERRQ(ierr); k = 0; for (j=0; j<M; j++){ /* column oriented! */ in[j] = j; for (i=0; i<m; i++){ im[i] = i+Istart; ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[k++] = rval; } } ierr = MatSetValues(Cpetsc,m,im,M,in,array,ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,m,im,M,in,array,ADD_VALUES);CHKERRQ(ierr); ierr = PetscFree(array);CHKERRQ(ierr); ierr = PetscFree2(im,in);CHKERRQ(ierr); ierr = MatAssemblyBegin(Cpetsc,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Cpetsc,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* if (!rank) {printf("main, Cpetsc: \n");} ierr = MatView(Cpetsc,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ ierr = MatGetOrdering(C,MATORDERINGNATURAL,&perm,&iperm);CHKERRQ(ierr); /* Test nonsymmetric MatMult() */ ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = MatMult(Cpetsc,x,b);CHKERRQ(ierr); ierr = MatMult(C,x,bpla);CHKERRQ(ierr); ierr = VecAXPY(bpla,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(bpla,NORM_2,&norm);CHKERRQ(ierr); if (norm > 1.e-12 && !rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"Nonsymmetric MatMult_Plapack error: |b_pla - b|= %g\n",norm);CHKERRQ(ierr); } /* Test LU Factorization */ if (nproc == 1){ ierr = MatGetFactor(C,MATSOLVERPETSC,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } else { ierr = MatGetFactor(C,MATSOLVERPLAPACK,MAT_FACTOR_LU,&F);CHKERRQ(ierr); } ierr = MatLUFactorSymbolic(F,C,perm,iperm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 2; nfact++){ if (!rank) printf(" LU nfact %d\n",nfact); if (nfact>0){ /* change matrix value for testing repeated MatLUFactorNumeric() */ if (!rank){ i = j = 0; rval = nfact; ierr = MatSetValues(Cpetsc,1,&i,1,&j,&rval,ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&i,1,&j,&rval,ADD_VALUES);CHKERRQ(ierr); } else { /* PLAPACK seems requiring all processors call MatSetValues(), so we add 0.0 on processesses with rank>0! */ i = j = 0; rval = 0.0; ierr = MatSetValues(Cpetsc,1,&i,1,&j,&rval,ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValues(C,1,&i,1,&j,&rval,ADD_VALUES);CHKERRQ(ierr); } ierr = MatAssemblyBegin(Cpetsc,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Cpetsc,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = MatLUFactorNumeric(F,C,&info);CHKERRQ(ierr); /* Test MatSolve() */ for (nsolve = 0; nsolve < 2; nsolve++){ ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; /* array[i] = rank + 1; */ } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); ierr = MatMult(C,x,b);CHKERRQ(ierr); ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol){ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: Norm of error %g, LU nfact %d\n",norm,nfact);CHKERRQ(ierr); } } } } ierr = MatDestroy(&F);CHKERRQ(ierr); /* Test non-symmetric operations */ /*-------------------------------*/ /* Create a symmetric Plapack dense matrix Csymm */ ierr = MatCreate(PETSC_COMM_WORLD,&Csymm);CHKERRQ(ierr); ierr = MatSetSizes(Csymm,PETSC_DECIDE,PETSC_DECIDE,M,M);CHKERRQ(ierr); ierr = MatSetType(Csymm,MATDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(Csymm);CHKERRQ(ierr); ierr = MatSetUp(Csymm);CHKERRQ(ierr); ierr = MatSetOption(Csymm,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); ierr = MatSetOption(Csymm,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); ierr = MatSetOption(Csymm,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr); ierr = MatZeroEntries(Csymm);CHKERRQ(ierr); ierr = MatZeroEntries(Cpetsc);CHKERRQ(ierr); for (i=Istart; i<Iend; i++){ for (j=0; j<=i; j++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); ierr = MatSetValues(Cpetsc,1,&i,1,&j,&rval,ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValues(Csymm,1,&i,1,&j,&rval,ADD_VALUES);CHKERRQ(ierr); if (j<i){ /* Although PLAPACK only requires lower triangular entries, we must add all the entries. MatSetValues_Plapack() will ignore the upper triangular entries AFTER an index map! */ ierr = MatSetValues(Cpetsc,1,&j,1,&i,&rval,ADD_VALUES);CHKERRQ(ierr); ierr = MatSetValues(Csymm,1,&j,1,&i,&rval,ADD_VALUES);CHKERRQ(ierr); } } } ierr = MatAssemblyBegin(Cpetsc,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Cpetsc,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyBegin(Csymm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(Csymm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); /* Test symmetric MatMult() */ ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = MatMult(Cpetsc,x,b);CHKERRQ(ierr); ierr = MatMult(Csymm,x,bpla);CHKERRQ(ierr); ierr = VecAXPY(bpla,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(bpla,NORM_2,&norm);CHKERRQ(ierr); if (norm > 1.e-12 && !rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"Symmetric MatMult_Plapack error: |b_pla - b|= %g\n",norm);CHKERRQ(ierr); } /* Test Cholesky Factorization */ ierr = MatShift(Csymm,M);CHKERRQ(ierr); /* make Csymm positive definite */ if (nproc == 1){ ierr = MatGetFactor(Csymm,MATSOLVERPETSC,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); } else { ierr = MatGetFactor(Csymm,MATSOLVERPLAPACK,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); } ierr = MatCholeskyFactorSymbolic(F,Csymm,perm,&info);CHKERRQ(ierr); for (nfact = 0; nfact < 2; nfact++){ if (!rank) printf(" Cholesky nfact %d\n",nfact); ierr = MatCholeskyFactorNumeric(F,Csymm,&info);CHKERRQ(ierr); /* Test MatSolve() */ for (nsolve = 0; nsolve < 2; nsolve++){ ierr = VecGetArray(x,&array);CHKERRQ(ierr); for (i=0; i<m; i++){ ierr = PetscRandomGetValue(rand,&rval);CHKERRQ(ierr); array[i] = rval; } ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); ierr = VecCopy(x,u);CHKERRQ(ierr); ierr = MatMult(Csymm,x,b);CHKERRQ(ierr); ierr = MatSolve(F,b,x);CHKERRQ(ierr); /* Check the error */ ierr = VecAXPY(u,-1.0,x);CHKERRQ(ierr); /* u <- (-1.0)x + u */ ierr = VecNorm(u,NORM_2,&norm);CHKERRQ(ierr); if (norm > tol){ if (!rank){ ierr = PetscPrintf(PETSC_COMM_SELF,"Error: Norm of error %g, Cholesky nfact %d\n",norm,nfact);CHKERRQ(ierr); } } } } ierr = MatDestroy(&F);CHKERRQ(ierr); /* Free data structures */ ierr = ISDestroy(&perm);CHKERRQ(ierr); ierr = ISDestroy(&iperm);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&bpla);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = MatDestroy(&Cpetsc);CHKERRQ(ierr); ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&Csymm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int main(int argc,char **args) { const ptrdiff_t N0=2056,N1=2056; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j; PetscMPIInt size,rank; int n,N,N_factor,NM; PetscScalar one=2.0,zero=0.5; PetscScalar two=4.0,three=8.0,four=16.0; PetscScalar a,*x_arr,*y_arr,*z_arr,enorm; Vec fin,fout,fout1; Vec ini,final; PetscRandom rnd; PetscErrorCode ierr; PetscInt *indx3,tempindx,low,*indx4,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rnd);CHKERRQ(ierr); alloc_local = fftw_mpi_local_size_2d_transposed(N0,N1/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); #if defined(DEBUGGING) printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); printf("The value local_n0 is %ld from process %d\n",local_n0,rank); printf("The value local_0_start is %ld from process %d\n",local_0_start,rank); /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */ /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */ #endif /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N = 2*N0*(N1/2+1); N_factor = N0*N1; n = 2*local_n0*(N1/2+1); /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank);*/ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* Set the vector with random data */ ierr = VecSet(fin,zero);CHKERRQ(ierr); /* for (i=0;i<N0*N1;i++) */ /* { */ /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */ /* } */ /* VecSet(fin,one); */ i =0; ierr = VecSetValues(fin,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); i =1; ierr = VecSetValues(fin,1,&i,&two,INSERT_VALUES);CHKERRQ(ierr); i =4; ierr = VecSetValues(fin,1,&i,&three,INSERT_VALUES);CHKERRQ(ierr); i =5; ierr = VecSetValues(fin,1,&i,&four,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(fin);CHKERRQ(ierr); ierr = VecAssemblyEnd(fin);CHKERRQ(ierr); ierr = VecSet(fout,zero);CHKERRQ(ierr); ierr = VecSet(fout1,zero);CHKERRQ(ierr); /* Get the meaningful portion of array */ ierr = VecGetArray(fin,&x_arr);CHKERRQ(ierr); ierr = VecGetArray(fout1,&z_arr);CHKERRQ(ierr); ierr = VecGetArray(fout,&y_arr);CHKERRQ(ierr); fplan=fftw_mpi_plan_dft_r2c_2d(N0,N1,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_2d(N0,N1,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); ierr = VecRestoreArray(fin,&x_arr); ierr = VecRestoreArray(fout1,&z_arr); ierr = VecRestoreArray(fout,&y_arr); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecCreate(PETSC_COMM_WORLD,&ini);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&final);CHKERRQ(ierr); ierr = VecSetSizes(ini,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetSizes(final,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetFromOptions(ini);CHKERRQ(ierr); ierr = VecSetFromOptions(final);CHKERRQ(ierr); if (N1%2==0) { NM = N1+2; } else { NM = N1+1; } /*printf("The Value of NM is %d",NM); */ ierr = VecGetOwnershipRange(fin,&low,NULL); /*printf("The local index is %d from %d\n",low,rank); */ ierr = PetscMalloc1(local_n0*N1,&indx3); ierr = PetscMalloc1(local_n0*N1,&indx4); for (i=0;i<local_n0;i++) { for (j=0;j<N1;j++) { tempindx = i*N1 + j; tempindx1 = i*NM + j; indx3[tempindx]=local_0_start*N1+tempindx; indx4[tempindx]=low+tempindx1; /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } ierr = PetscMalloc2(local_n0*N1,&x_arr,local_n0*N1,&y_arr);CHKERRQ(ierr); /* arr must be allocated for VecGetValues() */ ierr = VecGetValues(fin,local_n0*N1,indx4,(PetscScalar*)x_arr);CHKERRQ(ierr); ierr = VecSetValues(ini,local_n0*N1,indx3,x_arr,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(ini);CHKERRQ(ierr); ierr = VecAssemblyEnd(ini);CHKERRQ(ierr); ierr = VecGetValues(fout1,local_n0*N1,indx4,y_arr); ierr = VecSetValues(final,local_n0*N1,indx3,y_arr,INSERT_VALUES); ierr = VecAssemblyBegin(final); ierr = VecAssemblyEnd(final); ierr = PetscFree2(x_arr,y_arr);CHKERRQ(ierr); /* VecScatter vecscat; IS indx1,indx2; for (i=0;i<N0;i++) { indx = i*NM; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1); indx = i*N1; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */ /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-10) { ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); } /* Execute fftw with function fftw_execute and destory it after execution */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = VecDestroy(&ini);CHKERRQ(ierr); ierr = VecDestroy(&final);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rnd);CHKERRQ(ierr); ierr = PetscFree(indx3);CHKERRQ(ierr); ierr = PetscFree(indx4);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
PetscErrorCode ISPairToList(IS xis, IS yis, PetscInt *listlen, IS **islist) { PetscErrorCode ierr; IS indis = xis, coloris = yis; PetscInt *inds, *colors, llen, ilen, lstart, lend, lcount,l; PetscMPIInt rank, size, llow, lhigh, low, high,color,subsize; const PetscInt *ccolors, *cinds; MPI_Comm comm, subcomm; PetscFunctionBegin; PetscValidHeaderSpecific(xis, IS_CLASSID, 1); PetscValidHeaderSpecific(yis, IS_CLASSID, 2); PetscCheckSameComm(xis,1,yis,2); PetscValidIntPointer(listlen,3); PetscValidPointer(islist,4); ierr = PetscObjectGetComm((PetscObject)xis,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &size);CHKERRQ(ierr); /* Extract, copy and sort the local indices and colors on the color. */ ierr = ISGetLocalSize(coloris, &llen);CHKERRQ(ierr); ierr = ISGetLocalSize(indis, &ilen);CHKERRQ(ierr); if (llen != ilen) SETERRQ2(comm, PETSC_ERR_ARG_SIZ, "Incompatible IS sizes: %D and %D", ilen, llen); ierr = ISGetIndices(coloris, &ccolors);CHKERRQ(ierr); ierr = ISGetIndices(indis, &cinds);CHKERRQ(ierr); ierr = PetscMalloc2(ilen,&inds,llen,&colors);CHKERRQ(ierr); ierr = PetscMemcpy(inds,cinds,ilen*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(colors,ccolors,llen*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscSortIntWithArray(llen, colors, inds);CHKERRQ(ierr); /* Determine the global extent of colors. */ llow = 0; lhigh = -1; lstart = 0; lcount = 0; while (lstart < llen) { lend = lstart+1; while (lend < llen && colors[lend] == colors[lstart]) ++lend; llow = PetscMin(llow,colors[lstart]); lhigh = PetscMax(lhigh,colors[lstart]); ++lcount; } ierr = MPI_Allreduce(&llow,&low,1,MPI_INT,MPI_MIN,comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&lhigh,&high,1,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); *listlen = 0; if (low <= high) { if (lcount > 0) { *listlen = lcount; if (!*islist) { ierr = PetscMalloc(sizeof(IS)*lcount, islist);CHKERRQ(ierr); } } /* Traverse all possible global colors, and participate in the subcommunicators for the locally-supported colors. */ lcount = 0; lstart = 0; lend = 0; for (l = low; l <= high; ++l) { /* Find the range of indices with the same color, which is not smaller than l. Observe that, since colors is sorted, and is a subsequence of [low,high], as soon as we find a new color, it is >= l. */ if (lstart < llen) { /* The start of the next locally-owned color is identified. Now look for the end. */ if (lstart == lend) { lend = lstart+1; while (lend < llen && colors[lend] == colors[lstart]) ++lend; } /* Now check whether the identified color segment matches l. */ if (colors[lstart] < l) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Locally owned color %D at location %D is < than the next global color %D", colors[lstart], lcount, l); } color = (PetscMPIInt)(colors[lstart] == l); /* Check whether a proper subcommunicator exists. */ ierr = MPI_Allreduce(&color,&subsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); if (subsize == 1) subcomm = PETSC_COMM_SELF; else if (subsize == size) subcomm = comm; else { /* a proper communicator is necessary, so we create it. */ ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); } if (colors[lstart] == l) { /* If we have l among the local colors, we create an IS to hold the corresponding indices. */ ierr = ISCreateGeneral(subcomm, lend-lstart,inds+lstart,PETSC_COPY_VALUES,*islist+lcount);CHKERRQ(ierr); /* Position lstart at the beginning of the next local color. */ lstart = lend; /* Increment the counter of the local colors split off into an IS. */ ++lcount; } if (subsize > 0 && subsize < size) { /* Irrespective of color, destroy the split off subcomm: a subcomm used in the IS creation above is duplicated into a proper PETSc comm. */ ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr); } } /* for (l = low; l < high; ++l) */ } /* if (low <= high) */ ierr = PetscFree2(inds,colors);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ PetscConvEstGetConvRate - Returns an estimate of the convergence rate for the discretization Not collective Input Parameter: . ce - The PetscConvEst object Output Parameter: . alpha - The convergence rate for each field Note: The convergence rate alpha is defined by $ || u_h - u_exact || < C h^alpha where u_h is the discrete solution, and h is a measure of the discretization size. We solve a series of problems on refined meshes, calculate an error based upon the exact solution in the DS, and then fit the result to our model above using linear regression. Options database keys: . -snes_convergence_estimate : Execute convergence estimation and print out the rate Level: intermediate .keywords: PetscConvEst, convergence .seealso: PetscConvEstSetSolver(), PetscConvEstCreate(), PetscConvEstGetConvRate() @*/ PetscErrorCode PetscConvEstGetConvRate(PetscConvEst ce, PetscReal alpha[]) { DM *dm; PetscObject disc; MPI_Comm comm; const char *uname, *dmname; void *ctx; Vec u; PetscReal t = 0.0, *x, *y, slope, intercept; PetscInt *dof, dim, Nr = ce->Nr, r, f, oldlevel, oldnlev; PetscLogEvent event; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject) ce, &comm);CHKERRQ(ierr); ierr = DMGetDimension(ce->idm, &dim);CHKERRQ(ierr); ierr = DMGetApplicationContext(ce->idm, &ctx);CHKERRQ(ierr); ierr = DMPlexSetRefinementUniform(ce->idm, PETSC_TRUE);CHKERRQ(ierr); ierr = DMGetRefineLevel(ce->idm, &oldlevel);CHKERRQ(ierr); ierr = PetscMalloc2((Nr+1), &dm, (Nr+1)*ce->Nf, &dof);CHKERRQ(ierr); dm[0] = ce->idm; for (f = 0; f < ce->Nf; ++f) alpha[f] = 0.0; /* Loop over meshes */ ierr = PetscLogEventRegister("ConvEst Error", PETSC_OBJECT_CLASSID, &event);CHKERRQ(ierr); for (r = 0; r <= Nr; ++r) { PetscLogStage stage; char stageName[PETSC_MAX_PATH_LEN]; ierr = PetscSNPrintf(stageName, PETSC_MAX_PATH_LEN-1, "ConvEst Refinement Level %D", r);CHKERRQ(ierr); ierr = PetscLogStageRegister(stageName, &stage);CHKERRQ(ierr); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); if (r > 0) { ierr = DMRefine(dm[r-1], MPI_COMM_NULL, &dm[r]);CHKERRQ(ierr); ierr = DMSetCoarseDM(dm[r], dm[r-1]);CHKERRQ(ierr); ierr = DMCopyDisc(ce->idm, dm[r]);CHKERRQ(ierr); ierr = DMCopyTransform(ce->idm, dm[r]);CHKERRQ(ierr); ierr = PetscObjectGetName((PetscObject) dm[r-1], &dmname);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) dm[r], dmname);CHKERRQ(ierr); for (f = 0; f <= ce->Nf; ++f) { PetscErrorCode (*nspconstr)(DM, PetscInt, MatNullSpace *); ierr = DMGetNullSpaceConstructor(dm[r-1], f, &nspconstr);CHKERRQ(ierr); ierr = DMSetNullSpaceConstructor(dm[r], f, nspconstr);CHKERRQ(ierr); } } ierr = DMViewFromOptions(dm[r], NULL, "-conv_dm_view");CHKERRQ(ierr); /* Create solution */ ierr = DMCreateGlobalVector(dm[r], &u);CHKERRQ(ierr); ierr = DMGetField(dm[r], 0, NULL, &disc);CHKERRQ(ierr); ierr = PetscObjectGetName(disc, &uname);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) u, uname);CHKERRQ(ierr); /* Setup solver */ ierr = SNESReset(ce->snes);CHKERRQ(ierr); ierr = SNESSetDM(ce->snes, dm[r]);CHKERRQ(ierr); ierr = DMPlexSetSNESLocalFEM(dm[r], ctx, ctx, ctx);CHKERRQ(ierr); ierr = SNESSetFromOptions(ce->snes);CHKERRQ(ierr); /* Create initial guess */ ierr = DMProjectFunction(dm[r], t, ce->initGuess, ce->ctxs, INSERT_VALUES, u);CHKERRQ(ierr); ierr = SNESSolve(ce->snes, NULL, u);CHKERRQ(ierr); ierr = PetscLogEventBegin(event, ce, 0, 0, 0);CHKERRQ(ierr); ierr = DMComputeL2FieldDiff(dm[r], t, ce->exactSol, ce->ctxs, u, &ce->errors[r*ce->Nf]);CHKERRQ(ierr); ierr = PetscLogEventEnd(event, ce, 0, 0, 0);CHKERRQ(ierr); for (f = 0; f < ce->Nf; ++f) { PetscSection s, fs; PetscInt lsize; /* Could use DMGetOutputDM() to add in Dirichlet dofs */ ierr = DMGetSection(dm[r], &s);CHKERRQ(ierr); ierr = PetscSectionGetField(s, f, &fs);CHKERRQ(ierr); ierr = PetscSectionGetConstrainedStorageSize(fs, &lsize);CHKERRQ(ierr); ierr = MPI_Allreduce(&lsize, &dof[r*ce->Nf+f], 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject) ce->snes));CHKERRQ(ierr); ierr = PetscLogEventSetDof(event, f, dof[r*ce->Nf+f]);CHKERRQ(ierr); ierr = PetscLogEventSetError(event, f, ce->errors[r*ce->Nf+f]);CHKERRQ(ierr); } /* Monitor */ if (ce->monitor) { PetscReal *errors = &ce->errors[r*ce->Nf]; ierr = PetscPrintf(comm, "L_2 Error: ");CHKERRQ(ierr); if (ce->Nf > 1) {ierr = PetscPrintf(comm, "[");CHKERRQ(ierr);} for (f = 0; f < ce->Nf; ++f) { if (f > 0) {ierr = PetscPrintf(comm, ", ");CHKERRQ(ierr);} if (errors[f] < 1.0e-11) {ierr = PetscPrintf(comm, "< 1e-11");CHKERRQ(ierr);} else {ierr = PetscPrintf(comm, "%g", (double)errors[f]);CHKERRQ(ierr);} } if (ce->Nf > 1) {ierr = PetscPrintf(comm, "]");CHKERRQ(ierr);} ierr = PetscPrintf(comm, "\n");CHKERRQ(ierr); } if (!r) { /* PCReset() does not wipe out the level structure */ KSP ksp; PC pc; ierr = SNESGetKSP(ce->snes, &ksp);CHKERRQ(ierr); ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr); ierr = PCMGGetLevels(pc, &oldnlev);CHKERRQ(ierr); } /* Cleanup */ ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = PetscLogStagePop();CHKERRQ(ierr); } for (r = 1; r <= Nr; ++r) { ierr = DMDestroy(&dm[r]);CHKERRQ(ierr); } /* Fit convergence rate */ ierr = PetscMalloc2(Nr+1, &x, Nr+1, &y);CHKERRQ(ierr); for (f = 0; f < ce->Nf; ++f) { for (r = 0; r <= Nr; ++r) { x[r] = PetscLog10Real(dof[r*ce->Nf+f]); y[r] = PetscLog10Real(ce->errors[r*ce->Nf+f]); } ierr = PetscLinearRegression(Nr+1, x, y, &slope, &intercept);CHKERRQ(ierr); /* Since h^{-dim} = N, lg err = s lg N + b = -s dim lg h + b */ alpha[f] = -slope * dim; } ierr = PetscFree2(x, y);CHKERRQ(ierr); ierr = PetscFree2(dm, dof);CHKERRQ(ierr); /* Restore solver */ ierr = SNESReset(ce->snes);CHKERRQ(ierr); { /* PCReset() does not wipe out the level structure */ KSP ksp; PC pc; ierr = SNESGetKSP(ce->snes, &ksp);CHKERRQ(ierr); ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr); ierr = PCMGSetLevels(pc, oldnlev, NULL);CHKERRQ(ierr); ierr = DMSetRefineLevel(ce->idm, oldlevel);CHKERRQ(ierr); /* The damn DMCoarsen() calls in PCMG can reset this */ } ierr = SNESSetDM(ce->snes, ce->idm);CHKERRQ(ierr); ierr = DMPlexSetSNESLocalFEM(ce->idm, ctx, ctx, ctx);CHKERRQ(ierr); ierr = SNESSetFromOptions(ce->snes);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMSetUp_DA_2D(DM da) { DM_DA *dd = (DM_DA*)da->data; const PetscInt M = dd->M; const PetscInt N = dd->N; PetscInt m = dd->m; PetscInt n = dd->n; const PetscInt dof = dd->w; const PetscInt s = dd->s; DMBoundaryType bx = dd->bx; DMBoundaryType by = dd->by; DMDAStencilType stencil_type = dd->stencil_type; PetscInt *lx = dd->lx; PetscInt *ly = dd->ly; MPI_Comm comm; PetscMPIInt rank,size; PetscInt xs,xe,ys,ye,x,y,Xs,Xe,Ys,Ye,IXs,IXe,IYs,IYe; PetscInt up,down,left,right,i,n0,n1,n2,n3,n5,n6,n7,n8,*idx,nn; PetscInt xbase,*bases,*ldims,j,x_t,y_t,s_t,base,count; PetscInt s_x,s_y; /* s proportionalized to w */ PetscInt sn0 = 0,sn2 = 0,sn6 = 0,sn8 = 0; Vec local,global; VecScatter gtol; IS to,from; PetscErrorCode ierr; PetscFunctionBegin; if (stencil_type == DMDA_STENCIL_BOX && (bx == DM_BOUNDARY_MIRROR || by == DM_BOUNDARY_MIRROR)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Mirror boundary and box stencil"); ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (((Petsc64bitInt) M)*((Petsc64bitInt) N)*((Petsc64bitInt) dof) > (Petsc64bitInt) PETSC_MPI_INT_MAX) SETERRQ3(comm,PETSC_ERR_INT_OVERFLOW,"Mesh of %D by %D by %D (dof) is too large for 32 bit indices",M,N,dof); #endif if (dof < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Must have 1 or more degrees of freedom per node: %D",dof); if (s < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Stencil width cannot be negative: %D",s); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (m != PETSC_DECIDE) { if (m < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in X direction: %D",m); else if (m > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in X direction: %D %d",m,size); } if (n != PETSC_DECIDE) { if (n < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Non-positive number of processors in Y direction: %D",n); else if (n > size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Too many processors in Y direction: %D %d",n,size); } if (m == PETSC_DECIDE || n == PETSC_DECIDE) { if (n != PETSC_DECIDE) { m = size/n; } else if (m != PETSC_DECIDE) { n = size/m; } else { /* try for squarish distribution */ m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N))); if (!m) m = 1; while (m > 0) { n = size/m; if (m*n == size) break; m--; } if (M > N && m < n) {PetscInt _m = m; m = n; n = _m;} } if (m*n != size) SETERRQ(comm,PETSC_ERR_PLIB,"Unable to create partition, check the size of the communicator and input m and n "); } else if (m*n != size) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Given Bad partition"); if (M < m) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in x direction is too fine! %D %D",M,m); if (N < n) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in y direction is too fine! %D %D",N,n); /* Determine locally owned region xs is the first local node number, x is the number of local nodes */ if (!lx) { ierr = PetscMalloc1(m, &dd->lx);CHKERRQ(ierr); lx = dd->lx; for (i=0; i<m; i++) { lx[i] = M/m + ((M % m) > i); } } x = lx[rank % m]; xs = 0; for (i=0; i<(rank % m); i++) { xs += lx[i]; } #if defined(PETSC_USE_DEBUG) left = xs; for (i=(rank % m); i<m; i++) { left += lx[i]; } if (left != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of lx across processors not equal to M: %D %D",left,M); #endif /* Determine locally owned region ys is the first local node number, y is the number of local nodes */ if (!ly) { ierr = PetscMalloc1(n, &dd->ly);CHKERRQ(ierr); ly = dd->ly; for (i=0; i<n; i++) { ly[i] = N/n + ((N % n) > i); } } y = ly[rank/m]; ys = 0; for (i=0; i<(rank/m); i++) { ys += ly[i]; } #if defined(PETSC_USE_DEBUG) left = ys; for (i=(rank/m); i<n; i++) { left += ly[i]; } if (left != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Sum of ly across processors not equal to N: %D %D",left,N); #endif /* check if the scatter requires more than one process neighbor or wraps around the domain more than once */ if ((x < s) && ((m > 1) || (bx == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local x-width of domain x %D is smaller than stencil width s %D",x,s); if ((y < s) && ((n > 1) || (by == DM_BOUNDARY_PERIODIC))) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local y-width of domain y %D is smaller than stencil width s %D",y,s); xe = xs + x; ye = ys + y; /* determine ghost region (Xs) and region scattered into (IXs) */ if (xs-s > 0) { Xs = xs - s; IXs = xs - s; } else { if (bx) { Xs = xs - s; } else { Xs = 0; } IXs = 0; } if (xe+s <= M) { Xe = xe + s; IXe = xe + s; } else { if (bx) { Xs = xs - s; Xe = xe + s; } else { Xe = M; } IXe = M; } if (bx == DM_BOUNDARY_PERIODIC || bx == DM_BOUNDARY_MIRROR) { IXs = xs - s; IXe = xe + s; Xs = xs - s; Xe = xe + s; } if (ys-s > 0) { Ys = ys - s; IYs = ys - s; } else { if (by) { Ys = ys - s; } else { Ys = 0; } IYs = 0; } if (ye+s <= N) { Ye = ye + s; IYe = ye + s; } else { if (by) { Ye = ye + s; } else { Ye = N; } IYe = N; } if (by == DM_BOUNDARY_PERIODIC || by == DM_BOUNDARY_MIRROR) { IYs = ys - s; IYe = ye + s; Ys = ys - s; Ye = ye + s; } /* stencil length in each direction */ s_x = s; s_y = s; /* determine starting point of each processor */ nn = x*y; ierr = PetscMalloc2(size+1,&bases,size,&ldims);CHKERRQ(ierr); ierr = MPI_Allgather(&nn,1,MPIU_INT,ldims,1,MPIU_INT,comm);CHKERRQ(ierr); bases[0] = 0; for (i=1; i<=size; i++) { bases[i] = ldims[i-1]; } for (i=1; i<=size; i++) { bases[i] += bases[i-1]; } base = bases[rank]*dof; /* allocate the base parallel and sequential vectors */ dd->Nlocal = x*y*dof; ierr = VecCreateMPIWithArray(comm,dof,dd->Nlocal,PETSC_DECIDE,NULL,&global);CHKERRQ(ierr); dd->nlocal = (Xe-Xs)*(Ye-Ys)*dof; ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->nlocal,NULL,&local);CHKERRQ(ierr); /* generate appropriate vector scatters */ /* local to global inserts non-ghost point region into global */ ierr = PetscMalloc1((IXe-IXs)*(IYe-IYs),&idx);CHKERRQ(ierr); left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = i*(Xe-Xs) + j; } } /* global to local must include ghost points within the domain, but not ghost points outside the domain that aren't periodic */ if (stencil_type == DMDA_STENCIL_BOX) { left = IXs - Xs; right = left + (IXe-IXs); down = IYs - Ys; up = down + (IYe-IYs); count = 0; for (i=down; i<up; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); } else { /* must drop into cross shape region */ /* ---------| | top | |--- ---| up | middle | | | ---- ---- down | bottom | ----------- Xs xs xe Xe */ left = xs - Xs; right = left + x; down = ys - Ys; up = down + y; count = 0; /* bottom */ for (i=(IYs-Ys); i<down; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } /* middle */ for (i=down; i<up; i++) { for (j=(IXs-Xs); j<(IXe-Xs); j++) { idx[count++] = j + i*(Xe-Xs); } } /* top */ for (i=up; i<up+IYe-ye; i++) { for (j=left; j<right; j++) { idx[count++] = j + i*(Xe-Xs); } } ierr = ISCreateBlock(comm,dof,count,idx,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); } /* determine who lies on each side of us stored in n6 n7 n8 n3 n5 n0 n1 n2 */ /* Assume the Non-Periodic Case */ n1 = rank - m; if (rank % m) { n0 = n1 - 1; } else { n0 = -1; } if ((rank+1) % m) { n2 = n1 + 1; n5 = rank + 1; n8 = rank + m + 1; if (n8 >= m*n) n8 = -1; } else { n2 = -1; n5 = -1; n8 = -1; } if (rank % m) { n3 = rank - 1; n6 = n3 + m; if (n6 >= m*n) n6 = -1; } else { n3 = -1; n6 = -1; } n7 = rank + m; if (n7 >= m*n) n7 = -1; if (bx == DM_BOUNDARY_PERIODIC && by == DM_BOUNDARY_PERIODIC) { /* Modify for Periodic Cases */ /* Handle all four corners */ if ((n6 < 0) && (n7 < 0) && (n3 < 0)) n6 = m-1; if ((n8 < 0) && (n7 < 0) && (n5 < 0)) n8 = 0; if ((n2 < 0) && (n5 < 0) && (n1 < 0)) n2 = size-m; if ((n0 < 0) && (n3 < 0) && (n1 < 0)) n0 = size-1; /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } else if (by == DM_BOUNDARY_PERIODIC) { /* Handle Top and Bottom Sides */ if (n1 < 0) n1 = rank + m * (n-1); if (n7 < 0) n7 = rank - m * (n-1); if ((n3 >= 0) && (n0 < 0)) n0 = size - m + rank - 1; if ((n3 >= 0) && (n6 < 0)) n6 = (rank%m)-1; if ((n5 >= 0) && (n2 < 0)) n2 = size - m + rank + 1; if ((n5 >= 0) && (n8 < 0)) n8 = (rank%m)+1; } else if (bx == DM_BOUNDARY_PERIODIC) { /* Handle Left and Right Sides */ if (n3 < 0) n3 = rank + (m-1); if (n5 < 0) n5 = rank - (m-1); if ((n1 >= 0) && (n0 < 0)) n0 = rank-1; if ((n1 >= 0) && (n2 < 0)) n2 = rank-2*m+1; if ((n7 >= 0) && (n6 < 0)) n6 = rank+2*m-1; if ((n7 >= 0) && (n8 < 0)) n8 = rank+1; } ierr = PetscMalloc1(9,&dd->neighbors);CHKERRQ(ierr); dd->neighbors[0] = n0; dd->neighbors[1] = n1; dd->neighbors[2] = n2; dd->neighbors[3] = n3; dd->neighbors[4] = rank; dd->neighbors[5] = n5; dd->neighbors[6] = n6; dd->neighbors[7] = n7; dd->neighbors[8] = n8; if (stencil_type == DMDA_STENCIL_STAR) { /* save corner processor numbers */ sn0 = n0; sn2 = n2; sn6 = n6; sn8 = n8; n0 = n2 = n6 = n8 = -1; } ierr = PetscMalloc1((Xe-Xs)*(Ye-Ys),&idx);CHKERRQ(ierr); nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DM_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DM_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (bx == DM_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (by == DM_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } } ierr = ISCreateBlock(comm,dof,nn,idx,PETSC_USE_POINTER,&from);CHKERRQ(ierr); ierr = VecScatterCreate(global,from,local,to,>ol);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)gtol);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); if (stencil_type == DMDA_STENCIL_STAR) { n0 = sn0; n2 = sn2; n6 = sn6; n8 = sn8; } if (((stencil_type == DMDA_STENCIL_STAR) || (bx && bx != DM_BOUNDARY_PERIODIC) || (by && by != DM_BOUNDARY_PERIODIC))) { /* Recompute the local to global mappings, this time keeping the information about the cross corner processor numbers and any ghosted but not periodic indices. */ nn = 0; xbase = bases[rank]; for (i=1; i<=s_y; i++) { if (n0 >= 0) { /* left below */ x_t = lx[n0 % m]; y_t = ly[(n0/m)]; s_t = bases[n0] + x_t*y_t - (s_y-i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n1 >= 0) { /* directly below */ x_t = x; y_t = ly[(n1/m)]; s_t = bases[n1] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (ys-Ys > 0) { if (by == DM_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(s_y - i + 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n2 >= 0) { /* right below */ x_t = lx[n2 % m]; y_t = ly[(n2/m)]; s_t = bases[n2] + x_t*y_t - (s_y+1-i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe> 0 && ys-Ys > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (i=0; i<y; i++) { if (n3 >= 0) { /* directly left */ x_t = lx[n3 % m]; /* y_t = y; */ s_t = bases[n3] + (i+1)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0) { if (bx == DM_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*i + s_x - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } for (j=0; j<x; j++) idx[nn++] = xbase++; /* interior */ if (n5 >= 0) { /* directly right */ x_t = lx[n5 % m]; /* y_t = y; */ s_t = bases[n5] + (i)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0) { if (bx == DM_BOUNDARY_MIRROR) { for (j=0; j<s_x; j++) idx[nn++] = bases[rank] + x*(i + 1) - 2 - j; } else { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } for (i=1; i<=s_y; i++) { if (n6 >= 0) { /* left above */ x_t = lx[n6 % m]; /* y_t = ly[(n6/m)]; */ s_t = bases[n6] + (i)*x_t - s_x; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (xs-Xs > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } if (n7 >= 0) { /* directly above */ x_t = x; /* y_t = ly[(n7/m)]; */ s_t = bases[n7] + (i-1)*x_t; for (j=0; j<x_t; j++) idx[nn++] = s_t++; } else if (Ye-ye > 0) { if (by == DM_BOUNDARY_MIRROR) { for (j=0; j<x; j++) idx[nn++] = bases[rank] + x*(y - i - 1) + j; } else { for (j=0; j<x; j++) idx[nn++] = -1; } } if (n8 >= 0) { /* right above */ x_t = lx[n8 % m]; /* y_t = ly[(n8/m)]; */ s_t = bases[n8] + (i-1)*x_t; for (j=0; j<s_x; j++) idx[nn++] = s_t++; } else if (Xe-xe > 0 && Ye-ye > 0) { for (j=0; j<s_x; j++) idx[nn++] = -1; } } } /* Set the local to global ordering in the global vector, this allows use of VecSetValuesLocal(). */ ierr = ISLocalToGlobalMappingCreate(comm,dof,nn,idx,PETSC_OWN_POINTER,&da->ltogmap);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)da,(PetscObject)da->ltogmap);CHKERRQ(ierr); ierr = PetscFree2(bases,ldims);CHKERRQ(ierr); dd->m = m; dd->n = n; /* note petsc expects xs/xe/Xs/Xe to be multiplied by #dofs in many places */ dd->xs = xs*dof; dd->xe = xe*dof; dd->ys = ys; dd->ye = ye; dd->zs = 0; dd->ze = 1; dd->Xs = Xs*dof; dd->Xe = Xe*dof; dd->Ys = Ys; dd->Ye = Ye; dd->Zs = 0; dd->Ze = 1; ierr = VecDestroy(&local);CHKERRQ(ierr); ierr = VecDestroy(&global);CHKERRQ(ierr); dd->gtol = gtol; dd->base = base; da->ops->view = DMView_DA_2d; dd->ltol = NULL; dd->ao = NULL; PetscFunctionReturn(0); }
static PetscErrorCode VecAssemblyEnd_MPI_BTS(Vec X) { Vec_MPI *x = (Vec_MPI*)X->data; PetscInt bs = X->map->bs; PetscMPIInt npending,*some_indices,r; MPI_Status *some_statuses; PetscScalar *xarray; PetscErrorCode ierr; VecAssemblyFrame *frame; PetscFunctionBegin; if (X->stash.donotstash) { X->stash.insertmode = NOT_SET_VALUES; X->bstash.insertmode = NOT_SET_VALUES; PetscFunctionReturn(0); } ierr = VecGetArray(X,&xarray);CHKERRQ(ierr); ierr = PetscSegBufferExtractInPlace(x->segrecvframe,&frame);CHKERRQ(ierr); ierr = PetscMalloc2(4*x->nrecvranks,&some_indices,x->use_status?4*x->nrecvranks:0,&some_statuses);CHKERRQ(ierr); for (r=0,npending=0; r<x->nrecvranks; r++) npending += frame[r].pendings + frame[r].pendingb; while (npending>0) { PetscMPIInt ndone,ii; /* Filling MPI_Status fields requires some resources from the MPI library. We skip it on the first assembly, or * when VEC_SUBSET_OFF_PROC_ENTRIES has not been set, because we could exchange exact sizes in the initial * rendezvous. When the rendezvous is elided, however, we use MPI_Status to get actual message lengths, so that * subsequent assembly can set a proper subset of the values. */ ierr = MPI_Waitsome(4*x->nrecvranks,x->recvreqs,&ndone,some_indices,x->use_status?some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); for (ii=0; ii<ndone; ii++) { PetscInt i = some_indices[ii]/4,j,k; InsertMode imode = (InsertMode)x->recvhdr[i].insertmode; PetscInt *recvint; PetscScalar *recvscalar; PetscBool intmsg = (PetscBool)(some_indices[ii]%2 == 0); PetscBool blockmsg = (PetscBool)((some_indices[ii]%4)/2 == 1); npending--; if (!blockmsg) { /* Scalar stash */ PetscMPIInt count; if (--frame[i].pendings > 0) continue; if (x->use_status) { ierr = MPI_Get_count(&some_statuses[ii],intmsg ? MPIU_INT : MPIU_SCALAR,&count);CHKERRQ(ierr); } else count = x->recvhdr[i].count; for (j=0,recvint=frame[i].ints,recvscalar=frame[i].scalars; j<count; j++,recvint++) { PetscInt loc = *recvint - X->map->rstart; if (*recvint < X->map->rstart || X->map->rend <= *recvint) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Received vector entry %D out of local range [%D,%D)]",*recvint,X->map->rstart,X->map->rend); switch (imode) { case ADD_VALUES: xarray[loc] += *recvscalar++; break; case INSERT_VALUES: xarray[loc] = *recvscalar++; break; default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",imode); } } } else { /* Block stash */ PetscMPIInt count; if (--frame[i].pendingb > 0) continue; if (x->use_status) { ierr = MPI_Get_count(&some_statuses[ii],intmsg ? MPIU_INT : MPIU_SCALAR,&count);CHKERRQ(ierr); if (!intmsg) count /= bs; /* Convert from number of scalars to number of blocks */ } else count = x->recvhdr[i].bcount; for (j=0,recvint=frame[i].intb,recvscalar=frame[i].scalarb; j<count; j++,recvint++) { PetscInt loc = (*recvint)*bs - X->map->rstart; switch (imode) { case ADD_VALUES: for (k=loc; k<loc+bs; k++) xarray[k] += *recvscalar++; break; case INSERT_VALUES: for (k=loc; k<loc+bs; k++) xarray[k] = *recvscalar++; break; default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",imode); } } } } } ierr = VecRestoreArray(X,&xarray);CHKERRQ(ierr); ierr = MPI_Waitall(4*x->nsendranks,x->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); ierr = PetscFree2(some_indices,some_statuses);CHKERRQ(ierr); if (x->assembly_subset) { void *dummy; /* reset segbuffers */ ierr = PetscSegBufferExtractInPlace(x->segrecvint,&dummy);CHKERRQ(ierr); ierr = PetscSegBufferExtractInPlace(x->segrecvscalar,&dummy);CHKERRQ(ierr); } else { ierr = VecAssemblyReset_MPI(X);CHKERRQ(ierr); } X->stash.insertmode = NOT_SET_VALUES; X->bstash.insertmode = NOT_SET_VALUES; ierr = VecStashScatterEnd_Private(&X->stash);CHKERRQ(ierr); ierr = VecStashScatterEnd_Private(&X->bstash);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* DMDAGetFaceInterpolation - Gets the interpolation for a face based coarse space */ PetscErrorCode DMDAGetFaceInterpolation(DM da,PC_Exotic *exotic,Mat Aglobal,MatReuse reuse,Mat *P) { PetscErrorCode ierr; PetscInt dim,i,j,k,m,n,p,dof,Nint,Nface,Nwire,Nsurf,*Iint,*Isurf,cint = 0,csurf = 0,istart,jstart,kstart,*II,N,c = 0; PetscInt mwidth,nwidth,pwidth,cnt,mp,np,pp,Ntotal,gl[6],*globals,Ng,*IIint,*IIsurf,Nt; Mat Xint, Xsurf,Xint_tmp; IS isint,issurf,is,row,col; ISLocalToGlobalMapping ltg; MPI_Comm comm; Mat A,Aii,Ais,Asi,*Aholder,iAii; MatFactorInfo info; PetscScalar *xsurf,*xint; #if defined(PETSC_USE_DEBUG_foo) PetscScalar tmp; #endif PetscTable ht; PetscFunctionBegin; ierr = DMDAGetInfo(da,&dim,0,0,0,&mp,&np,&pp,&dof,0,0,0,0,0); CHKERRQ(ierr); if (dof != 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Only for single field problems"); if (dim != 3) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Only coded for 3d problems"); ierr = DMDAGetCorners(da,0,0,0,&m,&n,&p); CHKERRQ(ierr); ierr = DMDAGetGhostCorners(da,&istart,&jstart,&kstart,&mwidth,&nwidth,&pwidth); CHKERRQ(ierr); istart = istart ? -1 : 0; jstart = jstart ? -1 : 0; kstart = kstart ? -1 : 0; /* the columns of P are the interpolation of each coarse grid point (one for each vertex and edge) to all the local degrees of freedom (this includes the vertices, edges and faces). Xint are the subset of the interpolation into the interior Xface are the interpolation onto faces but not into the interior Xsurf are the interpolation onto the vertices and edges (the surfbasket) Xint Symbolically one could write P = (Xface) after interchanging the rows to match the natural ordering on the domain Xsurf */ N = (m - istart)*(n - jstart)*(p - kstart); Nint = (m-2-istart)*(n-2-jstart)*(p-2-kstart); Nface = 2*((m-2-istart)*(n-2-jstart) + (m-2-istart)*(p-2-kstart) + (n-2-jstart)*(p-2-kstart)); Nwire = 4*((m-2-istart) + (n-2-jstart) + (p-2-kstart)) + 8; Nsurf = Nface + Nwire; ierr = MatCreateSeqDense(MPI_COMM_SELF,Nint,6,NULL,&Xint); CHKERRQ(ierr); ierr = MatCreateSeqDense(MPI_COMM_SELF,Nsurf,6,NULL,&Xsurf); CHKERRQ(ierr); ierr = MatDenseGetArray(Xsurf,&xsurf); CHKERRQ(ierr); /* Require that all 12 edges and 6 faces have at least one grid point. Otherwise some of the columns of Xsurf will be all zero (thus making the coarse matrix singular). */ if (m-istart < 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of grid points per process in X direction must be at least 3"); if (n-jstart < 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of grid points per process in Y direction must be at least 3"); if (p-kstart < 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of grid points per process in Z direction must be at least 3"); cnt = 0; for (j=1; j<n-1-jstart; j++) { for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 0*Nsurf] = 1; } for (k=1; k<p-1-kstart; k++) { for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 1*Nsurf] = 1; for (j=1; j<n-1-jstart; j++) { xsurf[cnt++ + 2*Nsurf] = 1; /* these are the interior nodes */ xsurf[cnt++ + 3*Nsurf] = 1; } for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 4*Nsurf] = 1; } for (j=1; j<n-1-jstart; j++) { for (i=1; i<m-istart-1; i++) xsurf[cnt++ + 5*Nsurf] = 1; } #if defined(PETSC_USE_DEBUG_foo) for (i=0; i<Nsurf; i++) { tmp = 0.0; for (j=0; j<6; j++) tmp += xsurf[i+j*Nsurf]; if (PetscAbsScalar(tmp-1.0) > 1.e-10) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong Xsurf interpolation at i %D value %g",i,(double)PetscAbsScalar(tmp)); } #endif ierr = MatDenseRestoreArray(Xsurf,&xsurf); CHKERRQ(ierr); /* ierr = MatView(Xsurf,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);*/ /* I are the indices for all the needed vertices (in global numbering) Iint are the indices for the interior values, I surf for the surface values (This is just for the part of the global matrix obtained with MatGetSubMatrix(), it is NOT the local DMDA ordering.) IIint and IIsurf are the same as the Iint, Isurf except they are in the global numbering */ #define Endpoint(a,start,b) (a == 0 || a == (b-1-start)) ierr = PetscMalloc3(N,&II,Nint,&Iint,Nsurf,&Isurf); CHKERRQ(ierr); ierr = PetscMalloc2(Nint,&IIint,Nsurf,&IIsurf); CHKERRQ(ierr); for (k=0; k<p-kstart; k++) { for (j=0; j<n-jstart; j++) { for (i=0; i<m-istart; i++) { II[c++] = i + j*mwidth + k*mwidth*nwidth; if (!Endpoint(i,istart,m) && !Endpoint(j,jstart,n) && !Endpoint(k,kstart,p)) { IIint[cint] = i + j*mwidth + k*mwidth*nwidth; Iint[cint++] = i + j*(m-istart) + k*(m-istart)*(n-jstart); } else { IIsurf[csurf] = i + j*mwidth + k*mwidth*nwidth; Isurf[csurf++] = i + j*(m-istart) + k*(m-istart)*(n-jstart); } } } } if (c != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"c != N"); if (cint != Nint) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"cint != Nint"); if (csurf != Nsurf) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csurf != Nsurf"); ierr = DMGetLocalToGlobalMapping(da,<g); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(ltg,N,II,II); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(ltg,Nint,IIint,IIint); CHKERRQ(ierr); ierr = ISLocalToGlobalMappingApply(ltg,Nsurf,IIsurf,IIsurf); CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)da,&comm); CHKERRQ(ierr); ierr = ISCreateGeneral(comm,N,II,PETSC_COPY_VALUES,&is); CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,Nint,Iint,PETSC_COPY_VALUES,&isint); CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,Nsurf,Isurf,PETSC_COPY_VALUES,&issurf); CHKERRQ(ierr); ierr = PetscFree3(II,Iint,Isurf); CHKERRQ(ierr); ierr = ISSort(is); CHKERRQ(ierr); ierr = MatGetSubMatrices(Aglobal,1,&is,&is,MAT_INITIAL_MATRIX,&Aholder); CHKERRQ(ierr); A = *Aholder; ierr = PetscFree(Aholder); CHKERRQ(ierr); ierr = MatGetSubMatrix(A,isint,isint,MAT_INITIAL_MATRIX,&Aii); CHKERRQ(ierr); ierr = MatGetSubMatrix(A,isint,issurf,MAT_INITIAL_MATRIX,&Ais); CHKERRQ(ierr); ierr = MatGetSubMatrix(A,issurf,isint,MAT_INITIAL_MATRIX,&Asi); CHKERRQ(ierr); /* Solve for the interpolation onto the interior Xint */ ierr = MatMatMult(Ais,Xsurf,MAT_INITIAL_MATRIX,PETSC_DETERMINE,&Xint_tmp); CHKERRQ(ierr); ierr = MatScale(Xint_tmp,-1.0); CHKERRQ(ierr); if (exotic->directSolve) { ierr = MatGetFactor(Aii,MATSOLVERPETSC,MAT_FACTOR_LU,&iAii); CHKERRQ(ierr); ierr = MatFactorInfoInitialize(&info); CHKERRQ(ierr); ierr = MatGetOrdering(Aii,MATORDERINGND,&row,&col); CHKERRQ(ierr); ierr = MatLUFactorSymbolic(iAii,Aii,row,col,&info); CHKERRQ(ierr); ierr = ISDestroy(&row); CHKERRQ(ierr); ierr = ISDestroy(&col); CHKERRQ(ierr); ierr = MatLUFactorNumeric(iAii,Aii,&info); CHKERRQ(ierr); ierr = MatMatSolve(iAii,Xint_tmp,Xint); CHKERRQ(ierr); ierr = MatDestroy(&iAii); CHKERRQ(ierr); } else { Vec b,x; PetscScalar *xint_tmp; ierr = MatDenseGetArray(Xint,&xint); CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,Nint,0,&x); CHKERRQ(ierr); ierr = MatDenseGetArray(Xint_tmp,&xint_tmp); CHKERRQ(ierr); ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,Nint,0,&b); CHKERRQ(ierr); ierr = KSPSetOperators(exotic->ksp,Aii,Aii); CHKERRQ(ierr); for (i=0; i<6; i++) { ierr = VecPlaceArray(x,xint+i*Nint); CHKERRQ(ierr); ierr = VecPlaceArray(b,xint_tmp+i*Nint); CHKERRQ(ierr); ierr = KSPSolve(exotic->ksp,b,x); CHKERRQ(ierr); ierr = VecResetArray(x); CHKERRQ(ierr); ierr = VecResetArray(b); CHKERRQ(ierr); } ierr = MatDenseRestoreArray(Xint,&xint); CHKERRQ(ierr); ierr = MatDenseRestoreArray(Xint_tmp,&xint_tmp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&b); CHKERRQ(ierr); } ierr = MatDestroy(&Xint_tmp); CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG_foo) ierr = MatDenseGetArray(Xint,&xint); CHKERRQ(ierr); for (i=0; i<Nint; i++) { tmp = 0.0; for (j=0; j<6; j++) tmp += xint[i+j*Nint]; if (PetscAbsScalar(tmp-1.0) > 1.e-10) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong Xint interpolation at i %D value %g",i,(double)PetscAbsScalar(tmp)); } ierr = MatDenseRestoreArray(Xint,&xint); CHKERRQ(ierr); /* ierr =MatView(Xint,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ #endif /* total faces */ Ntotal = mp*np*(pp+1) + mp*pp*(np+1) + np*pp*(mp+1); /* For each vertex, edge, face on process (in the same orderings as used above) determine its local number including ghost points */ cnt = 0; { gl[cnt++] = mwidth+1; } { { gl[cnt++] = mwidth*nwidth+1; } { gl[cnt++] = mwidth*nwidth + mwidth; /* these are the interior nodes */ gl[cnt++] = mwidth*nwidth + mwidth+m-istart-1; } { gl[cnt++] = mwidth*nwidth+mwidth*(n-jstart-1)+1; } } { gl[cnt++] = mwidth*nwidth*(p-kstart-1) + mwidth+1; } /* PetscIntView(6,gl,PETSC_VIEWER_STDOUT_WORLD); */ /* convert that to global numbering and get them on all processes */ ierr = ISLocalToGlobalMappingApply(ltg,6,gl,gl); CHKERRQ(ierr); /* PetscIntView(6,gl,PETSC_VIEWER_STDOUT_WORLD); */ ierr = PetscMalloc1(6*mp*np*pp,&globals); CHKERRQ(ierr); ierr = MPI_Allgather(gl,6,MPIU_INT,globals,6,MPIU_INT,PetscObjectComm((PetscObject)da)); CHKERRQ(ierr); /* Number the coarse grid points from 0 to Ntotal */ ierr = MatGetSize(Aglobal,&Nt,NULL); CHKERRQ(ierr); ierr = PetscTableCreate(Ntotal/3,Nt+1,&ht); CHKERRQ(ierr); for (i=0; i<6*mp*np*pp; i++) { ierr = PetscTableAddCount(ht,globals[i]+1); CHKERRQ(ierr); } ierr = PetscTableGetCount(ht,&cnt); CHKERRQ(ierr); if (cnt != Ntotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hash table size %D not equal to total number coarse grid points %D",cnt,Ntotal); ierr = PetscFree(globals); CHKERRQ(ierr); for (i=0; i<6; i++) { ierr = PetscTableFind(ht,gl[i]+1,&gl[i]); CHKERRQ(ierr); gl[i]--; } ierr = PetscTableDestroy(&ht); CHKERRQ(ierr); /* PetscIntView(6,gl,PETSC_VIEWER_STDOUT_WORLD); */ /* construct global interpolation matrix */ ierr = MatGetLocalSize(Aglobal,&Ng,NULL); CHKERRQ(ierr); if (reuse == MAT_INITIAL_MATRIX) { ierr = MatCreateAIJ(PetscObjectComm((PetscObject)da),Ng,PETSC_DECIDE,PETSC_DECIDE,Ntotal,Nint+Nsurf,NULL,Nint,NULL,P); CHKERRQ(ierr); } else { ierr = MatZeroEntries(*P); CHKERRQ(ierr); } ierr = MatSetOption(*P,MAT_ROW_ORIENTED,PETSC_FALSE); CHKERRQ(ierr); ierr = MatDenseGetArray(Xint,&xint); CHKERRQ(ierr); ierr = MatSetValues(*P,Nint,IIint,6,gl,xint,INSERT_VALUES); CHKERRQ(ierr); ierr = MatDenseRestoreArray(Xint,&xint); CHKERRQ(ierr); ierr = MatDenseGetArray(Xsurf,&xsurf); CHKERRQ(ierr); ierr = MatSetValues(*P,Nsurf,IIsurf,6,gl,xsurf,INSERT_VALUES); CHKERRQ(ierr); ierr = MatDenseRestoreArray(Xsurf,&xsurf); CHKERRQ(ierr); ierr = MatAssemblyBegin(*P,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(*P,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = PetscFree2(IIint,IIsurf); CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG_foo) { Vec x,y; PetscScalar *yy; ierr = VecCreateMPI(PetscObjectComm((PetscObject)da),Ng,PETSC_DETERMINE,&y); CHKERRQ(ierr); ierr = VecCreateMPI(PetscObjectComm((PetscObject)da),PETSC_DETERMINE,Ntotal,&x); CHKERRQ(ierr); ierr = VecSet(x,1.0); CHKERRQ(ierr); ierr = MatMult(*P,x,y); CHKERRQ(ierr); ierr = VecGetArray(y,&yy); CHKERRQ(ierr); for (i=0; i<Ng; i++) { if (PetscAbsScalar(yy[i]-1.0) > 1.e-10) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Wrong p interpolation at i %D value %g",i,(double)PetscAbsScalar(yy[i])); } ierr = VecRestoreArray(y,&yy); CHKERRQ(ierr); ierr = VecDestroy(x); CHKERRQ(ierr); ierr = VecDestroy(y); CHKERRQ(ierr); } #endif ierr = MatDestroy(&Aii); CHKERRQ(ierr); ierr = MatDestroy(&Ais); CHKERRQ(ierr); ierr = MatDestroy(&Asi); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = ISDestroy(&is); CHKERRQ(ierr); ierr = ISDestroy(&isint); CHKERRQ(ierr); ierr = ISDestroy(&issurf); CHKERRQ(ierr); ierr = MatDestroy(&Xint); CHKERRQ(ierr); ierr = MatDestroy(&Xsurf); CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PCSetUp_Redundant(PC pc) { PC_Redundant *red = (PC_Redundant*)pc->data; PetscErrorCode ierr; PetscInt mstart,mend,mlocal,M; PetscMPIInt size; MPI_Comm comm,subcomm; Vec x; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */ ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size == 1) red->useparallelmat = PETSC_FALSE; if (!pc->setupcalled) { PetscInt mloc_sub; if (!red->psubcomm) { /* create red->psubcomm, new ksp and pc over subcomm */ KSP ksp; ierr = PCRedundantGetKSP(pc,&ksp);CHKERRQ(ierr); } subcomm = PetscSubcommChild(red->psubcomm); if (red->useparallelmat) { /* grab the parallel matrix and put it into processors of a subcomminicator */ ierr = MatCreateRedundantMatrix(pc->pmat,red->psubcomm->n,subcomm,MAT_INITIAL_MATRIX,&red->pmats);CHKERRQ(ierr); ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); if (size > 1) { PetscBool foundpack; ierr = MatGetFactorAvailable(red->pmats,NULL,MAT_FACTOR_LU,&foundpack);CHKERRQ(ierr); if (!foundpack) { /* reset default ksp and pc */ ierr = KSPSetType(red->ksp,KSPGMRES);CHKERRQ(ierr); ierr = PCSetType(red->pc,PCBJACOBI);CHKERRQ(ierr); } else { ierr = PCFactorSetMatSolverType(red->pc,NULL);CHKERRQ(ierr); } } ierr = KSPSetOperators(red->ksp,red->pmats,red->pmats);CHKERRQ(ierr); /* get working vectors xsub and ysub */ ierr = MatCreateVecs(red->pmats,&red->xsub,&red->ysub);CHKERRQ(ierr); /* create working vectors xdup and ydup. xdup concatenates all xsub's contigously to form a mpi vector over dupcomm (see PetscSubcommCreate_interlaced()) ydup concatenates all ysub and has empty local arrays because ysub's arrays will be place into it. Note: we use communicator dupcomm, not PetscObjectComm((PetscObject)pc)! */ ierr = MatGetLocalSize(red->pmats,&mloc_sub,NULL);CHKERRQ(ierr); ierr = VecCreateMPI(PetscSubcommContiguousParent(red->psubcomm),mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PetscSubcommContiguousParent(red->psubcomm),1,mloc_sub,PETSC_DECIDE,NULL,&red->ydup);CHKERRQ(ierr); /* create vecscatters */ if (!red->scatterin) { /* efficiency of scatterin is independent from psubcomm_type! */ IS is1,is2; PetscInt *idx1,*idx2,i,j,k; ierr = MatCreateVecs(pc->pmat,&x,0);CHKERRQ(ierr); ierr = VecGetSize(x,&M);CHKERRQ(ierr); ierr = VecGetOwnershipRange(x,&mstart,&mend);CHKERRQ(ierr); mlocal = mend - mstart; ierr = PetscMalloc2(red->psubcomm->n*mlocal,&idx1,red->psubcomm->n*mlocal,&idx2);CHKERRQ(ierr); j = 0; for (k=0; k<red->psubcomm->n; k++) { for (i=mstart; i<mend; i++) { idx1[j] = i; idx2[j++] = i + M*k; } } ierr = ISCreateGeneral(comm,red->psubcomm->n*mlocal,idx1,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,red->psubcomm->n*mlocal,idx2,PETSC_COPY_VALUES,&is2);CHKERRQ(ierr); ierr = VecScatterCreateWithData(x,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); /* Impl below is good for PETSC_SUBCOMM_INTERLACED (no inter-process communication) and PETSC_SUBCOMM_CONTIGUOUS (communication within subcomm) */ ierr = ISCreateStride(comm,mlocal,mstart+ red->psubcomm->color*M,1,&is1);CHKERRQ(ierr); ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr); ierr = VecScatterCreateWithData(red->xdup,is1,x,is2,&red->scatterout);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); ierr = ISDestroy(&is2);CHKERRQ(ierr); ierr = PetscFree2(idx1,idx2);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); } } else { /* !red->useparallelmat */ ierr = KSPSetOperators(red->ksp,pc->mat,pc->pmat);CHKERRQ(ierr); } } else { /* pc->setupcalled */ if (red->useparallelmat) { MatReuse reuse; /* grab the parallel matrix and put it into processors of a subcomminicator */ /*--------------------------------------------------------------------------*/ if (pc->flag == DIFFERENT_NONZERO_PATTERN) { /* destroy old matrices */ ierr = MatDestroy(&red->pmats);CHKERRQ(ierr); reuse = MAT_INITIAL_MATRIX; } else { reuse = MAT_REUSE_MATRIX; } ierr = MatCreateRedundantMatrix(pc->pmat,red->psubcomm->n,PetscSubcommChild(red->psubcomm),reuse,&red->pmats);CHKERRQ(ierr); ierr = KSPSetOperators(red->ksp,red->pmats,red->pmats);CHKERRQ(ierr); } else { /* !red->useparallelmat */ ierr = KSPSetOperators(red->ksp,pc->mat,pc->pmat);CHKERRQ(ierr); } } if (pc->setfromoptionscalled) { ierr = KSPSetFromOptions(red->ksp);CHKERRQ(ierr); } ierr = KSPSetUp(red->ksp);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc) { PetscErrorCode ierr; AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; MPI_Comm comm; PetscMPIInt rank,size,tag1,tag2; PetscInt *owner,*start,*sizes,nsends,nreceives; PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2; PetscInt *owners = aomems->map->range; MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2; MPI_Status recv_status; PetscMPIInt nindices,source,widx; PetscInt *rbuf,*sbuf; MPI_Status *send_status,*send_status2; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); /* first count number of contributors to each processor */ ierr = PetscMalloc2(2*size,&sizes,size,&start);CHKERRQ(ierr); ierr = PetscMemzero(sizes,2*size*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscCalloc1(n,&owner);CHKERRQ(ierr); j = 0; lastidx = -1; for (i=0; i<n; i++) { /* if indices are NOT locally sorted, need to start search at the beginning */ if (lastidx > (idx = ia[i])) j = 0; lastidx = idx; for (; j<size; j++) { if (idx >= owners[j] && idx < owners[j+1]) { sizes[2*j]++; /* num of indices to be sent */ sizes[2*j+1] = 1; /* send to proc[j] */ owner[i] = j; break; } } } sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ nsends = 0; for (i=0; i<size; i++) nsends += sizes[2*i+1]; /* inform other processors of number of messages and max length*/ ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr); /* allocate arrays */ ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr); ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr); ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr); ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr); ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr); /* post 1st receives: receive others requests since we don't know how long each individual message is we allocate the largest needed buffer for each receive. Potentially this is a lot of wasted space. */ for (i=0,count=0; i<nreceives; i++) { ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr); } /* do 1st sends: 1) starts[i] gives the starting index in svalues for stuff going to the ith processor */ start[0] = 0; for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; for (i=0; i<n; i++) { j = owner[i]; if (j != rank) { sindices[start[j]++] = ia[i]; } else { /* compute my own map */ if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) { ia[i] = maploc[ia[i]-owners[rank]]; } else { ia[i] = -1; /* ia[i] is not in the range of 0 and N-1, maps it to -1 */ } } } start[0] = 0; for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2]; for (i=0,count=0; i<size; i++) { if (sizes[2*i+1]) { /* send my request to others */ ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr); /* post receive for the answer of my request */ ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr); count++; } } if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count); /* wait on 1st sends */ if (nsends) { ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); } /* 1st recvs: other's requests */ for (j=0; j< nreceives; j++) { ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */ ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); rbuf = rindices+nmax*widx; /* global index */ source = recv_status.MPI_SOURCE; /* compute mapping */ sbuf = rbuf; for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]]; /* send mapping back to the sender */ ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr); } /* wait on 2nd sends */ if (nreceives) { ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr); } /* 2nd recvs: for the answer of my request */ for (j=0; j< nsends; j++) { ierr = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr); ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr); source = recv_status.MPI_SOURCE; /* pack output ia[] */ rbuf = sindices2+start[source]; count = 0; for (i=0; i<n; i++) { if (source == owner[i]) ia[i] = rbuf[count++]; } } /* free arrays */ ierr = PetscFree2(sizes,start);CHKERRQ(ierr); ierr = PetscFree(owner);CHKERRQ(ierr); ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr); ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr); ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr); ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscSFSetGraph - Set a parallel star forest Collective Input Arguments: + sf - star forest . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) . nleaves - number of leaf vertices on the current process, each of these references a root on any process . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage . localmode - copy mode for ilocal . iremote - remote locations of root vertices for each leaf on the current process - remotemode - copy mode for iremote Level: intermediate .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() @*/ PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) { PetscErrorCode ierr; PetscTable table; PetscTablePosition pos; PetscMPIInt size; PetscInt i,*rcount,*ranks; PetscFunctionBegin; PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); if (nleaves && ilocal) PetscValidIntPointer(ilocal,4); if (nleaves) PetscValidPointer(iremote,6); if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots); if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); ierr = PetscSFReset(sf);CHKERRQ(ierr); sf->nroots = nroots; sf->nleaves = nleaves; if (ilocal) { switch (localmode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); sf->mine = sf->mine_alloc; ierr = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr); sf->minleaf = PETSC_MAX_INT; sf->maxleaf = PETSC_MIN_INT; for (i=0; i<nleaves; i++) { sf->minleaf = PetscMin(sf->minleaf,ilocal[i]); sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]); } break; case PETSC_OWN_POINTER: sf->mine_alloc = (PetscInt*)ilocal; sf->mine = sf->mine_alloc; break; case PETSC_USE_POINTER: sf->mine = (PetscInt*)ilocal; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); } } if (!ilocal || nleaves > 0) { sf->minleaf = 0; sf->maxleaf = nleaves - 1; } switch (remotemode) { case PETSC_COPY_VALUES: ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); sf->remote = sf->remote_alloc; ierr = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr); break; case PETSC_OWN_POINTER: sf->remote_alloc = (PetscSFNode*)iremote; sf->remote = sf->remote_alloc; break; case PETSC_USE_POINTER: sf->remote = (PetscSFNode*)iremote; break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); } ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); for (i=0; i<nleaves; i++) { /* Log 1-based rank */ ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); } ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr); ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); for (i=0; i<sf->nranks; i++) { ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); ranks[i]--; /* Convert back to 0-based */ } ierr = PetscTableDestroy(&table);CHKERRQ(ierr); ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr); sf->roffset[0] = 0; for (i=0; i<sf->nranks; i++) { ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); sf->roffset[i+1] = sf->roffset[i] + rcount[i]; rcount[i] = 0; } for (i=0; i<nleaves; i++) { PetscInt lo,hi,irank; /* Search for index of iremote[i].rank in sf->ranks */ lo = 0; hi = sf->nranks; while (hi - lo > 1) { PetscInt mid = lo + (hi - lo)/2; if (iremote[i].rank < sf->ranks[mid]) hi = mid; else lo = mid; } if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo; else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank); sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i; sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index; rcount[irank]++; } ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); #if !defined(PETSC_USE_64BIT_INDICES) if (nroots == PETSC_DETERMINE) { /* Jed, if you have a better way to do this, put it in */ PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0; /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */ ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr); ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr); for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank]; ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Could set nroots to this maximum */ for (i = 0; i < size; ++i) maxRoots += numRankRoots[i]; /* Gather all indices */ ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr); leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index; leafOff[0] = 0; for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i]; rootOff[0] = 0; for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i]; ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); /* Sort and reduce */ ierr = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr); ierr = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr); ierr = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr); sf->nroots = maxRoots; } #endif sf->graphset = PETSC_TRUE; ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao) { PetscErrorCode ierr; IS isapp=ao->isapp,ispetsc=ao->ispetsc; const PetscInt *mypetsc,*myapp; PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp; MPI_Comm comm; AO_MemoryScalable *aomems; PetscLayout map; PetscMPIInt size,rank; PetscFunctionBegin; /* create special struct aomems */ ierr = PetscNewLog(ao,&aomems);CHKERRQ(ierr); ao->data = (void*) aomems; ierr = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr); ierr = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr); /* transmit all local lengths of isapp to all processors */ ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr); ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr); N = 0; for (i = 0; i < size; i++) { disp[i] = N; N += lens[i]; } /* If ispetsc is 0 then use "natural" numbering */ if (napp) { if (!ispetsc) { start = disp[rank]; ierr = PetscMalloc1((napp+1), &petsc);CHKERRQ(ierr); for (i=0; i<napp; i++) petsc[i] = start + i; } else { ierr = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr); petsc = (PetscInt*)mypetsc; } } /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */ ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr); map->bs = 1; map->N = N; ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); ao->N = N; ao->n = map->n; aomems->map = map; /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */ n_local = map->n; ierr = PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr); ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr); ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); if (napp) { if (ispetsc) { ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); } else { ierr = PetscFree(petsc);CHKERRQ(ierr); } } ierr = PetscFree2(lens,disp);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) { DM dmDist = NULL; PetscInt dim = user->dim; PetscBool cellSimplex = user->cellSimplex; const char *filename = user->filename; const PetscInt cells[3] = {2, 2, 2}; size_t len; PetscErrorCode ierr; PetscFunctionBeginUser; ierr = PetscStrlen(filename, &len);CHKERRQ(ierr); if (len) {ierr = DMPlexCreateFromFile(comm, filename, PETSC_TRUE, dm);CHKERRQ(ierr);} else if (cellSimplex) {ierr = DMPlexCreateBoxMesh(comm, dim, PETSC_TRUE, dm);CHKERRQ(ierr);} else {ierr = DMPlexCreateHexBoxMesh(comm, dim, cells, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, dm);CHKERRQ(ierr);} if (user->testPartition) { PetscPartitioner part; const PetscInt *sizes = NULL; const PetscInt *points = NULL; PetscMPIInt rank, numProcs; ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &numProcs);CHKERRQ(ierr); if (!rank) { if (dim == 2 && cellSimplex && numProcs == 2) { switch (user->testNum) { case 0: { PetscInt triSizes_p2[2] = {4, 4}; PetscInt triPoints_p2[8] = {3, 5, 6, 7, 0, 1, 2, 4}; ierr = PetscMalloc2(2, &sizes, 8, &points);CHKERRQ(ierr); ierr = PetscMemcpy(sizes, triSizes_p2, 2 * sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(points, triPoints_p2, 8 * sizeof(PetscInt));CHKERRQ(ierr);break;} case 1: { PetscInt triSizes_p2[2] = {6, 2}; PetscInt triPoints_p2[8] = {1, 2, 3, 4, 6, 7, 0, 5}; ierr = PetscMalloc2(2, &sizes, 8, &points);CHKERRQ(ierr); ierr = PetscMemcpy(sizes, triSizes_p2, 2 * sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(points, triPoints_p2, 8 * sizeof(PetscInt));CHKERRQ(ierr);break;} default: SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Could not find matching test number %d for triangular mesh on 2 procs", user->testNum); } } else if (dim == 2 && cellSimplex && numProcs == 3) { PetscInt triSizes_p3[3] = {3, 3, 2}; PetscInt triPoints_p3[8] = {1, 2, 4, 3, 6, 7, 0, 5}; ierr = PetscMalloc2(3, &sizes, 8, &points);CHKERRQ(ierr); ierr = PetscMemcpy(sizes, triSizes_p3, 3 * sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(points, triPoints_p3, 8 * sizeof(PetscInt));CHKERRQ(ierr); } else if (dim == 2 && !cellSimplex && numProcs == 2) { PetscInt quadSizes_p2[2] = {2, 2}; PetscInt quadPoints_p2[4] = {2, 3, 0, 1}; ierr = PetscMalloc2(2, &sizes, 4, &points);CHKERRQ(ierr); ierr = PetscMemcpy(sizes, quadSizes_p2, 2 * sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemcpy(points, quadPoints_p2, 4 * sizeof(PetscInt));CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Could not find matching test partition"); } ierr = DMPlexGetPartitioner(*dm, &part);CHKERRQ(ierr); ierr = PetscPartitionerSetType(part, PETSCPARTITIONERSHELL);CHKERRQ(ierr); ierr = PetscPartitionerShellSetPartition(part, numProcs, sizes, points);CHKERRQ(ierr); ierr = PetscFree2(sizes, points);CHKERRQ(ierr); } ierr = DMPlexDistribute(*dm, 0, NULL, &dmDist);CHKERRQ(ierr); if (dmDist) { ierr = DMDestroy(dm);CHKERRQ(ierr); *dm = dmDist; } ierr = PetscObjectSetName((PetscObject) *dm, cellSimplex ? "Simplicial Mesh" : "Tensor Product Mesh");CHKERRQ(ierr); ierr = DMViewFromOptions(*dm, NULL, "-dm_view");CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@ ISBuildTwoSided - Takes an IS that describes where we will go. Generates an IS that contains new numbers from remote or local on the IS. Collective on IS Input Parameters . to - an IS describes where we will go. Negative target rank will be ignored . toindx - an IS describes what indices should send. NULL means sending natural numbering Output Parameter: . rows - contains new numbers from remote or local Level: advanced .seealso: MatPartitioningCreate(), ISPartitioningToNumbering(), ISPartitioningCount() @*/ PetscErrorCode ISBuildTwoSided(IS ito,IS toindx, IS *rows) { const PetscInt *ito_indices,*toindx_indices; PetscInt *send_indices,rstart,*recv_indices,nrecvs,nsends; PetscInt *tosizes,*fromsizes,i,j,*tosizes_tmp,*tooffsets_tmp,ito_ln; PetscMPIInt *toranks,*fromranks,size,target_rank,*fromperm_newtoold,nto,nfrom; PetscLayout isrmap; MPI_Comm comm; PetscSF sf; PetscSFNode *iremote; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ito,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = ISGetLocalSize(ito,&ito_ln);CHKERRQ(ierr); /* why we do not have ISGetLayout? */ isrmap = ito->map; ierr = PetscLayoutGetRange(isrmap,&rstart,NULL);CHKERRQ(ierr); ierr = ISGetIndices(ito,&ito_indices);CHKERRQ(ierr); ierr = PetscCalloc2(size,&tosizes_tmp,size+1,&tooffsets_tmp);CHKERRQ(ierr); for(i=0; i<ito_ln; i++){ if(ito_indices[i]<0) continue; #if defined(PETSC_USE_DEBUG) if(ito_indices[i]>=size) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"target rank %d is larger than communicator size %d ",ito_indices[i],size); #endif tosizes_tmp[ito_indices[i]]++; } nto = 0; for(i=0; i<size; i++){ tooffsets_tmp[i+1] = tooffsets_tmp[i]+tosizes_tmp[i]; if(tosizes_tmp[i]>0) nto++; } ierr = PetscCalloc2(nto,&toranks,2*nto,&tosizes);CHKERRQ(ierr); nto = 0; for(i=0; i<size; i++){ if(tosizes_tmp[i]>0){ toranks[nto] = i; tosizes[2*nto] = tosizes_tmp[i];/* size */ tosizes[2*nto+1] = tooffsets_tmp[i];/* offset */ nto++; } } nsends = tooffsets_tmp[size]; ierr = PetscCalloc1(nsends,&send_indices);CHKERRQ(ierr); if(toindx){ ierr = ISGetIndices(toindx,&toindx_indices);CHKERRQ(ierr); } for(i=0; i<ito_ln; i++){ if(ito_indices[i]<0) continue; target_rank = ito_indices[i]; send_indices[tooffsets_tmp[target_rank]] = toindx? toindx_indices[i]:(i+rstart); tooffsets_tmp[target_rank]++; } if(toindx){ ierr = ISRestoreIndices(toindx,&toindx_indices);CHKERRQ(ierr); } ierr = ISRestoreIndices(ito,&ito_indices);CHKERRQ(ierr); ierr = PetscFree2(tosizes_tmp,tooffsets_tmp);CHKERRQ(ierr); ierr = PetscCommBuildTwoSided(comm,2,MPIU_INT,nto,toranks,tosizes,&nfrom,&fromranks,&fromsizes);CHKERRQ(ierr); ierr = PetscFree2(toranks,tosizes);CHKERRQ(ierr); ierr = PetscCalloc1(nfrom,&fromperm_newtoold);CHKERRQ(ierr); for(i=0; i<nfrom; i++){ fromperm_newtoold[i] = i; } ierr = PetscSortMPIIntWithArray(nfrom,fromranks,fromperm_newtoold);CHKERRQ(ierr); nrecvs = 0; for(i=0; i<nfrom; i++){ nrecvs += fromsizes[i*2]; } ierr = PetscCalloc1(nrecvs,&recv_indices);CHKERRQ(ierr); ierr = PetscCalloc1(nrecvs,&iremote);CHKERRQ(ierr); nrecvs = 0; for(i=0; i<nfrom; i++){ for(j=0; j<fromsizes[2*fromperm_newtoold[i]]; j++){ iremote[nrecvs].rank = fromranks[i]; iremote[nrecvs++].index = fromsizes[2*fromperm_newtoold[i]+1]+j; } } ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); ierr = PetscSFSetGraph(sf,nsends,nrecvs,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr); /* how to put a prefix ? */ ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,send_indices,recv_indices);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,send_indices,recv_indices);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree(fromranks);CHKERRQ(ierr); ierr = PetscFree(fromsizes);CHKERRQ(ierr); ierr = PetscFree(fromperm_newtoold);CHKERRQ(ierr); ierr = PetscFree(send_indices);CHKERRQ(ierr); if(rows){ ierr = PetscSortInt(nrecvs,recv_indices);CHKERRQ(ierr); ierr = ISCreateGeneral(comm, nrecvs,recv_indices,PETSC_OWN_POINTER,rows);CHKERRQ(ierr); }else{ ierr = PetscFree(recv_indices);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; PetscErrorCode ierr; PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; MatScalar *ba = b->a,*aa,*ap,*dk,*uik; MatScalar *u,*diag,*rtmp,*rtmp_ptr; PetscBool pivotinblocks = b->pivotinblocks; PetscReal shift = info->shiftamount; PetscFunctionBegin; /* initialization */ ierr = PetscMalloc(16*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); ierr = PetscMemzero(rtmp,16*mbs*sizeof(MatScalar));CHKERRQ(ierr); ierr = PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); for (i=0; i<mbs; i++) { jl[i] = mbs; il[0] = 0; } ierr = PetscMalloc2(16,MatScalar,&dk,16,MatScalar,&uik);CHKERRQ(ierr); ai = a->i; aj = a->j; aa = a->a; /* for each row k */ for (k = 0; k<mbs; k++){ /*initialize k-th row with elements nonzero in row k of A */ jmin = ai[k]; jmax = ai[k+1]; if (jmin < jmax) { ap = aa + jmin*16; for (j = jmin; j < jmax; j++){ vj = aj[j]; /* block col. index */ rtmp_ptr = rtmp + vj*16; for (i=0; i<16; i++) *rtmp_ptr++ = *ap++; } } /* modify k-th row by adding in those rows i with U(i,k) != 0 */ ierr = PetscMemcpy(dk,rtmp+k*16,16*sizeof(MatScalar));CHKERRQ(ierr); i = jl[k]; /* first row to be added to k_th row */ while (i < mbs){ nexti = jl[i]; /* next row to be added to k_th row */ /* compute multiplier */ ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ /* uik = -inv(Di)*U_bar(i,k) */ diag = ba + i*16; u = ba + ili*16; uik[0] = -(diag[0]*u[0] + diag[4]*u[1] + diag[8]*u[2] + diag[12]*u[3]); uik[1] = -(diag[1]*u[0] + diag[5]*u[1] + diag[9]*u[2] + diag[13]*u[3]); uik[2] = -(diag[2]*u[0] + diag[6]*u[1] + diag[10]*u[2]+ diag[14]*u[3]); uik[3] = -(diag[3]*u[0] + diag[7]*u[1] + diag[11]*u[2]+ diag[15]*u[3]); uik[4] = -(diag[0]*u[4] + diag[4]*u[5] + diag[8]*u[6] + diag[12]*u[7]); uik[5] = -(diag[1]*u[4] + diag[5]*u[5] + diag[9]*u[6] + diag[13]*u[7]); uik[6] = -(diag[2]*u[4] + diag[6]*u[5] + diag[10]*u[6]+ diag[14]*u[7]); uik[7] = -(diag[3]*u[4] + diag[7]*u[5] + diag[11]*u[6]+ diag[15]*u[7]); uik[8] = -(diag[0]*u[8] + diag[4]*u[9] + diag[8]*u[10] + diag[12]*u[11]); uik[9] = -(diag[1]*u[8] + diag[5]*u[9] + diag[9]*u[10] + diag[13]*u[11]); uik[10]= -(diag[2]*u[8] + diag[6]*u[9] + diag[10]*u[10]+ diag[14]*u[11]); uik[11]= -(diag[3]*u[8] + diag[7]*u[9] + diag[11]*u[10]+ diag[15]*u[11]); uik[12]= -(diag[0]*u[12] + diag[4]*u[13] + diag[8]*u[14] + diag[12]*u[15]); uik[13]= -(diag[1]*u[12] + diag[5]*u[13] + diag[9]*u[14] + diag[13]*u[15]); uik[14]= -(diag[2]*u[12] + diag[6]*u[13] + diag[10]*u[14]+ diag[14]*u[15]); uik[15]= -(diag[3]*u[12] + diag[7]*u[13] + diag[11]*u[14]+ diag[15]*u[15]); /* update D(k) += -U(i,k)^T * U_bar(i,k) */ dk[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3]; dk[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3]; dk[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3]; dk[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]; dk[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7]; dk[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7]; dk[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7]; dk[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7]; dk[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11]; dk[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11]; dk[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11]; dk[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11]; dk[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15]; dk[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15]; dk[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15]; dk[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]; ierr = PetscLogFlops(64.0*4.0);CHKERRQ(ierr); /* update -U(i,k) */ ierr = PetscMemcpy(ba+ili*16,uik,16*sizeof(MatScalar));CHKERRQ(ierr); /* add multiple of row i to k-th row ... */ jmin = ili + 1; jmax = bi[i+1]; if (jmin < jmax){ for (j=jmin; j<jmax; j++) { /* rtmp += -U(i,k)^T * U_bar(i,j) */ rtmp_ptr = rtmp + bj[j]*16; u = ba + j*16; rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1] + uik[2]*u[2] + uik[3]*u[3]; rtmp_ptr[1] += uik[4]*u[0] + uik[5]*u[1] + uik[6]*u[2] + uik[7]*u[3]; rtmp_ptr[2] += uik[8]*u[0] + uik[9]*u[1] + uik[10]*u[2]+ uik[11]*u[3]; rtmp_ptr[3] += uik[12]*u[0]+ uik[13]*u[1]+ uik[14]*u[2]+ uik[15]*u[3]; rtmp_ptr[4] += uik[0]*u[4] + uik[1]*u[5] + uik[2]*u[6] + uik[3]*u[7]; rtmp_ptr[5] += uik[4]*u[4] + uik[5]*u[5] + uik[6]*u[6] + uik[7]*u[7]; rtmp_ptr[6] += uik[8]*u[4] + uik[9]*u[5] + uik[10]*u[6]+ uik[11]*u[7]; rtmp_ptr[7] += uik[12]*u[4]+ uik[13]*u[5]+ uik[14]*u[6]+ uik[15]*u[7]; rtmp_ptr[8] += uik[0]*u[8] + uik[1]*u[9] + uik[2]*u[10] + uik[3]*u[11]; rtmp_ptr[9] += uik[4]*u[8] + uik[5]*u[9] + uik[6]*u[10] + uik[7]*u[11]; rtmp_ptr[10]+= uik[8]*u[8] + uik[9]*u[9] + uik[10]*u[10]+ uik[11]*u[11]; rtmp_ptr[11]+= uik[12]*u[8]+ uik[13]*u[9]+ uik[14]*u[10]+ uik[15]*u[11]; rtmp_ptr[12]+= uik[0]*u[12] + uik[1]*u[13] + uik[2]*u[14] + uik[3]*u[15]; rtmp_ptr[13]+= uik[4]*u[12] + uik[5]*u[13] + uik[6]*u[14] + uik[7]*u[15]; rtmp_ptr[14]+= uik[8]*u[12] + uik[9]*u[13] + uik[10]*u[14]+ uik[11]*u[15]; rtmp_ptr[15]+= uik[12]*u[12]+ uik[13]*u[13]+ uik[14]*u[14]+ uik[15]*u[15]; } ierr = PetscLogFlops(2.0*64.0*(jmax-jmin));CHKERRQ(ierr); /* ... add i to row list for next nonzero entry */ il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; /* update jl */ } i = nexti; } /* save nonzero entries in k-th row of U ... */ /* invert diagonal block */ diag = ba+k*16; ierr = PetscMemcpy(diag,dk,16*sizeof(MatScalar));CHKERRQ(ierr); if (pivotinblocks) { ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); } else { ierr = PetscKernel_A_gets_inverse_A_4_nopivot(diag);CHKERRQ(ierr); } jmin = bi[k]; jmax = bi[k+1]; if (jmin < jmax) { for (j=jmin; j<jmax; j++){ vj = bj[j]; /* block col. index of U */ u = ba + j*16; rtmp_ptr = rtmp + vj*16; for (k1=0; k1<16; k1++){ *u++ = *rtmp_ptr; *rtmp_ptr++ = 0.0; } } /* ... add k to row list for first nonzero entry in k-th row */ il[k] = jmin; i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; } } ierr = PetscFree(rtmp);CHKERRQ(ierr); ierr = PetscFree2(il,jl);CHKERRQ(ierr); ierr = PetscFree2(dk,uik);CHKERRQ(ierr); C->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace; C->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering_inplace; C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace; C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering_inplace; C->assembled = PETSC_TRUE; C->preallocated = PETSC_TRUE; ierr = PetscLogFlops(1.3333*64*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ PetscFunctionReturn(0); }
PetscErrorCode TSMonitorSPEig(TS ts,PetscInt step,PetscReal ptime,Vec v,void *monctx) { TSMonitorSPEigCtx ctx = (TSMonitorSPEigCtx) monctx; PetscErrorCode ierr; KSP ksp = ctx->ksp; PetscInt n,N,nits,neig,i,its = 200; PetscReal *r,*c,time_step_save; PetscDrawSP drawsp = ctx->drawsp; Mat A,B; Vec xdot; SNES snes; PetscFunctionBegin; if (!step) PetscFunctionReturn(0); if (((ctx->howoften > 0) && (!(step % ctx->howoften))) || ((ctx->howoften == -1) && ts->reason)) { ierr = VecDuplicate(v,&xdot);CHKERRQ(ierr); ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr); ierr = SNESGetJacobian(snes,&A,&B,NULL,NULL);CHKERRQ(ierr); ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,&B);CHKERRQ(ierr); /* This doesn't work because methods keep and use internal information about the shift so it seems we would need code for each method to trick the correct Jacobian in being computed. */ time_step_save = ts->time_step; ts->time_step = PETSC_MAX_REAL; ierr = SNESComputeJacobian(snes,v,A,B);CHKERRQ(ierr); ts->time_step = time_step_save; ierr = KSPSetOperators(ksp,B,B);CHKERRQ(ierr); ierr = VecGetSize(v,&n);CHKERRQ(ierr); if (n < 200) its = n; ierr = KSPSetTolerances(ksp,1.e-10,PETSC_DEFAULT,PETSC_DEFAULT,its);CHKERRQ(ierr); ierr = VecSetRandom(xdot,ctx->rand);CHKERRQ(ierr); ierr = KSPSolve(ksp,xdot,xdot);CHKERRQ(ierr); ierr = VecDestroy(&xdot);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&nits);CHKERRQ(ierr); N = nits+2; if (nits) { PetscDraw draw; PetscReal pause; PetscDrawAxis axis; PetscReal xmin,xmax,ymin,ymax; ierr = PetscDrawSPReset(drawsp);CHKERRQ(ierr); ierr = PetscDrawSPSetLimits(drawsp,ctx->xmin,ctx->xmax,ctx->ymin,ctx->ymax);CHKERRQ(ierr); ierr = PetscMalloc2(PetscMax(n,N),&r,PetscMax(n,N),&c);CHKERRQ(ierr); if (ctx->computeexplicitly) { ierr = KSPComputeEigenvaluesExplicitly(ksp,n,r,c);CHKERRQ(ierr); neig = n; } else { ierr = KSPComputeEigenvalues(ksp,N,r,c,&neig);CHKERRQ(ierr); } /* We used the positive operator to be able to reuse KSPs that require positive definiteness, now flip the spectrum as is conventional for ODEs */ for (i=0; i<neig; i++) r[i] = -r[i]; for (i=0; i<neig; i++) { if (ts->ops->linearstability) { PetscReal fr,fi; ierr = TSComputeLinearStability(ts,r[i],c[i],&fr,&fi);CHKERRQ(ierr); if ((fr*fr + fi*fi) > 1.0) { ierr = PetscPrintf(ctx->comm,"Linearized Eigenvalue %g + %g i linear stability function %g norm indicates unstable scheme \n",(double)r[i],(double)c[i],(double)(fr*fr + fi*fi));CHKERRQ(ierr); } } ierr = PetscDrawSPAddPoint(drawsp,r+i,c+i);CHKERRQ(ierr); } ierr = PetscFree2(r,c);CHKERRQ(ierr); ierr = PetscDrawSPGetDraw(drawsp,&draw);CHKERRQ(ierr); ierr = PetscDrawGetPause(draw,&pause);CHKERRQ(ierr); ierr = PetscDrawSetPause(draw,0.0);CHKERRQ(ierr); ierr = PetscDrawSPDraw(drawsp,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscDrawSetPause(draw,pause);CHKERRQ(ierr); if (ts->ops->linearstability) { ierr = PetscDrawSPGetAxis(drawsp,&axis);CHKERRQ(ierr); ierr = PetscDrawAxisGetLimits(axis,&xmin,&xmax,&ymin,&ymax);CHKERRQ(ierr); ierr = PetscDrawIndicatorFunction(draw,xmin,xmax,ymin,ymax,PETSC_DRAW_CYAN,(PetscErrorCode (*)(void*,PetscReal,PetscReal,PetscBool*))TSLinearStabilityIndicator,ts);CHKERRQ(ierr); ierr = PetscDrawSPDraw(drawsp,PETSC_FALSE);CHKERRQ(ierr); } } ierr = MatDestroy(&B);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* DMPatchZoom - Create a version of the coarse patch (identified by rank) with halo on communicator commz Collective on DM Input Parameters: + dm - the DM . rank - the rank which holds the given patch - commz - the new communicator for the patch Output Parameters: + dmz - the patch DM . sfz - the PetscSF mapping the patch+halo to the zoomed version . sfzr - the PetscSF mapping the patch to the restricted zoomed version Level: intermediate Note: All processes in commz should have the same rank (could autosplit comm) .seealso: DMPatchSolve() */ PetscErrorCode DMPatchZoom(DM dm, Vec X, MatStencil lower, MatStencil upper, MPI_Comm commz, DM *dmz, PetscSF *sfz, PetscSF *sfzr) { DMDAStencilType st; MatStencil blower, bupper, loclower, locupper; IS is; const PetscInt *ranges, *indices; PetscInt *localPoints = NULL; PetscSFNode *remotePoints = NULL; PetscInt dim, dof; PetscInt M, N, P, rM, rN, rP, halo = 1, sxb, syb, szb, sxr, syr, szr, exr, eyr, ezr, mxb, myb, mzb, i, j, k, q; PetscMPIInt size; PetscErrorCode ierr; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size);CHKERRQ(ierr); /* Create patch DM */ ierr = DMDAGetInfo(dm, &dim, &M, &N, &P, 0,0,0, &dof, 0,0,0,0, &st);CHKERRQ(ierr); /* Get piece for rank r, expanded by halo */ bupper.i = PetscMin(M, upper.i + halo); blower.i = PetscMax(lower.i - halo, 0); bupper.j = PetscMin(N, upper.j + halo); blower.j = PetscMax(lower.j - halo, 0); bupper.k = PetscMin(P, upper.k + halo); blower.k = PetscMax(lower.k - halo, 0); rM = bupper.i - blower.i; rN = bupper.j - blower.j; rP = bupper.k - blower.k; if (commz != MPI_COMM_NULL) { ierr = DMDACreate(commz, dmz);CHKERRQ(ierr); ierr = DMSetDimension(*dmz, dim);CHKERRQ(ierr); ierr = DMDASetSizes(*dmz, rM, rN, rP);CHKERRQ(ierr); ierr = DMDASetNumProcs(*dmz, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); ierr = DMDASetBoundaryType(*dmz, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE);CHKERRQ(ierr); ierr = DMDASetDof(*dmz, dof);CHKERRQ(ierr); ierr = DMDASetStencilType(*dmz, st);CHKERRQ(ierr); ierr = DMDASetStencilWidth(*dmz, 0);CHKERRQ(ierr); ierr = DMDASetOwnershipRanges(*dmz, NULL, NULL, NULL);CHKERRQ(ierr); ierr = DMSetFromOptions(*dmz);CHKERRQ(ierr); ierr = DMSetUp(*dmz);CHKERRQ(ierr); ierr = DMDAGetCorners(*dmz, &sxb, &syb, &szb, &mxb, &myb, &mzb);CHKERRQ(ierr); sxr = PetscMax(sxb, lower.i - blower.i); syr = PetscMax(syb, lower.j - blower.j); szr = PetscMax(szb, lower.k - blower.k); exr = PetscMin(sxb+mxb, upper.i - blower.i); eyr = PetscMin(syb+myb, upper.j - blower.j); ezr = PetscMin(szb+mzb, upper.k - blower.k); ierr = PetscMalloc2(rM*rN*rP,&localPoints,rM*rN*rP,&remotePoints);CHKERRQ(ierr); } else { sxr = syr = szr = exr = eyr = ezr = sxb = syb = szb = mxb = myb = mzb = 0; } /* Create SF for restricted map */ ierr = VecGetOwnershipRanges(X,&ranges);CHKERRQ(ierr); loclower.i = blower.i + sxr; locupper.i = blower.i + exr; loclower.j = blower.j + syr; locupper.j = blower.j + eyr; loclower.k = blower.k + szr; locupper.k = blower.k + ezr; ierr = DMDACreatePatchIS(dm, &loclower, &locupper, &is);CHKERRQ(ierr); ierr = ISGetIndices(is, &indices);CHKERRQ(ierr); q = 0; for (k = szb; k < szb+mzb; ++k) { if ((k < szr) || (k >= ezr)) continue; for (j = syb; j < syb+myb; ++j) { if ((j < syr) || (j >= eyr)) continue; for (i = sxb; i < sxb+mxb; ++i) { const PetscInt lp = ((k-szb)*rN + (j-syb))*rM + i-sxb; PetscInt r; if ((i < sxr) || (i >= exr)) continue; localPoints[q] = lp; ierr = PetscFindInt(indices[q], size+1, ranges, &r);CHKERRQ(ierr); remotePoints[q].rank = r < 0 ? -(r+1) - 1 : r; remotePoints[q].index = indices[q] - ranges[remotePoints[q].rank]; ++q; } } } ierr = ISRestoreIndices(is, &indices);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)dm), sfzr);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *sfzr, "Restricted Map");CHKERRQ(ierr); ierr = PetscSFSetGraph(*sfzr, M*N*P, q, localPoints, PETSC_COPY_VALUES, remotePoints, PETSC_COPY_VALUES);CHKERRQ(ierr); /* Create SF for buffered map */ loclower.i = blower.i + sxb; locupper.i = blower.i + sxb+mxb; loclower.j = blower.j + syb; locupper.j = blower.j + syb+myb; loclower.k = blower.k + szb; locupper.k = blower.k + szb+mzb; ierr = DMDACreatePatchIS(dm, &loclower, &locupper, &is);CHKERRQ(ierr); ierr = ISGetIndices(is, &indices);CHKERRQ(ierr); q = 0; for (k = szb; k < szb+mzb; ++k) { for (j = syb; j < syb+myb; ++j) { for (i = sxb; i < sxb+mxb; ++i, ++q) { PetscInt r; localPoints[q] = q; ierr = PetscFindInt(indices[q], size+1, ranges, &r);CHKERRQ(ierr); remotePoints[q].rank = r < 0 ? -(r+1) - 1 : r; remotePoints[q].index = indices[q] - ranges[remotePoints[q].rank]; } } } ierr = ISRestoreIndices(is, &indices);CHKERRQ(ierr); ierr = ISDestroy(&is);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)dm), sfz);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) *sfz, "Buffered Map");CHKERRQ(ierr); ierr = PetscSFSetGraph(*sfz, M*N*P, q, localPoints, PETSC_COPY_VALUES, remotePoints, PETSC_COPY_VALUES);CHKERRQ(ierr); ierr = PetscFree2(localPoints, remotePoints);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscInt i,n,*ix,*iy,*tomap,start; Vec x,y; PetscMPIInt nproc,rank; IS isx,isy; const PetscInt *ranges; VecScatter vscat; PetscFunctionBegin; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&nproc);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); if (nproc != 2) SETERRQ(PETSC_COMM_SELF,1,"This test must run with exactly two MPI ranks\n"); /* ==================================================================== (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter ==================================================================== */ n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */ /* create two MPI vectors x, y of length n=64, N=128 */ ierr = VecCreateMPI(PETSC_COMM_WORLD,n,PETSC_DECIDE,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); /* Initialize x as {0~127} */ ierr = VecGetOwnershipRanges(x,&ranges);CHKERRQ(ierr); for (i=ranges[rank]; i<ranges[rank+1]; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter have both local scatter and remote scatter (i.e., MPI communication) */ ierr = PetscMalloc2(n,&ix,n,&iy);CHKERRQ(ierr); start = ranges[rank]; for (i=ranges[rank]; i<ranges[rank+1]; i++) ix[i-start] = i; ierr = ISCreateGeneral(PETSC_COMM_WORLD,n,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); if (!rank) { for (i=0; i<n; i++) iy[i] = i+32; } else for (i=0; i<n/2; i++) { iy[i] = i+96; iy[i+n/2] = i; } ierr = ISCreateGeneral(PETSC_COMM_WORLD,n,iy,PETSC_COPY_VALUES,&isy);CHKERRQ(ierr); /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */ ierr = VecScatterCreateWithData(x,isx,y,isy,&vscat);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"Before VecScatterRemap on PtoP, MPI vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths). We create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of the local x to send out. The remap does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x. isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127} */ ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on PtoP, MPI vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* destroy everything before we recreate them in different types */ ierr = PetscFree2(ix,iy);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&isx);CHKERRQ(ierr); ierr = ISDestroy(&isy);CHKERRQ(ierr); ierr = PetscFree(tomap);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); /* ========================================================================================== (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter ========================================================================================== */ n = 64; /* long enough to trigger memcpy optimizations in local scatter */ /* create two seq vectors x, y of length n */ ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); ierr = VecDuplicate(x,&y);CHKERRQ(ierr); /* Initialize x as {0~63} */ for (i=0; i<n; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as general and let PETSc detect the pattern and optimize it */ ierr = PetscMalloc2(n,&ix,n,&iy);CHKERRQ(ierr); for (i=0; i<n; i++) ix[i] = i; ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); ierr = ISDuplicate(isx,&isy);CHKERRQ(ierr); /* create a vecscatter that just copies x to y */ ierr = VecScatterCreateWithData(x,isx,y,isy,&vscat);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {0~63} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. Create tomap as {32~63,0~31}. Originaly, we read from indices {0~64} of seq x to write to y. The remap does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x. */ ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {32~63,0~31} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSG, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* destroy everything before we recreate them in different types */ ierr = PetscFree2(ix,iy);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&isx);CHKERRQ(ierr); ierr = ISDestroy(&isy);CHKERRQ(ierr); ierr = PetscFree(tomap);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); /* =================================================================================================== (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter =================================================================================================== */ n = 64; /* long enough to trigger memcpy optimizations in local scatter */ /* create two seq vectors x of length n, and y of length n/2 */ ierr = VecCreateSeq(PETSC_COMM_SELF,n,&x);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,n/2,&y);CHKERRQ(ierr); /* Initialize x as {0~63} */ for (i=0; i<n; i++) { ierr = VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);CHKERRQ(ierr); } ierr = VecAssemblyBegin(x);CHKERRQ(ierr); ierr = VecAssemblyEnd(x);CHKERRQ(ierr); /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2, but we use it as general and let PETSc detect the pattern and optimize it. */ ierr = PetscMalloc2(n/2,&ix,n/2,&iy);CHKERRQ(ierr); for (i=0; i<n/2; i++) ix[i] = i*2; ierr = ISCreateGeneral(PETSC_COMM_SELF,n/2,ix,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr); /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */ ierr = ISCreateStride(PETSC_COMM_SELF,32,0,1,&isy);CHKERRQ(ierr); /* create a vecscatter that just copies even entries of x to y */ ierr = VecScatterCreateWithData(x,isx,y,isy,&vscat);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {0:63:2} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. Create tomap as {32~63,0~31}. Originaly, we read from indices{0:63:2} of seq x to write to y. The remap does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x. */ ierr = PetscMalloc1(n,&tomap);CHKERRQ(ierr); for (i=0; i<n/2; i++) { tomap[i] = i+n/2; tomap[i+n/2] = i; }; ierr = VecScatterRemap(vscat,tomap,NULL);CHKERRQ(ierr); ierr = VecScatterBegin(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); /* view y to check the result. y should be {32:63:2,0:31:2} */ ierr = PetscPrintf(PETSC_COMM_WORLD,"After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");CHKERRQ(ierr); ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* destroy everything before PetscFinalize */ ierr = PetscFree2(ix,iy);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = ISDestroy(&isx);CHKERRQ(ierr); ierr = ISDestroy(&isy);CHKERRQ(ierr); ierr = PetscFree(tomap);CHKERRQ(ierr); ierr = VecScatterDestroy(&vscat);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
static PetscErrorCode GreedyColoringLocalDistanceOne_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors) { PetscInt i,j,k,s,e,n,no,nd,nd_global,n_global,idx,ncols,maxcolors,masksize,ccol,*mask; PetscErrorCode ierr; Mat m=mc->mat; Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data; Mat md=NULL,mo=NULL; const PetscInt *md_i,*mo_i,*md_j,*mo_j; PetscBool isMPIAIJ,isSEQAIJ; ISColoringValue pcol; const PetscInt *cidx; PetscInt *lcolors,*ocolors; PetscReal *owts=NULL; PetscSF sf; PetscLayout layout; PetscFunctionBegin; ierr = MatGetSize(m,&n_global,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(m,&s,&e);CHKERRQ(ierr); n=e-s; masksize=20; nd_global = 0; /* get the matrix communication structures */ ierr = PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ); CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ); CHKERRQ(ierr); if (isMPIAIJ) { /* get the CSR data for on and off diagonal portions of m */ Mat_SeqAIJ *dseq; Mat_SeqAIJ *oseq; md=aij->A; dseq = (Mat_SeqAIJ*)md->data; mo=aij->B; oseq = (Mat_SeqAIJ*)mo->data; md_i = dseq->i; md_j = dseq->j; mo_i = oseq->i; mo_j = oseq->j; } else if (isSEQAIJ) { /* get the CSR data for m */ Mat_SeqAIJ *dseq; /* no off-processor nodes */ md=m; dseq = (Mat_SeqAIJ*)md->data; mo=NULL; no=0; md_i = dseq->i; md_j = dseq->j; mo_i = NULL; mo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring"); ierr = MatColoringGetMaxColors(mc,&maxcolors);CHKERRQ(ierr); if (mo) { ierr = VecGetSize(aij->lvec,&no);CHKERRQ(ierr); ierr = PetscMalloc2(no,&ocolors,no,&owts);CHKERRQ(ierr); for(i=0;i<no;i++) { ocolors[i]=maxcolors; } } ierr = PetscMalloc1(masksize,&mask);CHKERRQ(ierr); ierr = PetscMalloc1(n,&lcolors);CHKERRQ(ierr); for(i=0;i<n;i++) { lcolors[i]=maxcolors; } for (i=0;i<masksize;i++) { mask[i]=-1; } if (mo) { /* transfer neighbor weights */ ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);CHKERRQ(ierr); ierr = MatGetLayouts(m,&layout,NULL);CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); } while (nd_global < n_global) { nd=n; /* assign lowest possible color to each local vertex */ ierr = PetscLogEventBegin(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); for (i=0;i<n;i++) { idx=lperm[i]; if (lcolors[idx] == maxcolors) { ncols = md_i[idx+1]-md_i[idx]; cidx = &(md_j[md_i[idx]]); for (j=0;j<ncols;j++) { if (lcolors[cidx[j]] != maxcolors) { ccol=lcolors[cidx[j]]; if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } if (mo) { ncols = mo_i[idx+1]-mo_i[idx]; cidx = &(mo_j[mo_i[idx]]); for (j=0;j<ncols;j++) { if (ocolors[cidx[j]] != maxcolors) { ccol=ocolors[cidx[j]]; if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } for (j=0;j<masksize;j++) { if (mask[j]!=idx) { break; } } pcol=j; if (pcol>maxcolors)pcol=maxcolors; lcolors[idx]=pcol; } } ierr = PetscLogEventEnd(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); if (mo) { /* transfer neighbor colors */ ierr = PetscLogEventBegin(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,lcolors,ocolors);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,lcolors,ocolors);CHKERRQ(ierr); /* check for conflicts -- this is merely checking if any adjacent off-processor rows have the same color and marking the ones that are lower weight locally for changing */ for (i=0;i<n;i++) { ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); for (j=0;j<ncols;j++) { /* in the case of conflicts, the highest weight one stays and the others go */ if ((ocolors[cidx[j]] == lcolors[i]) && (owts[cidx[j]] > wts[i]) && lcolors[i] < maxcolors) { lcolors[i]=maxcolors; nd--; } } } nd_global=0; } ierr = MPIU_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); } for (i=0;i<n;i++) { colors[i] = (ISColoringValue)lcolors[i]; } ierr = PetscFree(mask);CHKERRQ(ierr); ierr = PetscFree(lcolors);CHKERRQ(ierr); if (mo) { ierr = PetscFree2(ocolors,owts);CHKERRQ(ierr); ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { PetscErrorCode ierr; Mat Cmpi; Mat_PtAPMPI *ptap; PetscFreeSpaceList free_space=NULL,current_space=NULL; Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c; Mat_SeqAIJ *ad =(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; Mat_SeqAIJ *p_loc,*p_oth; PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pdti,*pdtj,*poti,*potj,*ptJ; PetscInt *adi=ad->i,*aj,*aoi=ao->i,nnz; PetscInt *lnk,*owners_co,*coi,*coj,i,k,pnz,row; PetscInt am=A->rmap->n,pN=P->cmap->N,pm=P->rmap->n,pn=P->cmap->n; PetscBT lnkbt; MPI_Comm comm; PetscMPIInt size,rank,tagi,tagj,*len_si,*len_s,*len_ri,icompleted=0; PetscInt **buf_rj,**buf_ri,**buf_ri_k; PetscInt len,proc,*dnz,*onz,*owners; PetscInt nzi,*pti,*ptj; PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; MPI_Request *swaits,*rwaits; MPI_Status *sstatus,rstatus; Mat_Merge_SeqsToMPI *merge; PetscInt *api,*apj,*Jptr,apnz,*prmap=p->garray,pon,nspacedouble=0,j,ap_rmax=0; PetscReal afill=1.0,afill_tmp; PetscInt rmax; #if defined(PTAP_PROFILE) PetscLogDouble t0,t1,t2,t3,t4; #endif PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t0);CHKERRQ(ierr); #endif /* check if matrix local sizes are compatible */ if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) { SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); } if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) { SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); } ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* create struct Mat_PtAPMPI and attached it to C later */ ierr = PetscNew(&ptap);CHKERRQ(ierr); ierr = PetscNew(&merge);CHKERRQ(ierr); ptap->merge = merge; ptap->reuse = MAT_INITIAL_MATRIX; /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr); /* get P_loc by taking all local rows of P */ ierr = MatMPIAIJGetLocalMat(P,MAT_INITIAL_MATRIX,&ptap->P_loc);CHKERRQ(ierr); p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; pi_loc = p_loc->i; pj_loc = p_loc->j; pi_oth = p_oth->i; pj_oth = p_oth->j; #if defined(PTAP_PROFILE) ierr = PetscTime(&t1);CHKERRQ(ierr); #endif /* first, compute symbolic AP = A_loc*P = A_diag*P_loc + A_off*P_oth */ /*-------------------------------------------------------------------*/ ierr = PetscMalloc1((am+1),&api);CHKERRQ(ierr); api[0] = 0; /* create and initialize a linked list */ ierr = PetscLLCondensedCreate(pN,pN,&lnk,&lnkbt);CHKERRQ(ierr); /* Initial FreeSpace size is fill*(nnz(A) + nnz(P)) -OOM for ex56, np=8k on Intrepid! */ ierr = PetscFreeSpaceGet((PetscInt)(fill*(adi[am]+aoi[am]+pi_loc[pm])),&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<am; i++) { /* diagonal portion of A */ nzi = adi[i+1] - adi[i]; aj = ad->j + adi[i]; for (j=0; j<nzi; j++) { row = aj[j]; pnz = pi_loc[row+1] - pi_loc[row]; Jptr = pj_loc + pi_loc[row]; /* add non-zero cols of P into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } /* off-diagonal portion of A */ nzi = aoi[i+1] - aoi[i]; aj = ao->j + aoi[i]; for (j=0; j<nzi; j++) { row = aj[j]; pnz = pi_oth[row+1] - pi_oth[row]; Jptr = pj_oth + pi_oth[row]; ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } apnz = lnk[0]; api[i+1] = api[i] + apnz; if (ap_rmax < apnz) ap_rmax = apnz; /* if free space is not available, double the total space in the list */ if (current_space->local_remaining<apnz) { ierr = PetscFreeSpaceGet(apnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, then initialize lnk */ ierr = PetscLLCondensedClean(pN,apnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); current_space->array += apnz; current_space->local_used += apnz; current_space->local_remaining -= apnz; } /* Allocate space for apj, initialize apj, and */ /* destroy list of free space and other temporary array(s) */ ierr = PetscMalloc1((api[am]+1),&apj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,apj);CHKERRQ(ierr); afill_tmp = (PetscReal)api[am]/(adi[am]+aoi[am]+pi_loc[pm]+1); if (afill_tmp > afill) afill = afill_tmp; #if defined(PTAP_PROFILE) ierr = PetscTime(&t2);CHKERRQ(ierr); #endif /* determine symbolic Co=(p->B)^T*AP - send to others */ /*----------------------------------------------------*/ ierr = MatGetSymbolicTranspose_SeqAIJ(p->B,&poti,&potj);CHKERRQ(ierr); /* then, compute symbolic Co = (p->B)^T*AP */ pon = (p->B)->cmap->n; /* total num of rows to be sent to other processors >= (num of nonzero rows of C_seq) - pn */ ierr = PetscMalloc1((pon+1),&coi);CHKERRQ(ierr); coi[0] = 0; /* set initial free space to be fill*(nnz(p->B) + nnz(AP)) */ nnz = fill*(poti[pon] + api[am]); ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<pon; i++) { pnz = poti[i+1] - poti[i]; ptJ = potj + poti[i]; for (j=0; j<pnz; j++) { row = ptJ[j]; /* row of AP == col of Pot */ apnz = api[row+1] - api[row]; Jptr = apj + api[row]; /* add non-zero cols of AP into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(apnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } nnz = lnk[0]; /* If free space is not available, double the total space in the list */ if (current_space->local_remaining<nnz) { ierr = PetscFreeSpaceGet(nnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* Copy data into free space, and zero out denserows */ ierr = PetscLLCondensedClean(pN,nnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); current_space->array += nnz; current_space->local_used += nnz; current_space->local_remaining -= nnz; coi[i+1] = coi[i] + nnz; } ierr = PetscMalloc1((coi[pon]+1),&coj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,coj);CHKERRQ(ierr); afill_tmp = (PetscReal)coi[pon]/(poti[pon] + api[am]+1); if (afill_tmp > afill) afill = afill_tmp; ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->B,&poti,&potj);CHKERRQ(ierr); /* send j-array (coj) of Co to other processors */ /*----------------------------------------------*/ /* determine row ownership */ ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); merge->rowmap->n = pn; merge->rowmap->bs = 1; ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); owners = merge->rowmap->range; /* determine the number of messages to send, their lengths */ ierr = PetscMalloc2(size,&len_si,size,&sstatus);CHKERRQ(ierr); ierr = PetscMemzero(len_si,size*sizeof(PetscMPIInt));CHKERRQ(ierr); ierr = PetscCalloc1(size,&merge->len_s);CHKERRQ(ierr); len_s = merge->len_s; merge->nsend = 0; ierr = PetscMalloc1((size+2),&owners_co);CHKERRQ(ierr); proc = 0; for (i=0; i<pon; i++) { while (prmap[i] >= owners[proc+1]) proc++; len_si[proc]++; /* num of rows in Co to be sent to [proc] */ len_s[proc] += coi[i+1] - coi[i]; } len = 0; /* max length of buf_si[] */ owners_co[0] = 0; for (proc=0; proc<size; proc++) { owners_co[proc+1] = owners_co[proc] + len_si[proc]; if (len_si[proc]) { merge->nsend++; len_si[proc] = 2*(len_si[proc] + 1); len += len_si[proc]; } } /* determine the number and length of messages to receive for coi and coj */ ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); /* post the Irecv and Isend of coj */ ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rwaits);CHKERRQ(ierr); ierr = PetscMalloc1((merge->nsend+1),&swaits);CHKERRQ(ierr); for (proc=0, k=0; proc<size; proc++) { if (!len_s[proc]) continue; i = owners_co[proc]; ierr = MPI_Isend(coj+coi[i],len_s[proc],MPIU_INT,proc,tagj,comm,swaits+k);CHKERRQ(ierr); k++; } /* receives and sends of coj are complete */ for (i=0; i<merge->nrecv; i++) { ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus);CHKERRQ(ierr); } ierr = PetscFree(rwaits);CHKERRQ(ierr); if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus);CHKERRQ(ierr);} /* send and recv coi */ /*-------------------*/ ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&rwaits);CHKERRQ(ierr); ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ for (proc=0,k=0; proc<size; proc++) { if (!len_s[proc]) continue; /* form outgoing message for i-structure: buf_si[0]: nrows to be sent [1:nrows]: row index (global) [nrows+1:2*nrows+1]: i-structure index */ /*-------------------------------------------*/ nrows = len_si[proc]/2 - 1; buf_si_i = buf_si + nrows+1; buf_si[0] = nrows; buf_si_i[0] = 0; nrows = 0; for (i=owners_co[proc]; i<owners_co[proc+1]; i++) { nzi = coi[i+1] - coi[i]; buf_si_i[nrows+1] = buf_si_i[nrows] + nzi; /* i-structure */ buf_si[nrows+1] = prmap[i] -owners[proc]; /* local row index */ nrows++; } ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,swaits+k);CHKERRQ(ierr); k++; buf_si += len_si[proc]; } i = merge->nrecv; while (i--) { ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus);CHKERRQ(ierr); } ierr = PetscFree(rwaits);CHKERRQ(ierr); if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus);CHKERRQ(ierr);} ierr = PetscFree2(len_si,sstatus);CHKERRQ(ierr); ierr = PetscFree(len_ri);CHKERRQ(ierr); ierr = PetscFree(swaits);CHKERRQ(ierr); ierr = PetscFree(buf_s);CHKERRQ(ierr); #if defined(PTAP_PROFILE) ierr = PetscTime(&t3);CHKERRQ(ierr); #endif /* compute the local portion of C (mpi mat) */ /*------------------------------------------*/ ierr = MatGetSymbolicTranspose_SeqAIJ(p->A,&pdti,&pdtj);CHKERRQ(ierr); /* allocate pti array and free space for accumulating nonzero column info */ ierr = PetscMalloc1((pn+1),&pti);CHKERRQ(ierr); pti[0] = 0; /* set initial free space to be fill*(nnz(P) + nnz(AP)) */ nnz = fill*(pi_loc[pm] + api[am]); ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr); current_space = free_space; ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci);CHKERRQ(ierr); for (k=0; k<merge->nrecv; k++) { buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ nrows = *buf_ri_k[k]; nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ } ierr = MatPreallocateInitialize(comm,pn,pn,dnz,onz);CHKERRQ(ierr); rmax = 0; for (i=0; i<pn; i++) { /* add pdt[i,:]*AP into lnk */ pnz = pdti[i+1] - pdti[i]; ptJ = pdtj + pdti[i]; for (j=0; j<pnz; j++) { row = ptJ[j]; /* row of AP == col of Pt */ apnz = api[row+1] - api[row]; Jptr = apj + api[row]; /* add non-zero cols of AP into the sorted linked list lnk */ ierr = PetscLLCondensedAddSorted(apnz,Jptr,lnk,lnkbt);CHKERRQ(ierr); } /* add received col data into lnk */ for (k=0; k<merge->nrecv; k++) { /* k-th received message */ if (i == *nextrow[k]) { /* i-th row */ nzi = *(nextci[k]+1) - *nextci[k]; Jptr = buf_rj[k] + *nextci[k]; ierr = PetscLLCondensedAddSorted(nzi,Jptr,lnk,lnkbt);CHKERRQ(ierr); nextrow[k]++; nextci[k]++; } } nnz = lnk[0]; /* if free space is not available, make more free space */ if (current_space->local_remaining<nnz) { ierr = PetscFreeSpaceGet(nnz+current_space->total_array_size,¤t_space);CHKERRQ(ierr); nspacedouble++; } /* copy data into free space, then initialize lnk */ ierr = PetscLLCondensedClean(pN,nnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr); ierr = MatPreallocateSet(i+owners[rank],nnz,current_space->array,dnz,onz);CHKERRQ(ierr); current_space->array += nnz; current_space->local_used += nnz; current_space->local_remaining -= nnz; pti[i+1] = pti[i] + nnz; if (nnz > rmax) rmax = nnz; } ierr = MatRestoreSymbolicTranspose_SeqAIJ(p->A,&pdti,&pdtj);CHKERRQ(ierr); ierr = PetscFree3(buf_ri_k,nextrow,nextci);CHKERRQ(ierr); ierr = PetscMalloc1((pti[pn]+1),&ptj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous(&free_space,ptj);CHKERRQ(ierr); afill_tmp = (PetscReal)pti[pn]/(pi_loc[pm] + api[am]+1); if (afill_tmp > afill) afill = afill_tmp; ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); /* create symbolic parallel matrix Cmpi */ /*--------------------------------------*/ ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr); ierr = MatSetSizes(Cmpi,pn,pn,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizes(Cmpi,P->cmap->bs,P->cmap->bs);CHKERRQ(ierr); ierr = MatSetType(Cmpi,MATMPIAIJ);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr); ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); merge->bi = pti; /* Cseq->i */ merge->bj = ptj; /* Cseq->j */ merge->coi = coi; /* Co->i */ merge->coj = coj; /* Co->j */ merge->buf_ri = buf_ri; merge->buf_rj = buf_rj; merge->owners_co = owners_co; merge->destroy = Cmpi->ops->destroy; merge->duplicate = Cmpi->ops->duplicate; /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */ Cmpi->assembled = PETSC_FALSE; Cmpi->ops->destroy = MatDestroy_MPIAIJ_PtAP; Cmpi->ops->duplicate = MatDuplicate_MPIAIJ_MatPtAP; /* attach the supporting struct to Cmpi for reuse */ c = (Mat_MPIAIJ*)Cmpi->data; c->ptap = ptap; ptap->api = api; ptap->apj = apj; ptap->rmax = ap_rmax; *C = Cmpi; /* flag 'scalable' determines which implementations to be used: 0: do dense axpy in MatPtAPNumeric() - fast, but requires storage of a nonscalable dense array apa; 1: do sparse axpy in MatPtAPNumeric() - might slow, uses a sparse array apa */ /* set default scalable */ ptap->scalable = PETSC_TRUE; ierr = PetscOptionsGetBool(((PetscObject)Cmpi)->prefix,"-matptap_scalable",&ptap->scalable,NULL);CHKERRQ(ierr); if (!ptap->scalable) { /* Do dense axpy */ ierr = PetscCalloc1(pN,&ptap->apa);CHKERRQ(ierr); } else { ierr = PetscCalloc1(ap_rmax+1,&ptap->apa);CHKERRQ(ierr); } #if defined(PTAP_PROFILE) ierr = PetscTime(&t4);CHKERRQ(ierr); if (rank==1) PetscPrintf(MPI_COMM_SELF," [%d] PtAPSymbolic %g/P + %g/AP + %g/comm + %g/PtAP = %g\n",rank,t1-t0,t2-t1,t3-t2,t4-t3,t4-t0);CHKERRQ(ierr); #endif #if defined(PETSC_USE_INFO) if (pti[pn] != 0) { ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); ierr = PetscInfo1(Cmpi,"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);CHKERRQ(ierr); } else { ierr = PetscInfo(Cmpi,"Empty matrix product\n");CHKERRQ(ierr); } #endif PetscFunctionReturn(0); }
static PetscErrorCode GreedyColoringLocalDistanceTwo_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors) { MC_Greedy *gr = (MC_Greedy *) mc->data; PetscInt i,j,k,l,s,e,n,nd,nd_global,n_global,idx,ncols,maxcolors,mcol,mcol_global,nd1cols,*mask,masksize,*d1cols,*bad,*badnext,nbad,badsize,ccol,no,cbad; Mat m = mc->mat, mt; Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data; Mat md=NULL,mo=NULL; const PetscInt *md_i,*mo_i,*md_j,*mo_j; const PetscInt *rmd_i,*rmo_i,*rmd_j,*rmo_j; PetscBool isMPIAIJ,isSEQAIJ; PetscInt pcol,*dcolors,*ocolors; ISColoringValue *badidx; const PetscInt *cidx; PetscReal *owts,*colorweights; PetscInt *oconf,*conf; PetscSF sf; PetscLayout layout; PetscErrorCode ierr; PetscFunctionBegin; ierr = MatGetSize(m,&n_global,NULL);CHKERRQ(ierr); ierr = MatGetOwnershipRange(m,&s,&e);CHKERRQ(ierr); n=e-s; nd_global = 0; /* get the matrix communication structures */ ierr = PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ); CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ); CHKERRQ(ierr); if (isMPIAIJ) { Mat_SeqAIJ *dseq; Mat_SeqAIJ *oseq; md=aij->A; dseq = (Mat_SeqAIJ*)md->data; mo=aij->B; oseq = (Mat_SeqAIJ*)mo->data; md_i = dseq->i; md_j = dseq->j; mo_i = oseq->i; mo_j = oseq->j; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = oseq->i; rmo_j = oseq->j; } else if (isSEQAIJ) { Mat_SeqAIJ *dseq; /* no off-processor nodes */ md=m; dseq = (Mat_SeqAIJ*)md->data; md_i = dseq->i; md_j = dseq->j; mo_i = NULL; mo_j = NULL; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring"); if (!gr->symmetric) { ierr = MatTranspose(m, MAT_INITIAL_MATRIX, &mt);CHKERRQ(ierr); if (isSEQAIJ) { Mat_SeqAIJ *dseq = (Mat_SeqAIJ*) mt->data; rmd_i = dseq->i; rmd_j = dseq->j; rmo_i = NULL; rmo_j = NULL; } else SETERRQ(PetscObjectComm((PetscObject) mc), PETSC_ERR_SUP, "Nonsymmetric greedy coloring only works in serial"); } /* create the vectors and communication structures if necessary */ no=0; if (mo) { ierr = VecGetLocalSize(aij->lvec,&no);CHKERRQ(ierr); ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);CHKERRQ(ierr); ierr = MatGetLayouts(m,&layout,NULL);CHKERRQ(ierr); ierr = PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);CHKERRQ(ierr); } ierr = MatColoringGetMaxColors(mc,&maxcolors);CHKERRQ(ierr); masksize=n; nbad=0; badsize=n; ierr = PetscMalloc1(masksize,&mask);CHKERRQ(ierr); ierr = PetscMalloc4(n,&d1cols,n,&dcolors,n,&conf,n,&bad);CHKERRQ(ierr); ierr = PetscMalloc2(badsize,&badidx,badsize,&badnext);CHKERRQ(ierr); for(i=0;i<masksize;i++) { mask[i]=-1; } for (i=0;i<n;i++) { dcolors[i]=maxcolors; bad[i]=-1; } for (i=0;i<badsize;i++) { badnext[i]=-1; } if (mo) { ierr = PetscMalloc3(no,&owts,no,&oconf,no,&ocolors);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr); for (i=0;i<no;i++) { ocolors[i]=maxcolors; } } else { /* Appease overzealous -Wmaybe-initialized */ owts = NULL; oconf = NULL; ocolors = NULL; } mcol=0; while (nd_global < n_global) { nd=n; /* assign lowest possible color to each local vertex */ mcol_global=0; ierr = PetscLogEventBegin(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); for (i=0;i<n;i++) { idx=lperm[i]; if (dcolors[idx] == maxcolors) { /* entries in bad */ cbad=bad[idx]; while (cbad>=0) { ccol=badidx[cbad]; if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; cbad=badnext[cbad]; } /* diagonal distance-one rows */ nd1cols=0; ncols = rmd_i[idx+1]-rmd_i[idx]; cidx = &(rmd_j[rmd_i[idx]]); for (j=0;j<ncols;j++) { d1cols[nd1cols] = cidx[j]; nd1cols++; ccol=dcolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } /* off-diagonal distance-one rows */ if (mo) { ncols = rmo_i[idx+1]-rmo_i[idx]; cidx = &(rmo_j[rmo_i[idx]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* diagonal distance-two rows */ for (j=0;j<nd1cols;j++) { ncols = md_i[d1cols[j]+1]-md_i[d1cols[j]]; cidx = &(md_j[md_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=dcolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } /* off-diagonal distance-two rows */ if (mo) { for (j=0;j<nd1cols;j++) { ncols = mo_i[d1cols[j]+1]-mo_i[d1cols[j]]; cidx = &(mo_j[mo_i[d1cols[j]]]); for (l=0;l<ncols;l++) { ccol=ocolors[cidx[l]]; if (ccol != maxcolors) { if (ccol>=masksize) { PetscInt *newmask; ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr); for(k=0;k<2*masksize;k++) { newmask[k]=-1; } for(k=0;k<masksize;k++) { newmask[k]=mask[k]; } ierr = PetscFree(mask);CHKERRQ(ierr); mask=newmask; masksize*=2; } mask[ccol]=idx; } } } } /* assign this one the lowest color possible by seeing if there's a gap in the sequence of sorted neighbor colors */ for (j=0;j<masksize;j++) { if (mask[j]!=idx) { break; } } pcol=j; if (pcol>maxcolors) pcol=maxcolors; dcolors[idx]=pcol; if (pcol>mcol) mcol=pcol; } } ierr = PetscLogEventEnd(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr); if (mo) { /* transfer neighbor colors */ ierr = PetscSFBcastBegin(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr); /* find the maximum color assigned locally and allocate a mask */ ierr = MPIU_Allreduce(&mcol,&mcol_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); ierr = PetscMalloc1(mcol_global+1,&colorweights);CHKERRQ(ierr); /* check for conflicts */ for (i=0;i<n;i++) { conf[i]=PETSC_FALSE; } for (i=0;i<no;i++) { oconf[i]=PETSC_FALSE; } for (i=0;i<n;i++) { ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); if (ncols > 0) { /* fill in the mask */ for (j=0;j<mcol_global+1;j++) { colorweights[j]=0; } colorweights[dcolors[i]]=wts[i]; /* fill in the off-diagonal part of the mask */ for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < owts[cidx[j]]) { colorweights[ccol] = owts[cidx[j]]; } } } /* fill in the on-diagonal part of the mask */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] < wts[cidx[j]]) { colorweights[ccol] = wts[cidx[j]]; } } } /* go back through and set up on and off-diagonal conflict vectors */ ncols = md_i[i+1]-md_i[i]; cidx = &(md_j[md_i[i]]); for (j=0;j<ncols;j++) { ccol=dcolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > wts[cidx[j]]) { conf[cidx[j]]=PETSC_TRUE; } } } ncols = mo_i[i+1]-mo_i[i]; cidx = &(mo_j[mo_i[i]]); for (j=0;j<ncols;j++) { ccol=ocolors[cidx[j]]; if (ccol < maxcolors) { if (colorweights[ccol] > owts[cidx[j]]) { oconf[cidx[j]]=PETSC_TRUE; } } } } } nd_global=0; ierr = PetscFree(colorweights);CHKERRQ(ierr); ierr = PetscLogEventBegin(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); ierr = PetscSFReduceBegin(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr); ierr = PetscLogEventEnd(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr); /* go through and unset local colors that have conflicts */ for (i=0;i<n;i++) { if (conf[i]>0) { /* push this color onto the bad stack */ badidx[nbad]=dcolors[i]; badnext[nbad]=bad[i]; bad[i]=nbad; nbad++; if (nbad>=badsize) { PetscInt *newbadnext; ISColoringValue *newbadidx; ierr = PetscMalloc2(badsize*2,&newbadidx,badsize*2,&newbadnext);CHKERRQ(ierr); for(k=0;k<2*badsize;k++) { newbadnext[k]=-1; } for(k=0;k<badsize;k++) { newbadidx[k]=badidx[k]; newbadnext[k]=badnext[k]; } ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); badidx=newbadidx; badnext=newbadnext; badsize*=2; } dcolors[i] = maxcolors; nd--; } } } ierr = MPIU_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr); } if (mo) { ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); ierr = PetscFree3(owts,oconf,ocolors);CHKERRQ(ierr); } for (i=0;i<n;i++) { colors[i]=dcolors[i]; } ierr = PetscFree(mask);CHKERRQ(ierr); ierr = PetscFree4(d1cols,dcolors,conf,bad);CHKERRQ(ierr); ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr); if (!gr->symmetric) {ierr = MatDestroy(&mt);CHKERRQ(ierr);} PetscFunctionReturn(0); }
PETSC_EXTERN PetscErrorCode MatColoringApply_LF(MatColoring mc,ISColoring *iscoloring) { PetscErrorCode ierr; PetscInt *list,*work,*seq,*coloring,n; const PetscInt *ria,*rja,*cia,*cja; PetscInt n1, none,ncolors,i; PetscBool done; Mat mat = mc->mat; Mat mat_seq = mat; PetscMPIInt size; MPI_Comm comm; ISColoring iscoloring_seq; PetscInt bs = 1,rstart,rend,N_loc,nc; ISColoringValue *colors_loc; PetscBool flg1,flg2; PetscFunctionBegin; if (mc->dist != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LF may only do distance 2 coloring"); /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); if (flg1 || flg2) { ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); } ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size > 1) { /* create a sequential iscoloring on all processors */ ierr = MatGetSeqNonzeroStructure(mat,&mat_seq);CHKERRQ(ierr); } ierr = MatGetRowIJ(mat_seq,1,PETSC_FALSE,PETSC_TRUE,&n,&ria,&rja,&done);CHKERRQ(ierr); ierr = MatGetColumnIJ(mat_seq,1,PETSC_FALSE,PETSC_TRUE,&n,&cia,&cja,&done);CHKERRQ(ierr); if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Ordering requires IJ"); ierr = MatFDColoringDegreeSequence_Minpack(n,cja,cia,rja,ria,&seq);CHKERRQ(ierr); ierr = PetscMalloc2(n,&list,4*n,&work);CHKERRQ(ierr); n1 = n - 1; none = -1; MINPACKnumsrt(&n,&n1,seq,&none,list,work+2*n,work+n); ierr = PetscMalloc1(n,&coloring);CHKERRQ(ierr); MINPACKseq(&n,cja,cia,rja,ria,list,coloring,&ncolors,work); ierr = PetscFree2(list,work);CHKERRQ(ierr); ierr = PetscFree(seq);CHKERRQ(ierr); ierr = MatRestoreRowIJ(mat_seq,1,PETSC_FALSE,PETSC_TRUE,NULL,&ria,&rja,&done);CHKERRQ(ierr); ierr = MatRestoreColumnIJ(mat_seq,1,PETSC_FALSE,PETSC_TRUE,NULL,&cia,&cja,&done);CHKERRQ(ierr); /* shift coloring numbers to start at zero and shorten */ if (ncolors > IS_COLORING_MAX-1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Maximum color size exceeded"); { ISColoringValue *s = (ISColoringValue*) coloring; for (i=0; i<n; i++) s[i] = (ISColoringValue) (coloring[i]-1); ierr = MatColoringPatch(mat_seq,ncolors,n,s,iscoloring);CHKERRQ(ierr); } if (size > 1) { ierr = MatDestroySeqNonzeroStructure(&mat_seq);CHKERRQ(ierr); /* convert iscoloring_seq to a parallel iscoloring */ iscoloring_seq = *iscoloring; rstart = mat->rmap->rstart/bs; rend = mat->rmap->rend/bs; N_loc = rend - rstart; /* number of local nodes */ /* get local colors for each local node */ ierr = PetscMalloc1((N_loc+1),&colors_loc);CHKERRQ(ierr); for (i=rstart; i<rend; i++) colors_loc[i-rstart] = iscoloring_seq->colors[i]; /* create a parallel iscoloring */ nc = iscoloring_seq->n; ierr = ISColoringCreate(comm,nc,N_loc,colors_loc,iscoloring);CHKERRQ(ierr); ierr = ISColoringDestroy(&iscoloring_seq);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/*@ KSPComputeEigenvaluesExplicitly - Computes all of the eigenvalues of the preconditioned operator using LAPACK. Collective on KSP Input Parameter: + ksp - iterative context obtained from KSPCreate() - n - size of arrays r and c Output Parameters: + r - real part of computed eigenvalues, provided by user with a dimension at least of n - c - complex part of computed eigenvalues, provided by user with a dimension at least of n Notes: This approach is very slow but will generally provide accurate eigenvalue estimates. This routine explicitly forms a dense matrix representing the preconditioned operator, and thus will run only for relatively small problems, say n < 500. Many users may just want to use the monitoring routine KSPMonitorSingularValue() (which can be set with option -ksp_monitor_singular_value) to print the singular values at each iteration of the linear solve. The preconditoner operator, rhs vector, solution vectors should be set before this routine is called. i.e use KSPSetOperators(),KSPSolve() or KSPSetOperators() Level: advanced .keywords: KSP, compute, eigenvalues, explicitly .seealso: KSPComputeEigenvalues(), KSPMonitorSingularValue(), KSPComputeExtremeSingularValues(), KSPSetOperators(), KSPSolve() @*/ PetscErrorCode KSPComputeEigenvaluesExplicitly(KSP ksp,PetscInt nmax,PetscReal r[],PetscReal c[]) { Mat BA; PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm; PetscScalar *array; Mat A; PetscInt m,row,nz,i,n,dummy; const PetscInt *cols; const PetscScalar *vals; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); ierr = KSPComputeExplicitOperator(ksp,&BA);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MatGetSize(BA,&n,&n);CHKERRQ(ierr); if (size > 1) { /* assemble matrix on first processor */ ierr = MatCreate(PetscObjectComm((PetscObject)ksp),&A);CHKERRQ(ierr); if (!rank) { ierr = MatSetSizes(A,n,n,n,n);CHKERRQ(ierr); } else { ierr = MatSetSizes(A,0,0,n,n);CHKERRQ(ierr); } ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)BA,(PetscObject)A);CHKERRQ(ierr); ierr = MatGetOwnershipRange(BA,&row,&dummy);CHKERRQ(ierr); ierr = MatGetLocalSize(BA,&m,&dummy);CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = MatGetRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); row++; } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); } else { ierr = MatDenseGetArray(BA,&array);CHKERRQ(ierr); } #if defined(PETSC_HAVE_ESSL) /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ if (!rank) { PetscScalar sdummy,*cwork; PetscReal *work,*realpart; PetscBLASInt clen,idummy,lwork,bn,zero = 0; PetscInt *perm; #if !defined(PETSC_USE_COMPLEX) clen = n; #else clen = 2*n; #endif ierr = PetscMalloc1(clen,&cwork);CHKERRQ(ierr); idummy = -1; /* unused */ ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); lwork = 5*n; ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); ierr = PetscMalloc1(n,&realpart);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,array,&bn,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork)); ierr = PetscFPTrapPop();CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); /* For now we stick with the convention of storing the real and imaginary components of evalues separately. But is this what we really want? */ ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) for (i=0; i<n; i++) { realpart[i] = cwork[2*i]; perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = cwork[2*perm[i]]; c[i] = cwork[2*perm[i]+1]; } #else for (i=0; i<n; i++) { realpart[i] = PetscRealPart(cwork[i]); perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(cwork[perm[i]]); c[i] = PetscImaginaryPart(cwork[perm[i]]); } #endif ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(realpart);CHKERRQ(ierr); ierr = PetscFree(cwork);CHKERRQ(ierr); } #elif !defined(PETSC_USE_COMPLEX) if (!rank) { PetscScalar *work; PetscReal *realpart,*imagpart; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc2(n,&realpart,n,&imagpart);CHKERRQ(ierr); ierr = PetscMalloc1(5*n,&work);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt bn; ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,array,&bn,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); for (i=0; i<n; i++) perm[i] = i; ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree2(realpart,imagpart);CHKERRQ(ierr); } #else if (!rank) { PetscScalar *work,*eigs; PetscReal *rwork; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc1(5*n,&work);CHKERRQ(ierr); ierr = PetscMalloc1(2*n,&rwork);CHKERRQ(ierr); ierr = PetscMalloc1(n,&eigs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt nb; ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&nb,array,&nb,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,rwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr); for (i=0; i<n; i++) perm[i] = i; for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]); ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[perm[i]]); c[i] = PetscImaginaryPart(eigs[perm[i]]); } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(eigs);CHKERRQ(ierr); } #endif if (size > 1) { ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); } else { ierr = MatDenseRestoreArray(BA,&array);CHKERRQ(ierr); } ierr = MatDestroy(&BA);CHKERRQ(ierr); PetscFunctionReturn(0); }