예제 #1
0
static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
{
  PetscErrorCode ierr;
  PetscMatStashSpace space;
  PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
  PetscScalar **valptr;

  PetscFunctionBegin;
  ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
  for (space=stash->space_head,cnt=0; space; space=space->next) {
    for (i=0; i<space->local_used; i++) {
      row[cnt] = space->idx[i];
      col[cnt] = space->idy[i];
      valptr[cnt] = &space->val[i*bs2];
      perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
      cnt++;
    }
  }
  if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
  ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
  /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
  for (rowstart=0,cnt=0,i=1; i<=n; i++) {
    if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
      PetscInt colstart;
      ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
      for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
        PetscInt j,l;
        MatStashBlock *block;
        ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
        block->row = row[rowstart];
        block->col = col[colstart];
        ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
        for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
          if (insertmode == ADD_VALUES) {
            for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
          } else {
            ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
          }
        }
        colstart = j;
      }
      rowstart = i;
    }
  }
  ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
예제 #2
0
/*@C
    ISLocalToGlobalMappingGetInfo - Gets the neighbor information for each processor and 
     each index shared by more than one processor 

    Collective on ISLocalToGlobalMapping

    Input Parameters:
.   mapping - the mapping from local to global indexing

    Output Parameter:
+   nproc - number of processors that are connected to this one
.   proc - neighboring processors
.   numproc - number of indices for each subdomain (processor)
-   indices - indices of nodes (in local numbering) shared with neighbors (sorted by global numbering)

    Level: advanced

    Concepts: mapping^local to global

    Fortran Usage: 
$        ISLocalToGlobalMpngGetInfoSize(ISLocalToGlobalMapping,PetscInt nproc,PetscInt numprocmax,ierr) followed by 
$        ISLocalToGlobalMappingGetInfo(ISLocalToGlobalMapping,PetscInt nproc, PetscInt procs[nproc],PetscInt numprocs[nproc],
          PetscInt indices[nproc][numprocmax],ierr)
        There is no ISLocalToGlobalMappingRestoreInfo() in Fortran. You must make sure that procs[], numprocs[] and 
        indices[][] are large enough arrays, either by allocating them dynamically or defining static ones large enough.


.seealso: ISLocalToGlobalMappingDestroy(), ISLocalToGlobalMappingCreateIS(), ISLocalToGlobalMappingCreate(),
          ISLocalToGlobalMappingRestoreInfo()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT ISLocalToGlobalMappingGetInfo(ISLocalToGlobalMapping mapping,PetscInt *nproc,PetscInt *procs[],PetscInt *numprocs[],PetscInt **indices[])
{
  PetscErrorCode ierr;
  PetscMPIInt    size,rank,tag1,tag2,tag3,*len,*source,imdex;
  PetscInt       i,n = mapping->n,Ng,ng,max = 0,*lindices = mapping->indices;
  PetscInt       *nprocs,*owner,nsends,*sends,j,*starts,nmax,nrecvs,*recvs,proc;
  PetscInt       cnt,scale,*ownedsenders,*nownedsenders,rstart,nowned;
  PetscInt       node,nownedm,nt,*sends2,nsends2,*starts2,*lens2,*dest,nrecvs2,*starts3,*recvs2,k,*bprocs,*tmp;
  PetscInt       first_procs,first_numprocs,*first_indices;
  MPI_Request    *recv_waits,*send_waits;
  MPI_Status     recv_status,*send_status,*recv_statuses;
  MPI_Comm       comm = ((PetscObject)mapping)->comm;
  PetscTruth     debug = PETSC_FALSE;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(mapping,IS_LTOGM_COOKIE,1);
  ierr   = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr   = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  if (size == 1) {
    *nproc         = 0;
    *procs         = PETSC_NULL;
    ierr           = PetscMalloc(sizeof(PetscInt),numprocs);CHKERRQ(ierr);
    (*numprocs)[0] = 0;
    ierr           = PetscMalloc(sizeof(PetscInt*),indices);CHKERRQ(ierr); 
    (*indices)[0]  = PETSC_NULL;
    PetscFunctionReturn(0);
  }

  ierr = PetscOptionsGetTruth(PETSC_NULL,"-islocaltoglobalmappinggetinfo_debug",&debug,PETSC_NULL);CHKERRQ(ierr);

  /*
    Notes on ISLocalToGlobalMappingGetInfo

    globally owned node - the nodes that have been assigned to this processor in global
           numbering, just for this routine.

    nontrivial globally owned node - node assigned to this processor that is on a subdomain
           boundary (i.e. is has more than one local owner)

    locally owned node - node that exists on this processors subdomain

    nontrivial locally owned node - node that is not in the interior (i.e. has more than one
           local subdomain
  */
  ierr = PetscObjectGetNewTag((PetscObject)mapping,&tag1);CHKERRQ(ierr);
  ierr = PetscObjectGetNewTag((PetscObject)mapping,&tag2);CHKERRQ(ierr);
  ierr = PetscObjectGetNewTag((PetscObject)mapping,&tag3);CHKERRQ(ierr);

  for (i=0; i<n; i++) {
    if (lindices[i] > max) max = lindices[i];
  }
  ierr   = MPI_Allreduce(&max,&Ng,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
  Ng++;
  ierr   = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr   = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  scale  = Ng/size + 1;
  ng     = scale; if (rank == size-1) ng = Ng - scale*(size-1); ng = PetscMax(1,ng);
  rstart = scale*rank;

  /* determine ownership ranges of global indices */
  ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
  ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);

  /* determine owners of each local node  */
  ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    proc             = lindices[i]/scale; /* processor that globally owns this index */
    nprocs[2*proc+1] = 1;                 /* processor globally owns at least one of ours */
    owner[i]         = proc;              
    nprocs[2*proc]++;                     /* count of how many that processor globally owns of ours */
  }
  nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1];
  ierr = PetscInfo1(mapping,"Number of global owners for my local data %d\n",nsends);CHKERRQ(ierr);

  /* inform other processors of number of messages and max length*/
  ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
  ierr = PetscInfo1(mapping,"Number of local owners for my global data %d\n",nrecvs);CHKERRQ(ierr);

  /* post receives for owned rows */
  ierr = PetscMalloc((2*nrecvs+1)*(nmax+1)*sizeof(PetscInt),&recvs);CHKERRQ(ierr);
  ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
  for (i=0; i<nrecvs; i++) {
    ierr = MPI_Irecv(recvs+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+i);CHKERRQ(ierr);
  }

  /* pack messages containing lists of local nodes to owners */
  ierr       = PetscMalloc((2*n+1)*sizeof(PetscInt),&sends);CHKERRQ(ierr);
  ierr       = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
  starts[0]  = 0; 
  for (i=1; i<size; i++) { starts[i] = starts[i-1] + 2*nprocs[2*i-2];} 
  for (i=0; i<n; i++) {
    sends[starts[owner[i]]++] = lindices[i];
    sends[starts[owner[i]]++] = i;
  }
  ierr = PetscFree(owner);CHKERRQ(ierr);
  starts[0]  = 0; 
  for (i=1; i<size; i++) { starts[i] = starts[i-1] + 2*nprocs[2*i-2];} 

  /* send the messages */
  ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
  ierr = PetscMalloc((nsends+1)*sizeof(PetscInt),&dest);CHKERRQ(ierr);
  cnt = 0;
  for (i=0; i<size; i++) {
    if (nprocs[2*i]) {
      ierr      = MPI_Isend(sends+starts[i],2*nprocs[2*i],MPIU_INT,i,tag1,comm,send_waits+cnt);CHKERRQ(ierr);
      dest[cnt] = i;
      cnt++;
    }
  }
  ierr = PetscFree(starts);CHKERRQ(ierr);

  /* wait on receives */
  ierr = PetscMalloc((nrecvs+1)*sizeof(PetscMPIInt),&source);CHKERRQ(ierr);
  ierr = PetscMalloc((nrecvs+1)*sizeof(PetscMPIInt),&len);CHKERRQ(ierr);
  cnt  = nrecvs; 
  ierr = PetscMalloc((ng+1)*sizeof(PetscInt),&nownedsenders);CHKERRQ(ierr);
  ierr = PetscMemzero(nownedsenders,ng*sizeof(PetscInt));CHKERRQ(ierr);
  while (cnt) {
    ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
    /* unpack receives into our local space */
    ierr           = MPI_Get_count(&recv_status,MPIU_INT,&len[imdex]);CHKERRQ(ierr);
    source[imdex]  = recv_status.MPI_SOURCE;
    len[imdex]     = len[imdex]/2;
    /* count how many local owners for each of my global owned indices */
    for (i=0; i<len[imdex]; i++) nownedsenders[recvs[2*imdex*nmax+2*i]-rstart]++;
    cnt--;
  }
  ierr = PetscFree(recv_waits);CHKERRQ(ierr);

  /* count how many globally owned indices are on an edge multiplied by how many processors own them. */
  nowned  = 0;
  nownedm = 0;
  for (i=0; i<ng; i++) {
    if (nownedsenders[i] > 1) {nownedm += nownedsenders[i]; nowned++;}
  }

  /* create single array to contain rank of all local owners of each globally owned index */
  ierr      = PetscMalloc((nownedm+1)*sizeof(PetscInt),&ownedsenders);CHKERRQ(ierr);
  ierr      = PetscMalloc((ng+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
  starts[0] = 0;
  for (i=1; i<ng; i++) {
    if (nownedsenders[i-1] > 1) starts[i] = starts[i-1] + nownedsenders[i-1];
    else starts[i] = starts[i-1];
  }

  /* for each nontrival globally owned node list all arriving processors */
  for (i=0; i<nrecvs; i++) {
    for (j=0; j<len[i]; j++) {
      node = recvs[2*i*nmax+2*j]-rstart;
      if (nownedsenders[node] > 1) {
        ownedsenders[starts[node]++] = source[i];
      }
    }
  }

  if (debug) { /* -----------------------------------  */
    starts[0]    = 0;
    for (i=1; i<ng; i++) {
      if (nownedsenders[i-1] > 1) starts[i] = starts[i-1] + nownedsenders[i-1];
      else starts[i] = starts[i-1];
    }
    for (i=0; i<ng; i++) {
      if (nownedsenders[i] > 1) {
        ierr = PetscSynchronizedPrintf(comm,"[%d] global node %d local owner processors: ",rank,i+rstart);CHKERRQ(ierr);
        for (j=0; j<nownedsenders[i]; j++) {
          ierr = PetscSynchronizedPrintf(comm,"%d ",ownedsenders[starts[i]+j]);CHKERRQ(ierr);
        }
        ierr = PetscSynchronizedPrintf(comm,"\n");CHKERRQ(ierr);
      }
    }
    ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
  }/* -----------------------------------  */

  /* wait on original sends */
  if (nsends) {
    ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
    ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
    ierr = PetscFree(send_status);CHKERRQ(ierr);
  }
  ierr = PetscFree(send_waits);CHKERRQ(ierr);
  ierr = PetscFree(sends);CHKERRQ(ierr);
  ierr = PetscFree(nprocs);CHKERRQ(ierr);

  /* pack messages to send back to local owners */
  starts[0]    = 0;
  for (i=1; i<ng; i++) {
    if (nownedsenders[i-1] > 1) starts[i] = starts[i-1] + nownedsenders[i-1];
    else starts[i] = starts[i-1];
  }
  nsends2 = nrecvs;
  ierr    = PetscMalloc((nsends2+1)*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); /* length of each message */
  for (i=0; i<nrecvs; i++) {
    nprocs[i] = 1;
    for (j=0; j<len[i]; j++) {
      node = recvs[2*i*nmax+2*j]-rstart;
      if (nownedsenders[node] > 1) {
        nprocs[i] += 2 + nownedsenders[node];
      }
    }
  }
  nt = 0; for (i=0; i<nsends2; i++) nt += nprocs[i];
  ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&sends2);CHKERRQ(ierr); 
  ierr = PetscMalloc((nsends2+1)*sizeof(PetscInt),&starts2);CHKERRQ(ierr);
  starts2[0] = 0; for (i=1; i<nsends2; i++) starts2[i] = starts2[i-1] + nprocs[i-1];
  /*
     Each message is 1 + nprocs[i] long, and consists of 
       (0) the number of nodes being sent back 
       (1) the local node number,
       (2) the number of processors sharing it,
       (3) the processors sharing it
  */
  for (i=0; i<nsends2; i++) {
    cnt = 1;
    sends2[starts2[i]] = 0;
    for (j=0; j<len[i]; j++) {
      node = recvs[2*i*nmax+2*j]-rstart;
      if (nownedsenders[node] > 1) {
        sends2[starts2[i]]++;
        sends2[starts2[i]+cnt++] = recvs[2*i*nmax+2*j+1];
        sends2[starts2[i]+cnt++] = nownedsenders[node];
        ierr = PetscMemcpy(&sends2[starts2[i]+cnt],&ownedsenders[starts[node]],nownedsenders[node]*sizeof(PetscInt));CHKERRQ(ierr);
        cnt += nownedsenders[node];
      }
    }
  }

  /* receive the message lengths */
  nrecvs2 = nsends;
  ierr = PetscMalloc((nrecvs2+1)*sizeof(PetscInt),&lens2);CHKERRQ(ierr);  
  ierr = PetscMalloc((nrecvs2+1)*sizeof(PetscInt),&starts3);CHKERRQ(ierr);  
  ierr = PetscMalloc((nrecvs2+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
  for (i=0; i<nrecvs2; i++) {
    ierr = MPI_Irecv(&lens2[i],1,MPIU_INT,dest[i],tag2,comm,recv_waits+i);CHKERRQ(ierr);
  }

  /* send the message lengths */
  for (i=0; i<nsends2; i++) {
    ierr = MPI_Send(&nprocs[i],1,MPIU_INT,source[i],tag2,comm);CHKERRQ(ierr);
  }

  /* wait on receives of lens */
  if (nrecvs2) {
    ierr = PetscMalloc(nrecvs2*sizeof(MPI_Status),&recv_statuses);CHKERRQ(ierr);
    ierr = MPI_Waitall(nrecvs2,recv_waits,recv_statuses);CHKERRQ(ierr);
    ierr = PetscFree(recv_statuses);CHKERRQ(ierr);
  }
  ierr = PetscFree(recv_waits);

  starts3[0] = 0;
  nt         = 0;
  for (i=0; i<nrecvs2-1; i++) {
    starts3[i+1] = starts3[i] + lens2[i];
    nt          += lens2[i];
  }
  nt += lens2[nrecvs2-1];

  ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&recvs2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrecvs2+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
  for (i=0; i<nrecvs2; i++) {
    ierr = MPI_Irecv(recvs2+starts3[i],lens2[i],MPIU_INT,dest[i],tag3,comm,recv_waits+i);CHKERRQ(ierr);
  }
  
  /* send the messages */
  ierr = PetscMalloc((nsends2+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
  for (i=0; i<nsends2; i++) {
    ierr = MPI_Isend(sends2+starts2[i],nprocs[i],MPIU_INT,source[i],tag3,comm,send_waits+i);CHKERRQ(ierr);
  }

  /* wait on receives */
  if (nrecvs2) {
    ierr = PetscMalloc(nrecvs2*sizeof(MPI_Status),&recv_statuses);CHKERRQ(ierr);
    ierr = MPI_Waitall(nrecvs2,recv_waits,recv_statuses);CHKERRQ(ierr);
    ierr = PetscFree(recv_statuses);CHKERRQ(ierr);
  }
  ierr = PetscFree(recv_waits);CHKERRQ(ierr);
  ierr = PetscFree(nprocs);CHKERRQ(ierr);

  if (debug) { /* -----------------------------------  */
    cnt = 0;
    for (i=0; i<nrecvs2; i++) {
      nt = recvs2[cnt++];
      for (j=0; j<nt; j++) {
        ierr = PetscSynchronizedPrintf(comm,"[%d] local node %d number of subdomains %d: ",rank,recvs2[cnt],recvs2[cnt+1]);CHKERRQ(ierr);
        for (k=0; k<recvs2[cnt+1]; k++) {
          ierr = PetscSynchronizedPrintf(comm,"%d ",recvs2[cnt+2+k]);CHKERRQ(ierr);
        }
        cnt += 2 + recvs2[cnt+1];
        ierr = PetscSynchronizedPrintf(comm,"\n");CHKERRQ(ierr);
      }
    }
    ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
  } /* -----------------------------------  */

  /* count number subdomains for each local node */
  ierr = PetscMalloc(size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
  ierr = PetscMemzero(nprocs,size*sizeof(PetscInt));CHKERRQ(ierr);
  cnt  = 0;
  for (i=0; i<nrecvs2; i++) {
    nt = recvs2[cnt++];
    for (j=0; j<nt; j++) {
      for (k=0; k<recvs2[cnt+1]; k++) {
        nprocs[recvs2[cnt+2+k]]++;
      }
      cnt += 2 + recvs2[cnt+1];
    }
  }
  nt = 0; for (i=0; i<size; i++) nt += (nprocs[i] > 0);
  *nproc    = nt;
  ierr = PetscMalloc((nt+1)*sizeof(PetscInt),procs);CHKERRQ(ierr);
  ierr = PetscMalloc((nt+1)*sizeof(PetscInt),numprocs);CHKERRQ(ierr);
  ierr = PetscMalloc((nt+1)*sizeof(PetscInt*),indices);CHKERRQ(ierr);
  ierr = PetscMalloc(size*sizeof(PetscInt),&bprocs);CHKERRQ(ierr);
  cnt       = 0;
  for (i=0; i<size; i++) {
    if (nprocs[i] > 0) {
      bprocs[i]        = cnt;
      (*procs)[cnt]    = i;
      (*numprocs)[cnt] = nprocs[i];
      ierr             = PetscMalloc(nprocs[i]*sizeof(PetscInt),&(*indices)[cnt]);CHKERRQ(ierr);
      cnt++;
    }
  }

  /* make the list of subdomains for each nontrivial local node */
  ierr = PetscMemzero(*numprocs,nt*sizeof(PetscInt));CHKERRQ(ierr);
  cnt  = 0;
  for (i=0; i<nrecvs2; i++) {
    nt = recvs2[cnt++];
    for (j=0; j<nt; j++) {
      for (k=0; k<recvs2[cnt+1]; k++) {
        (*indices)[bprocs[recvs2[cnt+2+k]]][(*numprocs)[bprocs[recvs2[cnt+2+k]]]++] = recvs2[cnt];
      }
      cnt += 2 + recvs2[cnt+1];
    }
  }
  ierr = PetscFree(bprocs);CHKERRQ(ierr);
  ierr = PetscFree(recvs2);CHKERRQ(ierr);

  /* sort the node indexing by their global numbers */
  nt = *nproc;
  for (i=0; i<nt; i++) {
    ierr = PetscMalloc(((*numprocs)[i])*sizeof(PetscInt),&tmp);CHKERRQ(ierr);
    for (j=0; j<(*numprocs)[i]; j++) {
      tmp[j] = lindices[(*indices)[i][j]];
    }
    ierr = PetscSortIntWithArray((*numprocs)[i],tmp,(*indices)[i]);CHKERRQ(ierr); 
    ierr = PetscFree(tmp);CHKERRQ(ierr);
  }

  if (debug) { /* -----------------------------------  */
    nt = *nproc;
    for (i=0; i<nt; i++) {
      ierr = PetscSynchronizedPrintf(comm,"[%d] subdomain %d number of indices %d: ",rank,(*procs)[i],(*numprocs)[i]);CHKERRQ(ierr);
      for (j=0; j<(*numprocs)[i]; j++) {
        ierr = PetscSynchronizedPrintf(comm,"%d ",(*indices)[i][j]);CHKERRQ(ierr);
      }
      ierr = PetscSynchronizedPrintf(comm,"\n");CHKERRQ(ierr);
    }
    ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
  } /* -----------------------------------  */

  /* wait on sends */
  if (nsends2) {
    ierr = PetscMalloc(nsends2*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
    ierr = MPI_Waitall(nsends2,send_waits,send_status);CHKERRQ(ierr);
    ierr = PetscFree(send_status);CHKERRQ(ierr);
  }

  ierr = PetscFree(starts3);CHKERRQ(ierr);
  ierr = PetscFree(dest);CHKERRQ(ierr);
  ierr = PetscFree(send_waits);CHKERRQ(ierr);

  ierr = PetscFree(nownedsenders);CHKERRQ(ierr);
  ierr = PetscFree(ownedsenders);CHKERRQ(ierr);
  ierr = PetscFree(starts);CHKERRQ(ierr);
  ierr = PetscFree(starts2);CHKERRQ(ierr);
  ierr = PetscFree(lens2);CHKERRQ(ierr);

  ierr = PetscFree(source);CHKERRQ(ierr);
  ierr = PetscFree(len);CHKERRQ(ierr);
  ierr = PetscFree(recvs);CHKERRQ(ierr);
  ierr = PetscFree(nprocs);CHKERRQ(ierr);
  ierr = PetscFree(sends2);CHKERRQ(ierr);

  /* put the information about myself as the first entry in the list */
  first_procs    = (*procs)[0];
  first_numprocs = (*numprocs)[0];
  first_indices  = (*indices)[0];
  for (i=0; i<*nproc; i++) {
    if ((*procs)[i] == rank) {
      (*procs)[0]    = (*procs)[i];
      (*numprocs)[0] = (*numprocs)[i];
      (*indices)[0]  = (*indices)[i];
      (*procs)[i]    = first_procs; 
      (*numprocs)[i] = first_numprocs;
      (*indices)[i]  = first_indices;
      break;
    }
  }
  PetscFunctionReturn(0);
}
예제 #3
0
/*
 * Increase overlap for the sub-matrix across sub communicator
 * sub-matrix could be a graph or numerical matrix
 * */
PetscErrorCode  MatIncreaseOverlapSplit_Single(Mat mat,IS *is,PetscInt ov)
{
  PetscInt         i,nindx,*indices_sc,*indices_ov,localsize,*localsizes_sc,localsize_tmp;
  PetscInt         *indices_ov_rd,nroots,nleaves,*localoffsets,*indices_recv,*sources_sc,*sources_sc_rd;
  const PetscInt   *indices;
  PetscMPIInt      srank,ssize,issamecomm,k,grank;
  IS               is_sc,allis_sc,partitioning;
  MPI_Comm         gcomm,dcomm,scomm;
  PetscSF          sf;
  PetscSFNode      *remote;
  Mat              *smat;
  MatPartitioning  part;
  PetscErrorCode   ierr;

  PetscFunctionBegin;
  /* get a sub communicator before call individual MatIncreaseOverlap
   * since the sub communicator may be changed.
   * */
  ierr = PetscObjectGetComm((PetscObject)(*is),&dcomm);CHKERRQ(ierr);
  /*make a copy before the original one is deleted*/
  ierr = PetscCommDuplicate(dcomm,&scomm,NULL);CHKERRQ(ierr);
  /*get a global communicator, where mat should be a global matrix  */
  ierr = PetscObjectGetComm((PetscObject)mat,&gcomm);CHKERRQ(ierr);
  /*increase overlap on each individual subdomain*/
  ierr = (*mat->ops->increaseoverlap)(mat,1,is,ov);CHKERRQ(ierr);
  /*compare communicators */
  ierr = MPI_Comm_compare(gcomm,scomm,&issamecomm);CHKERRQ(ierr);
  /* if the sub-communicator is the same as the global communicator,
   * user does not want to use a sub-communicator
   * */
  if(issamecomm == MPI_IDENT || issamecomm == MPI_CONGRUENT) PetscFunctionReturn(0);
  /* if the sub-communicator is petsc_comm_self,
   * user also does not care the sub-communicator
   * */
  ierr = MPI_Comm_compare(scomm,PETSC_COMM_SELF,&issamecomm);CHKERRQ(ierr);
  if(issamecomm == MPI_IDENT || issamecomm == MPI_CONGRUENT){PetscFunctionReturn(0);}
  /*local rank, size in a sub-communicator  */
  ierr = MPI_Comm_rank(scomm,&srank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(scomm,&ssize);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(gcomm,&grank);CHKERRQ(ierr);
  /*create a new IS based on sub-communicator
   * since the old IS is often based on petsc_comm_self
   * */
  ierr = ISGetLocalSize(*is,&nindx);CHKERRQ(ierr);
  ierr = PetscCalloc1(nindx,&indices_sc);CHKERRQ(ierr);
  ierr = ISGetIndices(*is,&indices);CHKERRQ(ierr);
  ierr = PetscMemcpy(indices_sc,indices,sizeof(PetscInt)*nindx);CHKERRQ(ierr);
  ierr = ISRestoreIndices(*is,&indices);CHKERRQ(ierr);
  /*we do not need any more*/
  ierr = ISDestroy(is);CHKERRQ(ierr);
  /*create a index set based on the sub communicator  */
  ierr = ISCreateGeneral(scomm,nindx,indices_sc,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr);
  /*gather all indices within  the sub communicator*/
  ierr = ISAllGather(is_sc,&allis_sc);CHKERRQ(ierr);
  ierr = ISDestroy(&is_sc);CHKERRQ(ierr);
  /* gather local sizes */
  ierr = PetscMalloc1(ssize,&localsizes_sc);CHKERRQ(ierr);
  /*get individual local sizes for all index sets*/
  ierr = MPI_Gather(&nindx,1,MPIU_INT,localsizes_sc,1,MPIU_INT,0,scomm);CHKERRQ(ierr);
  /*only root does these computations */
  if(!srank){
   /*get local size for the big index set*/
   ierr = ISGetLocalSize(allis_sc,&localsize);CHKERRQ(ierr);
   ierr = PetscCalloc2(localsize,&indices_ov,localsize,&sources_sc);CHKERRQ(ierr);
   ierr = PetscCalloc2(localsize,&indices_ov_rd,localsize,&sources_sc_rd);CHKERRQ(ierr);
   ierr = ISGetIndices(allis_sc,&indices);CHKERRQ(ierr);
   ierr = PetscMemcpy(indices_ov,indices,sizeof(PetscInt)*localsize);CHKERRQ(ierr);
   ierr = ISRestoreIndices(allis_sc,&indices);CHKERRQ(ierr);
   /*we do not need it any more */
   ierr = ISDestroy(&allis_sc);CHKERRQ(ierr);
   /*assign corresponding sources */
   localsize_tmp = 0;
   for(k=0; k<ssize; k++){
     for(i=0; i<localsizes_sc[k]; i++){
       sources_sc[localsize_tmp++] = k;
     }
   }
   /*record where indices come from */
   ierr = PetscSortIntWithArray(localsize,indices_ov,sources_sc);CHKERRQ(ierr);
   /*count local sizes for reduced indices */
   ierr = PetscMemzero(localsizes_sc,sizeof(PetscInt)*ssize);CHKERRQ(ierr);
   /*initialize the first entity*/
   if(localsize){
	 indices_ov_rd[0] = indices_ov[0];
	 sources_sc_rd[0] = sources_sc[0];
	 localsizes_sc[sources_sc[0]]++;
   }
   localsize_tmp = 1;
   /*remove duplicate integers */
   for(i=1; i<localsize; i++){
	 if(indices_ov[i] != indices_ov[i-1]){
	   indices_ov_rd[localsize_tmp]   = indices_ov[i];
	   sources_sc_rd[localsize_tmp++] = sources_sc[i];
	   localsizes_sc[sources_sc[i]]++;
	 }
   }
   ierr = PetscFree2(indices_ov,sources_sc);CHKERRQ(ierr);
   ierr = PetscCalloc1(ssize+1,&localoffsets);CHKERRQ(ierr);
   for(k=0; k<ssize; k++){
	 localoffsets[k+1] = localoffsets[k] + localsizes_sc[k];
   }
   /*construct a star forest to send data back */
   nleaves = localoffsets[ssize];
   ierr = PetscMemzero(localoffsets,(ssize+1)*sizeof(PetscInt));CHKERRQ(ierr);
   nroots  = localsizes_sc[srank];
   ierr = PetscCalloc1(nleaves,&remote);CHKERRQ(ierr);
   for(i=0; i<nleaves; i++){
	 remote[i].rank  = sources_sc_rd[i];
	 remote[i].index = localoffsets[sources_sc_rd[i]]++;
   }
   ierr = PetscFree(localoffsets);CHKERRQ(ierr);
  }else{
   ierr = ISDestroy(&allis_sc);CHKERRQ(ierr);
   /*Allocate a 'zero' pointer */
   ierr = PetscCalloc1(0,&remote);CHKERRQ(ierr);
   nleaves = 0;
   indices_ov_rd = 0;
   sources_sc_rd = 0;
  }
  /*scatter sizes to everybody */
  ierr = MPI_Scatter(localsizes_sc,1, MPIU_INT,&nroots,1, MPIU_INT,0,scomm);CHKERRQ(ierr);
  /*free memory */
  ierr = PetscFree(localsizes_sc);CHKERRQ(ierr);
  ierr = PetscCalloc1(nroots,&indices_recv);CHKERRQ(ierr);
  /*ierr = MPI_Comm_dup(scomm,&dcomm);CHKERRQ(ierr);*/
  /*set data back to every body */
  ierr = PetscSFCreate(scomm,&sf);CHKERRQ(ierr);
  ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);
  ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
  ierr = PetscSFSetGraph(sf,nroots,nleaves,PETSC_NULL,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
  ierr = PetscSFReduceBegin(sf,MPIU_INT,indices_ov_rd,indices_recv,MPIU_REPLACE);CHKERRQ(ierr);
  ierr = PetscSFReduceEnd(sf,MPIU_INT,indices_ov_rd,indices_recv,MPIU_REPLACE);CHKERRQ(ierr);
  ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
  /* free memory */
  ierr = PetscFree2(indices_ov_rd,sources_sc_rd);CHKERRQ(ierr);
  /*create a index set*/
  ierr = ISCreateGeneral(scomm,nroots,indices_recv,PETSC_OWN_POINTER,&is_sc);CHKERRQ(ierr);
  /*construct a parallel submatrix */
  ierr = MatGetSubMatricesMPI(mat,1,&is_sc,&is_sc,MAT_INITIAL_MATRIX,&smat);CHKERRQ(ierr);
  /* we do not need them any more */
  ierr = ISDestroy(&allis_sc);CHKERRQ(ierr);
  /*create a partitioner to repartition the sub-matrix*/
  ierr = MatPartitioningCreate(scomm,&part);CHKERRQ(ierr);
  ierr = MatPartitioningSetAdjacency(part,smat[0]);CHKERRQ(ierr);
#if PETSC_HAVE_PARMETIS
  /* if there exists a ParMETIS installation, we try to use ParMETIS
   * because a repartition routine possibly work better
   * */
  ierr = MatPartitioningSetType(part,MATPARTITIONINGPARMETIS);CHKERRQ(ierr);
  /*try to use reparition function, instead of partition function */
  ierr = MatPartitioningParmetisSetRepartition(part);CHKERRQ(ierr);
#else
  /*we at least provide a default partitioner to rebalance the computation  */
  ierr = MatPartitioningSetType(part,MATPARTITIONINGAVERAGE);CHKERRQ(ierr);
#endif
  /*user can pick up any partitioner by using an option*/
  ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
  /* apply partition */
  ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
  ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
  ierr = MatDestroy(&(smat[0]));CHKERRQ(ierr);
  ierr = PetscFree(smat);CHKERRQ(ierr);
  /* get local rows including  overlap */
  ierr = ISBuildTwoSided(partitioning,is_sc,is);CHKERRQ(ierr);
  /* destroy */
  ierr = ISDestroy(&is_sc);CHKERRQ(ierr);
  ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
  ierr = PetscCommDestroy(&scomm);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
예제 #4
0
/*@
   ISPairToList   -   convert an IS pair encoding an integer map to a list of ISs.
                     Each IS on the output list contains the preimage for each index on the second input IS.
                     The ISs on the output list are constructed on the subcommunicators of the input IS pair.
                     Each subcommunicator corresponds to the preimage of some index j -- this subcomm contains
                     exactly the ranks that assign some indices i to j.  This is essentially the inverse of
                     ISListToPair().

  Collective on indis.

  Input arguments:
+ xis -  domain IS
- yis -  range IS

  Output arguments:
+ listlen -  length of islist
- islist  -  list of ISs breaking up indis by color

  Note:
+ xis and yis must be of the same length and have congruent communicators.
- The resulting ISs have subcommunicators in a "deadlock-free" order (see ISListToPair()).

  Level: advanced

.seealso ISListToPair()
 @*/
PetscErrorCode ISPairToList(IS xis, IS yis, PetscInt *listlen, IS **islist)
{
  PetscErrorCode ierr;
  IS             indis = xis, coloris = yis;
  PetscInt       *inds, *colors, llen, ilen, lstart, lend, lcount,l;
  PetscMPIInt    rank, size, llow, lhigh, low, high,color,subsize;
  const PetscInt *ccolors, *cinds;
  MPI_Comm       comm, subcomm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(xis, IS_CLASSID, 1);
  PetscValidHeaderSpecific(yis, IS_CLASSID, 2);
  PetscCheckSameComm(xis,1,yis,2);
  PetscValidIntPointer(listlen,3);
  PetscValidPointer(islist,4);
  ierr = PetscObjectGetComm((PetscObject)xis,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &size);CHKERRQ(ierr);
  /* Extract, copy and sort the local indices and colors on the color. */
  ierr = ISGetLocalSize(coloris, &llen);CHKERRQ(ierr);
  ierr = ISGetLocalSize(indis,   &ilen);CHKERRQ(ierr);
  if (llen != ilen) SETERRQ2(comm, PETSC_ERR_ARG_SIZ, "Incompatible IS sizes: %D and %D", ilen, llen);
  ierr = ISGetIndices(coloris, &ccolors);CHKERRQ(ierr);
  ierr = ISGetIndices(indis, &cinds);CHKERRQ(ierr);
  ierr = PetscMalloc2(ilen,&inds,llen,&colors);CHKERRQ(ierr);
  ierr = PetscMemcpy(inds,cinds,ilen*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscMemcpy(colors,ccolors,llen*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = PetscSortIntWithArray(llen, colors, inds);CHKERRQ(ierr);
  /* Determine the global extent of colors. */
  llow   = 0; lhigh  = -1;
  lstart = 0; lcount = 0;
  while (lstart < llen) {
    lend = lstart+1;
    while (lend < llen && colors[lend] == colors[lstart]) ++lend;
    llow  = PetscMin(llow,colors[lstart]);
    lhigh = PetscMax(lhigh,colors[lstart]);
    ++lcount;
  }
  ierr     = MPIU_Allreduce(&llow,&low,1,MPI_INT,MPI_MIN,comm);CHKERRQ(ierr);
  ierr     = MPIU_Allreduce(&lhigh,&high,1,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
  *listlen = 0;
  if (low <= high) {
    if (lcount > 0) {
      *listlen = lcount;
      if (!*islist) {
        ierr = PetscMalloc1(lcount, islist);CHKERRQ(ierr);
      }
    }
    /*
     Traverse all possible global colors, and participate in the subcommunicators
     for the locally-supported colors.
     */
    lcount = 0;
    lstart = 0; lend = 0;
    for (l = low; l <= high; ++l) {
      /*
       Find the range of indices with the same color, which is not smaller than l.
       Observe that, since colors is sorted, and is a subsequence of [low,high],
       as soon as we find a new color, it is >= l.
       */
      if (lstart < llen) {
        /* The start of the next locally-owned color is identified.  Now look for the end. */
        if (lstart == lend) {
          lend = lstart+1;
          while (lend < llen && colors[lend] == colors[lstart]) ++lend;
        }
        /* Now check whether the identified color segment matches l. */
        if (colors[lstart] < l) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Locally owned color %D at location %D is < than the next global color %D", colors[lstart], lcount, l);
      }
      color = (PetscMPIInt)(colors[lstart] == l);
      /* Check whether a proper subcommunicator exists. */
      ierr = MPIU_Allreduce(&color,&subsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);

      if (subsize == 1) subcomm = PETSC_COMM_SELF;
      else if (subsize == size) subcomm = comm;
      else {
        /* a proper communicator is necessary, so we create it. */
        ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
      }
      if (colors[lstart] == l) {
        /* If we have l among the local colors, we create an IS to hold the corresponding indices. */
        ierr = ISCreateGeneral(subcomm, lend-lstart,inds+lstart,PETSC_COPY_VALUES,*islist+lcount);CHKERRQ(ierr);
        /* Position lstart at the beginning of the next local color. */
        lstart = lend;
        /* Increment the counter of the local colors split off into an IS. */
        ++lcount;
      }
      if (subsize > 0 && subsize < size) {
        /*
         Irrespective of color, destroy the split off subcomm:
         a subcomm used in the IS creation above is duplicated
         into a proper PETSc comm.
         */
        ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
      }
    } /* for (l = low; l < high; ++l) */
  } /* if (low <= high) */
  ierr = PetscFree2(inds,colors);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
예제 #5
0
void PETSC_STDCALL   petscsortintwitharray_(PetscInt *n,PetscInt i[],PetscInt Ii[], int *__ierr ){
*__ierr = PetscSortIntWithArray(*n,i,Ii);
}
예제 #6
0
void array_sort_ui(unsigned int NRows, unsigned int NCols, unsigned int *A, unsigned int *Indices, const char ordering,
                   const char trans)
{
	unsigned int i, count, swap,
	             row, col, *colMs, *colMe, SortLen, rowM, rowS,
	             *Indicestmp, *IndicesInter, *Atmp;

	if ((ordering == 'R' && trans == 'T') || (ordering == 'C' && trans == 'N')) {
		mkl_simatcopy(ordering,trans,NRows,NCols,1.,(float *) A,NCols,NRows);
		swap = NRows; NRows = NCols; NCols = swap;
	} else if ((ordering == 'R' && trans == 'N') || (ordering == 'C' && trans == 'T')) {
		// Don't do anything.
	} else {
		printf("Error: Invalid ordering/trans input.\n"), EXIT_MSG;
	}
// array_print_i(NRows,NCols,A,'R');
	colMs = malloc(NRows * sizeof *colMs); // free
	colMe = malloc(NRows * sizeof *colMe); // free

	for (row = 0; row < NRows; row++) {
		if (row == 0) {
			colMs[row] = 0;
			colMe[row] = NCols-1;
			SortLen = colMe[row] - colMs[row] + 1;

			IndicesInter = malloc(SortLen * sizeof *IndicesInter); // free
			Indicestmp   = malloc(SortLen * sizeof *Indicestmp); // free
			for (i = 0; i < SortLen; i++)
				Indicestmp[i] = i;

			PetscSortIntWithArray(SortLen,(int *)&A[row*NCols],(int *)Indicestmp);
			for (i = 0; i < SortLen; i++) IndicesInter[i] = Indices[Indicestmp[i]];
			for (i = 0; i < SortLen; i++) Indices[i]      = IndicesInter[i];
			free(IndicesInter);

			Atmp = malloc(NCols * sizeof *Atmp); // free
			// Rearrange other rows accordingly
			for (rowM = row+1; rowM < NRows; rowM++) {
				for (col = 0; col < NCols; col++) Atmp[col]         = A[rowM*NCols+col];
				for (col = 0; col < NCols; col++) A[rowM*NCols+col] = Atmp[Indicestmp[col]];
			}
		free(Indicestmp);
		free(Atmp);
		} else {
			colMs[0] = colMe[row-1] = 0;
			while (colMe[row-1] < NCols-1) {
				for (rowM = 0; rowM < row; rowM++) {
					if (rowM > 0)
						colMs[rowM] = colMs[rowM-1];
					col = colMs[rowM]+1;
					while (col < NCols && A[rowM*NCols+col] == A[rowM*NCols+colMs[rowM]]) {
						// Ensure that all rows are taken into account and not only the previous row
						if (rowM > 0 && col > colMe[rowM-1])
							break;
						col++;
					}
					colMe[rowM] = col-1;
				}
				rowM -= 1;
				SortLen = colMe[rowM] - colMs[rowM]+1;

//printf("%d %d %d %d %d %d %d %d \n",row,col,rowM,colMs[0],colMe[0],colMs[rowM],colMe[rowM],SortLen);

				IndicesInter = malloc(SortLen * sizeof *IndicesInter); // free
				Indicestmp   = malloc(SortLen * sizeof *Indicestmp); // free
				for (i = 0; i < SortLen; i++)
					Indicestmp[i] = i;

				PetscSortIntWithArray(SortLen,(int *)&A[row*NCols+colMs[rowM]],(int *)Indicestmp);
				for (i = 0; i < SortLen; i++) IndicesInter[i]        = Indices[Indicestmp[i]+colMs[rowM]];
				for (i = 0; i < SortLen; i++) Indices[i+colMs[rowM]] = IndicesInter[i];
				free(IndicesInter);

				Atmp = malloc(SortLen * sizeof *Atmp); // free

				// Rearrange slave rows accordingly
				for (rowS = row+1; rowS < NRows; rowS++) {
					for (count = 0, col = colMs[rowM]; col <= colMe[rowM]; col++) {
						Atmp[count]         = A[rowS*NCols+col];
						count++;
					}
					for (count = 0, col = colMs[rowM]; col <= colMe[rowM]; col++) {
						A[rowS*NCols+col] = Atmp[Indicestmp[count]];
						count++;
					}
				}
				free(Indicestmp);
				free(Atmp);

				colMs[0] = colMe[row-1]+1;
			}
		}
//array_print_ui(NCols,NRows,A,'C');
//array_print_ui(1,NCols,Indices,'R');
	}

	free(colMs);
	free(colMe);

// array_print_i(NRows,NCols,A,'R');

	if ((ordering == 'R' && trans == 'T') || (ordering == 'C' && trans == 'N')) {
		mkl_simatcopy(ordering,trans,NRows,NCols,1.,(float *) A,NCols,NRows);
		swap = NRows; NRows = NCols; NCols = swap;
	} else if ((ordering == 'R' && trans == 'N') || (ordering == 'C' && trans == 'T')) {
		// Don't do anything.
	}
// array_print_i(NRows,NCols,A,'R');
}
예제 #7
0
/*@C
   PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters

   Collective

   Input Argument:
.  sf - star forest that may contain roots with 0 or with more than 1 vertex

   Output Arguments:
.  multi - star forest with split roots, such that each root has degree exactly 1

   Level: developer

   Notes:

   In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
   edge, it is a candidate for future optimization that might involve its removal.

.seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin()
@*/
PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
{
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
  PetscValidPointer(multi,2);
  if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
    ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
    *multi = sf->multi;
    PetscFunctionReturn(0);
  }
  if (!sf->multi) {
    const PetscInt *indegree;
    PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
    PetscSFNode    *remote;
    ierr        = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
    ierr        = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
    for (i=0,maxlocal=0; i<sf->nleaves; i++) maxlocal = PetscMax(maxlocal,(sf->mine ? sf->mine[i] : i)+1);
    ierr        = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
    inoffset[0] = 0;
    for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
    for (i=0; i<maxlocal; i++) outones[i] = 1;
    ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
    ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
    for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
#if 0
#if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
    for (i=0; i<sf->nroots; i++) {
      if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
    }
#endif
#endif
    ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
    for (i=0; i<sf->nleaves; i++) {
      remote[i].rank  = sf->remote[i].rank;
      remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
    }
    ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
    ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
    if (sf->rankorder) {        /* Sort the ranks */
      PetscMPIInt rank;
      PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
      PetscSFNode *newremote;
      ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
      for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
      ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
      for (i=0; i<maxlocal; i++) outranks[i] = rank;
      ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
      ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
      /* Sort the incoming ranks at each vertex, build the inverse map */
      for (i=0; i<sf->nroots; i++) {
        PetscInt j;
        for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
        ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
        for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
      }
      ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
      ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
      ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
      for (i=0; i<sf->nleaves; i++) {
        newremote[i].rank  = sf->remote[i].rank;
        newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
      }
      ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
      ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
    }
    ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
  }
  *multi = sf->multi;
  PetscFunctionReturn(0);
}
예제 #8
0
/*@C
   PetscSFSetGraph - Set a parallel star forest

   Collective

   Input Arguments:
+  sf - star forest
.  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
.  nleaves - number of leaf vertices on the current process, each of these references a root on any process
.  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
.  localmode - copy mode for ilocal
.  iremote - remote locations of root vertices for each leaf on the current process
-  remotemode - copy mode for iremote

   Level: intermediate

.seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
@*/
PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
{
  PetscErrorCode     ierr;
  PetscTable         table;
  PetscTablePosition pos;
  PetscMPIInt        size;
  PetscInt           i,*rcount,*ranks;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
  ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
  if (nleaves && ilocal) PetscValidIntPointer(ilocal,4);
  if (nleaves) PetscValidPointer(iremote,6);
  if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots);
  if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
  ierr        = PetscSFReset(sf);CHKERRQ(ierr);
  sf->nroots  = nroots;
  sf->nleaves = nleaves;
  if (ilocal) {
    switch (localmode) {
    case PETSC_COPY_VALUES:
      ierr        = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
      sf->mine    = sf->mine_alloc;
      ierr        = PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));CHKERRQ(ierr);
      sf->minleaf = PETSC_MAX_INT;
      sf->maxleaf = PETSC_MIN_INT;
      for (i=0; i<nleaves; i++) {
        sf->minleaf = PetscMin(sf->minleaf,ilocal[i]);
        sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]);
      }
      break;
    case PETSC_OWN_POINTER:
      sf->mine_alloc = (PetscInt*)ilocal;
      sf->mine       = sf->mine_alloc;
      break;
    case PETSC_USE_POINTER:
      sf->mine = (PetscInt*)ilocal;
      break;
    default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
    }
  }
  if (!ilocal || nleaves > 0) {
    sf->minleaf = 0;
    sf->maxleaf = nleaves - 1;
  }
  switch (remotemode) {
  case PETSC_COPY_VALUES:
    ierr       = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
    sf->remote = sf->remote_alloc;
    ierr       = PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));CHKERRQ(ierr);
    break;
  case PETSC_OWN_POINTER:
    sf->remote_alloc = (PetscSFNode*)iremote;
    sf->remote       = sf->remote_alloc;
    break;
  case PETSC_USE_POINTER:
    sf->remote = (PetscSFNode*)iremote;
    break;
  default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
  }

  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
  ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
  for (i=0; i<nleaves; i++) {
    /* Log 1-based rank */
    ierr = PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
  }
  ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
  ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,nleaves,&sf->rmine,nleaves,&sf->rremote);CHKERRQ(ierr);
  ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
  ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
  for (i=0; i<sf->nranks; i++) {
    ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
    ranks[i]--;             /* Convert back to 0-based */
  }
  ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
  ierr = PetscSortIntWithArray(sf->nranks,ranks,rcount);CHKERRQ(ierr);
  sf->roffset[0] = 0;
  for (i=0; i<sf->nranks; i++) {
    ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
    sf->roffset[i+1] = sf->roffset[i] + rcount[i];
    rcount[i]        = 0;
  }
  for (i=0; i<nleaves; i++) {
    PetscInt lo,hi,irank;
    /* Search for index of iremote[i].rank in sf->ranks */
    lo = 0; hi = sf->nranks;
    while (hi - lo > 1) {
      PetscInt mid = lo + (hi - lo)/2;
      if (iremote[i].rank < sf->ranks[mid]) hi = mid;
      else                                  lo = mid;
    }
    if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo;
    else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank);
    sf->rmine[sf->roffset[irank] + rcount[irank]]   = ilocal ? ilocal[i] : i;
    sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index;
    rcount[irank]++;
  }
  ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
#if !defined(PETSC_USE_64BIT_INDICES)
  if (nroots == PETSC_DETERMINE) {
    /* Jed, if you have a better way to do this, put it in */
    PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0;

    /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */
    ierr = PetscMalloc4(size,&numRankLeaves,size+1,&leafOff,size,&numRankRoots,size+1,&rootOff);CHKERRQ(ierr);
    ierr = PetscMemzero(numRankLeaves, size * sizeof(PetscInt));CHKERRQ(ierr);
    for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank];
    ierr = MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
    /* Could set nroots to this maximum */
    for (i = 0; i < size; ++i) maxRoots += numRankRoots[i];

    /* Gather all indices */
    ierr = PetscMalloc2(nleaves,&leafIndices,maxRoots,&rootIndices);CHKERRQ(ierr);
    leafOff[0] = 0;
    for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i];
    for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index;
    leafOff[0] = 0;
    for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i];
    rootOff[0] = 0;
    for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i];
    ierr = MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr);
    /* Sort and reduce */
    ierr       = PetscSortRemoveDupsInt(&maxRoots, rootIndices);CHKERRQ(ierr);
    ierr       = PetscFree2(leafIndices,rootIndices);CHKERRQ(ierr);
    ierr       = PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);CHKERRQ(ierr);
    sf->nroots = maxRoots;
  }
#endif

  sf->graphset = PETSC_TRUE;
  ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
예제 #9
0
PetscErrorCode PCBDDCGraphSetUp(PCBDDCGraph graph, PetscInt custom_minimal_size, IS neumann_is, IS dirichlet_is, PetscInt n_ISForDofs, IS ISForDofs[], IS custom_primal_vertices)
{
  IS             subset,subset_n;
  MPI_Comm       comm;
  const PetscInt *is_indices;
  PetscInt       n_neigh,*neigh,*n_shared,**shared,*queue_global;
  PetscInt       i,j,k,s,total_counts,nodes_touched,is_size;
  PetscMPIInt    commsize;
  PetscBool      same_set,mirrors_found;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidLogicalCollectiveInt(graph->l2gmap,custom_minimal_size,2);
  if (neumann_is) {
    PetscValidHeaderSpecific(neumann_is,IS_CLASSID,3);
    PetscCheckSameComm(graph->l2gmap,1,neumann_is,3);
  }
  graph->has_dirichlet = PETSC_FALSE;
  if (dirichlet_is) {
    PetscValidHeaderSpecific(dirichlet_is,IS_CLASSID,4);
    PetscCheckSameComm(graph->l2gmap,1,dirichlet_is,4);
    graph->has_dirichlet = PETSC_TRUE;
  }
  PetscValidLogicalCollectiveInt(graph->l2gmap,n_ISForDofs,5);
  for (i=0;i<n_ISForDofs;i++) {
    PetscValidHeaderSpecific(ISForDofs[i],IS_CLASSID,6);
    PetscCheckSameComm(graph->l2gmap,1,ISForDofs[i],6);
  }
  if (custom_primal_vertices) {
    PetscValidHeaderSpecific(custom_primal_vertices,IS_CLASSID,6);
    PetscCheckSameComm(graph->l2gmap,1,custom_primal_vertices,7);
  }
  ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);

  /* custom_minimal_size */
  graph->custom_minimal_size = custom_minimal_size;
  /* get info l2gmap and allocate work vectors  */
  ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr);
  /* check if we have any local periodic nodes (periodic BCs) */
  mirrors_found = PETSC_FALSE;
  if (graph->nvtxs && n_neigh) {
    for (i=0; i<n_shared[0]; i++) graph->count[shared[0][i]] += 1;
    for (i=0; i<n_shared[0]; i++) {
      if (graph->count[shared[0][i]] > 1) {
        mirrors_found = PETSC_TRUE;
        break;
      }
    }
  }
  /* compute local mirrors (if any) */
  if (mirrors_found) {
    IS       to,from;
    PetscInt *local_indices,*global_indices;

    ierr = ISCreateStride(PETSC_COMM_SELF,graph->nvtxs,0,1,&to);CHKERRQ(ierr);
    ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,to,&from);CHKERRQ(ierr);
    /* get arrays of local and global indices */
    ierr = PetscMalloc1(graph->nvtxs,&local_indices);CHKERRQ(ierr);
    ierr = ISGetIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    ierr = PetscMemcpy(local_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    ierr = ISRestoreIndices(to,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    ierr = PetscMalloc1(graph->nvtxs,&global_indices);CHKERRQ(ierr);
    ierr = ISGetIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    ierr = PetscMemcpy(global_indices,is_indices,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    ierr = ISRestoreIndices(from,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    /* allocate space for mirrors */
    ierr = PetscMalloc2(graph->nvtxs,&graph->mirrors,graph->nvtxs,&graph->mirrors_set);CHKERRQ(ierr);
    ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    graph->mirrors_set[0] = 0;

    k=0;
    for (i=0;i<n_shared[0];i++) {
      j=shared[0][i];
      if (graph->count[j] > 1) {
        graph->mirrors[j]++;
        k++;
      }
    }
    /* allocate space for set of mirrors */
    ierr = PetscMalloc1(k,&graph->mirrors_set[0]);CHKERRQ(ierr);
    for (i=1;i<graph->nvtxs;i++)
      graph->mirrors_set[i]=graph->mirrors_set[i-1]+graph->mirrors[i-1];

    /* fill arrays */
    ierr = PetscMemzero(graph->mirrors,graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr);
    for (j=0;j<n_shared[0];j++) {
      i=shared[0][j];
      if (graph->count[i] > 1)
        graph->mirrors_set[i][graph->mirrors[i]++]=global_indices[i];
    }
    ierr = PetscSortIntWithArray(graph->nvtxs,global_indices,local_indices);CHKERRQ(ierr);
    for (i=0;i<graph->nvtxs;i++) {
      if (graph->mirrors[i] > 0) {
        ierr = PetscFindInt(graph->mirrors_set[i][0],graph->nvtxs,global_indices,&k);CHKERRQ(ierr);
        j = global_indices[k];
        while ( k > 0 && global_indices[k-1] == j) k--;
        for (j=0;j<graph->mirrors[i];j++) {
          graph->mirrors_set[i][j]=local_indices[k+j];
        }
        ierr = PetscSortInt(graph->mirrors[i],graph->mirrors_set[i]);CHKERRQ(ierr);
      }
    }
    ierr = PetscFree(local_indices);CHKERRQ(ierr);
    ierr = PetscFree(global_indices);CHKERRQ(ierr);
    ierr = ISDestroy(&to);CHKERRQ(ierr);
    ierr = ISDestroy(&from);CHKERRQ(ierr);
  }
  ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr);

  /* Count total number of neigh per node */
  k = 0;
  for (i=1;i<n_neigh;i++) {
    k += n_shared[i];
    for (j=0;j<n_shared[i];j++) {
      graph->count[shared[i][j]] += 1;
    }
  }
  /* Allocate space for storing the set of neighbours for each node */
  if (graph->nvtxs) {
    ierr = PetscMalloc1(k,&graph->neighbours_set[0]);CHKERRQ(ierr);
  }
  for (i=1;i<graph->nvtxs;i++) { /* dont count myself */
    graph->neighbours_set[i]=graph->neighbours_set[i-1]+graph->count[i-1];
  }
  /* Get information for sharing subdomains */
  ierr = PetscMemzero(graph->count,graph->nvtxs*sizeof(*graph->count));CHKERRQ(ierr);
  for (i=1;i<n_neigh;i++) { /* dont count myself */
    s = n_shared[i];
    for (j=0;j<s;j++) {
      k = shared[i][j];
      graph->neighbours_set[k][graph->count[k]] = neigh[i];
      graph->count[k] += 1;
    }
  }
  /* sort set of sharing subdomains */
  for (i=0;i<graph->nvtxs;i++) {
    ierr = PetscSortRemoveDupsInt(&graph->count[i],graph->neighbours_set[i]);CHKERRQ(ierr);
  }
  /* free memory allocated by ISLocalToGlobalMappingGetInfo */
  ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr);

  /*
     Get info for dofs splitting
     User can specify just a subset; an additional field is considered as a complementary field
  */
  for (i=0;i<graph->nvtxs;i++) graph->which_dof[i] = n_ISForDofs; /* by default a dof belongs to the complement set */
  for (i=0;i<n_ISForDofs;i++) {
    ierr = ISGetLocalSize(ISForDofs[i],&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (j=0;j<is_size;j++) {
      if (is_indices[j] > -1 && is_indices[j] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */
        graph->which_dof[is_indices[j]] = i;
      }
    }
    ierr = ISRestoreIndices(ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }

  /* Take into account Neumann nodes */
  if (neumann_is) {
    ierr = ISGetLocalSize(neumann_is,&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (i=0;i<is_size;i++) {
      if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */
        graph->special_dof[is_indices[i]] = PCBDDCGRAPH_NEUMANN_MARK;
      }
    }
    ierr = ISRestoreIndices(neumann_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }
  /* Take into account Dirichlet nodes (they overwrite any neumann boundary mark previously set) */
  if (dirichlet_is) {
    ierr = ISGetLocalSize(dirichlet_is,&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (i=0;i<is_size;i++){
      if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs) { /* out of bounds indices (if any) are skipped */
        if (commsize > graph->commsizelimit) { /* dirichlet nodes treated as internal */
          ierr = PetscBTSet(graph->touched,is_indices[i]);CHKERRQ(ierr);
          graph->subset[is_indices[i]] = 0;
        }
        graph->special_dof[is_indices[i]] = PCBDDCGRAPH_DIRICHLET_MARK;
      }
    }
    ierr = ISRestoreIndices(dirichlet_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }
  /* mark local periodic nodes (if any) and adapt CSR graph (if any) */
  if (graph->mirrors) {
    for (i=0;i<graph->nvtxs;i++)
      if (graph->mirrors[i])
        graph->special_dof[i] = PCBDDCGRAPH_LOCAL_PERIODIC_MARK;

    if (graph->xadj) {
      PetscInt *new_xadj,*new_adjncy;
      /* sort CSR graph */
      for (i=0;i<graph->nvtxs;i++)
        ierr = PetscSortInt(graph->xadj[i+1]-graph->xadj[i],&graph->adjncy[graph->xadj[i]]);CHKERRQ(ierr);

      /* adapt local CSR graph in case of local periodicity */
      k = 0;
      for (i=0;i<graph->nvtxs;i++)
        for (j=graph->xadj[i];j<graph->xadj[i+1];j++)
          k += graph->mirrors[graph->adjncy[j]];

      ierr = PetscMalloc1(graph->nvtxs+1,&new_xadj);CHKERRQ(ierr);
      ierr = PetscMalloc1(k+graph->xadj[graph->nvtxs],&new_adjncy);CHKERRQ(ierr);
      new_xadj[0] = 0;
      for (i=0;i<graph->nvtxs;i++) {
        k = graph->xadj[i+1]-graph->xadj[i];
        ierr = PetscMemcpy(&new_adjncy[new_xadj[i]],&graph->adjncy[graph->xadj[i]],k*sizeof(PetscInt));CHKERRQ(ierr);
        new_xadj[i+1] = new_xadj[i]+k;
        for (j=graph->xadj[i];j<graph->xadj[i+1];j++) {
          k = graph->mirrors[graph->adjncy[j]];
          ierr = PetscMemcpy(&new_adjncy[new_xadj[i+1]],graph->mirrors_set[graph->adjncy[j]],k*sizeof(PetscInt));CHKERRQ(ierr);
          new_xadj[i+1] += k;
        }
        k = new_xadj[i+1]-new_xadj[i];
        ierr = PetscSortRemoveDupsInt(&k,&new_adjncy[new_xadj[i]]);CHKERRQ(ierr);
        new_xadj[i+1] = new_xadj[i]+k;
      }
      /* set new CSR into graph */
      ierr = PetscFree(graph->xadj);CHKERRQ(ierr);
      ierr = PetscFree(graph->adjncy);CHKERRQ(ierr);
      graph->xadj = new_xadj;
      graph->adjncy = new_adjncy;
    }
  }

  /* mark special nodes (if any) -> each will become a single node equivalence class */
  if (custom_primal_vertices) {
    ierr = ISGetLocalSize(custom_primal_vertices,&is_size);CHKERRQ(ierr);
    ierr = ISGetIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
    for (i=0,j=0;i<is_size;i++){
      if (is_indices[i] > -1 && is_indices[i] < graph->nvtxs  && graph->special_dof[is_indices[i]] != PCBDDCGRAPH_DIRICHLET_MARK) { /* out of bounds indices (if any) are skipped */
        graph->special_dof[is_indices[i]] = PCBDDCGRAPH_SPECIAL_MARK-j;
        j++;
      }
    }
    ierr = ISRestoreIndices(custom_primal_vertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
  }

  /* mark interior nodes (if commsize > graph->commsizelimit) as touched and belonging to partition number 0 */
  if (commsize > graph->commsizelimit) {
    for (i=0;i<graph->nvtxs;i++) {
      if (!graph->count[i]) {
        ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr);
        graph->subset[i] = 0;
      }
    }
  }

  /* init graph structure and compute default subsets */
  nodes_touched = 0;
  for (i=0;i<graph->nvtxs;i++) {
    if (PetscBTLookup(graph->touched,i)) {
      nodes_touched++;
    }
  }
  i = 0;
  graph->ncc = 0;
  total_counts = 0;

  /* allocated space for queues */
  if (commsize == graph->commsizelimit) {
    ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,graph->nvtxs,&graph->queue);CHKERRQ(ierr);
  } else {
    PetscInt nused = graph->nvtxs - nodes_touched;
    ierr = PetscMalloc2(nused+1,&graph->cptr,nused,&graph->queue);CHKERRQ(ierr);
  }

  while (nodes_touched<graph->nvtxs) {
    /*  find first untouched node in local ordering */
    while (PetscBTLookup(graph->touched,i)) i++;
    ierr = PetscBTSet(graph->touched,i);CHKERRQ(ierr);
    graph->subset[i] = graph->ncc+1;
    graph->cptr[graph->ncc] = total_counts;
    graph->queue[total_counts] = i;
    total_counts++;
    nodes_touched++;
    /* now find all other nodes having the same set of sharing subdomains */
    for (j=i+1;j<graph->nvtxs;j++) {
      /* check for same number of sharing subdomains, dof number and same special mark */
      if (!PetscBTLookup(graph->touched,j) && graph->count[i] == graph->count[j] && graph->which_dof[i] == graph->which_dof[j] && graph->special_dof[i] == graph->special_dof[j]) {
        /* check for same set of sharing subdomains */
        same_set = PETSC_TRUE;
        for (k=0;k<graph->count[j];k++){
          if (graph->neighbours_set[i][k] != graph->neighbours_set[j][k]) {
            same_set = PETSC_FALSE;
          }
        }
        /* I found a friend of mine */
        if (same_set) {
          ierr = PetscBTSet(graph->touched,j);CHKERRQ(ierr);
          graph->subset[j] = graph->ncc+1;
          nodes_touched++;
          graph->queue[total_counts] = j;
          total_counts++;
        }
      }
    }
    graph->ncc++;
  }
  /* set default number of subsets (at this point no info on csr and/or local_subs has been taken into account, so n_subsets = ncc */
  graph->n_subsets = graph->ncc;
  ierr = PetscMalloc1(graph->n_subsets,&graph->subset_ncc);CHKERRQ(ierr);
  for (i=0;i<graph->n_subsets;i++) {
    graph->subset_ncc[i] = 1;
  }
  /* final pointer */
  graph->cptr[graph->ncc] = total_counts;

  /* For consistency reasons (among neighbours), I need to sort (by global ordering) each connected component */
  /* Get a reference node (min index in global ordering) for each subset for tagging messages */
  ierr = PetscMalloc1(graph->ncc,&graph->subset_ref_node);CHKERRQ(ierr);
  ierr = PetscMalloc1(graph->cptr[graph->ncc],&queue_global);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingApply(graph->l2gmap,graph->cptr[graph->ncc],graph->queue,queue_global);CHKERRQ(ierr);
  for (j=0;j<graph->ncc;j++) {
    ierr = PetscSortIntWithArray(graph->cptr[j+1]-graph->cptr[j],&queue_global[graph->cptr[j]],&graph->queue[graph->cptr[j]]);CHKERRQ(ierr);
    graph->subset_ref_node[j] = graph->queue[graph->cptr[j]];
  }
  ierr = PetscFree(queue_global);CHKERRQ(ierr);
  graph->queue_sorted = PETSC_TRUE;

  /* save information on subsets (needed when analyzing the connected components) */
  if (graph->ncc) {
    ierr = PetscMalloc2(graph->ncc,&graph->subset_size,graph->ncc,&graph->subset_idxs);CHKERRQ(ierr);
    ierr = PetscMalloc1(graph->cptr[graph->ncc],&graph->subset_idxs[0]);CHKERRQ(ierr);
    ierr = PetscMemzero(graph->subset_idxs[0],graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr);
    for (j=1;j<graph->ncc;j++) {
      graph->subset_size[j-1] = graph->cptr[j] - graph->cptr[j-1];
      graph->subset_idxs[j] = graph->subset_idxs[j-1] + graph->subset_size[j-1];
    }
    graph->subset_size[graph->ncc-1] = graph->cptr[graph->ncc] - graph->cptr[graph->ncc-1];
    ierr = PetscMemcpy(graph->subset_idxs[0],graph->queue,graph->cptr[graph->ncc]*sizeof(PetscInt));CHKERRQ(ierr);
  }

  /* renumber reference nodes */
  ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(graph->l2gmap)),graph->ncc,graph->subset_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
  ierr = ISLocalToGlobalMappingApplyIS(graph->l2gmap,subset_n,&subset);CHKERRQ(ierr);
  ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
  ierr = ISRenumber(subset,NULL,NULL,&subset_n);CHKERRQ(ierr);
  ierr = ISDestroy(&subset);CHKERRQ(ierr);
  ierr = ISGetLocalSize(subset_n,&k);CHKERRQ(ierr);
  if (k != graph->ncc) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",k,graph->ncc);
  ierr = ISGetIndices(subset_n,&is_indices);CHKERRQ(ierr);
  ierr = PetscMemcpy(graph->subset_ref_node,is_indices,graph->ncc*sizeof(PetscInt));CHKERRQ(ierr);
  ierr = ISRestoreIndices(subset_n,&is_indices);CHKERRQ(ierr);
  ierr = ISDestroy(&subset_n);CHKERRQ(ierr);

  /* free workspace */
  graph->setupcalled = PETSC_TRUE;
  PetscFunctionReturn(0);
}
예제 #10
0
PetscErrorCode PCBDDCGraphGetCandidatesIS(PCBDDCGraph graph, PetscInt *n_faces, IS *FacesIS[], PetscInt *n_edges, IS *EdgesIS[], IS *VerticesIS)
{
  IS             *ISForFaces,*ISForEdges,ISForVertices;
  PetscInt       i,nfc,nec,nvc,*idx,*mark;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = PetscCalloc1(graph->ncc,&mark);CHKERRQ(ierr);
  /* loop on ccs to evalute number of faces, edges and vertices */
  nfc = 0;
  nec = 0;
  nvc = 0;
  for (i=0;i<graph->ncc;i++) {
    PetscInt repdof = graph->queue[graph->cptr[i]];
    if (graph->cptr[i+1]-graph->cptr[i] > graph->custom_minimal_size && graph->count[repdof] < graph->maxcount) {
      if (!graph->twodim && graph->count[repdof] == 1 && graph->special_dof[repdof] != PCBDDCGRAPH_NEUMANN_MARK) {
        nfc++;
        mark[i] = 2;
      } else {
        nec++;
        mark[i] = 1;
      }
    } else {
      nvc += graph->cptr[i+1]-graph->cptr[i];
    }
  }

  /* allocate IS arrays for faces, edges. Vertices need a single index set. */
  if (FacesIS) {
    ierr = PetscMalloc1(nfc,&ISForFaces);CHKERRQ(ierr);
  }
  if (EdgesIS) {
    ierr = PetscMalloc1(nec,&ISForEdges);CHKERRQ(ierr);
  }
  if (VerticesIS) {
    ierr = PetscMalloc1(nvc,&idx);CHKERRQ(ierr);
  }

  /* loop on ccs to compute index sets for faces and edges */
  if (!graph->queue_sorted) {
    PetscInt *queue_global;

    ierr = PetscMalloc1(graph->cptr[graph->ncc],&queue_global);CHKERRQ(ierr);
    ierr = ISLocalToGlobalMappingApply(graph->l2gmap,graph->cptr[graph->ncc],graph->queue,queue_global);CHKERRQ(ierr);
    for (i=0;i<graph->ncc;i++) {
      ierr = PetscSortIntWithArray(graph->cptr[i+1]-graph->cptr[i],&queue_global[graph->cptr[i]],&graph->queue[graph->cptr[i]]);CHKERRQ(ierr);
    }
    ierr = PetscFree(queue_global);CHKERRQ(ierr);
    graph->queue_sorted = PETSC_TRUE;
  }
  nfc = 0;
  nec = 0;
  for (i=0;i<graph->ncc;i++) {
    if (mark[i] == 2) {
      if (FacesIS) {
        ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],&graph->queue[graph->cptr[i]],PETSC_USE_POINTER,&ISForFaces[nfc]);CHKERRQ(ierr);
      }
      nfc++;
    } else if (mark[i] == 1) {
      if (EdgesIS) {
        ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],&graph->queue[graph->cptr[i]],PETSC_USE_POINTER,&ISForEdges[nec]);CHKERRQ(ierr);
      }
      nec++;
    }
  }

  /* index set for vertices */
  if (VerticesIS) {
    nvc = 0;
    for (i=0;i<graph->ncc;i++) {
      if (!mark[i]) {
        PetscInt j;

        for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
          idx[nvc]=graph->queue[j];
          nvc++;
        }
      }
    }
    /* sort vertex set (by local ordering) */
    ierr = PetscSortInt(nvc,idx);CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,idx,PETSC_OWN_POINTER,&ISForVertices);CHKERRQ(ierr);
  }
  ierr = PetscFree(mark);CHKERRQ(ierr);

  /* get back info */
  if (n_faces)       *n_faces = nfc;
  if (FacesIS)       *FacesIS = ISForFaces;
  if (n_edges)       *n_edges = nec;
  if (EdgesIS)       *EdgesIS = ISForEdges;
  if (VerticesIS) *VerticesIS = ISForVertices;
  PetscFunctionReturn(0);
}