Exemple #1
0
/*@C
   PetscThreadReductionBegin - Initiates a threaded reduction and returns a
                               reduction object to be passed to PetscThreadCommRunKernel

   Input Parameters:
+  comm - the MPI comm
.  op   - the reduction operation
.  type - the data type for reduction
-  nreds - Number of reductions

   Output Parameters:
.  redout  - the reduction context

   Level: developer

   Notes:
   See include/petscthreadcomm.h for the available reduction operations

   To be called from the main thread before calling PetscThreadCommRunKernel

.seealso: PetscThreadCommReductionKernelPost(), PetscThreadCommReductionKernelEnd(), PetscThreadCommReductionEnd()
@*/
PetscErrorCode PetscThreadReductionBegin(MPI_Comm comm,PetscThreadCommReductionOp op, PetscDataType type,PetscInt nreds,PetscThreadCommReduction *redout)
{
    PetscErrorCode ierr;
    PetscThreadComm tcomm;
    PetscInt        i;
    PetscThreadCommRedCtx redctx;
    PetscThreadCommReduction red;

    PetscFunctionBegin;
    ierr = PetscCommGetThreadComm(comm,&tcomm);
    CHKERRQ(ierr);
    red = tcomm->red;
    if(red->ctr+nreds > PETSC_REDUCTIONS_MAX) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Reductions in operation: %D Max. allowed: %D",red->ctr+nreds,PETSC_REDUCTIONS_MAX);
    for(i=red->ctr; i<red->ctr+nreds; i++) {
        redctx = &red->redctx[i];
        redctx->op = op;
        redctx->type = type;
        redctx->red_status = THREADCOMM_REDUCTION_NEW;
        redctx->tcomm = tcomm;
    }
    red->nreds += nreds;
    red->ctr = red->ctr+nreds;
    *redout = red;
    PetscFunctionReturn(0);
}
PetscErrorCode PetscSubcommCreate_interlaced(PetscSubcomm psubcomm)
{
  PetscErrorCode ierr;
  PetscMPIInt    rank,size,*subsize,duprank,subrank;
  PetscMPIInt    np_subcomm,nleftover,i,j,color,nsubcomm=psubcomm->n;
  MPI_Comm       subcomm=0,dupcomm=0,comm=psubcomm->parent;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);

  /* get size of each subcommunicator */
  ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);

  np_subcomm = size/nsubcomm;
  nleftover  = size - nsubcomm*np_subcomm;
  for (i=0; i<nsubcomm; i++) {
    subsize[i] = np_subcomm;
    if (i<nleftover) subsize[i]++;
  }

  /* find color for this proc */
  color   = rank%nsubcomm;
  subrank = rank/nsubcomm;

  ierr = MPI_Comm_split(comm,color,subrank,&subcomm);CHKERRQ(ierr);

  j = 0; duprank = 0;
  for (i=0; i<nsubcomm; i++) {
    if (j == color) {
      duprank += subrank;
      break;
    }
    duprank += subsize[i]; j++;
  }

  /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
  ierr = MPI_Comm_split(comm,0,duprank,&dupcomm);CHKERRQ(ierr);
  {
    PetscThreadComm tcomm;
    ierr = PetscCommGetThreadComm(comm,&tcomm);CHKERRQ(ierr);
    ierr = MPI_Attr_put(dupcomm,Petsc_ThreadComm_keyval,tcomm);CHKERRQ(ierr);
    tcomm->refct++;
    ierr = MPI_Attr_put(subcomm,Petsc_ThreadComm_keyval,tcomm);CHKERRQ(ierr);
    tcomm->refct++;
  }
  ierr = PetscCommDuplicate(dupcomm,&psubcomm->dupparent,NULL);CHKERRQ(ierr);
  ierr = PetscCommDuplicate(subcomm,&psubcomm->comm,NULL);CHKERRQ(ierr);
  ierr = MPI_Comm_free(&dupcomm);CHKERRQ(ierr);
  ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);

  psubcomm->color   = color;
  psubcomm->subsize = subsize;
  psubcomm->type    = PETSC_SUBCOMM_INTERLACED;
  PetscFunctionReturn(0);
}
PetscErrorCode PetscSubcommCreate_contiguous(PetscSubcomm psubcomm)
{
  PetscErrorCode ierr;
  PetscMPIInt    rank,size,*subsize,duprank=-1,subrank=-1;
  PetscMPIInt    np_subcomm,nleftover,i,color=-1,rankstart,nsubcomm=psubcomm->n;
  MPI_Comm       subcomm=0,dupcomm=0,comm=psubcomm->parent;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);

  /* get size of each subcommunicator */
  ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);

  np_subcomm = size/nsubcomm;
  nleftover  = size - nsubcomm*np_subcomm;
  for (i=0; i<nsubcomm; i++) {
    subsize[i] = np_subcomm;
    if (i<nleftover) subsize[i]++;
  }

  /* get color and subrank of this proc */
  rankstart = 0;
  for (i=0; i<nsubcomm; i++) {
    if (rank >= rankstart && rank < rankstart+subsize[i]) {
      color   = i;
      subrank = rank - rankstart;
      duprank = rank;
      break;
    } else rankstart += subsize[i];
  }

  ierr = MPI_Comm_split(comm,color,subrank,&subcomm);CHKERRQ(ierr);

  /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
  ierr = MPI_Comm_split(comm,0,duprank,&dupcomm);CHKERRQ(ierr);
  {
    PetscThreadComm tcomm;
    ierr = PetscCommGetThreadComm(comm,&tcomm);CHKERRQ(ierr);
    ierr = MPI_Attr_put(dupcomm,Petsc_ThreadComm_keyval,tcomm);CHKERRQ(ierr);
    tcomm->refct++;
    ierr = MPI_Attr_put(subcomm,Petsc_ThreadComm_keyval,tcomm);CHKERRQ(ierr);
    tcomm->refct++;
  }
  ierr = PetscCommDuplicate(dupcomm,&psubcomm->dupparent,NULL);CHKERRQ(ierr);
  ierr = PetscCommDuplicate(subcomm,&psubcomm->comm,NULL);CHKERRQ(ierr);
  ierr = MPI_Comm_free(&dupcomm);CHKERRQ(ierr);
  ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);

  psubcomm->color   = color;
  psubcomm->subsize = subsize;
  psubcomm->type    = PETSC_SUBCOMM_CONTIGUOUS;
  PetscFunctionReturn(0);
}
Exemple #4
0
EXTERN_C_END

#undef __FUNCT__
#define __FUNCT__ "PetscThreadCommRunKernel_OpenMP"
PetscErrorCode PetscThreadCommRunKernel_OpenMP(MPI_Comm comm,PetscThreadCommJobCtx job)
{
  PetscErrorCode ierr;
  PetscThreadComm tcomm;
  PetscInt        trank=0;

  PetscFunctionBegin;
  ierr = PetscCommGetThreadComm(comm,&tcomm);CHKERRQ(ierr);
#pragma omp parallel num_threads(tcomm->nworkThreads) shared(comm,job) private(trank,ierr)
  {
    trank = omp_get_thread_num();
    PetscRunKernel(trank,job->nargs,job);
    job->job_status[trank] = THREAD_JOB_COMPLETED;
  }
  PetscFunctionReturn(0);
}
Exemple #5
0
PETSC_EXTERN PetscErrorCode PetscThreadCommCreate_PThread(PetscThreadComm tcomm)
{
  PetscThreadComm_PThread ptcomm;
  PetscErrorCode          ierr;
  PetscInt                i;

  PetscFunctionBegin;
  ptcommcrtct++;
  ierr = PetscStrcpy(tcomm->type,PTHREAD);CHKERRQ(ierr);
  ierr = PetscNew(&ptcomm);CHKERRQ(ierr);

  tcomm->data              = (void*)ptcomm;
  ptcomm->nthreads         = 0;
  ptcomm->sync             = PTHREADSYNC_LOCKFREE;
  ptcomm->aff              = PTHREADAFFPOLICY_ONECORE;
  ptcomm->spark            = PTHREADPOOLSPARK_SELF;
  ptcomm->ismainworker     = PETSC_TRUE;
  ptcomm->synchronizeafter = PETSC_TRUE;
  tcomm->ops->destroy      = PetscThreadCommDestroy_PThread;
  tcomm->ops->runkernel    = PetscThreadCommRunKernel_PThread_LockFree;
  tcomm->ops->barrier      = PetscThreadCommBarrier_PThread_LockFree;
  tcomm->ops->getrank      = PetscThreadCommGetRank_PThread;

  ierr = PetscMalloc1(tcomm->nworkThreads,&ptcomm->granks);CHKERRQ(ierr);

  if (!PetscPThreadCommInitializeCalled) { /* Only done for PETSC_THREAD_COMM_WORLD */
    PetscBool flg1,flg2,flg3,flg4;
    PetscPThreadCommInitializeCalled = PETSC_TRUE;

    ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"PThread communicator options",NULL);CHKERRQ(ierr);
    ierr = PetscOptionsBool("-threadcomm_pthread_main_is_worker","Main thread is also a worker thread",NULL,PETSC_TRUE,&ptcomm->ismainworker,&flg1);CHKERRQ(ierr);
    ierr = PetscOptionsEnum("-threadcomm_pthread_affpolicy","Thread affinity policy"," ",PetscPThreadCommAffinityPolicyTypes,(PetscEnum)ptcomm->aff,(PetscEnum*)&ptcomm->aff,&flg2);CHKERRQ(ierr);
    ierr = PetscOptionsEnum("-threadcomm_pthread_type","Thread pool type"," ",PetscPThreadCommSynchronizationTypes,(PetscEnum)ptcomm->sync,(PetscEnum*)&ptcomm->sync,&flg3);CHKERRQ(ierr);
    ierr = PetscOptionsEnum("-threadcomm_pthread_spark","Thread pool spark type"," ",PetscPThreadCommPoolSparkTypes,(PetscEnum)ptcomm->spark,(PetscEnum*)&ptcomm->spark,&flg4);CHKERRQ(ierr);
    ierr = PetscOptionsBool("-threadcomm_pthread_synchronizeafter","Puts a barrier after every kernel call",NULL,PETSC_TRUE,&ptcomm->synchronizeafter,&flg1);CHKERRQ(ierr);
    ierr = PetscOptionsEnd();CHKERRQ(ierr);

    if (ptcomm->ismainworker) {
      ptcomm->nthreads         = tcomm->nworkThreads-1;
      ptcomm->thread_num_start = 1;
    } else {
      ptcomm->nthreads         = tcomm->nworkThreads;
      ptcomm->thread_num_start = 0;
    }

    switch (ptcomm->sync) {
    case PTHREADSYNC_LOCKFREE:
      ptcomm->initialize    = PetscPThreadCommInitialize_LockFree;
      ptcomm->finalize      = PetscPThreadCommFinalize_LockFree;
      tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
      tcomm->ops->barrier   = PetscThreadCommBarrier_PThread_LockFree;
      break;
    default:
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently");
    }
    /* Set up thread ranks */
    for (i=0; i< tcomm->nworkThreads; i++) ptcomm->granks[i] = i;

    if (ptcomm->ismainworker) {
#if defined(PETSC_PTHREAD_LOCAL)
      PetscPThreadRank=0; /* Main thread rank */
#else
      ierr = pthread_key_create(&PetscPThreadRankkey,NULL);CHKERRQ(ierr);
      ierr = pthread_setspecific(PetscPThreadRankkey,&ptcomm->granks[0]);CHKERRQ(ierr);
#endif
    }
    /* Set the leader thread rank */
    if (ptcomm->nthreads) {
      if (ptcomm->ismainworker) tcomm->leader = ptcomm->granks[1];
      else tcomm->leader = ptcomm->granks[0];
    }

    /* Create array holding pthread ids */
    ierr = PetscMalloc1(tcomm->nworkThreads,&ptcomm->tid);CHKERRQ(ierr);
    /* Create thread attributes */
    ierr = PetscMalloc1(tcomm->nworkThreads,&ptcomm->attr);CHKERRQ(ierr);
    ierr = PetscThreadCommSetPThreadAttributes(tcomm);CHKERRQ(ierr);
    if (ptcomm->ismainworker) {
      /* Pin main thread */
#if defined(PETSC_HAVE_SCHED_CPU_SET_T)
      cpu_set_t mset;
      PetscInt  ncores,icorr;

      ierr = PetscGetNCores(&ncores);CHKERRQ(ierr);
      CPU_ZERO(&mset);
      icorr = tcomm->affinities[0]%ncores;
      CPU_SET(icorr,&mset);
      sched_setaffinity(0,sizeof(cpu_set_t),&mset);
#endif
    }
    /* Initialize thread pool */
    ierr = (*ptcomm->initialize)(tcomm);CHKERRQ(ierr);

  } else {
    PetscThreadComm         gtcomm;
    PetscThreadComm_PThread gptcomm;
    PetscInt                *granks,j,*gaffinities;

    ierr        = PetscCommGetThreadComm(PETSC_COMM_WORLD,&gtcomm);CHKERRQ(ierr);
    gaffinities = gtcomm->affinities;
    gptcomm     = (PetscThreadComm_PThread)tcomm->data;
    granks      = gptcomm->granks;
    /* Copy over the data from the global thread communicator structure */
    ptcomm->ismainworker     = gptcomm->ismainworker;
    ptcomm->thread_num_start = gptcomm->thread_num_start;
    ptcomm->sync             = gptcomm->sync;
    ptcomm->aff              = gptcomm->aff;
    tcomm->ops->runkernel    = gtcomm->ops->runkernel;
    tcomm->ops->barrier      = gtcomm->ops->barrier;

    for (i=0; i < tcomm->nworkThreads; i++) {
      for (j=0; j < gtcomm->nworkThreads; j++) {
        if (tcomm->affinities[i] == gaffinities[j]) ptcomm->granks[i] = granks[j];
      }
    }
  }
  PetscFunctionReturn(0);
}