Exemple #1
0
void
_mp_copyin_move_cpp(void *blk_tp, int off, int class_size, 
                     int vector_size,assign_func_ptr assign_op)
{
  int lcpu;
  char *to, *fr;
  char *garbage = 0;
  int i;

  lcpu =__kmpc_global_thread_num(0);  

  __kmpc_barrier(0, lcpu);
  if (lcpu != 0) {
    fr =  __kmpc_threadprivate_cached(0, 0, garbage, 
			               (size_t) (class_size * vector_size), 
					blk_tp);
    to =  __kmpc_threadprivate_cached(0, lcpu, garbage, 
				       (size_t)(class_size * vector_size), 
					blk_tp);

    for(i = 0 ; i < vector_size; i++) {
        if (to != fr)
	    (*assign_op)(to, fr);
	to += class_size;
	fr += class_size;
     }
  }
  __kmpc_barrier(0, lcpu);
} 
Exemple #2
0
void
_mp_copypriv_move_tls(void **blk_tp, int off, int size, int single_thread)
{
  int lcpu;
  char *to;
  char *garbage = 0;

  if (single_thread != -1) {  /* single thread */
    if (*blk_tp == 0)
      singadr =  (char*)__kmpc_threadprivate(0, single_thread, garbage, (size_t)size);
    else
      singadr = *blk_tp;
    singlen = size;
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
  if (single_thread == -1) {  /* single thread */
     lcpu = __kmpc_global_thread_num(0);
     if (*blk_tp == 0)
       to = __kmpc_threadprivate(0, lcpu, garbage, (size_t)size);
     else
       to = *blk_tp;
     memcpy(to, singadr, size);
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
}
Exemple #3
0
/* C/C++: copy a private stack or other other variable */
void
_mp_copypriv(char *adr, long len, int thread)
{
  if (thread == 0) {
    singadr = adr;
    singlen = len;
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
  if (thread)
    memcpy(adr, singadr, singlen);
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
}
/*!
@ingroup CANCELLATION
@param loc_ref location of the original task directive
@param gtid Global thread ID of encountering thread

@return returns true if a matching cancellation request has been flagged in the
RTL and the encountering thread has to cancel..

Barrier with cancellation point to send threads from the barrier to the
end of the parallel region.  Needs a special code pattern as documented
in the design document for the cancellation feature.
*/
kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) {
  int ret = 0 /* false */;
  kmp_info_t *this_thr = __kmp_threads[gtid];
  kmp_team_t *this_team = this_thr->th.th_team;

  KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);

  // call into the standard barrier
  __kmpc_barrier(loc, gtid);

  // if cancellation is active, check cancellation flag
  if (__kmp_omp_cancellation) {
    // depending on which construct to cancel, check the flag and
    // reset the flag
    switch (KMP_ATOMIC_LD_RLX(&(this_team->t.t_cancel_request))) {
    case cancel_parallel:
      ret = 1;
      // ensure that threads have checked the flag, when
      // leaving the above barrier
      __kmpc_barrier(loc, gtid);
      this_team->t.t_cancel_request = cancel_noreq;
      // the next barrier is the fork/join barrier, which
      // synchronizes the threads leaving here
      break;
    case cancel_loop:
    case cancel_sections:
      ret = 1;
      // ensure that threads have checked the flag, when
      // leaving the above barrier
      __kmpc_barrier(loc, gtid);
      this_team->t.t_cancel_request = cancel_noreq;
      // synchronize the threads again to make sure we do not have any run-away
      // threads that cause a race on the cancellation flag
      __kmpc_barrier(loc, gtid);
      break;
    case cancel_taskgroup:
      // this case should not occur
      KMP_ASSERT(0 /* false */);
      break;
    case cancel_noreq:
      // do nothing
      break;
    default:
      KMP_ASSERT(0 /* false */);
    }
  }

  return ret;
}
Exemple #5
0
/* Copy multiple items from master to children threads. 
 * Don't use: keep for backward compatibility
 */
void
_mp_copyin_move_multiple(int n_entries, void *data)
{
  int i;
  const int tid = __kmpc_global_thread_num(NULL);
  struct pair_t {size_t size; void *data;};


  if (tid != 0) {
    for (i=0; i<n_entries; ++i) {
      struct pair_t *item = (struct pair_t *)data + i;
      void *key = item->data;
      const size_t size = item->size;
      void *to = __kmpc_threadprivate_cached(NULL, tid, NULL, size, key);
      /* FIXME: Should this be 0 or the team master? 
       * I think the gtid of team master.
       */
      void *fr = __kmpc_threadprivate_cached(NULL, 0, NULL, size, key);
      if (to != fr)
        memcpy(to, fr, size);
    }
  }

  __kmpc_barrier(0, tid);
}
Exemple #6
0
void
_mp_copypriv_al(char **adr, long len, int thread)
{

  if (thread == 0) {
    singadr = *adr;
    singlen = len;
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
  if (thread)
    memcpy(*adr, singadr, singlen);
  __kmpc_barrier(0, __kmpc_global_thread_num(0));

  /* reason for second barrier is that we want to wait until every thread
   * is done copying because we have only one singadr
   * if we have another mp_copypriv... we don't want to overwrite singadr
   */
}
Exemple #7
0
void
_mp_copypriv_move(void *blk_tp, int off, int size, int single_thread)
{
  int lcpu;
  char *to;
  char *garbage = 0;

  if (single_thread != -1) {  /* single thread */
    singadr =  __kmpc_threadprivate_cached(0, single_thread, garbage, (size_t)size, blk_tp);
    singlen = size;
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
  if (single_thread == -1) {  /* single thread */
     lcpu = __kmpc_global_thread_num(0);
     to = __kmpc_threadprivate_cached(0, lcpu, garbage, (size_t)size, blk_tp);
     memcpy(to, singadr, size);
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
}
Exemple #8
0
void
_mp_copyin_move_al(void *blk_tp, int off, long size)
{
  int lcpu;
  char *to, *fr;
  char *garbage = 0;

  lcpu = __kmpc_global_thread_num(0);
  if (lcpu != 0) {
    fr =  __kmpc_threadprivate_cached(0, 0, garbage, (size_t)size, blk_tp);
    to =  __kmpc_threadprivate_cached(0, lcpu, garbage, (size_t)size, blk_tp);
    if (to && to != fr) {
      memcpy(to, fr, size);
    }
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
}
Exemple #9
0
void
_mp_copyin_move_tls(void *blk_tp, int off, int size)
{
  int lcpu;
  char *to, *fr;
  char *garbage = 0;

  lcpu =__kmpc_global_thread_num(0);  

  if (lcpu != 0) {
    fr =  __kmpc_threadprivate(0, 0, garbage, (size_t)size);
    to =  __kmpc_threadprivate(0, lcpu, garbage, (size_t)size);
    if (to != fr)
      memcpy(to, fr, size);
  }
  __kmpc_barrier(0, __kmpc_global_thread_num(0));
}
// ---------------------------------------------------------------------------
void
run_loop(
    int loop_lb,   // Loop lower bound.
    int loop_ub,   // Loop upper bound.
    int loop_st,   // Loop stride.
    int lchunk
) {
  static int volatile loop_sync = 0;
  int lb;   // Chunk lower bound.
  int ub;   // Chunk upper bound.
  int st;   // Chunk stride.
  int rc;
  int tid = omp_get_thread_num();
  int gtid = __kmpc_global_thread_num(&loc);
  int last;
  int tc = (loop_ub - loop_lb) / loop_st + 1;
  int ch;
  int no_chunk = 0;
  if (lchunk == 0) {
    no_chunk = 1;
    lchunk = 1;
  }
  ch = lchunk * SIMD_LEN;
#if _DEBUG > 1
  printf("run_loop gtid %d tid %d (lb=%d, ub=%d, st=%d, ch=%d)\n",
         gtid, tid, (int)loop_lb, (int)loop_ub, (int)loop_st, lchunk);
#endif
  // Don't test degenerate cases that should have been discovered by codegen.
  if (loop_st == 0)
    return;
  if (loop_st > 0 ? loop_lb > loop_ub : loop_lb < loop_ub)
    return;
  __kmpc_dispatch_init_4(&loc, gtid, kmp_sch_runtime_simd,
                         loop_lb, loop_ub, loop_st, SIMD_LEN);
  {
    // Let the master thread handle the chunks alone.
    int chunk;      // No of current chunk.
    int last_ub;    // Upper bound of the last processed chunk.
    u64 cur;        // Number of interations in  current chunk.
    u64 max;        // Max allowed iterations for current chunk.
    int undersized = 0;
    last_ub = loop_ub;
    chunk = 0;
    max = (loop_ub - loop_lb) / loop_st + 1;
    // The first chunk can consume all iterations.
    while (__kmpc_dispatch_next_4(&loc, gtid, &last, &lb, &ub, &st)) {
      ++ chunk;
#if _DEBUG
      printf("th %d: chunk=%d, lb=%d, ub=%d ch %d\n",
             tid, chunk, (int)lb, (int)ub, (int)(ub-lb+1));
#endif
      // Check if previous chunk (it is not the final chunk) is undersized.
      if (undersized)
        printf("Error with chunk %d, th %d, err %d\n", chunk, tid, ++err);
      if (loop_st > 0) {
        if (!(ub <= loop_ub))
          printf("Error with ub %d, %d, ch %d, err %d\n",
                 (int)ub, (int)loop_ub, chunk, ++err);
        if (!(lb <= ub))
          printf("Error with bounds %d, %d, %d, err %d\n",
                 (int)lb, (int)ub, chunk, ++err);
      } else {
        if (!(ub >= loop_ub))
          printf("Error with ub %d, %d, %d, err %d\n",
                 (int)ub, (int)loop_ub, chunk, ++err);
        if (!(lb >= ub))
          printf("Error with bounds %d, %d, %d, err %d\n",
                 (int)lb, (int)ub, chunk, ++err);
      }; // if
      // Stride should not change.
      if (!(st == loop_st))
        printf("Error with st %d, %d, ch %d, err %d\n",
               (int)st, (int)loop_st, chunk, ++err);
      cur = ( ub - lb ) / loop_st + 1;
      // Guided scheduling uses FP computations, so current chunk may
      // be a bit bigger (+1) than allowed maximum.
      if (!( cur <= max + 1))
        printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
      // Update maximum for the next chunk.
      if (last) {
        if (!no_chunk && cur > ch)
          printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
                 (int)cur, ch, tid, ++err);
      } else {
        if (cur % ch)
          printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
                 chunk, (int)cur, ch, tid, ++err);
      }
      if (cur < max)
        max = cur;
      last_ub = ub;
      undersized = (cur < ch);
#if _DEBUG > 1
      if (last)
        printf("under%d cur %d, ch %d, tid %d, ub %d, lb %d, st %d =======\n",
               undersized,cur,ch,tid,ub,lb,loop_st);
#endif
    } // while
    // Must have the right last iteration index.
    if (loop_st > 0) {
      if (!(last_ub <= loop_ub))
        printf("Error with last1 %d, %d, ch %d, err %d\n",
               (int)last_ub, (int)loop_ub, chunk, ++err);
      if (last && !(last_ub + loop_st > loop_ub))
        printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
               (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
    } else {
      if (!(last_ub >= loop_ub))
        printf("Error with last1 %d, %d, ch %d, err %d\n",
               (int)last_ub, (int)loop_ub, chunk, ++err);
      if (last && !(last_ub + loop_st < loop_ub))
        printf("Error with last2 %d, %d, %d, ch %d, err %d\n",
               (int)last_ub, (int)loop_st, (int)loop_ub, chunk, ++err);
    } // if
  }
  __kmpc_barrier(&loc, gtid);
} // run_loop