main(int argc, char **argv) { 

  int i, n=16,chunk,a[n],suma=0;

  if(argc < 3) { 
    fprintf(stderr,"\nFalta iteraciones o chunk \n"); 
    exit(-1); 
  } 
  n = atoi(argv[1]); if (n>200) n=200; chunk = atoi(argv[2]); 
 
  for (i=0; i<n; i++) a[i] = i; 
 
  #pragma omp parallel
  { 
    #pragma omp for firstprivate(suma) lastprivate(suma) schedule(dynamic,chunk) 
    for (i=0; i<n; i++) { 
      suma = suma + a[i]; 
      //printf(" thread %d suma a[%d]=%d suma=%d \n", omp_get_thread_num(),i,a[i],suma); 
    } 
    #pragma omp master
    {
      printf("Dentro de 'parallel':\n");
      printf("num-threads: %d\n", omp_get_num_threads());
      printf("num-procs: %d\n", omp_get_num_procs());
      printf("in-parallel: %d\n", omp_in_parallel());
    }
  }
  //printf("Fuera de 'parallel for' suma=%d\n",suma);
  printf("Fuera de 'parallel':\n");
  printf("num-threads: %d\n", omp_get_num_threads());
  printf("num-procs: %d\n", omp_get_num_procs());
  printf("in-parallel: %d\n", omp_in_parallel());
}
Exemplo n.º 2
0
void
check_parallel (int thds)
{
  if(thds == 0) {
    if (omp_in_parallel () != 0) {
      #pragma omp critical
      {
	ERROR (errors);
      }
    }
    if (omp_get_num_threads() != 1) {
      #pragma omp critical
      {
	ERROR (errors);
      }
    }

  } else {
    if (omp_in_parallel () == 0) {
      #pragma omp critical
      {
	ERROR (errors);
      }
    }
    if (omp_get_num_threads() != thds) {
      #pragma omp critical
      {
	ERROR (errors);
      }
    }
  }
}
int main(int argc, char **argv)
{

	int i, n=200, chunk, a[n], suma=0;
	omp_sched_t schedule_type;
	int chunk_value;

	if(argc < 3)
	{
		fprintf(stderr,"\nFalta iteraciones o chunk \n");
		exit(-1);
	}

	n = atoi(argv[1]);
	if (n>200)
		n=200;

	chunk = atoi(argv[2]);

	for (i=0; i<n; i++)
		a[i] = i;

	#pragma omp parallel for firstprivate(suma) lastprivate(suma) \
			schedule(dynamic,chunk)
	for (i=0; i<n; i++)
	{
		suma = suma + a[i];
		printf(" thread %d suma a[%d]=%d suma=%d \n", omp_get_thread_num(),i,a[i],suma);

        if(omp_get_thread_num() == 0)
        {
            printf(" Dentro de 'parallel for':\n");

            printf("   static = 1, dynamic = 2, guided = 3, auto = 4\n");
            omp_get_schedule(&schedule_type, &chunk_value);
            printf("   dyn-var: %d, nthreads-var:%d, thread-limit-var:%d,run-sched-var: %d, chunk: %d\n", \
            		 omp_get_dynamic(), \
            		omp_get_max_threads(), omp_get_thread_limit(), \
            		schedule_type, chunk_value);

            printf(" get_num_threads: %d,get_num_procs: %d,in_parallel():%d \n", \
            		omp_get_num_threads(),omp_get_num_procs(),omp_in_parallel());

        }
	}

	printf("Fuera de 'parallel for' suma=%d\n",suma);

	printf("   static = 1, dynamic = 2, guided = 3, auto = 4\n");
	omp_get_schedule(&schedule_type, &chunk_value);
	printf("   dyn-var: %d, nthreads-var:%d, thread-limit-var:%d,run-sched-var: %d, chunk: %d\n" \
				, omp_get_dynamic(), \
			omp_get_max_threads(), omp_get_thread_limit(), \
			schedule_type, chunk_value);

	printf(" get_num_threads: %d,get_num_procs: %d,in_parallel():%d \n", \
			omp_get_num_threads(),omp_get_num_procs(),omp_in_parallel());
}
Exemplo n.º 4
0
void caffe_cpu_copy(const int N, const Dtype* X, Dtype* Y) {
  if (X == Y) return;

  #ifdef _OPENMP
  int nthr = omp_get_max_threads();
  int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3;
  const bool run_parallel =
    caffe::cpu::OpenMpManager::isMajorThread(boost::this_thread::get_id()) &&
    (Caffe::mode() != Caffe::GPU) &&
    (omp_in_parallel() == 0) &&
    (N >= threshold);

  if (run_parallel) {
    const int block = 256*1024/sizeof(Dtype), remainder = N%block;
    #pragma omp parallel for
    for (int i = 0; i <= N-block; i += block)
      memcpy(Y+i, X+i, sizeof(Dtype) * block);  // NOLINT(caffe/alt_fn)
    if (remainder != 0)
      memcpy(Y+N-remainder, X+N-remainder,  // NOLINT(caffe/alt_fn)
          sizeof(Dtype) * remainder);
    return;
  }

  #endif

  memcpy(Y, X, sizeof(Dtype) * N);  // NOLINT(caffe/alt_fn)
}
Exemplo n.º 5
0
static void bernoulli_generate(int n, double p, int* r) {
  int seed = 17 + caffe_rng_rand() % 4096;

#ifdef _OPENMP
  int nthr = omp_get_max_threads();
  int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3;
  bool run_parallel =
    (Caffe::mode() != Caffe::GPU) &&
    (omp_in_parallel() == 0) &&
    (n >= threshold);
  if (!run_parallel) nthr = 1;

# pragma omp parallel num_threads(nthr)
  {
    const int ithr = omp_get_thread_num();
    const int avg_amount = (n + nthr - 1) / nthr;
    const int my_offset = ithr * avg_amount;
    const int my_amount = std::min(my_offset + avg_amount, n) - my_offset;
#else
  {
    const int my_amount = n;
    const int my_offset = 0;
#endif

    VSLStreamStatePtr stream;
    vslNewStream(&stream, VSL_BRNG_MCG31, seed);
    vslSkipAheadStream(stream, my_offset);
    viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, my_amount,
      r + my_offset, p);
    vslDeleteStream(&stream);
  }
}
Exemplo n.º 6
0
	/// Are we in a parallel execution state; i.e., is it possible that
	/// other threads are currently executing.
	static bool in_parallel(void)
	{
# ifdef _OPENMP
		return omp_in_parallel() != 0;
# else
		return false;
# endif
	}
Exemplo n.º 7
0
void OpenMPexec::verify_is_process( const char * const label )
{
  if ( omp_in_parallel() ) {
    std::string msg( label );
    msg.append( " ERROR: in parallel" );
    Kokkos::Impl::throw_runtime_exception( msg );
  }
}
Exemplo n.º 8
0
	/// Are we in a parallel execution state; i.e., is it possible that
	/// other threads are currently executing. 
	static bool in_parallel(void)
	{
# ifdef _OPENMP
		return static_cast<bool>( omp_in_parallel() );
# else
		return false;
# endif
	}
Exemplo n.º 9
0
/*
 * Checks that false is returned when called from serial region
 * and true is returned when called within parallel region. 
 */
int test_omp_in_parallel()
{
  int serial;
  int isparallel;

  serial = 1;
  isparallel = 0;
  serial = omp_in_parallel();

  #pragma omp parallel
  {
    #pragma omp single
    {
      isparallel = omp_in_parallel();
    }
  }
  return (!(serial) && isparallel);
}
Exemplo n.º 10
0
void push_log_context(const char *functionname, const void *classname) {
// we don't have multithread support
#ifdef _OPENMP
  if (!omp_in_parallel())
#endif
  {
    internal::log_contexts.push_back(std::make_pair(functionname, classname));
  }
}
Exemplo n.º 11
0
void OpenMP::impl_finalize()
#endif
{
  if ( omp_in_parallel() )
  {
    std::string msg("Kokkos::OpenMP::finalize ERROR ");
    if( !Impl::t_openmp_instance ) msg.append(": not initialized");
    if( omp_in_parallel() ) msg.append(": in parallel");
    Kokkos::Impl::throw_runtime_exception(msg);
  }

  if ( Impl::t_openmp_instance ) {
    // Silence Cuda Warning
    const int nthreads = Impl::t_openmp_instance->m_pool_size <= Impl::g_openmp_hardware_max_threads
                       ? Impl::g_openmp_hardware_max_threads
                       : Impl::t_openmp_instance->m_pool_size;
    (void) nthreads;

    using Exec = Impl::OpenMPExec;
    Exec * instance = Impl::t_openmp_instance;
    instance->~Exec();

    OpenMP::memory_space space;
    space.deallocate( instance, sizeof(Exec) );

    #pragma omp parallel num_threads(nthreads)
    {
      Impl::t_openmp_hardware_id = 0;
      Impl::t_openmp_instance    = nullptr;
      Impl::SharedAllocationRecord< void, void >::tracking_disable();
    }

    // allow main thread to track
    Impl::SharedAllocationRecord< void, void >::tracking_enable();

    Impl::g_openmp_hardware_max_threads = 1;
  }

  #if defined(KOKKOS_ENABLE_PROFILING)
    Kokkos::Profiling::finalize();
  #endif
}
Exemplo n.º 12
0
void
check_parallel ()
{
  if (omp_in_parallel () != 0) {
    #pragma omp critical
    errors += 1;
  }
  if (omp_get_num_threads () != 1) {
    #pragma omp critical
    errors += 1;
  }
}
Exemplo n.º 13
0
double pmap_reduce2(unsigned n, T* vec1,T* vec2, BinaryOp binop, ReduceOp reop){
  T acc = 0;
  if(n < 2000){
    for(unsigned i = 0; i < n; ++i){
      acc = reop(acc, binop(vec1[i], vec2[i]));
    }
    return acc;
  }else{
    const unsigned thread_num = 4;
    unsigned  multi = n/thread_num;
    unsigned remainder = n%thread_num;
    T temp_result[thread_num] = {0};
    int thread, n_ele;
#pragma omp parallel for default(none)					\
  shared(vec1, vec2,  temp_result, multi) private(reop, binop, thread, n_ele)
    for(thread = 0; thread < (int) thread_num; ++thread){
      for(n_ele = 0; n_ele < (int) multi; ++n_ele){
	temp_result[thread] = reop(
				   temp_result[thread], 
				   binop(
					 vec1[thread*multi + n_ele], 
					 vec2[thread*multi + n_ele]
					 )
				   );
      }
      printf("%i", omp_in_parallel());
    }

    for(unsigned thread = 0; thread < thread_num; ++thread){
      acc += temp_result[thread];
    }

       unsigned idx = multi*thread_num;

    switch(remainder){
    case 0:
      break;
    case 1:
      acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++;
      break;
    case 2:
      acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++;
      acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++;
      break;
    case 3:
      acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++;
      acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++;
      acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++;
      break;
    }
    return acc;
  }
};
Exemplo n.º 14
0
void ULIBC_node_barrier(void) {
  if ( omp_in_parallel() ) {
#if _POSIX_BARRIERS > 0
    const struct numainfo_t ni = ULIBC_get_current_numainfo();
    struct NUMA_barrier_t *barrier = __barrier[ni.node];
    assert( barrier );
    pthread_barrier_wait(&barrier->barrier);
#else
    OMP("omp barrier");
#endif // _POSIX_BARRIERS
  }
}
Exemplo n.º 15
0
void ___rouent2(struct s1 *p) {

  elg_ui4 rid;

  if (rou_init)
    {
      __rouinit();
    }

  if (!p->isseen)
    {
      char* modpos;

      /* fix OPARI output file names */
      if ( (modpos = strstr(p->file, ".mod.")) != NULL )
        {
          strcpy(modpos, modpos+4);
        }
      
#if (defined (ELG_OMPI) || defined (ELG_OMP))
      if (omp_in_parallel())
	{
#pragma omp critical (epk_comp_register_region)
	  {
	    if (!p->isseen) ESD_DEF_REGION(p);
	  }
	}
      else
          ESD_DEF_REGION(p);
#else
      ESD_DEF_REGION(p);
#endif
      elg_cntl_msg("Register [0x%lx]:\"%s\" with id=%d", (long)(p->rout), p->rout, p->rid);
    }
      
  rid = p->rid; /* local copy of region identifier */

#ifdef CHECK_STACK
  {
      /* update callstack tracking structures */
      long frame = cstkszv[ELG_MY_THREAD]++;
      if (frame > cstkmxv[ELG_MY_THREAD]) cstkmxv[ELG_MY_THREAD] = frame;
      if (frame < EpkMaxFrames) cstackv[ELG_MY_THREAD][frame] = rid;
      else rid = ELG_NO_ID; /* truncate frame when too deep */
  }

  /* -- enter event, if neither filtered nor truncated -- */
  if (rid != ELG_NO_ID)
      esd_enter(rid); /* -- region enter event -- */
#else
  esd_enter(rid); /* -- region enter event -- */
#endif
}
Exemplo n.º 16
0
void phat_enter(char *str, int *id) {
  uint64_t time;

  /* -- if not yet initialized, initialize VampirTrace -- */
  if ( phat_init ) {
    uint32_t main_id;
    VT_MEMHOOKS_OFF();
    phat_init = 0;
    vt_open();

    main_id = register_region("main");
    time = vt_pform_wtime();
    vt_enter(&time, main_id);
    VT_MEMHOOKS_ON();
  }

  /* -- if VampirTrace already finalized, return -- */
  if ( !vt_is_alive ) return;

  /* -- ignore SUN OMP runtime functions -- */
  if ( strchr(str, '$') != NULL ) return;

  VT_MEMHOOKS_OFF();

  time = vt_pform_wtime();

  /* -- get region identifier -- */
  if ( *id == -1 ) {
    /* -- region entered the first time, register region -- */
#   if defined (VT_OMPI) || defined (VT_OMP)
    if (omp_in_parallel()) {
#     pragma omp critical (vt_comp_phat_1)
      {
        if ( (*id = hash_get((long) str)) == VT_NO_ID ) {
          *id = register_region(str);
        }
      }
    } else {
      *id = register_region(str);
    }
#   else
    *id = register_region(str);
#   endif
  }

  /* -- write enter record -- */
  vt_enter(&time, *id);

  VT_MEMHOOKS_ON();
}
Exemplo n.º 17
0
    imp(unsigned ms, event_handler * eh):
        m_eh(eh) {
#ifdef _WINDOWS
        m_first = true;
        CreateTimerQueueTimer(&m_timer,			
                              NULL,				
                              abort_proc,
                              this,
                              0,				
                              ms,				
                              WT_EXECUTEINTIMERTHREAD);	
#elif defined(__APPLE__) && defined(__MACH__)
        // Mac OS X
        m_interval = ms;
        if (pthread_attr_init(&m_attributes) != 0)
            throw default_exception("failed to initialize timer thread attributes");
        if (pthread_create(&m_thread_id, &m_attributes, &thread_func, this) != 0)
            throw default_exception("failed to start timer thread");
#else
	// Linux version
        if (omp_in_parallel()) {
            // It doesn't work in with more than one thread.
            // SIGEV_SIGNAL: the event is handled by the process not by the thread that installed the handler.
            // SIGEV_THREAD: the event is handled by a new thread (Z3 crashes with this configuration).
            // 
            // It seems the way to go is SIGEV_SIGNAL, but I have to find a way to identify the thread the event is meant to.
            return;
        }
	m_old_timer   = g_timer;
	g_timer       = this;
	m_old_handler = signal(SIG, sig_handler);

	struct sigevent sev;
        memset(&sev, 0, sizeof(sigevent));
	sev.sigev_notify = SIGEV_SIGNAL;
	sev.sigev_signo  = SIG;
	sev.sigev_value.sival_ptr = &m_timerid;
	if (timer_create(CLOCKID, &sev, &m_timerid) == -1)
	    throw default_exception("failed to create timer");

	unsigned long long nano = static_cast<unsigned long long>(ms) * 1000000ull;
	struct itimerspec its;
	its.it_value.tv_sec  = nano / 1000000000ull;
	its.it_value.tv_nsec = nano % 1000000000ull;
	its.it_interval.tv_sec  = 0; // timer experies once
	its.it_interval.tv_nsec = 0;
	if (timer_settime(m_timerid, 0, &its, NULL) == -1)
	    throw default_exception("failed to set timer");
#endif
    }
Exemplo n.º 18
0
 arma_inline
 static
 bool
 in_parallel()
   {
   #if defined(ARMA_USE_OPENMP)
     {
     return bool(omp_in_parallel());
     }
   #else
     {
     return false;
     }
   #endif
   }
Exemplo n.º 19
0
void
check_parallel (int n)
{
  if (n == 1) {
    if (omp_in_parallel() != 0) {
      #pragma omp critical
      errors += 1;
    }
    if (omp_get_num_threads() != 1) {
      #pragma omp critical
      errors += 1;
    }

  } else {
    if (omp_in_parallel() == 0) {
      #pragma omp critical
      errors += 1;
    }
    if (omp_get_num_threads() != n) {
      #pragma omp critical
      errors += 1;
    }
  }
}
Exemplo n.º 20
0
Arquivo: hi.cpp Projeto: VinInn/ctest
inline
void runparallel() {

  static __thread bool first=true;
  if (first)
  {
    first=false;
    Lock l(global::coutLock);

    std::cout << "thread " << omp_get_thread_num() << " of " << omp_get_num_threads() << std::endl;

    if(omp_in_parallel()) std::cout << "in parallel" << std::endl;

  }

}
Exemplo n.º 21
0
void __pat_tp_func_entry(void* func, void* callsite) {
  HashNode *hn;

  void * funcptr = func;

#ifdef __ia64__
  funcptr = *( void ** )func;
#endif

  /* -- if not yet initialized, initialize EPIK -- */
  if ( cce_init ) {
    if (cce_init != 1) {
        elg_cntl_msg("Ignoring function @%p entered during initialization", func);
        return;
    }
    cce_init = -1;
    epk_open_exe();
    esd_open();
    epk_comp_status = &epk_filter_status;
    epk_comp_finalize = &cyg_profile_finalize;
    cce_init = 0;
  }

  if ( (hn = epk_hash_get((long)funcptr)) ) {
    if ( hn->elgid == ELG_NO_ID ) {
      /* -- region entered the first time, register region -- */
#     if defined (ELG_OMPI) || defined (ELG_OMP)
      if (omp_in_parallel()) {
#       pragma omp critical (epk_comp_register_region)
        {
          if ( hn->elgid == ELG_NO_ID ) {
            hn->elgid = epk_register_region(hn->name, hn->fname, hn->lno);
          }
        }
      } else {
        hn->elgid = epk_register_region(hn->name, hn->fname, hn->lno);
      }
#     else
      hn->elgid = epk_register_region(hn->name, hn->fname, hn->lno);
#     endif
    }

    esd_enter(hn->elgid);
  }
}
Exemplo n.º 22
0
check_parallel (int v)
{
  if (omp_in_parallel () != v) {
    #pragma omp critical
    errors += 1;
  }
  if (v) {
    if (omp_get_num_threads () != thds) {
      #pragma omp critical
      errors += 1;
    }
  } else {
    if (omp_get_num_threads () != 1) {
      #pragma omp critical
      errors += 1;
    }
  }
}
const VRWGraph& polyMeshGenAddressing::pointEdges() const
{
    if( !pePtr_ )
    {
        # ifdef USE_OMP
        if( omp_in_parallel() )
            FatalErrorIn
            (
                "const VRWGraph& polyMeshGenAddressing::pointEdges() const"
            ) << "Calculating addressing inside a parallel region."
                << " This is not thread safe" << exit(FatalError);
        # endif

        calcPointEdges();
    }

    return *pePtr_;
}
Exemplo n.º 24
0
void pop_log_context() {
#ifdef _OPENMP
  if (!omp_in_parallel())
#endif
  {
    if (internal::log_context_initializeds >=
        static_cast<int>(internal::log_contexts.size() - 1)) {
      internal::log_indent -= 2;
      std::string message =
          std::string("end ") +
          get_context_name(internal::log_contexts.size() - 1) + "\n";
      internal::stream.write(message.c_str(), message.size());
      internal::stream.strict_sync();
      --internal::log_context_initializeds;
    }
    internal::log_contexts.pop_back();
  }
}
Exemplo n.º 25
0
int main (int argc, char *argv[]) 
{
  int nthreads, tid, procs, maxt, inpar, dynamic, nested;
  char name[50];
  
  /* Start parallel region */
#pragma omp parallel private(nthreads, tid)
  {
    
    /* Obtain thread number */
    tid = omp_get_thread_num();
    
    /* Only master thread does this
       We could also use #pragma omp master
     */
    if (tid == 0) 
      {
	printf("Thread %d getting environment info...\n", tid);
	
	/* Get host name */
	gethostname(name, 50);

	/* Get environment information */
	procs = omp_get_num_procs();
	nthreads = omp_get_num_threads();
	maxt = omp_get_max_threads();
	inpar = omp_in_parallel();
	dynamic = omp_get_dynamic();
	nested = omp_get_nested();
	
	/* Print environment information */
	printf("Hostname = %s\n", name);
	printf("Number of processors = %d\n", procs);
	printf("Number of threads = %d\n", nthreads);
	printf("Max threads = %d\n", maxt);
	printf("In parallel? = %d\n", inpar);
	printf("Dynamic threads enabled? = %d\n", dynamic);
	printf("Nested parallelism supported? = %d\n", nested);
	
      }
    
  }  /* Done */
  exit(0);
}
Exemplo n.º 26
0
void GOMP_parallel_start(void (*fn)(void *),
                         void *data,
                         unsigned nthreads)
{
    debug_printf("GOMP_parallel_start(%p, %p, %u)\n", fn, data, nthreads);

    /* Identify the number of threads that can be spawned and start the processing */
    if (!omp_in_parallel()) {
        debug_printf("not in parallel\n");

        struct omp_icv_task *icv_task = bomp_icv_task_new();
        if (!icv_task) {
            debug_printf("no icv task\n");
            return;
        }

        icv_task->active_levels = 1;
        icv_task->nthreads = omp_get_max_threads();
        debug_printf("omp_get_max_threads = %u\n", icv_task->nthreads);

        if (nthreads == 0 || (icv_task->dynamic && icv_task->nthreads < nthreads)) {
            icv_task->nthreads = OMP_GET_ICV_GLOBAL(thread_limit);
            debug_printf("resetting to = %u\n", icv_task->nthreads);
        }

        bomp_icv_set_task(icv_task);
        debug_printf("icv task set %u\n", icv_task->nthreads);

        /* start processing */
        bomp_start_processing(fn, data, 0, icv_task->nthreads);
    } else {
        if (omp_get_nested()) {
            // handle nested paralellism
            assert(!"Handling nested paralellism\n");
        }

        /* we have already started enough threads */
        uint32_t active_levels = OMP_GET_ICV_TASK(active_levels);
        //debug_printf("setting active_levels to %u\n", active_levels+1);

        OMP_SET_ICV_TASK(active_levels, active_levels+1);
    }
}
Exemplo n.º 27
0
void caffe_copy(const int N, const Dtype* X, Dtype* Y) {
  if (X != Y) {
    // If there are more than one openmp thread (we are in active region)
    // then checking Caffe::mode can create additional GPU Context
    //
    if (
#ifdef _OPENMP
        (omp_in_parallel() == 0) &&
#endif
        (Caffe::mode() == Caffe::GPU)) {
#ifndef CPU_ONLY
      // NOLINT_NEXT_LINE(caffe/alt_fn)
      CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault));
#else
      NO_GPU;
#endif
    } else {
      caffe_cpu_copy<Dtype>(N, X, Y);
    }
  }
}
Exemplo n.º 28
0
void caffe_set(const int N, const Dtype alpha, Dtype* Y) {
  // If we are executing parallel region already then do not start another one
  // if also number of data to be processed is smaller than arbitrary:
  // threashold 12*4 cachelines per thread then no parallelization is to be made
  #ifdef _OPENMP

  int nthr = omp_get_max_threads();
  int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3;
  bool run_parallel =  // Do not do parallel computation from non major threads
       caffe::cpu::OpenMpManager::isMajorThread(boost::this_thread::get_id());

  // Note: we Assume GPU's CPU path is single threaded
  if (omp_in_parallel() == 0) {
    // inactive parallel region may mean also batch 1,
    // but no new threads are to be created
    run_parallel = run_parallel && (Caffe::mode() != Caffe::GPU) &&
                   (N >= threshold);
  } else {
    // If we are running active parallel region then it is CPU
    run_parallel = run_parallel && (N >= threshold);
  }

  if (run_parallel) {
    #pragma omp parallel for
    for (int i = 0; i < N; ++i) {
      Y[i] = alpha;
    }

    return;
  }

  #endif

  if (alpha == 0) {
    memset(Y, 0, sizeof(Dtype) * N);  // NOLINT(caffe/alt_fn)
  } else {
    std::fill(Y, Y + N, alpha);
  }
}
Exemplo n.º 29
0
    ~imp() {
#ifdef _WINDOWS
        DeleteTimerQueueTimer(NULL,
                              m_timer,
                              INVALID_HANDLE_VALUE);
#elif defined(__APPLE__) && defined(__MACH__)
        // Mac OS X
        pthread_cond_signal(&m_condition_var); // this is okay to fail
        if (pthread_join(m_thread_id, NULL) != 0)
            throw default_exception("failed to join thread");
        if (pthread_attr_destroy(&m_attributes) != 0)
            throw default_exception("failed to destroy pthread attributes object");
#else
	// Linux version
        if (omp_in_parallel())
            return; // see comments in the constructor.
	timer_delete(m_timerid);
	if (m_old_handler != SIG_ERR)
	    signal(SIG, m_old_handler);
	g_timer = m_old_timer;
#endif
    }
Exemplo n.º 30
0
void esd_omp_fork (elg_ui4 rid)
{
    extern EsdPathIndex_t* esd_forkpath;

    if (esd_status) return;
    if (esd_check_thrd_id(ESD_MY_THREAD)) return;

    if (esd_forkpath != NULL) {
        unsigned stkframe = ESDTHRD_PATHS(thrdv[ESD_MY_THREAD])->stkframe;
        elg_warning("[%u]FORK ignoring nested fork(rid=%u) fork_rid=%u fork=%p!",
                stkframe, rid, esd_forkpath->nodeid, esd_forkpath);
        return;
    }

    esd_fork_time = elg_pform_wtime();
    esd_measurement(ESDTHRD_VALV(thrdv[ESD_MY_THREAD]), esd_fork_time);
    /* push fork path on (master's) callpath */
    esd_path_push(ESDTHRD_PATHS(thrdv[ESD_MY_THREAD]), rid, esd_fork_time);
    esd_frame_stash(ESDTHRD_PATHS(thrdv[ESD_MY_THREAD]), ESDTHRD_VALV(thrdv[ESD_MY_THREAD]));

    /* determine forkpath on master and set it for worker threads */
    esd_forkpath = ESDTHRD_PATHS(thrdv[ESD_MY_THREAD])->currpath;
    int currpathid = esd_forkpath - ESDTHRD_PATHS(thrdv[ESD_MY_THREAD])->pathv;
    elg_cntl_msg("fork(rid=%u, pathid=%d) fork=%p", rid, currpathid, esd_forkpath);

    /* duplicate master's fork callpath & measurement on each worker thread */
    unsigned t;
    for (t=1; t<ElgThrd_get_num_thrds(); t++) {
        /* XXXX esd_fork_time expected to be identical for all threads,
           however, HWC values would be different for each thread! */
        esd_path_push(ESDTHRD_PATHS(thrdv[t]), rid, esd_fork_time);
        esd_frame_stash(ESDTHRD_PATHS(thrdv[t]), ESDTHRD_VALV(thrdv[ESD_MY_THREAD]));
    }

#if (defined (ELG_OMPI) || defined (ELG_OMP))
    if (!omp_in_parallel()) /* XXXX nested? */
        if (esd_tracing) elg_omp_fork();
#endif
}