main(int argc, char **argv) { int i, n=16,chunk,a[n],suma=0; if(argc < 3) { fprintf(stderr,"\nFalta iteraciones o chunk \n"); exit(-1); } n = atoi(argv[1]); if (n>200) n=200; chunk = atoi(argv[2]); for (i=0; i<n; i++) a[i] = i; #pragma omp parallel { #pragma omp for firstprivate(suma) lastprivate(suma) schedule(dynamic,chunk) for (i=0; i<n; i++) { suma = suma + a[i]; //printf(" thread %d suma a[%d]=%d suma=%d \n", omp_get_thread_num(),i,a[i],suma); } #pragma omp master { printf("Dentro de 'parallel':\n"); printf("num-threads: %d\n", omp_get_num_threads()); printf("num-procs: %d\n", omp_get_num_procs()); printf("in-parallel: %d\n", omp_in_parallel()); } } //printf("Fuera de 'parallel for' suma=%d\n",suma); printf("Fuera de 'parallel':\n"); printf("num-threads: %d\n", omp_get_num_threads()); printf("num-procs: %d\n", omp_get_num_procs()); printf("in-parallel: %d\n", omp_in_parallel()); }
void check_parallel (int thds) { if(thds == 0) { if (omp_in_parallel () != 0) { #pragma omp critical { ERROR (errors); } } if (omp_get_num_threads() != 1) { #pragma omp critical { ERROR (errors); } } } else { if (omp_in_parallel () == 0) { #pragma omp critical { ERROR (errors); } } if (omp_get_num_threads() != thds) { #pragma omp critical { ERROR (errors); } } } }
int main(int argc, char **argv) { int i, n=200, chunk, a[n], suma=0; omp_sched_t schedule_type; int chunk_value; if(argc < 3) { fprintf(stderr,"\nFalta iteraciones o chunk \n"); exit(-1); } n = atoi(argv[1]); if (n>200) n=200; chunk = atoi(argv[2]); for (i=0; i<n; i++) a[i] = i; #pragma omp parallel for firstprivate(suma) lastprivate(suma) \ schedule(dynamic,chunk) for (i=0; i<n; i++) { suma = suma + a[i]; printf(" thread %d suma a[%d]=%d suma=%d \n", omp_get_thread_num(),i,a[i],suma); if(omp_get_thread_num() == 0) { printf(" Dentro de 'parallel for':\n"); printf(" static = 1, dynamic = 2, guided = 3, auto = 4\n"); omp_get_schedule(&schedule_type, &chunk_value); printf(" dyn-var: %d, nthreads-var:%d, thread-limit-var:%d,run-sched-var: %d, chunk: %d\n", \ omp_get_dynamic(), \ omp_get_max_threads(), omp_get_thread_limit(), \ schedule_type, chunk_value); printf(" get_num_threads: %d,get_num_procs: %d,in_parallel():%d \n", \ omp_get_num_threads(),omp_get_num_procs(),omp_in_parallel()); } } printf("Fuera de 'parallel for' suma=%d\n",suma); printf(" static = 1, dynamic = 2, guided = 3, auto = 4\n"); omp_get_schedule(&schedule_type, &chunk_value); printf(" dyn-var: %d, nthreads-var:%d, thread-limit-var:%d,run-sched-var: %d, chunk: %d\n" \ , omp_get_dynamic(), \ omp_get_max_threads(), omp_get_thread_limit(), \ schedule_type, chunk_value); printf(" get_num_threads: %d,get_num_procs: %d,in_parallel():%d \n", \ omp_get_num_threads(),omp_get_num_procs(),omp_in_parallel()); }
void caffe_cpu_copy(const int N, const Dtype* X, Dtype* Y) { if (X == Y) return; #ifdef _OPENMP int nthr = omp_get_max_threads(); int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3; const bool run_parallel = caffe::cpu::OpenMpManager::isMajorThread(boost::this_thread::get_id()) && (Caffe::mode() != Caffe::GPU) && (omp_in_parallel() == 0) && (N >= threshold); if (run_parallel) { const int block = 256*1024/sizeof(Dtype), remainder = N%block; #pragma omp parallel for for (int i = 0; i <= N-block; i += block) memcpy(Y+i, X+i, sizeof(Dtype) * block); // NOLINT(caffe/alt_fn) if (remainder != 0) memcpy(Y+N-remainder, X+N-remainder, // NOLINT(caffe/alt_fn) sizeof(Dtype) * remainder); return; } #endif memcpy(Y, X, sizeof(Dtype) * N); // NOLINT(caffe/alt_fn) }
static void bernoulli_generate(int n, double p, int* r) { int seed = 17 + caffe_rng_rand() % 4096; #ifdef _OPENMP int nthr = omp_get_max_threads(); int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3; bool run_parallel = (Caffe::mode() != Caffe::GPU) && (omp_in_parallel() == 0) && (n >= threshold); if (!run_parallel) nthr = 1; # pragma omp parallel num_threads(nthr) { const int ithr = omp_get_thread_num(); const int avg_amount = (n + nthr - 1) / nthr; const int my_offset = ithr * avg_amount; const int my_amount = std::min(my_offset + avg_amount, n) - my_offset; #else { const int my_amount = n; const int my_offset = 0; #endif VSLStreamStatePtr stream; vslNewStream(&stream, VSL_BRNG_MCG31, seed); vslSkipAheadStream(stream, my_offset); viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, my_amount, r + my_offset, p); vslDeleteStream(&stream); } }
/// Are we in a parallel execution state; i.e., is it possible that /// other threads are currently executing. static bool in_parallel(void) { # ifdef _OPENMP return omp_in_parallel() != 0; # else return false; # endif }
void OpenMPexec::verify_is_process( const char * const label ) { if ( omp_in_parallel() ) { std::string msg( label ); msg.append( " ERROR: in parallel" ); Kokkos::Impl::throw_runtime_exception( msg ); } }
/// Are we in a parallel execution state; i.e., is it possible that /// other threads are currently executing. static bool in_parallel(void) { # ifdef _OPENMP return static_cast<bool>( omp_in_parallel() ); # else return false; # endif }
/* * Checks that false is returned when called from serial region * and true is returned when called within parallel region. */ int test_omp_in_parallel() { int serial; int isparallel; serial = 1; isparallel = 0; serial = omp_in_parallel(); #pragma omp parallel { #pragma omp single { isparallel = omp_in_parallel(); } } return (!(serial) && isparallel); }
void push_log_context(const char *functionname, const void *classname) { // we don't have multithread support #ifdef _OPENMP if (!omp_in_parallel()) #endif { internal::log_contexts.push_back(std::make_pair(functionname, classname)); } }
void OpenMP::impl_finalize() #endif { if ( omp_in_parallel() ) { std::string msg("Kokkos::OpenMP::finalize ERROR "); if( !Impl::t_openmp_instance ) msg.append(": not initialized"); if( omp_in_parallel() ) msg.append(": in parallel"); Kokkos::Impl::throw_runtime_exception(msg); } if ( Impl::t_openmp_instance ) { // Silence Cuda Warning const int nthreads = Impl::t_openmp_instance->m_pool_size <= Impl::g_openmp_hardware_max_threads ? Impl::g_openmp_hardware_max_threads : Impl::t_openmp_instance->m_pool_size; (void) nthreads; using Exec = Impl::OpenMPExec; Exec * instance = Impl::t_openmp_instance; instance->~Exec(); OpenMP::memory_space space; space.deallocate( instance, sizeof(Exec) ); #pragma omp parallel num_threads(nthreads) { Impl::t_openmp_hardware_id = 0; Impl::t_openmp_instance = nullptr; Impl::SharedAllocationRecord< void, void >::tracking_disable(); } // allow main thread to track Impl::SharedAllocationRecord< void, void >::tracking_enable(); Impl::g_openmp_hardware_max_threads = 1; } #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif }
void check_parallel () { if (omp_in_parallel () != 0) { #pragma omp critical errors += 1; } if (omp_get_num_threads () != 1) { #pragma omp critical errors += 1; } }
double pmap_reduce2(unsigned n, T* vec1,T* vec2, BinaryOp binop, ReduceOp reop){ T acc = 0; if(n < 2000){ for(unsigned i = 0; i < n; ++i){ acc = reop(acc, binop(vec1[i], vec2[i])); } return acc; }else{ const unsigned thread_num = 4; unsigned multi = n/thread_num; unsigned remainder = n%thread_num; T temp_result[thread_num] = {0}; int thread, n_ele; #pragma omp parallel for default(none) \ shared(vec1, vec2, temp_result, multi) private(reop, binop, thread, n_ele) for(thread = 0; thread < (int) thread_num; ++thread){ for(n_ele = 0; n_ele < (int) multi; ++n_ele){ temp_result[thread] = reop( temp_result[thread], binop( vec1[thread*multi + n_ele], vec2[thread*multi + n_ele] ) ); } printf("%i", omp_in_parallel()); } for(unsigned thread = 0; thread < thread_num; ++thread){ acc += temp_result[thread]; } unsigned idx = multi*thread_num; switch(remainder){ case 0: break; case 1: acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++; break; case 2: acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++; acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++; break; case 3: acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++; acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++; acc = reop(acc, binop(vec1[idx], vec2[idx])); idx++; break; } return acc; } };
void ULIBC_node_barrier(void) { if ( omp_in_parallel() ) { #if _POSIX_BARRIERS > 0 const struct numainfo_t ni = ULIBC_get_current_numainfo(); struct NUMA_barrier_t *barrier = __barrier[ni.node]; assert( barrier ); pthread_barrier_wait(&barrier->barrier); #else OMP("omp barrier"); #endif // _POSIX_BARRIERS } }
void ___rouent2(struct s1 *p) { elg_ui4 rid; if (rou_init) { __rouinit(); } if (!p->isseen) { char* modpos; /* fix OPARI output file names */ if ( (modpos = strstr(p->file, ".mod.")) != NULL ) { strcpy(modpos, modpos+4); } #if (defined (ELG_OMPI) || defined (ELG_OMP)) if (omp_in_parallel()) { #pragma omp critical (epk_comp_register_region) { if (!p->isseen) ESD_DEF_REGION(p); } } else ESD_DEF_REGION(p); #else ESD_DEF_REGION(p); #endif elg_cntl_msg("Register [0x%lx]:\"%s\" with id=%d", (long)(p->rout), p->rout, p->rid); } rid = p->rid; /* local copy of region identifier */ #ifdef CHECK_STACK { /* update callstack tracking structures */ long frame = cstkszv[ELG_MY_THREAD]++; if (frame > cstkmxv[ELG_MY_THREAD]) cstkmxv[ELG_MY_THREAD] = frame; if (frame < EpkMaxFrames) cstackv[ELG_MY_THREAD][frame] = rid; else rid = ELG_NO_ID; /* truncate frame when too deep */ } /* -- enter event, if neither filtered nor truncated -- */ if (rid != ELG_NO_ID) esd_enter(rid); /* -- region enter event -- */ #else esd_enter(rid); /* -- region enter event -- */ #endif }
void phat_enter(char *str, int *id) { uint64_t time; /* -- if not yet initialized, initialize VampirTrace -- */ if ( phat_init ) { uint32_t main_id; VT_MEMHOOKS_OFF(); phat_init = 0; vt_open(); main_id = register_region("main"); time = vt_pform_wtime(); vt_enter(&time, main_id); VT_MEMHOOKS_ON(); } /* -- if VampirTrace already finalized, return -- */ if ( !vt_is_alive ) return; /* -- ignore SUN OMP runtime functions -- */ if ( strchr(str, '$') != NULL ) return; VT_MEMHOOKS_OFF(); time = vt_pform_wtime(); /* -- get region identifier -- */ if ( *id == -1 ) { /* -- region entered the first time, register region -- */ # if defined (VT_OMPI) || defined (VT_OMP) if (omp_in_parallel()) { # pragma omp critical (vt_comp_phat_1) { if ( (*id = hash_get((long) str)) == VT_NO_ID ) { *id = register_region(str); } } } else { *id = register_region(str); } # else *id = register_region(str); # endif } /* -- write enter record -- */ vt_enter(&time, *id); VT_MEMHOOKS_ON(); }
imp(unsigned ms, event_handler * eh): m_eh(eh) { #ifdef _WINDOWS m_first = true; CreateTimerQueueTimer(&m_timer, NULL, abort_proc, this, 0, ms, WT_EXECUTEINTIMERTHREAD); #elif defined(__APPLE__) && defined(__MACH__) // Mac OS X m_interval = ms; if (pthread_attr_init(&m_attributes) != 0) throw default_exception("failed to initialize timer thread attributes"); if (pthread_create(&m_thread_id, &m_attributes, &thread_func, this) != 0) throw default_exception("failed to start timer thread"); #else // Linux version if (omp_in_parallel()) { // It doesn't work in with more than one thread. // SIGEV_SIGNAL: the event is handled by the process not by the thread that installed the handler. // SIGEV_THREAD: the event is handled by a new thread (Z3 crashes with this configuration). // // It seems the way to go is SIGEV_SIGNAL, but I have to find a way to identify the thread the event is meant to. return; } m_old_timer = g_timer; g_timer = this; m_old_handler = signal(SIG, sig_handler); struct sigevent sev; memset(&sev, 0, sizeof(sigevent)); sev.sigev_notify = SIGEV_SIGNAL; sev.sigev_signo = SIG; sev.sigev_value.sival_ptr = &m_timerid; if (timer_create(CLOCKID, &sev, &m_timerid) == -1) throw default_exception("failed to create timer"); unsigned long long nano = static_cast<unsigned long long>(ms) * 1000000ull; struct itimerspec its; its.it_value.tv_sec = nano / 1000000000ull; its.it_value.tv_nsec = nano % 1000000000ull; its.it_interval.tv_sec = 0; // timer experies once its.it_interval.tv_nsec = 0; if (timer_settime(m_timerid, 0, &its, NULL) == -1) throw default_exception("failed to set timer"); #endif }
arma_inline static bool in_parallel() { #if defined(ARMA_USE_OPENMP) { return bool(omp_in_parallel()); } #else { return false; } #endif }
void check_parallel (int n) { if (n == 1) { if (omp_in_parallel() != 0) { #pragma omp critical errors += 1; } if (omp_get_num_threads() != 1) { #pragma omp critical errors += 1; } } else { if (omp_in_parallel() == 0) { #pragma omp critical errors += 1; } if (omp_get_num_threads() != n) { #pragma omp critical errors += 1; } } }
inline void runparallel() { static __thread bool first=true; if (first) { first=false; Lock l(global::coutLock); std::cout << "thread " << omp_get_thread_num() << " of " << omp_get_num_threads() << std::endl; if(omp_in_parallel()) std::cout << "in parallel" << std::endl; } }
void __pat_tp_func_entry(void* func, void* callsite) { HashNode *hn; void * funcptr = func; #ifdef __ia64__ funcptr = *( void ** )func; #endif /* -- if not yet initialized, initialize EPIK -- */ if ( cce_init ) { if (cce_init != 1) { elg_cntl_msg("Ignoring function @%p entered during initialization", func); return; } cce_init = -1; epk_open_exe(); esd_open(); epk_comp_status = &epk_filter_status; epk_comp_finalize = &cyg_profile_finalize; cce_init = 0; } if ( (hn = epk_hash_get((long)funcptr)) ) { if ( hn->elgid == ELG_NO_ID ) { /* -- region entered the first time, register region -- */ # if defined (ELG_OMPI) || defined (ELG_OMP) if (omp_in_parallel()) { # pragma omp critical (epk_comp_register_region) { if ( hn->elgid == ELG_NO_ID ) { hn->elgid = epk_register_region(hn->name, hn->fname, hn->lno); } } } else { hn->elgid = epk_register_region(hn->name, hn->fname, hn->lno); } # else hn->elgid = epk_register_region(hn->name, hn->fname, hn->lno); # endif } esd_enter(hn->elgid); } }
check_parallel (int v) { if (omp_in_parallel () != v) { #pragma omp critical errors += 1; } if (v) { if (omp_get_num_threads () != thds) { #pragma omp critical errors += 1; } } else { if (omp_get_num_threads () != 1) { #pragma omp critical errors += 1; } } }
const VRWGraph& polyMeshGenAddressing::pointEdges() const { if( !pePtr_ ) { # ifdef USE_OMP if( omp_in_parallel() ) FatalErrorIn ( "const VRWGraph& polyMeshGenAddressing::pointEdges() const" ) << "Calculating addressing inside a parallel region." << " This is not thread safe" << exit(FatalError); # endif calcPointEdges(); } return *pePtr_; }
void pop_log_context() { #ifdef _OPENMP if (!omp_in_parallel()) #endif { if (internal::log_context_initializeds >= static_cast<int>(internal::log_contexts.size() - 1)) { internal::log_indent -= 2; std::string message = std::string("end ") + get_context_name(internal::log_contexts.size() - 1) + "\n"; internal::stream.write(message.c_str(), message.size()); internal::stream.strict_sync(); --internal::log_context_initializeds; } internal::log_contexts.pop_back(); } }
int main (int argc, char *argv[]) { int nthreads, tid, procs, maxt, inpar, dynamic, nested; char name[50]; /* Start parallel region */ #pragma omp parallel private(nthreads, tid) { /* Obtain thread number */ tid = omp_get_thread_num(); /* Only master thread does this We could also use #pragma omp master */ if (tid == 0) { printf("Thread %d getting environment info...\n", tid); /* Get host name */ gethostname(name, 50); /* Get environment information */ procs = omp_get_num_procs(); nthreads = omp_get_num_threads(); maxt = omp_get_max_threads(); inpar = omp_in_parallel(); dynamic = omp_get_dynamic(); nested = omp_get_nested(); /* Print environment information */ printf("Hostname = %s\n", name); printf("Number of processors = %d\n", procs); printf("Number of threads = %d\n", nthreads); printf("Max threads = %d\n", maxt); printf("In parallel? = %d\n", inpar); printf("Dynamic threads enabled? = %d\n", dynamic); printf("Nested parallelism supported? = %d\n", nested); } } /* Done */ exit(0); }
void GOMP_parallel_start(void (*fn)(void *), void *data, unsigned nthreads) { debug_printf("GOMP_parallel_start(%p, %p, %u)\n", fn, data, nthreads); /* Identify the number of threads that can be spawned and start the processing */ if (!omp_in_parallel()) { debug_printf("not in parallel\n"); struct omp_icv_task *icv_task = bomp_icv_task_new(); if (!icv_task) { debug_printf("no icv task\n"); return; } icv_task->active_levels = 1; icv_task->nthreads = omp_get_max_threads(); debug_printf("omp_get_max_threads = %u\n", icv_task->nthreads); if (nthreads == 0 || (icv_task->dynamic && icv_task->nthreads < nthreads)) { icv_task->nthreads = OMP_GET_ICV_GLOBAL(thread_limit); debug_printf("resetting to = %u\n", icv_task->nthreads); } bomp_icv_set_task(icv_task); debug_printf("icv task set %u\n", icv_task->nthreads); /* start processing */ bomp_start_processing(fn, data, 0, icv_task->nthreads); } else { if (omp_get_nested()) { // handle nested paralellism assert(!"Handling nested paralellism\n"); } /* we have already started enough threads */ uint32_t active_levels = OMP_GET_ICV_TASK(active_levels); //debug_printf("setting active_levels to %u\n", active_levels+1); OMP_SET_ICV_TASK(active_levels, active_levels+1); } }
void caffe_copy(const int N, const Dtype* X, Dtype* Y) { if (X != Y) { // If there are more than one openmp thread (we are in active region) // then checking Caffe::mode can create additional GPU Context // if ( #ifdef _OPENMP (omp_in_parallel() == 0) && #endif (Caffe::mode() == Caffe::GPU)) { #ifndef CPU_ONLY // NOLINT_NEXT_LINE(caffe/alt_fn) CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault)); #else NO_GPU; #endif } else { caffe_cpu_copy<Dtype>(N, X, Y); } } }
void caffe_set(const int N, const Dtype alpha, Dtype* Y) { // If we are executing parallel region already then do not start another one // if also number of data to be processed is smaller than arbitrary: // threashold 12*4 cachelines per thread then no parallelization is to be made #ifdef _OPENMP int nthr = omp_get_max_threads(); int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3; bool run_parallel = // Do not do parallel computation from non major threads caffe::cpu::OpenMpManager::isMajorThread(boost::this_thread::get_id()); // Note: we Assume GPU's CPU path is single threaded if (omp_in_parallel() == 0) { // inactive parallel region may mean also batch 1, // but no new threads are to be created run_parallel = run_parallel && (Caffe::mode() != Caffe::GPU) && (N >= threshold); } else { // If we are running active parallel region then it is CPU run_parallel = run_parallel && (N >= threshold); } if (run_parallel) { #pragma omp parallel for for (int i = 0; i < N; ++i) { Y[i] = alpha; } return; } #endif if (alpha == 0) { memset(Y, 0, sizeof(Dtype) * N); // NOLINT(caffe/alt_fn) } else { std::fill(Y, Y + N, alpha); } }
~imp() { #ifdef _WINDOWS DeleteTimerQueueTimer(NULL, m_timer, INVALID_HANDLE_VALUE); #elif defined(__APPLE__) && defined(__MACH__) // Mac OS X pthread_cond_signal(&m_condition_var); // this is okay to fail if (pthread_join(m_thread_id, NULL) != 0) throw default_exception("failed to join thread"); if (pthread_attr_destroy(&m_attributes) != 0) throw default_exception("failed to destroy pthread attributes object"); #else // Linux version if (omp_in_parallel()) return; // see comments in the constructor. timer_delete(m_timerid); if (m_old_handler != SIG_ERR) signal(SIG, m_old_handler); g_timer = m_old_timer; #endif }
void esd_omp_fork (elg_ui4 rid) { extern EsdPathIndex_t* esd_forkpath; if (esd_status) return; if (esd_check_thrd_id(ESD_MY_THREAD)) return; if (esd_forkpath != NULL) { unsigned stkframe = ESDTHRD_PATHS(thrdv[ESD_MY_THREAD])->stkframe; elg_warning("[%u]FORK ignoring nested fork(rid=%u) fork_rid=%u fork=%p!", stkframe, rid, esd_forkpath->nodeid, esd_forkpath); return; } esd_fork_time = elg_pform_wtime(); esd_measurement(ESDTHRD_VALV(thrdv[ESD_MY_THREAD]), esd_fork_time); /* push fork path on (master's) callpath */ esd_path_push(ESDTHRD_PATHS(thrdv[ESD_MY_THREAD]), rid, esd_fork_time); esd_frame_stash(ESDTHRD_PATHS(thrdv[ESD_MY_THREAD]), ESDTHRD_VALV(thrdv[ESD_MY_THREAD])); /* determine forkpath on master and set it for worker threads */ esd_forkpath = ESDTHRD_PATHS(thrdv[ESD_MY_THREAD])->currpath; int currpathid = esd_forkpath - ESDTHRD_PATHS(thrdv[ESD_MY_THREAD])->pathv; elg_cntl_msg("fork(rid=%u, pathid=%d) fork=%p", rid, currpathid, esd_forkpath); /* duplicate master's fork callpath & measurement on each worker thread */ unsigned t; for (t=1; t<ElgThrd_get_num_thrds(); t++) { /* XXXX esd_fork_time expected to be identical for all threads, however, HWC values would be different for each thread! */ esd_path_push(ESDTHRD_PATHS(thrdv[t]), rid, esd_fork_time); esd_frame_stash(ESDTHRD_PATHS(thrdv[t]), ESDTHRD_VALV(thrdv[ESD_MY_THREAD])); } #if (defined (ELG_OMPI) || defined (ELG_OMP)) if (!omp_in_parallel()) /* XXXX nested? */ if (esd_tracing) elg_omp_fork(); #endif }