//reduce a double vector among threads double *glb_threads_reduce_double_vect(double *vect,int nel) { GET_THREAD_ID(); //fill all the threads pointers double **ptr=nissa_malloc("ptr",NACTIVE_THREADS,double*); ptr[thread_id]=vect; THREAD_BARRIER(); //reduce among threads on thread 0 for(unsigned int jthread=1;jthread<NACTIVE_THREADS;jthread++) { if(ptr[0]==ptr[jthread]) crash("overlapping vectors for threads %d and 0",jthread); NISSA_PARALLEL_LOOP(iel,0,nel) ptr[0][iel]+=ptr[jthread][iel]; } //copy to the output for(unsigned int jthread=1;jthread<NACTIVE_THREADS;jthread++) NISSA_PARALLEL_LOOP(iel,0,nel) ptr[jthread][iel]=ptr[0][iel]; //return ptr 0 double *ret=ptr[0]; nissa_free(ptr); return ret; }
void thread_shutdown () { /* Make secondary threads exit wait() */ global_doShutdown = TRUE; THREAD_BARRIER(global_barrierPtr, 0); long numThread = global_numThread; long i; for (i = 1; i < numThread; i++) { THREAD_JOIN(global_threads[i]); } global_numThread = 1; THREAD_BARRIER_FREE(global_barrierPtr); global_barrierPtr = NULL; free(global_threadIds); global_threadIds = NULL; free(global_threads); global_threads = NULL; }
/* ============================================================================= * thread_barrier_wait * -- Call after thread_start() to synchronize threads inside parallel region * ============================================================================= */ void thread_barrier_wait() { #ifndef SIMULATOR long threadId = thread_getId(); #endif /* !SIMULATOR */ THREAD_BARRIER(global_barrierPtr, threadId); }
/* ============================================================================= * threadWait * -- Synchronizes all threads to start/stop parallel section * ============================================================================= */ static void threadWait (void* argPtr) { long threadId = *(long*)argPtr; THREAD_LOCAL_SET(global_threadId, (long)threadId); while (1) { THREAD_BARRIER(global_barrierPtr, threadId); /* wait for start parallel */ if (global_doShutdown) { break; } global_funcPtr(global_argPtr); THREAD_BARRIER(global_barrierPtr, threadId); /* wait for end parallel */ if (threadId == 0) { break; } } }
//evolve the momenta with force THREADABLE_FUNCTION_3ARG(evolve_lx_momenta_with_force, quad_su3*,H, quad_su3*,F, double,dt) { GET_THREAD_ID(); NISSA_PARALLEL_LOOP(ivol,0,loc_vol) for(int mu=0; mu<NDIM; mu++) for(int ic1=0; ic1<NCOL; ic1++) for(int ic2=0; ic2<NCOL; ic2++) complex_subt_the_prod_idouble(H[ivol][mu][ic1][ic2],F[ivol][mu][ic1][ic2],dt); THREAD_BARRIER(); }
//reduce a double double glb_reduce_double(double in_loc,double (*thread_op)(double,double),MPI_Op mpi_op) { double out_glb; #ifdef USE_THREADS if(!thread_pool_locked) { GET_THREAD_ID(); //copy loc in the buf and sync all the threads glb_double_reduction_buf[thread_id]=in_loc; THREAD_BARRIER(); //within master thread summ all the pieces and between MPI if(IS_MASTER_THREAD) { for(unsigned int ith=1;ith<nthreads;ith++) in_loc=thread_op(in_loc,glb_double_reduction_buf[ith]); MPI_Allreduce(&in_loc,&(glb_double_reduction_buf[0]),1,MPI_DOUBLE,mpi_op,MPI_COMM_WORLD); cache_flush(); } //read glb val THREAD_ATOMIC_EXEC(out_glb=glb_double_reduction_buf[0];);
//init the global random generator void simul_t::init_glb_rnd_gen(int seed) { GET_THREAD_ID(); if(IS_MASTER_THREAD) glb_rnd_gen.init(seed); THREAD_BARRIER(); }
void thread_barrier_wait() { long threadId = thread_getId(); THREAD_BARRIER(global_barrierPtr, threadId); }
/* ============================================================================= * thread_barrier_wait * -- Call after thread_start() to synchronize threads inside parallel region * ============================================================================= */ void thread_barrier_wait() { THREAD_BARRIER(global_barrierPtr); }