RTR3DECL(int) RTThreadSetAffinity(PCRTCPUSET pCpuSet) { int rc; if (pCpuSet == NULL) rc = processor_bind(P_LWPID, P_MYID, PBIND_NONE, NULL); else { RTCPUSET PresentSet; int cCpusInSet = RTCpuSetCount(pCpuSet); if (cCpusInSet == 1) { unsigned iCpu = 0; while ( iCpu < RTCPUSET_MAX_CPUS && !RTCpuSetIsMemberByIndex(pCpuSet, iCpu)) iCpu++; rc = processor_bind(P_LWPID, P_MYID, iCpu, NULL); } else if ( cCpusInSet == RTCPUSET_MAX_CPUS || RTCpuSetIsEqual(pCpuSet, RTMpGetPresentSet(&PresentSet))) rc = processor_bind(P_LWPID, P_MYID, PBIND_NONE, NULL); else return VERR_NOT_SUPPORTED; } if (!rc) return VINF_SUCCESS; return RTErrConvertFromErrno(errno); }
static int hwloc_solaris_get_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) { processorid_t binding; int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); int n; int i; if (depth < 0) { errno = ENOSYS; return -1; } /* first check if processor_bind() was used to bind to a single processor rather than to an lgroup */ if ( processor_bind(idtype, id, PBIND_QUERY, &binding) == 0 && binding != PBIND_NONE ) { hwloc_bitmap_only(hwloc_set, binding); return 0; } /* if not, check lgroups */ hwloc_bitmap_zero(hwloc_set); n = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index); if (aff == LGRP_AFF_STRONG) hwloc_bitmap_or(hwloc_set, hwloc_set, obj->cpuset); } if (hwloc_bitmap_iszero(hwloc_set)) hwloc_bitmap_copy(hwloc_set, hwloc_topology_get_complete_cpuset(topology)); return 0; }
void Affinity_unBind(){ if(processor_bind(P_LWPID, P_MYID, PBIND_NONE, NULL)<0){ printf("Couldn't unbind thread\n"); exit(0); } return; }
void Affinity_Bind_Thread(uint32_t thread){ //thread = ApplicationToPhysicalMapping[thread]; if(processor_bind(P_LWPID, P_MYID, thread, NULL)<0){ printf("Couldn't bind thread %d\n",thread); } return; }
void Affinity_Init(){ uint32_t i; // printf("implementing NUMA via <sys/processor.h>\n"); //for(i=0;i<MAX_THREADS;i++)ApplicationToPhysicalMapping[i]=0; int NUMA_Cores=0; for(i=0;i<MAX_THREADS;i++)if(processor_bind(P_LWPID, P_MYID, i, NULL)>=0){ //ApplicationToPhysicalMapping[NUMA_Cores]=i; NUMA_Cores++; } NUMA_CoresPerNode = NUMA_Cores; NUMA_Sockets=1; if(NUMA_Cores> 96){NUMA_CoresPerNode = NUMA_Cores/2;NUMA_Sockets=2;} if(NUMA_Cores>192){NUMA_CoresPerNode = NUMA_Cores/4;NUMA_Sockets=4;} // printf("Found %d sockets each with %d cores(%d)\n",NUMA_Sockets,NUMA_CoresPerNode,NUMA_Cores); processor_bind(P_LWPID, P_MYID, PBIND_NONE, NULL); return; }
bool ph_thread_set_affinity(ph_thread_t *me, int affinity) { #ifdef HAVE_PTHREAD_SETAFFINITY_NP # ifdef __linux__ cpu_set_t set; # else /* FreeBSD */ cpuset_t set; #endif CPU_ZERO(&set); CPU_SET(affinity, &set); return pthread_setaffinity_np(me->thr, sizeof(set), &set) == 0; #elif defined(__APPLE__) thread_affinity_policy_data_t data; data.affinity_tag = affinity + 1; return thread_policy_set(pthread_mach_thread_np(me->thr), THREAD_AFFINITY_POLICY, (thread_policy_t)&data, THREAD_AFFINITY_POLICY_COUNT) == 0; #elif defined(HAVE_CPUSET_SETAFFINITY) /* untested bsdish */ cpuset_t set; CPU_ZERO(&set); CPU_SET(affinity, &set); cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set), set); #elif defined(HAVE_PROCESSOR_BIND) return processor_bind(P_LWPID, me->lwpid, affinity, NULL) == 0; #endif return true; }
void ProcessorMap::BindToPhysicalCPU( int pproc ) const { /* Verify pproc is in the physical cpu array */ int lcpu = -1; for( int i=0; i<m_nProcs;i++ ) { if( m_p_nProcessor_Ids[i] == pproc ) { lcpu = i; break; } } if( lcpu != -1 ) { #ifdef OS_SOLARIS if( processor_bind(P_LWPID, P_MYID, pproc, NULL) < 0 ) { fatal("Call to processor_bind() failed for physical CPU %i\n",pproc); } #endif #ifdef OS_LINUX cpu_set_t myProc; CPU_ZERO( &myProc ); CPU_SET( pproc, &myProc ); if( sched_setaffinity(0, sizeof(myProc), &myProc) < 0 ) { fatal("Call to sched_setaffinity() failed for physical CPU %i\n",pproc); } #endif } else { fatal("Failed to bind to processor %i\n -- Processor does not exist!",pproc ); } }
int proc_unbind_thread () { #ifdef _LINUX_ return sched_setaffinity (0, sizeof (cpu_set_t), proc_get_full_set()); #elif defined (_SOLARIS_) return processor_bind (P_LWPID, P_MYID, PBIND_NONE, NULL); #endif }
static int ATL_setmyaffinity() /* * Attempts to sets the affinity of an already-running thread. The * aff_set flag is set to true whether we succeed or not (no point in * trying multiple times). * RETURNS: 0 on success, non-zero error code on error */ { int bindID; bindID = omp_get_thread_num(); #ifdef ATL_RANK_IS_PROCESSORID bindID = bindID % ATL_AFF_NUMID; #else bindID = ATL_affinityIDs[bindID%ATL_AFF_NUMID]; #endif #ifdef ATL_PAFF_PLPA plpa_cpu_set_t cpuset; PLPA_CPU_ZERO(&cpuset); PLPA_CPU_SET(bindID, &cpuset); if (me->paff_set) return(0); me->paff_set = 1; return(plpa_sched_setaffinity((pid_t)0, sizeof(cpuset), &cpuset)); #elif defined(ATL_PAFF_PBIND) return(processor_bind(P_LWPID, P_MYID, bindID, NULL)); #elif defined(ATL_PAFF_SCHED) cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(bindID, &cpuset); if (me->paff_set) return(0); me->paff_set = 1; return(sched_setaffinity(0, sizeof(cpuset), &cpuset)); #elif defined (ATL_PAFF_RUNON) if (me->paff_set) return(0); me->paff_set = 1; return(pthread_setrunon_np(bindID)); #elif defined(ATL_PAFF_BINDP) if (me->paff_set) return(0); me->paff_set = 1; return(bindprocessor(BINDTHREAD, thread_self(), bindID)); #elif defined(ATL_PAFF_CPUSET) /* untried FreeBSD code */ cpuset_t mycpuset; CPU_ZERO(&mycpuset); /* no manpage, so guess works like linux */ CPU_SET(bindID, &mycpuset); if (me->paff_set) return(0); me->paff_set = 1; return(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mycpuset), &mycpuset)); #endif return(0); }
int pr_processor_bind(struct ps_prochandle *Pr, idtype_t idtype, id_t id, int processorid, int *obind) { sysret_t rval; /* return value */ argdes_t argd[4]; /* arg descriptors */ argdes_t *adp = &argd[0]; /* first argument */ int error; if (Pr == NULL) /* no subject process */ return (processor_bind(idtype, id, processorid, obind)); adp->arg_value = idtype; /* idtype */ adp->arg_object = NULL; adp->arg_type = AT_BYVAL; adp->arg_inout = AI_INPUT; adp->arg_size = 0; adp++; adp->arg_value = id; /* id */ adp->arg_object = NULL; adp->arg_type = AT_BYVAL; adp->arg_inout = AI_INPUT; adp->arg_size = 0; adp++; adp->arg_value = processorid; /* processorid */ adp->arg_object = NULL; adp->arg_type = AT_BYVAL; adp->arg_inout = AI_INPUT; adp->arg_size = 0; adp++; if (obind == NULL) { adp->arg_value = 0; /* obind */ adp->arg_object = NULL; adp->arg_type = AT_BYVAL; adp->arg_inout = AI_INPUT; adp->arg_size = 0; } else { adp->arg_value = 0; adp->arg_object = obind; adp->arg_type = AT_BYREF; adp->arg_inout = AI_INOUT; adp->arg_size = sizeof (int); } error = Psyscall(Pr, &rval, SYS_processor_bind, 4, &argd[0]); if (error) { errno = (error < 0)? ENOSYS : error; return (-1); } return (rval.sys_rval1); }
bool threadBindToProcessor(threadid_t tid, int pnumber) { #if defined(_WIN32) HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, 0, tid); if (!hThread) return false; bool success = SetThreadAffinityMask(hThread, 1 << pnumber) != 0; CloseHandle(hThread); return success; #elif (defined(__FreeBSD__) && (__FreeBSD_version >= 702106)) \ || defined(__linux) || defined(linux) cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(pnumber, &cpuset); return pthread_setaffinity_np(tid, sizeof(cpuset), &cpuset) == 0; #elif defined(__sun) || defined(sun) return processor_bind(P_LWPID, MAKE_LWPID_PTHREAD(tid), pnumber, NULL) == 0; #elif defined(_AIX) return bindprocessor(BINDTHREAD, (tid_t)tid, pnumber) == 0; #elif defined(__hpux) || defined(hpux) pthread_spu_t answer; return pthread_processor_bind_np(PTHREAD_BIND_ADVISORY_NP, &answer, pnumber, tid) == 0; #elif defined(__APPLE__) struct thread_affinity_policy tapol; thread_port_t threadport = pthread_mach_thread_np(tid); tapol.affinity_tag = pnumber + 1; return thread_policy_set(threadport, THREAD_AFFINITY_POLICY, (thread_policy_t)&tapol, THREAD_AFFINITY_POLICY_COUNT) == KERN_SUCCESS; #else return false; #endif }
/* Bind the calling thread to run on CPU_ID. Returns 0 if successful, -1 if failed. */ int proc_bind_thread (int cpu_id) { #ifdef _LINUX_ cpu_set_t cpu_set; CPU_ZERO (&cpu_set); CPU_SET (cpu_id, &cpu_set); return sched_setaffinity (0, sizeof (cpu_set), &cpu_set); #elif defined (_SOLARIS_) return processor_bind (P_LWPID, P_MYID, cpu_id, NULL); #endif }
int proc_unbind_thread () { // Forbid thread binding if (getenv("MAPRED_NO_BINDING") != NULL) { return 0; } #ifdef _LINUX_ return sched_setaffinity (0, sizeof (cpu_set_t), proc_get_full_set()); #elif defined (_SOLARIS_) return processor_bind (P_LWPID, P_MYID, PBIND_NONE, NULL); #endif }
static void * soaker(void *arg) { struct tstate *state = arg; pcparms_t pcparms; fxparms_t *fx = (fxparms_t *)pcparms.pc_clparms; if (processor_bind(P_LWPID, P_MYID, state->cpuid, NULL) != 0) (void) fprintf(stderr, gettext("%s: couldn't bind soaker " "thread to cpu%d: %s\n"), opts->pgmname, state->cpuid, strerror(errno)); /* * Put the soaker thread in the fixed priority (FX) class so it runs * at the lowest possible global priority. */ pcparms.pc_cid = fxinfo.pc_cid; fx->fx_upri = 0; fx->fx_uprilim = 0; fx->fx_tqsecs = fx->fx_tqnsecs = FX_TQDEF; if (priocntl(P_LWPID, P_MYID, PC_SETPARMS, &pcparms) != 0) (void) fprintf(stderr, gettext("%s: couldn't put soaker " "thread in FX sched class: %s\n"), opts->pgmname, strerror(errno)); /* * Let the parent thread know we're ready to roll. */ (void) mutex_lock(&state->soak_lock); state->soak_state = SOAK_RUN; (void) cond_signal(&state->soak_cv); (void) mutex_unlock(&state->soak_lock); for (;;) { spin: (void) mutex_lock(&state->soak_lock); if (state->soak_state == SOAK_RUN) { (void) mutex_unlock(&state->soak_lock); goto spin; } while (state->soak_state == SOAK_PAUSE) (void) cond_wait(&state->soak_cv, &state->soak_lock); (void) mutex_unlock(&state->soak_lock); } /*NOTREACHED*/ return (NULL); }
void *worker_thread(void *arg) { struct worker_args *args = arg; long *block = blocks[args->block]; pthread_mutex_t *lock = locks[args->block]; #if defined(AFFINITY) && defined(__sun) if (processor_bind(P_LWPID, P_MYID, args->cpu, NULL)) { perror("processor_bind"); } #endif // initialize per-thread random number generator myrandstate_t r; init_myrand(&r, args->seed1, args->seed2); int c; long swap; int seq_i[batchsize]; int seq_j[batchsize]; for (;;) { for (c = 0; c < batchsize; c++) { seq_i[c] = get_uniform(&r) * blocksize; seq_j[c] = get_uniform(&r) * blocksize; } pthread_mutex_lock(lock); for (c = 0; c < batchsize; c++) { swap = block[seq_i[c]]; block[seq_i[c]] = block[seq_j[c]]; block[seq_j[c]] = swap; } pthread_mutex_unlock(lock); #if defined(__sun) atomic_add_int(&counters[args->id], 1); #endif #if defined(linux) __sync_fetch_and_add(&counters[args->id], 1); #endif //printf("[Thread %d] Using CPU %d\n", args->id, sched_getcpu()); } return NULL; }
bool Thread::bindToProcessor(unsigned int proc_number) { #if defined(__ANDROID__) return false; #elif defined(_WIN32) return SetThreadAffinityMask(m_thread_handle, 1 << proc_number); #elif __FreeBSD_version >= 702106 || defined(__linux) || defined(linux) cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(proc_number, &cpuset); return pthread_setaffinity_np(m_thread_handle, sizeof(cpuset), &cpuset) == 0; #elif defined(__sun) || defined(sun) return processor_bind(P_LWPID, P_MYID, proc_number, NULL) == 0 #elif defined(_AIX) return bindprocessor(BINDTHREAD, m_kernel_thread_id, proc_number) == 0; #elif defined(__hpux) || defined(hpux) pthread_spu_t answer; return pthread_processor_bind_np(PTHREAD_BIND_ADVISORY_NP, &answer, proc_number, m_thread_handle) == 0; #elif defined(__APPLE__) struct thread_affinity_policy tapol; thread_port_t threadport = pthread_mach_thread_np(m_thread_handle); tapol.affinity_tag = proc_number + 1; return thread_policy_set(threadport, THREAD_AFFINITY_POLICY, (thread_policy_t)&tapol, THREAD_AFFINITY_POLICY_COUNT) == KERN_SUCCESS; #else return false; #endif }
void setaffinity(int c) { #if defined (__SVR4) && defined (__sun) processorid_t obind; if (processor_bind(P_LWPID, P_MYID, c, &obind) < 0) edie("setaffinity, processor_bind failed"); #else cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(c, &cpuset); if (sched_setaffinity(0, sizeof(cpuset), &cpuset) < 0) edie("setaffinity, sched_setaffinity failed"); #endif }
RTR3DECL(int) RTThreadGetAffinity(PRTCPUSET pCpuSet) { processorid_t iOldCpu; int rc = processor_bind(P_LWPID, P_MYID, PBIND_QUERY, &iOldCpu); if (rc) return RTErrConvertFromErrno(errno); if (iOldCpu == PBIND_NONE) RTMpGetPresentSet(pCpuSet); else { RTCpuSetEmpty(pCpuSet); if (RTCpuSetAdd(pCpuSet, iOldCpu) != 0) return VERR_INTERNAL_ERROR_5; } return VINF_SUCCESS; }
/* This get function returns the CPU id that's currently binded, * and then sets the cpumask. */ static int solaris_module_get(opal_paffinity_base_cpu_set_t *cpumask) { processorid_t obind; if (0 != processor_bind(P_PID, P_MYID, PBIND_QUERY, &obind)) { return OPAL_ERR_IN_ERRNO; } opal_output_verbose(5, opal_paffinity_base_output, "paffinity:solaris: obind=%d", obind); /* if there isn't any processor binded, just zero out and return */ OPAL_PAFFINITY_CPU_ZERO(*cpumask); if (PBIND_NONE != obind) { OPAL_PAFFINITY_CPU_SET(obind, *cpumask); } return OPAL_SUCCESS; }
/* Bind the calling thread to run on CPU_ID. Returns 0 if successful, -1 if failed. */ int proc_bind_thread (int cpu_id) { // Forbid thread binding if (getenv("MAPRED_NO_BINDING") != NULL) { return 0; } #ifdef _LINUX_ cpu_set_t cpu_set; CPU_ZERO (&cpu_set); CPU_SET (cpu_id, &cpu_set); return sched_setaffinity (0, sizeof (cpu_set), &cpu_set); #elif defined (_SOLARIS_) return processor_bind (P_LWPID, P_MYID, cpu_id, NULL); #endif }
// interface for pthreads_create function static void *thread_run( void *data ) { processorid_t oproc_bind; // previous processor the thread was bound to Thread *thread = (Thread *) data; // bind this process to a specific processor (if specified) if (thread->m_procid != -1) { int status = processor_bind( P_LWPID, P_MYID, thread->m_procid, &oproc_bind ); if ( status != 0 ) { printf( "warning: processor bind status to proc %d == %d\n", thread->m_procid, status ); } // printf( "old proc %d\n", oproc_bind ); } // calls thread "run()" action in a separate thread return (thread->run()); }
/* this gives us a cpumask which tells which CPU to bind */ static int solaris_module_set(opal_paffinity_base_cpu_set_t cpumask) { processorid_t cpuid; /* Find out where in the cpumask is the location of the current CPU. */ cpuid = cpumask_to_id(cpumask); if (-1 == cpuid) { opal_output(0, "paffinity:solaris: Error when coverting cpumask to id"); return OPAL_ERR_IN_ERRNO; } if (0 != processor_bind(P_PID, P_MYID, cpuid, NULL)) { opal_output(0, "paffinity:solaris: Error when binding to CPU #%d: %s", cpuid, strerror(errno)); return OPAL_ERR_IN_ERRNO; } opal_output_verbose(5, opal_paffinity_base_output, "paffinity:solaris: Successfully bind to CPU #%d", cpuid); return OPAL_SUCCESS; }
void* thread_loop(void* tmp) { int threadId = (int) tmp; int ret; /* Bind the thread to a processor. This will make sure that each of * threads are on a different processor. processorIds[threadId] * specifies the processor ID which the thread is binding to. */ ret = processor_bind(P_LWPID, P_MYID, processorIds[threadId], NULL); assert(ret == 0); #ifdef DEBUG printf("thread id = %d, proc id = %d\n", threadId, processorIds[threadId]); #endif // Each thread accesses locks in random order and increments the counter for(int i=0; i<acquires; ++i){ int lock_index = access_order[threadId][i]; #ifdef DEBUG printf("Thread %d trying lock %d\n", threadId, lock_index); #endif acquire_lock(&(lock_array[lock_index]), threadId, &(counter_array[lock_index])); // update counter counter_array[lock_index].count_var = threadId; #ifdef DEBUG printf("Thread %d releasing lock %d\n", threadId, lock_index); #endif release_lock(&(lock_array[lock_index])); // wait a small number of cycles delay(small_spin_length); } }
int ATL_thread_start(ATL_thread_t *thr, int proc, int JOINABLE, void *(*rout)(void*), void *arg) /* * Creates a thread that will run only on processor proc. * RETURNS: 0 on success, non-zero on error * NOTE: present implementation dies on error, so 0 is always returned. */ { #ifdef ATL_WINTHREADS #ifdef ATL_WIN32THREADS DWORD thrID; #else unsigned thrID; #endif #ifdef ATL_NOAFFINITY #ifdef ATL_WIN32THREADS thr->thrH = CreateThread(NULL, 0, rout, arg, 0, &thrID); #else thr->thrH = (HANDLE)_beginthreadex(NULL, 0, rout, arg, 0, &thrID); #endif ATL_assert(thr->thrH); #else thr->rank = proc; #ifdef ATL_WIN32THREADS thr->thrH = CreateThread(NULL, 0, rout, arg, CREATE_SUSPENDED, &thrID); #else thr->thrH = (HANDLE)_beginthreadex(NULL, 0, rout, arg, CREATE_SUSPENDED, &thrID); #endif ATL_assert(thr->thrH); #ifdef ATL_RANK_IS_PROCESSORID ATL_assert(SetThreadAffinityMask(thr->thrH, (1<<proc))); #else ATL_assert(SetThreadAffinityMask(thr->thrH, (1<<ATL_affinityIDs[proc%ATL_AFF_NUMID]))); #endif ATL_assert(ResumeThread(thr->thrH) == 1); #endif #elif defined(ATL_OMP_THREADS) fprintf(stderr, "Should not call thread_start when using OpenMP!"); ATL_assert(0); #elif 0 && defined(ATL_OS_OSX) /* unchecked special OSX code */ /* http://developer.apple.com/library/mac/#releasenotes/Performance/RN-AffinityAPI/_index.html */ pthread_attr_t attr; #define ATL_OSX_AFF_SETS 2 /* should be probed for */ thread_affinity_policy ap; ap.affinity_tag = proc % ATL_OSX_AFF_SETS; ATL_assert(!pthread_attr_init(&attr)); if (JOINABLE) ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)); else ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* no chk, OK to fail */ ATL_assert(!pthread_create(&thr->thrH, &attr, rout, arg)); ATL_assert(!thread_policy_set(thr->thrH, THREAD_AFFINITY_POLICY, (integer_t*)&ap, THREAD_AFFINITY_POLICY_COUNT)); ATL_assert(!pthread_attr_destroy(&attr)); #else pthread_attr_t attr; #ifndef ATL_NOAFFINITY #if defined(ATL_PAFF_SETAFFNP) || defined(ATL_PAFF_SCHED) cpu_set_t cpuset; #elif defined(ATL_PAFF_PLPA) plpa_cpu_set_t cpuset; #elif defined(ATL_PAFF_CPUSET) /* untried FreeBSD code */ cpuset_t mycpuset; #endif #ifdef ATL_RANK_IS_PROCESSORID const int affID = proc; #else const int affID = ATL_affinityIDs[proc%ATL_AFF_NUMID]; #endif #ifdef ATL_PAFF_SELF thr->paff_set = 0; /* affinity must be set by created thread */ #endif #endif thr->rank = proc; ATL_assert(!pthread_attr_init(&attr)); if (JOINABLE) { #ifdef IBM_PT_ERROR ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_UNDETACHED)); #else ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)); #endif } else ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* no chk, OK to fail */ #ifdef ATL_PAFF_SETAFFNP CPU_ZERO(&cpuset); CPU_SET(affID, &cpuset); ATL_assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset)); #elif defined(ATL_PAFF_SETPROCNP) ATL_assert(!pthread_attr_setprocessor_np(&attr, (pthread_spu_t)affID, PTHREAD_BIND_FORCED_NP)); #endif ATL_assert(!pthread_create(&thr->thrH, &attr, rout, arg)); #if defined(ATL_PAFF_PBIND) ATL_assert(!processor_bind(P_LWPID, thr->thrH, affID, NULL)); thr->paff_set = 0; /* affinity set by spawner */ #elif defined(ATL_PAFF_BINDP) ATL_assert(!bindprocessor(BINDTHREAD, thr->thrH, bindID)); thr->paff_set = 0; /* affinity set by spawner */ #elif defined(ATL_PAFF_CPUSET) /* untried FreeBSD code */ CPU_ZERO(&mycpuset); /* no manpage, so guess works like linux */ CPU_SET(bindID, &mycpuset); if (!cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, thr->thrH, sizeof(mycpuset), &mycpuset)); thr->paff_set = 0; /* affinity set by spawner */ #endif ATL_assert(!pthread_attr_destroy(&attr)); #endif return(0); }
static void bind_to_processor(int child_num) { /* This routine will bind the calling process to a particular */ /* processor. We are not choosy as to which processor, so it will be */ /* the process id mod the number of processors - shifted by one for */ /* those systems which name processor starting from one instead of */ /* zero. on those systems where I do not yet know how to bind a */ /* process to a processor, this routine will be a no-op raj 10/95 */ /* just as a reminder, this is *only* for the looper processes, not */ /* the actual measurement processes. those will, should, MUST float */ /* or not float from CPU to CPU as controlled by the operating */ /* system defaults. raj 12/95 */ #ifdef __hpux #include <sys/syscall.h> #include <sys/mp.h> int old_cpu = -2; if (debug) { fprintf(where, "child %d asking for CPU %d as pid %d with %d CPUs\n", child_num, (child_num % lib_num_loc_cpus), getpid(), lib_num_loc_cpus); fflush(where); } SETPROCESS((child_num % lib_num_loc_cpus), getpid()); return; #else #if defined(__sun) && defined(__SVR4) /* should only be Solaris */ #include <sys/processor.h> #include <sys/procset.h> int old_binding; if (debug) { fprintf(where, "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n", child_num, (child_num % lib_num_loc_cpus), getpid(), lib_num_loc_cpus); fflush(where); } if (processor_bind(P_PID, getpid(), (child_num % lib_num_loc_cpus), &old_binding) != 0) { fprintf(where,"bind_to_processor: unable to perform processor binding\n"); fprintf(where," errno %d\n",errno); fflush(where); } return; #else #ifdef WIN32 if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) { perror("SetThreadAffinityMask failed"); fflush(stderr); } if (debug) { fprintf(where, "bind_to_processor: child %d asking for CPU %d of %d CPUs\n", child_num, (child_num % lib_num_loc_cpus), lib_num_loc_cpus); fflush(where); } #endif return; #endif /* __sun && _SVR4 */ #endif /* __hpux */ }
/** schedule_tasks() * thread_func - function pointer to process splitter data * splitter_func - splitter function pointer * splitter_init - splitter_init function pointer * runs map tasks in a new thread on each the available processors. * returns pointer intermediate value array */ static inline void schedule_tasks(thread_wrapper_arg_t *th_arg) { assert(th_arg); pthread_attr_t attr; // parameter for pthread creation thread_wrapper_arg_t * curr_th_arg; // arg for thread_wrapper() int thread_cnt; // counter of number threads assigned assigned int curr_proc; int curr_thread; int num_threads = getNumTaskThreads(th_arg->func_type); int threads_per_proc = num_threads / g_state.num_procs; int threads_mod_procs = num_threads % g_state.num_procs; int pos = 0; // position of next result in the array pthread_mutex_t splitter_lock; // lock for splitter function g_state.tinfo = (thread_info_t *)CALLOC(num_threads, sizeof(thread_info_t)); CHECK_ERROR(pthread_mutex_init(&splitter_lock, NULL) != 0); dprintf("Number of available processors = %d\n", g_state.num_procs); dprintf("Number of Threads to schedule = %d per(%d) mod(%d)\n", num_threads, threads_per_proc, threads_mod_procs); th_arg->pos = &pos; th_arg->splitter_lock = &splitter_lock; // thread must be scheduled systemwide pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); #ifdef _LINUX_ unsigned long cpu_set; // bit array of available processors // Create a thread for each availble processor to handle the split data CHECK_ERROR(sched_getaffinity(0, sizeof(cpu_set), &cpu_set) == -1); for (thread_cnt = curr_proc = 0; curr_proc < sizeof(cpu_set) && thread_cnt < num_threads; curr_proc++) { if (isCpuAvailable(cpu_set, curr_proc)) { #endif #ifdef _SOLARIS_ int max_procs = sysconf(_SC_NPROCESSORS_ONLN); for (thread_cnt = curr_proc = 0; thread_cnt < num_threads; curr_proc++) { if (P_ONLINE == p_online(curr_proc, P_STATUS)) { #endif for (curr_thread = !(threads_mod_procs-- > 0); curr_thread <= threads_per_proc && thread_cnt < num_threads; curr_thread++, thread_cnt++) { // Setup data to be passed to each thread curr_th_arg = (thread_wrapper_arg_t*)MALLOC(sizeof(thread_wrapper_arg_t)); memcpy(curr_th_arg, th_arg, sizeof(thread_wrapper_arg_t)); curr_th_arg->cpu_id = curr_proc; g_state.tinfo[thread_cnt].cpuid = curr_proc; //fprintf(stderr, "Starting thread %d on cpu %d\n", thread_cnt, curr_th_arg->cpu_id); switch (th_arg->func_type) { case MAP: CHECK_ERROR(pthread_create(&g_state.tinfo[thread_cnt].tid, &attr, map_worker, curr_th_arg) != 0); break; case REDUCE: CHECK_ERROR(pthread_create(&g_state.tinfo[thread_cnt].tid, &attr, reduce_worker, curr_th_arg) != 0); break; case MERGE: CHECK_ERROR(pthread_create(&g_state.tinfo[thread_cnt].tid, &attr, merge_worker, curr_th_arg) != 0); break; default: assert(0); break; } } } /*** ADDED BY RAM TO ASSIGN EACH PTHREAD TO HARDWARE THREADS ON DIFFERENT PROCESSORS ON THE ULTRASPARC T1 ****/ if (getenv("MR_AFARA") != NULL) { //fprintf(stderr, "Using sparse threads\n"); curr_proc += 3; if (curr_proc >= max_procs-1) { curr_proc++; curr_proc = curr_proc % max_procs; } } } dprintf("Status: All %d threads have been created\n", num_threads); // barrier, wait for all threads to finish for (thread_cnt = 0; thread_cnt < num_threads; thread_cnt++) { int ret_val; CHECK_ERROR(pthread_join(g_state.tinfo[thread_cnt].tid, (void **)(void *)&ret_val) != 0); // The thread returned and error. Restart the thread. //if (ret_val != 0) //{ //} } pthread_attr_destroy(&attr); free(g_state.tinfo); dprintf("Status: All tasks have completed\n"); return; } /** map_worker() * args - pointer to thread_wrapper_arg_t * returns 0 on success * This runs thread_func() until there is no more data from the splitter(). * The pointer to results are stored in return_values array. */ static void *map_worker(void *args) { thread_wrapper_arg_t *th_arg = (thread_wrapper_arg_t *)args; int thread_index = getCurrThreadIndex(MAP); map_args_t thread_func_arg; int num_assigned = 0; int ret; // return value of splitter func. 0 = no more data to provide int isOneQueuePerTask = g_state.isOneQueuePerTask; assert(th_arg); #ifdef _LINUX_ // Bind thread to run on cpu_id unsigned long cpu_set = 0; setCpuAvailable(&cpu_set, th_arg->cpu_id); CHECK_ERROR(sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0); #endif #ifdef _SOLARIS_ dprintf("Binding thread to processor %d\n", th_arg->cpu_id); CHECK_ERROR(processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0); /*if (processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0) { switch(errno) { case EFAULT: dprintf("EFAULT\n"); break; case EINVAL: dprintf("EINVAL\n"); break; case EPERM: dprintf("EPERM\n"); break; case ESRCH: dprintf("ESRCH\n"); break; default: dprintf("Errno is %d\n",errno); } }*/ #endif while (1) { pthread_mutex_lock(th_arg->splitter_lock); ret = g_state.splitter(g_state.args->task_data, g_state.chunk_size, &thread_func_arg); if (ret != 0) { int alloc_len = g_state.intermediate_task_alloc_len; g_state.tinfo[thread_index].curr_task = g_state.map_tasks++; num_assigned++; if (isOneQueuePerTask && g_state.map_tasks > alloc_len) { dprintf("MAP TASK QUEUE REALLOC\n"); int i; g_state.intermediate_task_alloc_len *= 2; for (i = 0; i < g_state.reduce_tasks; i++) { g_state.intermediate_vals[i] = (keyvals_arr_t *)REALLOC( g_state.intermediate_vals[i], g_state.intermediate_task_alloc_len*sizeof(keyvals_arr_t)); memset(&g_state.intermediate_vals[i][alloc_len], 0, alloc_len*sizeof(keyvals_arr_t)); } } } pthread_mutex_unlock(th_arg->splitter_lock); // Stop if there is no more data if (ret == 0) break; dprintf("Task %d: cpu_id -> %d - Started\n", num_assigned, th_arg->cpu_id); g_state.args->map(&thread_func_arg); dprintf("Task %d: cpu_id -> %d - Done\n", num_assigned, th_arg->cpu_id); } dprintf("Status: Total of %d tasks were assigned to cpu_id %d\n", num_assigned, th_arg->cpu_id); free(args); return (void *)0; } static void *reduce_worker(void *args) { thread_wrapper_arg_t *th_arg = (thread_wrapper_arg_t *)args; int thread_index = getCurrThreadIndex(REDUCE); int isOneQueuePerTask = g_state.isOneQueuePerTask; assert(th_arg); #ifdef _LINUX_ // Bind thread to run on cpu_id unsigned long cpu_set = 0; setCpuAvailable(&cpu_set, th_arg->cpu_id); CHECK_ERROR(sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0); #endif #ifdef _SOLARIS_ CHECK_ERROR(processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0); /*if (processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0) { switch(errno) { case EFAULT: dprintf("EFAULT\n"); break; case EINVAL: dprintf("EINVAL\n"); break; case EPERM: dprintf("EPERM\n"); break; case ESRCH: dprintf("ESRCH\n"); break; default: dprintf("Errno is %d\n",errno); } }*/ #endif int curr_thread, done; int curr_reduce_task = 0; int ret; int num_map_threads; if (isOneQueuePerTask) num_map_threads = g_state.map_tasks; else num_map_threads = g_state.num_map_threads; int startsize = DEFAULT_VALS_ARR_LEN; keyvals_arr_t* thread_array; int vals_len, max_len, next_min_pos; keyvals_t *curr_key_val, *min_key_val, *next_min; int * thread_position = (int *)MALLOC(num_map_threads * sizeof(int)); void** vals = MALLOC(sizeof(char*)*startsize); while (1) { // Get the next reduce task pthread_mutex_lock(th_arg->splitter_lock); ret = (*th_arg->pos >= g_state.reduce_tasks); if (!ret) { g_state.tinfo[thread_index].curr_task = curr_reduce_task = (*th_arg->pos)++; } pthread_mutex_unlock(th_arg->splitter_lock); // No more reduce tasks if(ret) break; bzero((char *)thread_position, num_map_threads*sizeof(int)); vals_len = 0; max_len = startsize; min_key_val = NULL; next_min = NULL; done = 0; while (!done) { for (curr_thread = 0; curr_thread < num_map_threads; curr_thread++) { /* Find the next array to search */ thread_array = &g_state.intermediate_vals[curr_reduce_task][curr_thread]; /* Check if the current processor array has been completely searched */ if (thread_position[curr_thread] >= thread_array->len) continue; /* Get the next key in the processor array */ curr_key_val = &thread_array->arr[thread_position[curr_thread]]; /* If the key matches the minimum value. Then add the value to the list of values for that key */ if (min_key_val != NULL && !g_state.args->key_cmp(curr_key_val->key, min_key_val->key)) { if (g_state.reduce == identity_reduce) { int j; for (j = 0; j < curr_key_val->len; j++) { emit_inline(min_key_val->key, curr_key_val->vals[j]); } } else { if (vals_len + curr_key_val->len >= max_len) { while (vals_len + curr_key_val->len >= max_len) max_len *= 2; vals = REALLOC(vals, sizeof(char*)*(max_len)); } memcpy(&vals[vals_len], curr_key_val->vals, curr_key_val->len*sizeof(char*)); vals_len += curr_key_val->len; } thread_position[curr_thread--]++; } /* Find the location of the next min */ else if (next_min == NULL || g_state.args->key_cmp(curr_key_val->key, next_min->key) < 0) { next_min = curr_key_val; next_min_pos = curr_thread; } } if(min_key_val != NULL) { if (g_state.reduce != identity_reduce) { g_state.reduce(min_key_val->key, vals, vals_len); } vals_len = 0; min_key_val = NULL; } if (next_min != NULL) { min_key_val = next_min; next_min = NULL; } // See if there are any elements left for(curr_thread = 0; curr_thread < num_map_threads && thread_position[curr_thread] >= g_state.intermediate_vals[curr_reduce_task][curr_thread].len; curr_thread++); done = (curr_thread == num_map_threads); } for (curr_thread = 0; curr_thread < num_map_threads; curr_thread++) { keyvals_arr_t * arr = &g_state.intermediate_vals[curr_reduce_task][curr_thread]; int j; for(j = 0; j < arr->len; j++) { free(arr->arr[j].vals); } free(arr->arr); } free(g_state.intermediate_vals[curr_reduce_task]); } free(thread_position); free(vals); free(args); return (void *)0; }
/** merge_worker() * args - pointer to thread_wrapper_arg_t * returns 0 on success */ static void *merge_worker(void *args) { thread_wrapper_arg_t *th_arg = (thread_wrapper_arg_t *)args; int curr_thread = getCurrThreadIndex(MERGE); assert(th_arg); #ifdef _LINUX_ // Bind thread to run on cpu_id unsigned long cpu_set = 0; setCpuAvailable(&cpu_set, th_arg->cpu_id); CHECK_ERROR(sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0); #endif #ifdef _SOLARIS_ CHECK_ERROR(processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0); /*if (processor_bind(P_LWPID, P_MYID, th_arg->cpu_id, NULL)!= 0) { switch(errno) { case EFAULT: dprintf("EFAULT\n"); break; case EINVAL: dprintf("EINVAL\n"); break; case EPERM: dprintf("EPERM\n"); break; case ESRCH: dprintf("ESRCH\n"); break; default: dprintf("Errno is %d\n",errno); } }*/ #endif // Assumes num_merge_threads is modified before each call int length = th_arg->merge_len / g_state.num_merge_threads; int modlen = th_arg->merge_len % g_state.num_merge_threads; // Let's make some progress here if (length <= 1) { length = 2; modlen = th_arg->merge_len % 2; } int pos = curr_thread*length + ((curr_thread < modlen) ? curr_thread : modlen); if (pos < th_arg->merge_len) { keyval_arr_t *vals = &th_arg->merge_input[pos]; dprintf("Thread %d: cpu_id -> %d - Started\n", curr_thread, th_arg->cpu_id); merge_results(vals, length + (curr_thread < modlen)); dprintf("Thread %d: cpu_id -> %d - Done\n", curr_thread, th_arg->cpu_id); } free(args); return (void *)0; }
static bool apply_affinity(ph_cpu_set_t *set, ph_thread_t *me) { return processor_bind(P_LWPID, me->lwpid, *set, NULL) == 0; }
int thread_bind_native(__unused_variable struct cpuid_state_t *state, uint32_t id) { #ifdef TARGET_OS_WINDOWS BOOL ret = FALSE; HANDLE hThread = GetCurrentThread(); #if _WIN32_WINNT < 0x0601 if (is_windows7_or_greater()) { #endif DWORD threadsInGroup = 0; WORD groupId, groupCount; GROUP_AFFINITY affinity; ZeroMemory(&affinity, sizeof(GROUP_AFFINITY)); groupCount = GetActiveProcessorGroupCount(); for (groupId = 0; groupId < groupCount; groupId++) { threadsInGroup = GetActiveProcessorCount(groupId); if (id < threadsInGroup) break; id -= threadsInGroup; } if (groupId < groupCount && id < threadsInGroup) { affinity.Group = groupId; affinity.Mask = 1ULL << id; ret = SetThreadGroupAffinity(hThread, &affinity, NULL); } #if _WIN32_WINNT < 0x0601 } else { DWORD mask; if (id > 32) return 1; mask = (1 << id); ret = SetThreadAffinityMask(hThread, mask); } #endif if (state && ret != FALSE) state->cpu_bound_index = id; return (ret != FALSE) ? 0 : 1; #elif defined(TARGET_OS_LINUX) || defined(TARGET_OS_FREEBSD) int ret; #ifdef CPU_SET_S size_t setsize = CPU_ALLOC_SIZE(MAX_CPUS); CPUSET_T *set = CPU_ALLOC(MAX_CPUS); pthread_t pth; pth = pthread_self(); CPU_ZERO_S(setsize, set); CPU_SET_S(id, setsize, set); ret = pthread_setaffinity_np(pth, setsize, set); CPU_FREE(set); #else size_t bits_per_set = sizeof(CPUSET_T) * 8; size_t bits_per_subset = sizeof(CPUSET_MASK_T) * 8; size_t setsize = sizeof(CPUSET_T) * (MAX_CPUS / bits_per_set); size_t set_id, subset_id; unsigned long long mask; CPUSET_T *set = malloc(setsize); pthread_t pth; pth = pthread_self(); for (set_id = 0; set_id < (MAX_CPUS / bits_per_set); set_id++) CPU_ZERO(&set[set_id]); set_id = id / bits_per_set; id %= bits_per_set; subset_id = id / bits_per_subset; id %= bits_per_subset; mask = 1ULL << (unsigned long long)id; ((unsigned long *)set[set_id].__bits)[subset_id] |= mask; ret = pthread_setaffinity_np(pth, setsize, set); free(set); #endif if (state && ret == 0) state->cpu_bound_index = id; return (ret == 0) ? 0 : 1; #elif defined(TARGET_OS_SOLARIS) /* * This requires permissions, so can easily fail. */ if (processor_bind(P_LWPID, P_MYID, id, NULL) != 0) { fprintf(stderr, "warning: failed to bind to CPU%u: %s\n", id, strerror(errno)); return 1; } if (state) state->cpu_bound_index = id; return 0; #elif defined(TARGET_OS_MACOSX) int ret = 1; #ifdef USE_CHUD ret = (utilBindThreadToCPU(id) == 0) ? 0 : 1; #endif if (state && ret == 0) state->cpu_bound_index = id; return ret == 0 ? 0 : 1; #else #error "thread_bind_native() not defined for this platform" #endif }
static int hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags) { unsigned target_cpu; /* The resulting binding is always strict */ if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) { if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0) return -1; #ifdef HAVE_LIBLGRP if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth >= 0) { int n = hwloc_get_nbobjs_by_depth(topology, depth); int i; for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); } } } #endif /* HAVE_LIBLGRP */ return 0; } #ifdef HAVE_LIBLGRP if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth >= 0) { int n = hwloc_get_nbobjs_by_depth(topology, depth); int i; int ok; hwloc_bitmap_t target = hwloc_bitmap_alloc(); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) hwloc_bitmap_or(target, target, obj->cpuset); } ok = hwloc_bitmap_isequal(target, hwloc_set); hwloc_bitmap_free(target); if (ok) { /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */ for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) { lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG); } else { if (flags & HWLOC_CPUBIND_STRICT) lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); else lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK); } } return 0; } } } #endif /* HAVE_LIBLGRP */ if (hwloc_bitmap_weight(hwloc_set) != 1) { errno = EXDEV; return -1; } target_cpu = hwloc_bitmap_first(hwloc_set); if (processor_bind(idtype, id, (processorid_t) (target_cpu), NULL) != 0) return -1; return 0; }