__attribute__ ((constructor (103))) static void liblock_init() { int i; char get_cmd[128]; char *tmp_env; int num_servers; tmp_env = getenv("NUM_SERVERS"); if(!tmp_env) tmp_env = "1"; num_servers = atoi(tmp_env); liblock_server_cores = malloc(NUM_LOCKS * sizeof(struct core *)); /* for (i = 0; i < NUM_LOCKS; i++) { if (num_servers > 6) printf("!!! warning: too many server cores (> 6 not supported).\n"); liblock_server_cores[i] = topology->nodes[0].cores[i % num_servers]; } */ /* Fast config 1: liblock_server_cores[0] = topology->nodes[0].cores[2]; liblock_server_cores[1] = topology->nodes[0].cores[2]; liblock_server_cores[2] = topology->nodes[0].cores[2]; liblock_server_cores[3] = topology->nodes[0].cores[2]; liblock_server_cores[4] = topology->nodes[0].cores[2]; liblock_server_cores[5] = topology->nodes[0].cores[2]; liblock_server_cores[6] = topology->nodes[0].cores[2]; liblock_server_cores[7] = topology->nodes[0].cores[1]; liblock_server_cores[8] = topology->nodes[0].cores[2]; liblock_server_cores[9] = topology->nodes[0].cores[2]; liblock_server_cores[10] = topology->nodes[0].cores[2]; Fast config 2: liblock_server_cores[0] = topology->nodes[0].cores[1]; liblock_server_cores[1] = topology->nodes[0].cores[1]; liblock_server_cores[2] = topology->nodes[0].cores[1]; liblock_server_cores[3] = topology->nodes[0].cores[1]; liblock_server_cores[4] = topology->nodes[0].cores[1]; liblock_server_cores[5] = topology->nodes[0].cores[1]; liblock_server_cores[6] = topology->nodes[0].cores[1]; liblock_server_cores[7] = topology->nodes[0].cores[1]; liblock_server_cores[8] = topology->nodes[0].cores[1]; liblock_server_cores[9] = topology->nodes[0].cores[2]; liblock_server_cores[10] = topology->nodes[0].cores[1]; Conclusion: conflict between locks 7 and 9! */ #ifdef ONE_SERVER liblock_server_cores[0] = topology->nodes[0].cores[0]; liblock_server_cores[1] = topology->nodes[0].cores[0]; liblock_server_cores[2] = topology->nodes[0].cores[0]; liblock_server_cores[3] = topology->nodes[0].cores[0]; liblock_server_cores[4] = topology->nodes[0].cores[0]; liblock_server_cores[5] = topology->nodes[0].cores[0]; liblock_server_cores[6] = topology->nodes[0].cores[0]; liblock_server_cores[7] = topology->nodes[0].cores[0]; liblock_server_cores[8] = topology->nodes[0].cores[0]; liblock_server_cores[9] = topology->nodes[0].cores[0]; liblock_server_cores[10] = topology->nodes[0].cores[0]; #else liblock_server_cores[0] = topology->nodes[0].cores[0]; liblock_server_cores[1] = topology->nodes[0].cores[0]; liblock_server_cores[2] = topology->nodes[0].cores[0]; liblock_server_cores[3] = topology->nodes[0].cores[0]; liblock_server_cores[4] = topology->nodes[0].cores[0]; liblock_server_cores[5] = topology->nodes[0].cores[1]; liblock_server_cores[6] = topology->nodes[0].cores[1]; liblock_server_cores[7] = topology->nodes[0].cores[0]; liblock_server_cores[8] = topology->nodes[0].cores[1]; liblock_server_cores[9] = topology->nodes[0].cores[1]; liblock_server_cores[10] = topology->nodes[0].cores[1]; #endif liblock_lock_name = getenv("LIBLOCK_LOCK_NAME"); if(!liblock_lock_name) liblock_lock_name = "rcl"; is_rcl = !strcmp(liblock_lock_name, "rcl") || !strcmp(liblock_lock_name, "multircl"); liblock_start_server_threads_by_hand = 1; liblock_servers_always_up = 1; sprintf(get_cmd, "/proc/%d/cmdline", getpid()); FILE* f=fopen(get_cmd, "r"); if(!f) { printf("!!! warning: unable to find command line\n"); } char buf[1024]; buf[0] = 0; if(!fgets(buf, 1024, f)) printf("fgets\n"); printf("**** testing %s with lock %s\n", buf, liblock_lock_name); /* Pre-bind */ cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(topology->nodes[0].cores[2]->core_id, &cpuset); if(sched_setaffinity(0, sizeof(cpu_set_t), &cpuset)) fatal("pthread_setaffinity_np"); /* /Pre-bind */ if(is_rcl) { go = 0; liblock_reserve_core_for(topology->nodes[0].cores[0], liblock_lock_name); #ifndef ONE_SERVER liblock_reserve_core_for(topology->nodes[0].cores[1], liblock_lock_name); #endif /* for (i = 0; i < NUM_LOCKS; i++) liblock_reserve_core_for(liblock_server_cores[i], liblock_lock_name); */ /* launch the liblock threads */ liblock_lookup(liblock_lock_name)->run(do_go); while(!go) PAUSE(); } client_cores = malloc(sizeof(int)*topology->nb_cores); int j, k, z; for(i=0, z=0; i<topology->nb_nodes; i++) { for(j=0; j<topology->nodes[i].nb_cores; j++) { int is_server_core = 0; if (is_rcl) { for (k = 0; k < NUM_LOCKS; k++) if(topology->nodes[i].cores[j] == liblock_server_cores[k]) is_server_core = 1; } if (!is_server_core) client_cores[z++] = topology->nodes[i].cores[j]->core_id; } } n_available_cores = z; printf("**** %d available cores for clients.\n", z); liblock_auto_bind(); }
static int pfring_zc_daq_initialize(const DAQ_Config_t *config, void **ctxt_ptr, char *errbuf, size_t len) { Pfring_Context_t *context; DAQ_Dict* entry; u_int numCPU = get_nprocs(); int i, max_buffer_len = 0; int num_buffers; context = calloc(1, sizeof(Pfring_Context_t)); if (context == NULL) { snprintf(errbuf, len, "%s: Couldn't allocate memory for context!", __FUNCTION__); return DAQ_ERROR_NOMEM; } context->mode = config->mode; context->snaplen = config->snaplen; context->promisc_flag =(config->flags & DAQ_CFG_PROMISC); context->timeout = (config->timeout > 0) ? (int) config->timeout : -1; context->devices[DAQ_PF_RING_PASSIVE_DEV_IDX] = strdup(config->name); context->num_devices = 1; context->ids_bridge = 0; context->clusterid = 0; context->max_buffer_len = 0; context->bindcpu = 0; if (!context->devices[DAQ_PF_RING_PASSIVE_DEV_IDX]) { snprintf(errbuf, len, "%s: Couldn't allocate memory for the device string!", __FUNCTION__); free(context); return DAQ_ERROR_NOMEM; } for (entry = config->values; entry; entry = entry->next) { if (!entry->value || !*entry->value) { snprintf(errbuf, len, "%s: variable needs value(%s)\n", __FUNCTION__, entry->key); return DAQ_ERROR; } else if (!strcmp(entry->key, "bindcpu")) { char *end = entry->value; context->bindcpu = (int) strtol(entry->value, &end, 0); if(*end || (context->bindcpu >= numCPU)) { snprintf(errbuf, len, "%s: bad bindcpu(%s)\n", __FUNCTION__, entry->value); return DAQ_ERROR; } else { cpu_set_t mask; CPU_ZERO(&mask); CPU_SET((int)context->bindcpu, &mask); if (sched_setaffinity(0, sizeof(mask), &mask) < 0) { snprintf(errbuf, len, "%s:failed to set bindcpu(%u) on pid %i\n", __FUNCTION__, context->bindcpu, getpid()); return DAQ_ERROR; } } } else if (!strcmp(entry->key, "timeout")) { char *end = entry->value; context->timeout = (int) strtol(entry->value, &end, 0); if (*end || (context->timeout < 0)) { snprintf(errbuf, len, "%s: bad timeout(%s)\n", __FUNCTION__, entry->value); return DAQ_ERROR; } } else if (!strcmp(entry->key, "idsbridge")) { if (context->mode == DAQ_MODE_PASSIVE) { char* end = entry->value; context->ids_bridge = (int) strtol(entry->value, &end, 0); if (*end || (context->ids_bridge < 0) || (context->ids_bridge > 2)) { snprintf(errbuf, len, "%s: bad ids bridge mode(%s)\n", __FUNCTION__, entry->value); return DAQ_ERROR; } } else { snprintf(errbuf, len, "%s: idsbridge is for passive mode only\n", __FUNCTION__); return DAQ_ERROR; } } else if (!strcmp(entry->key, "clusterid")) { char *end = entry->value; context->clusterid = (int) strtol(entry->value, &end, 0); if (*end || (context->clusterid < 0)) { snprintf(errbuf, len, "%s: bad clusterid(%s)\n", __FUNCTION__, entry->value); return DAQ_ERROR; } } else { snprintf(errbuf, len, "%s: unsupported variable(%s=%s)\n", __FUNCTION__, entry->key, entry->value); return DAQ_ERROR; } } if (context->mode == DAQ_MODE_READ_FILE) { snprintf(errbuf, len, "%s: function not supported on PF_RING", __FUNCTION__); free(context); return DAQ_ERROR; } else if (context->mode == DAQ_MODE_INLINE || (context->mode == DAQ_MODE_PASSIVE && context->ids_bridge)) { /* zc:ethX+zc:ethY,zc:ethZ+zc:ethJ */ char *twins, *twins_pos = NULL; context->num_devices = 0; twins = strtok_r(context->devices[DAQ_PF_RING_PASSIVE_DEV_IDX], ",", &twins_pos); while(twins != NULL) { char *dev, *dev_pos = NULL, *tx_dev; int last_twin = 0; dev = strtok_r(twins, "+", &dev_pos); while (dev != NULL) { if (context->num_devices >= DAQ_PF_RING_MAX_NUM_DEVICES) { snprintf(errbuf, len, "%s: Maximum num of devices reached (%d), you should increase " "DAQ_PF_RING_MAX_NUM_DEVICES.\n", __FUNCTION__, DAQ_PF_RING_MAX_NUM_DEVICES); free(context); return DAQ_ERROR; } last_twin = context->num_devices; context->devices[context->num_devices] = dev; tx_dev = strchr(dev, '-'); if (tx_dev != NULL) { /* use the specified device for tx */ tx_dev[0] = '\0'; tx_dev++; context->tx_devices[context->num_devices] = tx_dev; } else { context->tx_devices[context->num_devices] = dev; } context->num_devices++; dev = strtok_r(NULL, "+", &dev_pos); } if (context->num_devices & 0x1) { snprintf(errbuf, len, "%s: Wrong format: %s requires pairs of devices", __FUNCTION__, context->mode == DAQ_MODE_INLINE ? "inline mode" : "ids bridge"); free(context); return DAQ_ERROR; } if (last_twin > 0) /* new dev pair */ printf("%s <-> %s\n", context->devices[last_twin - 1], context->devices[last_twin]); twins = strtok_r(NULL, ",", &twins_pos); } } else if(context->mode == DAQ_MODE_PASSIVE) { /* zc:ethX,zc:ethY */ char *dev, *dev_pos = NULL; context->num_devices = 0; dev = strtok_r(context->devices[DAQ_PF_RING_PASSIVE_DEV_IDX], ",", &dev_pos); while (dev != NULL) { context->devices[context->num_devices++] = dev; dev = strtok_r(NULL, ",", &dev_pos); } } #ifdef SIG_RELOAD /* catching the SIGRELOAD signal, replacing the default snort handler */ if ((default_sig_reload_handler = signal(SIGHUP, pfring_zc_daq_sig_reload)) == SIG_ERR) default_sig_reload_handler = NULL; #endif num_buffers = 2 /* buffer, buffer_inject */; #ifdef DAQ_PF_RING_BEST_EFFORT_BOOST if (context->mode == DAQ_MODE_PASSIVE && context->ids_bridge == 2) num_buffers += QUEUE_LEN; #endif for (i = 0; i < context->num_devices; i++) { max_buffer_len = max_packet_len(context, context->devices[i], i); if (max_buffer_len > context->max_buffer_len) context->max_buffer_len = max_buffer_len; if (strstr(context->devices[i], "zc:") != NULL) num_buffers += MAX_CARD_SLOTS; if (context->tx_devices[i] != NULL) { max_buffer_len = max_packet_len(context, context->tx_devices[i], i); if (max_buffer_len > context->max_buffer_len) context->max_buffer_len = max_buffer_len; if (strstr(context->tx_devices[i], "zc:") != NULL) num_buffers += MAX_CARD_SLOTS; } } context->cluster = pfring_zc_create_cluster(context->clusterid, context->max_buffer_len, 0, num_buffers, context->bindcpu == 0 ? -1 : numa_node_of_cpu(context->bindcpu), NULL); if (context->cluster == NULL) { DPE(context->errbuf, "%s: Cluster failed: %s(%d)", __FUNCTION__, strerror(errno), errno); return DAQ_ERROR; } context->buffer = pfring_zc_get_packet_handle(context->cluster); if (context->buffer == NULL) { DPE(context->errbuf, "%s: Buffer allocation failed: %s(%d)", __FUNCTION__, strerror(errno), errno); return DAQ_ERROR; } context->buffer_inject = pfring_zc_get_packet_handle(context->cluster); if (context->buffer_inject == NULL) { DPE(context->errbuf, "%s: Buffer allocation failed: %s(%d)", __FUNCTION__, strerror(errno), errno); return DAQ_ERROR; } for (i = 0; i < context->num_devices; i++) { if (pfring_zc_daq_open(context, i) == -1) return DAQ_ERROR; } #ifdef DAQ_PF_RING_BEST_EFFORT_BOOST if (context->mode == DAQ_MODE_PASSIVE && context->ids_bridge == 2) { context->q = pfring_zc_create_queue(context->cluster, QUEUE_LEN); if (context->q == NULL) { snprintf(errbuf, len, "%s: Couldn't create queue: '%s'", __FUNCTION__, strerror(errno)); return DAQ_ERROR_NOMEM; } context->mq_queues[context->num_devices] = context->q; context->mq = pfring_zc_create_multi_queue(context->mq_queues, context->num_devices + 1); } #endif context->state = DAQ_STATE_INITIALIZED; *ctxt_ptr = context; return DAQ_SUCCESS; }
void start(std::vector<uint64_t> const & procs) { if ( ! thread.get() ) { thread_ptr_type tthread(new pthread_t); thread = UNIQUE_PTR_MOVE(tthread); pthread_attr_t attr; if ( pthread_attr_init(&attr) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "pthread_attr_init failed:" << strerror(errno); se.finish(); throw se; } cpu_set_t cpuset; CPU_ZERO(&cpuset); for ( uint64_t i = 0; i < procs.size(); ++i ) CPU_SET(procs[i],&cpuset); if ( pthread_attr_setaffinity_np(&attr,sizeof(cpu_set_t),&cpuset) ) { pthread_attr_destroy(&attr); ::libmaus2::exception::LibMausException se; se.getStream() << "pthread_attr_setaffinity_np failed:" << strerror(errno); se.finish(); throw se; } #if 0 std::cerr << "Creating thread with affinity." << std::endl; std::cerr << ::libmaus2::util::StackTrace::getStackTrace() << std::endl; #endif if ( pthread_create(thread.get(),&attr,dispatch,this) ) { pthread_attr_destroy(&attr); ::libmaus2::exception::LibMausException se; se.getStream() << "pthread_create() failed in PosixThread::start()"; se.finish(); throw se; } if ( pthread_attr_destroy(&attr) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "pthread_attr_destroy failed:" << strerror(errno); se.finish(); throw se; } } else { ::libmaus2::exception::LibMausException se; se.getStream() << "PosixThread::start() called but object is already in use."; se.finish(); throw se; } }
affinity_set::affinity_set() { CPU_ZERO(&set); }
int scheprocess(MTN *mtn, MTNJOB *job, int job_max, int cpu_lim, int cpu_num) { int i; int cpu_id; int cpu_use; cpu_set_t cpumask; cpu_id = 0; cpu_use = 0; scanprocess(job, job_max); for(i=0;i<job_max;i++){ if(!job[i].pid){ continue; } getjobusage(job + i); if(cpu_id != job[i].cid){ CPU_ZERO(&cpumask); CPU_SET(cpu_id, &cpumask); if(sched_setaffinity(job[i].pid, cpu_num, &cpumask) == -1){ mtnlogger(mtn, 0, "[error] %s: sched_setaffinity: %s\n", __func__, strerror(errno)); job->cid = -1; }else{ job->cid = cpu_id; } } cpu_id += 1; cpu_id %= cpu_num; cpu_use += job[i].cpu; //MTNDEBUG("CMD=%s STATE=%c CPU=%d.%d\n", job->cmd, job->pstat[0].state, job->cpu / 10, job->cpu % 10); } //MTNDEBUG("[CPU=%d.%d%% LIM=%d CPU=%d]\n", ctx->cpu_use / 10, ctx->cpu_use % 10, ctx->cpu_lim / 10, ctx->cnt.cpu); if(!cpu_lim){ return(cpu_use); } for(i=0;i<job_max;i++){ if(!job[i].pid){ continue; } if(cpu_lim * cpu_num < cpu_use){ // 過負荷状態 if(job[i].pstat[0].state != 'T'){ if(job[i].cpu > cpu_lim){ kill(-(job[i].pid), SIGSTOP); return(cpu_use); } } }else{ // アイドル状態 if(job[i].pstat[0].state == 'T'){ if(job[i].cpu < cpu_lim){ kill(-(job[i].pid), SIGCONT); return(cpu_use); } } } } for(i=0;i<job_max;i++){ if(!job[i].pid){ continue; } if(job[i].pstat[0].state != 'T'){ if(job[i].cpu > cpu_lim){ kill(-(job[i].pid), SIGSTOP); } }else{ if(job[i].cpu < cpu_lim){ kill(-(job[i].pid), SIGCONT); } } } return(cpu_use); }
int main(int argc, char *argv[]) { unsigned long long num_nodes, ram_size; unsigned long num_forks = 1; struct sysinfo info; void *shm; int *cond; struct sigaction zig; int c, add_wait = -1, is_parent = 1; #ifdef __cpu_set_t_defined int affinity = 0; cpu_set_t my_cpu_mask; #endif /* By default we'll use 1/16th of total RAM, rounded * down to the nearest page. */ if (sysinfo(&info) != 0) { perror("sysinfo"); return 1; } ram_size = info.totalram / 16; ram_size = ram_size & ~(getpagesize() - 1); num_nodes = ram_size / sizeof(void *); /* Parse command line args */ while ((c = getopt(argc, argv, "a:p:n:d:s:t")) != -1) { switch (c) { case 'p': num_forks = atoi(optarg); break; case 'd': ram_size = info.totalram / atoi(optarg); ram_size = ram_size & ~(getpagesize() - 1); num_nodes = ram_size / sizeof(void *); break; case 'n': num_nodes = atoi(optarg); ram_size = num_nodes * sizeof(void *); break; case 's': report_interval = atoi(optarg); break; case 'a': add_wait = atoi(optarg); break; #ifdef __cpu_set_t_defined case 't': affinity = 1; break; #endif default: print_help(argv[0]); return 0; } } /* Will we exceed half the address space size? Use 1/4 of it at most. */ if (ram_size > ((unsigned long long)1 << ((sizeof(void *) * 8) - 1))) { printf("Was going to use %lluKB (%llu nodes) but that's too big.\n", ram_size / 1024, num_nodes); ram_size = ((unsigned long long)1 << (sizeof(void *) * 8)); ram_size /= 4; num_nodes = ram_size / sizeof(void *); printf("Clamping to %lluKB (%llu nodes) instead.\n", ram_size / 1024, num_nodes); } /* Talk about what we're going to do. */ printf("Going to use %lluKB (%llu nodes).\n", ram_size / 1024, num_nodes); /* Make a shared anonymous map of the RAM */ shm = mmap(NULL, ram_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); if (shm == MAP_FAILED) { perror("mmap"); return 2; } printf("mmap region: %p (%llu nodes)\n", shm, num_nodes); /* Create an SHM condition variable. Bogus, I know... */ cond = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); if (cond == MAP_FAILED) { perror("mmap"); return 4; } *cond = 1; /* Create a "graph" by populating it with random pointers. */ printf("Populating nodes..."); fflush(stdout); populate_graph(shm, num_nodes); printf("done.\n"); printf("Creating %lu processes with reports every %lu seconds \ and %d seconds between adding children.\n", num_forks, report_interval, add_wait); /* Fork off separate processes. The shared region is shared * across all children. If we only wanted one thread, we shouldn't * fork anything. Note that the "cond" mmap is a really crappy * condition variable kludge that works well enough for HERE ONLY. */ for (c = (add_wait >= 0 ? 0 : 1); c < num_forks; c++) { /* Child should wait for the condition and then break. */ if (!fork()) { #ifdef __cpu_set_t_defined if (affinity) { CPU_ZERO(&my_cpu_mask); CPU_SET(c, &my_cpu_mask); if (0 != sched_setaffinity(0,sizeof(cpu_set_t), &my_cpu_mask)) { perror("sched_setaffinity"); } } #endif is_parent = 0; while (*cond) { usleep(10000); } break; } } if (is_parent) { #ifdef __cpu_set_t_defined if (affinity) { CPU_ZERO(&my_cpu_mask); CPU_SET(0, &my_cpu_mask); if (0 != sched_setaffinity(0,sizeof(cpu_set_t), &my_cpu_mask)) { perror("sched_setaffinity"); } } #endif printf("All threads created. Launching!\n"); *cond = 0; } /* now start the work */ if (!is_parent) { start_thread: /* Set up the alarm handler to print speed info. */ memset(&zig, 0x00, sizeof(zig)); zig.sa_handler = alarm_func; sigaction(SIGALRM, &zig, NULL); gettimeofday(&last, NULL); alarm(report_interval); /* Walk the graph. */ walk_graph(shm); /* This function never returns */ } else { /* Start the ramp-up. The children will never die, * so we don't need to wait() for 'em. */ while (add_wait != -1) { sleep(add_wait); if (fork() == 0) { /* goto is cheesy, but works. */ goto start_thread; } else { printf("Added thread.\n"); } } goto start_thread; } return 0; }
/* Check the process affinity mask and if it is found to be non-zero, * will honor it and disable mdrun internal affinity setting. * Note that this will only work on Linux as we use a GNU feature. */ void gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr, gmx_hw_opt_t *hw_opt, int gmx_unused nthreads_hw_avail, gmx_bool bAfterOpenmpInit) { GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer"); if (!bAfterOpenmpInit) { /* Check for externally set OpenMP affinity and turn off internal * pinning if any is found. We need to do this check early to tell * thread-MPI whether it should do pinning when spawning threads. * TODO: the above no longer holds, we should move these checks later */ if (hw_opt->thread_affinity != threadaffOFF) { char *message; if (!gmx_omp_check_thread_affinity(&message)) { /* TODO: with -pin auto we should only warn when using all cores */ md_print_warn(cr, fplog, "%s", message); sfree(message); hw_opt->thread_affinity = threadaffOFF; } } /* With thread-MPI this is needed as pinning might get turned off, * which needs to be known before starting thread-MPI. * With thread-MPI hw_opt is processed here on the master rank * and passed to the other ranks later, so we only do this on master. */ if (!SIMMASTER(cr)) { return; } #ifndef GMX_THREAD_MPI return; #endif } #ifdef HAVE_SCHED_AFFINITY int ret; cpu_set_t mask_current; if (hw_opt->thread_affinity == threadaffOFF) { /* internal affinity setting is off, don't bother checking process affinity */ return; } CPU_ZERO(&mask_current); if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0) { /* failed to query affinity mask, will just return */ if (debug) { fprintf(debug, "Failed to query affinity mask (error %d)", ret); } return; } /* Before proceeding with the actual check, make sure that the number of * detected CPUs is >= the CPUs in the current set. * We need to check for CPU_COUNT as it was added only in glibc 2.6. */ #ifdef CPU_COUNT if (nthreads_hw_avail < CPU_COUNT(&mask_current)) { if (debug) { fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT", nthreads_hw_avail, CPU_COUNT(&mask_current)); } return; } #endif /* CPU_COUNT */ gmx_bool bAllSet = TRUE; for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++) { bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0); } #ifdef GMX_LIB_MPI gmx_bool bAllSet_All; MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); bAllSet = bAllSet_All; #endif if (!bAllSet) { if (hw_opt->thread_affinity == threadaffAUTO) { if (!bAfterOpenmpInit) { md_print_warn(cr, fplog, "Non-default thread affinity set, disabling internal thread affinity"); } else { md_print_warn(cr, fplog, "Non-default thread affinity set probably by the OpenMP library,\n" "disabling internal thread affinity"); } hw_opt->thread_affinity = threadaffOFF; } else { /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */ if (bAfterOpenmpInit) { md_print_warn(cr, fplog, "Overriding thread affinity set outside %s\n", ShortProgram()); } } if (debug) { fprintf(debug, "Non-default affinity mask found\n"); } } else { if (debug) { fprintf(debug, "Default affinity mask found\n"); } } #endif /* HAVE_SCHED_AFFINITY */ }
//----------------------------------------------------------------------------- // Simulator Entry Point //----------------------------------------------------------------------------- int main( int argc, char* argv[] ) { int result; controller_pid = 0; sim_pid = getpid(); cpu_set_t cpuset_mask; // zero out the cpu set CPU_ZERO( &cpuset_mask ); // set the cpu set s.t. controller only runs on 1 processor specified by DEFAULT_CONTROLLER_PROCESSOR CPU_SET( 0, &cpuset_mask ); if ( sched_setaffinity( sim_pid, sizeof(cpuset_mask), &cpuset_mask ) == -1 ) { printf( "ERROR: Failed to set affinity for sim process.\n" ); _exit( EXIT_FAILURE ); } /* struct sched_param sim_params; sim_thread = pthread_self(); //sim_params.sched_priority = 0; result = pthread_setschedparam( sim_thread, SCHED_RR, &sim_params ); if( result != 0 ) { switch( errno ) { case EINVAL: printf( "errno: EINVAL\n" ); break; case EPERM: printf( "errno: EPERM\n" ); break; case ESRCH: printf( "errno: ESRCH\n" ); break; default: printf( "errno: Unenumerated\n" ); break; } _exit( EXIT_FAILURE ); } */ /* int sched_policy = sched_getscheduler( 0 ); if ( sched_policy == -1 ) { printf( "ERROR: Failed to get scheduler policy for controller process.\n" ); _exit( EXIT_FAILURE ); } else { printf( "Sim Scheduling Policy: %d\n", sched_policy ); struct sched_param params; params.sched_priority = 1; result = sched_setscheduler( 0, SCHED_RR, ¶ms ); if( result != 0 ) { switch( errno ) { case EINVAL: printf( "errno: EINVAL\n" ); break; case EPERM: printf( "errno: EPERM\n" ); break; case ESRCH: printf( "errno: ESRCH\n" ); break; default: printf( "errno: Unenumerated\n" ); break; } // error _exit( EXIT_FAILURE ); } } */ sim_priority = getpriority( PRIO_PROCESS, sim_pid ); //fork_controller(); result = pthread_attr_init( &monitor_thread_attr ); if( result != 0 ) { // error _exit( EXIT_FAILURE ); } struct sched_param monitor_sched_param; //monitor_sched_param.sched_priority = sched_getscheduler( pthread_self() ) + 2; result = pthread_attr_setinheritsched( &monitor_thread_attr, PTHREAD_EXPLICIT_SCHED ); if( result != 0 ) { // error _exit( EXIT_FAILURE ); } result = pthread_attr_setschedpolicy( &monitor_thread_attr, SCHED_RR ); if( result != 0 ) { // error _exit( EXIT_FAILURE ); } monitor_sched_param.sched_priority = sim_priority + 2; printf( "App Priority: %d\n", sim_priority ); printf( "Monitor Priority: %d\n", monitor_sched_param.sched_priority ); result = pthread_attr_setschedparam( &monitor_thread_attr, &monitor_sched_param ); if( result != 0 ) { // error _exit( EXIT_FAILURE ); } result = pthread_create( &monitor_thread, &monitor_thread_attr, &monitor, NULL ); if( result != 0 ) { switch( result ) { case EAGAIN: printf( "result: EAGAIN\n" ); break; case EPERM: printf( "result: EPERM\n" ); break; case EINVAL: printf( "result: EINVAL\n" ); break; default: printf( "result: Unenumerated %d\n", result ); break; } // error perror( "Error pthread_create" ); _exit( EXIT_FAILURE ); } void* return_value; result = pthread_join( monitor_thread, &return_value ); if( result != 0 ) { // error _exit( EXIT_FAILURE ); } return 0; }
int sageBlockStreamer::streamLoop() { while (streamerOn) { //int syncFrame = 0; //sage::printLog("\n========= wait for a frame ========\n"); sageBlockFrame *buf = (sageBlockFrame *)doubleBuf->getBackBuffer(); //sage::printLog("\n========= got a frame ==========\n"); /* sungwon experimental */ if ( affinityFlag ) { #if ! defined (__APPLE__) cpu_set_t cpuset; CPU_ZERO(&cpuset); pthread_mutex_lock(&affinityMutex); std::list<int>::iterator it; for ( it=cpulist.begin(); it!=cpulist.end(); it++) { CPU_SET((*it), &cpuset); } affinityFlag = false; // reset flag pthread_mutex_unlock(&affinityMutex); if ( pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset) != 0 ) { perror("\n\npthread_setaffinity_np\n"); } if ( pthread_getaffinity_np(pthread_self(), sizeof(cpuset), &cpuset) != 0 ) { perror("pthread_getaffinity_np"); } else { fprintf(stderr,"SBS::%s() : current CPU affinity : ", __FUNCTION__); for (int i=0; i<CPU_SETSIZE; i++) { if (CPU_ISSET(i, &cpuset)) { fprintf(stderr, "%d ", i); } } fprintf(stderr,"\n"); } #endif } if ( config.swexp ) { //buf->updateBufferHeader(frameID, config.resX, config.resY); if ( nwObj->sendpixelonly(0, buf) <= 0 ) { streamerOn = false; } else { //fprintf(stderr, "sageBlockStreamer::%s() : frame %d sent \n", __FUNCTION__, frameID); } doubleBuf->releaseBackBuffer(); frameID++; frameCounter++; continue; } char *msgStr = NULL; if (config.nodeNum > 1) { config.syncClientObj->sendSlaveUpdate(frameID); //sage::printLog("send update %d", config.rank); config.syncClientObj->waitForSyncData(msgStr); //sage::printLog("receive sync %d", config.rank); if (msgStr) { //std::cout << "reconfigure " << msgStr << std::endl; reconfigureStreams(msgStr); //firstConfiguration = false; } } else { pthread_mutex_lock(reconfigMutex); if (msgQueue.size() > 0) { msgStr = msgQueue.front(); reconfigureStreams(msgStr); //std::cout << "config ID : " << configID << std::endl; msgQueue.pop_front(); firstConfiguration = false; } pthread_mutex_unlock(reconfigMutex); } if (config.nodeNum == 1) checkInterval(); if (streamPixelData(buf) < 0) { streamerOn = false; } doubleBuf->releaseBackBuffer(); //std::cout << "pt1" << std::endl; } // for quiting other processes waiting a sync signal if (config.nodeNum > 1) { config.syncClientObj->sendSlaveUpdate(frameID); } sage::printLog("sageStreamer : network thread exit"); return 0; }
/** * @brief Linux-specific version of do_cpu_set_init(). * * @param cpu_set The CPU set. * * @return 0 on success. Negative value on error. */ static void cpu_set_init_Linux(cpu_set_p cpu_set) { int i; os_cpu_set_t original_affinity_set; int num_cpus = sysconf(_SC_NPROCESSORS_CONF); /* get current affinity set so we can restore it when we're done */ if ( sched_getaffinity( 0, sizeof(os_cpu_set_t), &original_affinity_set ) ) throw EXCEPTION2(ThreadException, "sched_getaffinity() failed with %s", errno_to_str().data()); /* test restoration */ if ( sched_setaffinity( 0, sizeof(os_cpu_set_t), &original_affinity_set ) ) throw EXCEPTION2(ThreadException, "sched_setaffinity() failed with %s", errno_to_str().data()); /* allocate cpus */ cpu_t cpus = (cpu_t)malloc( num_cpus * sizeof(struct cpu_s) ); if ( cpus == NULL ) throw EXCEPTION1(BadAlloc, "cpu array"); for (i = 0; i < num_cpus; i++) /* initialize fields */ CPU_ZERO( &cpus[i].cpu_set ); /* find the CPUs on the system */ int num_found = 0; int cpu_num; for (cpu_num = 0; ; cpu_num++) { os_cpu_set_t test_set; CPU_ZERO( &test_set ); CPU_SET ( cpu_num, &test_set ); if ( !sched_setaffinity( 0, sizeof(os_cpu_set_t), &test_set ) ) { /* found a new CPU */ cpus[num_found].cpu_unique_id = cpu_num; cpu_set_copy( &cpus[num_found].cpu_set, &test_set ); num_found++; if ( num_found == num_cpus ) break; } } /* restore original affinity set */ if ( sched_setaffinity( 0, sizeof(os_cpu_set_t), &original_affinity_set ) ) throw EXCEPTION2(ThreadException, "sched_setaffinity() failed with %s", errno_to_str().data()); /* return parameters */ cpu_set->cpuset_num_cpus = num_cpus; cpu_set->cpuset_cpus = cpus; }
/** * worker main loop */ static int swFactoryProcess_worker_loop(swFactory *factory, int worker_pti) { swServer *serv = factory->ptr; struct { long pti; swEventData req; } rdata; int n; int pipe_rd = serv->workers[worker_pti].pipe_worker; #ifdef HAVE_CPU_AFFINITY if (serv->open_cpu_affinity == 1) { cpu_set_t cpu_set; CPU_ZERO(&cpu_set); CPU_SET(worker_pti % SW_CPU_NUM, &cpu_set); if (0 != sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set)) { swWarn("pthread_setaffinity_np set failed"); } } #endif //signal init swWorker_signal_init(); //worker_id SwooleWG.id = worker_pti; #ifndef SW_USE_RINGBUFFER int i; //for open_check_eof and open_check_length if (serv->open_eof_check || serv->open_length_check || serv->open_http_protocol) { SwooleWG.buffer_input = sw_malloc(sizeof(swString*) * serv->reactor_num); if (SwooleWG.buffer_input == NULL) { swError("malloc for SwooleWG.buffer_input failed."); return SW_ERR; } for (i = 0; i < serv->reactor_num; i++) { SwooleWG.buffer_input[i] = swString_new(serv->buffer_input_size); if (SwooleWG.buffer_input[i] == NULL) { swError("buffer_input init failed."); return SW_ERR; } } } #endif if (serv->ipc_mode == SW_IPC_MSGQUEUE) { //抢占式,使用相同的队列type if (serv->dispatch_mode == SW_DISPATCH_QUEUE) { //这里必须加1 rdata.pti = serv->worker_num + 1; } else { //必须加1 rdata.pti = worker_pti + 1; } } else { SwooleG.main_reactor = sw_malloc(sizeof(swReactor)); if (SwooleG.main_reactor == NULL) { swError("[Worker] malloc for reactor failed."); return SW_ERR; } if (swReactor_auto(SwooleG.main_reactor, SW_REACTOR_MAXEVENTS) < 0) { swError("[Worker] create worker_reactor failed."); return SW_ERR; } swSetNonBlock(pipe_rd); SwooleG.main_reactor->ptr = serv; SwooleG.main_reactor->add(SwooleG.main_reactor, pipe_rd, SW_FD_PIPE); SwooleG.main_reactor->setHandle(SwooleG.main_reactor, SW_FD_PIPE, swFactoryProcess_worker_onPipeReceive); #ifdef HAVE_SIGNALFD if (SwooleG.use_signalfd) { swSignalfd_setup(SwooleG.main_reactor); } #endif } if (serv->max_request < 1) { SwooleWG.run_always = 1; } else { worker_task_num = serv->max_request; worker_task_num += swRandom(worker_pti); } //worker start swServer_worker_onStart(serv); if (serv->ipc_mode == SW_IPC_MSGQUEUE) { while (SwooleG.running > 0) { n = serv->read_queue.out(&serv->read_queue, (swQueue_data *) &rdata, sizeof(rdata.req)); if (n < 0) { if (errno == EINTR) { if (SwooleG.signal_alarm) { swTimer_select(&SwooleG.timer); } } else { swWarn("[Worker%ld] read_queue->out() failed. Error: %s [%d]", rdata.pti, strerror(errno), errno); } continue; } swFactoryProcess_worker_excute(factory, &rdata.req); } } else { struct timeval timeo; timeo.tv_sec = SW_REACTOR_TIMEO_SEC; timeo.tv_usec = SW_REACTOR_TIMEO_USEC; SwooleG.main_reactor->wait(SwooleG.main_reactor, &timeo); } //worker shutdown swServer_worker_onStop(serv); swTrace("[Worker]max request"); return SW_OK; }
static void *magma_ssytrd_hb2st_parallel_section(void *arg) { magma_int_t my_core_id = ((magma_sbulge_id_data*)arg) -> id; magma_sbulge_data* data = ((magma_sbulge_id_data*)arg) -> data; magma_int_t allcores_num = data -> threads_num; magma_int_t n = data -> n; magma_int_t nb = data -> nb; magma_int_t nbtiles = data -> nbtiles; magma_int_t grsiz = data -> grsiz; magma_int_t Vblksiz = data -> Vblksiz; magma_int_t compT = data -> compT; float *A = data -> A; magma_int_t lda = data -> lda; float *V = data -> V; magma_int_t ldv = data -> ldv; float *TAU = data -> TAU; float *T = data -> T; magma_int_t ldt = data -> ldt; volatile magma_int_t* prog = data -> prog; pthread_barrier_t* barrier = &(data -> barrier); magma_int_t sys_corenbr = 1; float timeB=0.0, timeT=0.0; #if defined(SETAFFINITY) // bind threads cpu_set_t set; // bind threads CPU_ZERO( &set ); CPU_SET( my_core_id, &set ); sched_setaffinity( 0, sizeof(set), &set) ; #endif if(compT==1) { /* compute the Q1 overlapped with the bulge chasing+T. * if all_cores_num=1 it call Q1 on GPU and then bulgechasing. * otherwise the first thread run Q1 on GPU and * the other threads run the bulgechasing. * */ if(allcores_num==1) { //========================= // bulge chasing //========================= timeB = magma_wtime(); magma_stile_bulge_parallel(0, 1, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); //========================= // compute the T's to be used when applying Q2 //========================= timeT = magma_wtime(); magma_stile_bulge_computeT_parallel(0, 1, V, ldv, TAU, T, ldt, n, nb, Vblksiz); timeT = magma_wtime()-timeT; printf(" Finish T's timing= %f \n" ,timeT); }else{ // allcore_num > 1 magma_int_t id = my_core_id; magma_int_t tot = allcores_num; //========================= // bulge chasing //========================= if(id == 0)timeB = magma_wtime(); magma_stile_bulge_parallel(id, tot, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); pthread_barrier_wait(barrier); if(id == 0){ timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); } //========================= // compute the T's to be used when applying Q2 //========================= if(id == 0)timeT = magma_wtime(); magma_stile_bulge_computeT_parallel(id, tot, V, ldv, TAU, T, ldt, n, nb, Vblksiz); pthread_barrier_wait(barrier); if (id == 0){ timeT = magma_wtime()-timeT; printf(" Finish T's timing= %f \n" ,timeT); } } // allcore == 1 }else{ // WANTZ = 0 //========================= // bulge chasing //========================= if(my_core_id == 0) timeB = magma_wtime(); magma_stile_bulge_parallel(my_core_id, allcores_num, A, lda, V, ldv, TAU, n, nb, nbtiles, grsiz, Vblksiz, prog); pthread_barrier_wait(barrier); if(my_core_id == 0){ timeB = magma_wtime()-timeB; printf(" Finish BULGE timing= %f \n" ,timeB); } } // WANTZ > 0 #if defined(SETAFFINITY) // unbind threads sys_corenbr = sysconf(_SC_NPROCESSORS_ONLN); CPU_ZERO( &set ); for(magma_int_t i=0; i<sys_corenbr; i++) CPU_SET( i, &set ); sched_setaffinity( 0, sizeof(set), &set) ; #endif return 0; }
/* HVM mode suspension. */ static void xctrl_suspend() { #ifdef SMP cpuset_t cpu_suspend_map; #endif int suspend_cancelled; EVENTHANDLER_INVOKE(power_suspend); if (smp_started) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); } KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0")); /* * Clear our XenStore node so the toolstack knows we are * responding to the suspend request. */ xs_write(XST_NIL, "control", "shutdown", ""); /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. */ mtx_lock(&Giant); if (DEVICE_SUSPEND(root_bus) != 0) { mtx_unlock(&Giant); printf("%s: device_suspend failed\n", __func__); return; } mtx_unlock(&Giant); #ifdef SMP CPU_ZERO(&cpu_suspend_map); /* silence gcc */ if (smp_started) { /* * Suspend other CPUs. This prevents IPIs while we * are resuming, and will allow us to reset per-cpu * vcpu_info on resume. */ cpu_suspend_map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map); if (!CPU_EMPTY(&cpu_suspend_map)) suspend_cpus(cpu_suspend_map); } #endif /* * Prevent any races with evtchn_interrupt() handler. */ disable_intr(); intr_suspend(); xen_hvm_suspend(); suspend_cancelled = HYPERVISOR_suspend(0); xen_hvm_resume(suspend_cancelled != 0); intr_resume(suspend_cancelled != 0); enable_intr(); /* * Reset grant table info. */ gnttab_resume(); #ifdef SMP if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) { /* * Now that event channels have been initialized, * resume CPUs. */ resume_cpus(cpu_suspend_map); } #endif /* * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or * similar. */ mtx_lock(&Giant); DEVICE_RESUME(root_bus); mtx_unlock(&Giant); if (smp_started) { thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } EVENTHANDLER_INVOKE(power_resume); if (bootverbose) printf("System resumed after suspension\n"); }
void *semathread(void *param) { int mustgetcpu = 0; int first = 1; struct params *par = param; cpu_set_t mask; int policy = SCHED_FIFO; struct sched_param schedp; memset(&schedp, 0, sizeof(schedp)); schedp.sched_priority = par->priority; sched_setscheduler(0, policy, &schedp); if (par->cpu != -1) { CPU_ZERO(&mask); CPU_SET(par->cpu, &mask); if(sched_setaffinity(0, sizeof(mask), &mask) == -1) fprintf(stderr, "WARNING: Could not set CPU affinity " "to CPU #%d\n", par->cpu); } else mustgetcpu = 1; par->tid = gettid(); while (!par->shutdown) { if (par->sender) { pthread_mutex_lock(&syncmutex[par->num]); /* Release lock: Start of latency measurement ... */ gettimeofday(&par->unblocked, NULL); pthread_mutex_unlock(&testmutex[par->num]); par->samples++; if(par->max_cycles && par->samples >= par->max_cycles) par->shutdown = 1; if (mustgetcpu) par->cpu = get_cpu(); } else { /* Receiver */ if (!first) { pthread_mutex_lock(&syncmutex[par->num]); first = 1; } pthread_mutex_lock(&testmutex[par->num]); /* ... Got the lock: End of latency measurement */ gettimeofday(&par->received, NULL); par->samples++; timersub(&par->received, &par->neighbor->unblocked, &par->diff); if (par->diff.tv_usec < par->mindiff) par->mindiff = par->diff.tv_usec; if (par->diff.tv_usec > par->maxdiff) par->maxdiff = par->diff.tv_usec; par->sumdiff += (double) par->diff.tv_usec; if (par->tracelimit && par->maxdiff > par->tracelimit) { char tracing_enabled_file[MAX_PATH]; strcpy(tracing_enabled_file, get_debugfileprefix()); strcat(tracing_enabled_file, "tracing_enabled"); int tracing_enabled = open(tracing_enabled_file, O_WRONLY); if (tracing_enabled >= 0) { write(tracing_enabled, "0", 1); close(tracing_enabled); } else snprintf(par->error, sizeof(par->error), "Could not access %s\n", tracing_enabled_file); par->shutdown = 1; par->neighbor->shutdown = 1; } if (par->max_cycles && par->samples >= par->max_cycles) par->shutdown = 1; if (mustgetcpu) par->cpu = get_cpu(); nanosleep(&par->delay, NULL); pthread_mutex_unlock(&syncmutex[par->num]); } } par->stopped = 1; return NULL; }
int server_main(void) { int fd, r, len; void *binder, *cookie; bwr_t bwr; unsigned char rbuf[RBUF_SIZE], *p; bcmd_txn_t *reply; tdata_t *tdata = NULL; inst_buf_t *inst; inst_entry_t copy; if (!share_cpus) { cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); r = sched_setaffinity(0, sizeof(cpuset), &cpuset); if (!r) printf("server is bound to CPU 0\n"); else fprintf(stderr, "server failed to be bound to CPU 0\n"); } fd = open("/dev/binder", O_RDWR); if (fd < 0) { fprintf(stderr, "failed to open binder device\n"); return -1; } #if (!defined(INLINE_TRANSACTION_DATA)) if (mmap(NULL, 128 * 1024, PROT_READ, MAP_PRIVATE, fd, 0) == MAP_FAILED) { fprintf(stderr, "server failed to mmap shared buffer\n"); return -1; } #endif binder = SVC_BINDER; cookie = SVC_COOKIE; r = add_service(fd, binder, cookie, service, sizeof(service) / 2); if (r < 0) { printf("server failed to add instrumentation service\n"); return -1; } printf("server added instrumentation service\n"); r = start_looper(fd); if (r < 0) { printf("server failed to start looper\n"); return -1; } bwr.read_buffer = (unsigned long)rbuf; while (1) { bwr.read_size = sizeof(rbuf); bwr.read_consumed = 0; bwr.write_size = 0; ioctl_read++; r = ioctl(fd, BINDER_WRITE_READ, &bwr); if (r < 0) { fprintf(stderr, "server failed ioctl\n"); return r; } INST_RECORD(©); p = rbuf; len = bwr.read_consumed; while (len > 0) { r = server_parse_command(p, len, &tdata, &reply); //hexdump(tdata, bwr.read_consumed); if (r < 0) return r; p += r; len -= r; #if (defined(SIMULATE_FREE_BUFFER) || !defined(INLINE_TRANSACTION_DATA)) if (tdata) FREE_BUFFER(fd, (void *)tdata->data.ptr.buffer); #endif if (!reply) { //hexdump(rbuf, bwr.read_consumed); continue; } inst = (inst_buf_t *)reply->tdata.data.ptr.buffer; INST_ENTRY_COPY(inst, "S_RECV", ©); //acsiidump(inst,sizeof(*inst)+data_SZ); bwr.write_buffer = (unsigned long)reply; bwr.write_size = sizeof(*reply); bwr.write_consumed = 0; bwr.read_size = 0; INST_ENTRY(inst, "S_REPLY"); ioctl_write++; r = ioctl(fd, BINDER_WRITE_READ, &bwr); if (r < 0) { fprintf(stderr, "server failed reply ioctl\n"); return r; } #if (!defined(INLINE_TRANSACTION_DATA)) free(reply); #endif } } free(reply); return 0; }
REALIGN #endif void WrapperInit(void) { #ifndef SWAP_WINDOW_AND_GL_THREAD initializeSDL2(); #endif BOOL useOnlyOneCPU = true; uint32_t msaa = 0; FILE *f = NULL; SDL_JoystickEventState(SDL_IGNORE); SDL_ShowCursor(false); #ifdef WIN32 const char *homeDir = getenv("AppData"); #else const char *homeDir = getenv("HOME"); #endif if (homeDir && *homeDir) { static const char subdirsToCreate[5][10] = { "config", "save", "stats", "stats/prh", "tmptrk" }; char buffer[MAX_PATH]; uint32_t pos, i; /* Creating ~/.nfs2se directory and subdirectories */ strcpy(buffer, homeDir); #ifdef __APPLE__ strcat(buffer, "/Library/Application Support/nfs2se/"); #else strcat(buffer, "/.nfs2se/"); #endif mkdir_wrap(buffer, 0755); pos = strlen(buffer); for (i = 0; i < 5; ++i) { strcpy(buffer + pos, subdirsToCreate[i]); mkdir_wrap(buffer, 0755); } /* Checking for nfs2se.conf and copying it at first tme to home dir */ strcpy(buffer + pos, "nfs2se.conf"); FILE *fSrc = fopen("nfs2se.conf.template", "r"); if (fSrc) { FILE *fDst = fopen(buffer, "r"); if (!fDst && (fDst = fopen(buffer, "w"))) { char *buffer2 = (char *)malloc(1024); uint32_t bread; do { bread = fread(buffer2, 1, 1024, fSrc); fwrite(buffer2, 1, bread, fDst); } while (bread == 1024); free(buffer2); } fclose(fDst); fclose(fSrc); } /* Open the config file */ if ((f = fopen(buffer, "r"))) { buffer[pos] = '\0'; settingsDir = strdup(buffer); } } #ifndef WIN32 uint32_t i; event_mutex = SDL_CreateMutex(); event_cond = SDL_CreateCond(); signal(SIGINT, signal_handler); signal(SIGILL, signal_handler); signal(SIGABRT, signal_handler); signal(SIGBUS, signal_handler); signal(SIGFPE, signal_handler); signal(SIGUSR1, signal_handler); signal(SIGSEGV, signal_handler); signal(SIGUSR2, signal_handler); signal(SIGPIPE, signal_handler); signal(SIGALRM, signal_handler); signal(SIGTERM, signal_handler); atexit(exit_func); #endif if (!f) f = fopen("nfs2se.conf", "r"); if (!f) fprintf(stderr, "Cannot open configuration file \"nfs2se.conf\"\n"); else { BOOL canParseNextLine = true; char line[128]; while (fgets(line, sizeof line, f)) { uint32_t nPos = strlen(line) - 1; if (line[nPos] != '\n') { canParseNextLine = false; continue; } if (line[0] == '\0' || line[0] == ' ' || line[0] == '#') continue; if (!canParseNextLine) { canParseNextLine = true; continue; } line[nPos] = '\0'; if (!strncasecmp("UseOnlyOneCPU=", line, 14)) useOnlyOneCPU = !!atoi(line + 14); else if (!strncasecmp("StartInFullScreen=", line, 18)) startInFullScreen = !!atoi(line + 18); else if (!strncasecmp("VSync=", line, 6)) vSync = atoi(line + 6); else if (!strncasecmp("MSAA=", line, 5)) { msaa = atoi(line + 5); if (msaa > 16 || (msaa & (msaa - 1))) msaa = 0; } else if (!strncasecmp("UseWindowSizeForFullScreen=", line, 27)) { if (atoi(line + 27)) fullScreenFlag = SDL_WINDOW_FULLSCREEN; } else if (!strncasecmp("WindowSize=", line, 11)) sscanf(line + 11, "%dx%d", &winWidth, &winHeight); else if (!strncasecmp("KeepAspectRatio=", line, 16)) sscanf(line + 16, "%d", &keepAspectRatio); else if (!strncasecmp("Joystick0AxisValueShift=", line, 24)) { joystickAxisValueShift[0] = atoi(line + 24); if (joystickAxisValueShift[0] < 0 || joystickAxisValueShift[0] > 32767) joystickAxisValueShift[0] = 0; } else if (!strncasecmp("Joystick1AxisValueShift=", line, 24)) { joystickAxisValueShift[1] = atoi(line + 24); if (joystickAxisValueShift[1] < 0 || joystickAxisValueShift[1] > 32767) joystickAxisValueShift[1] = 0; } else if (!strncasecmp("Joystick0Axes=", line, 14)) sscanf(line + 14, "%d,%d,%d,%d:%d,%d,%d,%d", joystickAxes[0]+0, joystickAxes[0]+1, joystickAxes[0]+2, joystickAxes[0]+3, joystickAxes[0]+4, joystickAxes[0]+5, joystickAxes[0]+6, joystickAxes[0]+7); else if (!strncasecmp("Joystick1Axes=", line, 14)) sscanf(line + 14, "%d,%d,%d,%d:%d,%d,%d,%d", joystickAxes[1]+0, joystickAxes[1]+1, joystickAxes[1]+2, joystickAxes[1]+3, joystickAxes[1]+4, joystickAxes[1]+5, joystickAxes[1]+6, joystickAxes[1]+7); else if (!strncasecmp("Joystick0Buttons=", line, 17)) sscanf(line + 17, "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", joystickButtons[0]+0, joystickButtons[0]+1, joystickButtons[0]+2, joystickButtons[0]+3, joystickButtons[0]+4, joystickButtons[0]+5, joystickButtons[0]+6, joystickButtons[0]+7, joystickButtons[0]+8, joystickButtons[0]+9, joystickButtons[0]+10, joystickButtons[0]+11, joystickButtons[0]+12, joystickButtons[0]+13, joystickButtons[0]+14); else if (!strncasecmp("Joystick1Buttons=", line, 17)) sscanf(line + 17, "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", joystickButtons[1]+0, joystickButtons[1]+1, joystickButtons[1]+2, joystickButtons[1]+3, joystickButtons[1]+4, joystickButtons[1]+5, joystickButtons[1]+6, joystickButtons[1]+7, joystickButtons[1]+8, joystickButtons[1]+9, joystickButtons[1]+10, joystickButtons[1]+11, joystickButtons[1]+12, joystickButtons[1]+13, joystickButtons[1]+14); else if (!strncasecmp("UseSpringForceFeedbackEffect=", line, 29)) useSpringForceFeedbackEffect = atoi(line + 29); #ifdef WIN32 else if (!strncasecmp("WindowsForceFeedbackDevice=", line, 27)) windowsForceFeedbackDevice = atoi(line + 27); #endif else if (!strncasecmp("LinearSoundInterpolation=", line, 25)) linearSoundInterpolation = !!atoi(line + 25); else if (!strncasecmp("UseGlBleginGlEnd=", line, 17)) useGlBleginGlEnd = !!atoi(line + 17); else if (!strncasecmp("Port1=", line, 6)) PORT1 = atoi(line + 6); else if (!strncasecmp("Port2=", line, 6)) PORT2 = atoi(line + 6); else if (!strncasecmp("Bcast=", line, 6)) { uint32_t a, b, c, d; if (sscanf(line + 6, "%d.%d.%d.%d", &a, &b, &c, &d) && a <= 0xFF && b <= 0xFF && c <= 0xFF && d <= 0xFF) broadcast = d << 24 | c << 16 | b << 8 | a; } #ifndef WIN32 else if (!strncasecmp("LinuxCOM1=", line, 10)) serialPort[0] = strdup(line + 10); else if (!strncasecmp("LinuxCOM2=", line, 10)) serialPort[1] = strdup(line + 10); else if (!strncasecmp("LinuxCOM3=", line, 10)) serialPort[2] = strdup(line + 10); else if (!strncasecmp("LinuxCOM4=", line, 10)) serialPort[3] = strdup(line + 10); #endif } fclose(f); } if (msaa) { SDL_GL_SetAttribute(SDL_GL_MULTISAMPLEBUFFERS, msaa == 1 ? 0 : 1); SDL_GL_SetAttribute(SDL_GL_MULTISAMPLESAMPLES, msaa); } #ifndef WIN32 for (i = 0; i < 4; ++i) { if (!serialPort[i]) { char tmp[11]; sprintf(tmp, "/dev/ttyS%d", i); serialPort[i] = strdup(tmp); } } #endif if (useOnlyOneCPU) { #if defined WIN32 SetProcessAffinityMask(GetCurrentProcess(), 1); #elif !defined linux #warning "TODO: thread affinity" #else cpu_set_t set; CPU_ZERO(&set); CPU_SET(0, &set); if (sched_setaffinity(0, sizeof set, &set)) perror("sched_setaffinity"); #endif } checkGameDirs(); }
int client_main(void) { int fd, r, n, m, wait = 0, retries,pid; void *binder, *cookie; bcmd_txn_t *txn; bwr_t bwr; inst_buf_t *inst, *inst_reply; inst_entry_t *entry, copy; unsigned char rbuf[RBUF_SIZE], *ibuf, *p; struct timeval ref, delta; char labels[INST_MAX_ENTRIES][8]; unsigned long long total_usecs[INST_MAX_ENTRIES]; unsigned long long min[INST_MAX_ENTRIES],max[INST_MAX_ENTRIES],record[INST_MAX_ENTRIES]; FILE *fp; if (!share_cpus) { cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(id + 1, &cpuset); r = sched_setaffinity(0, sizeof(cpuset), &cpuset); if (!r) printf("client %d is bound to CPU %d\n", id, id + 1); else fprintf(stderr, "client %d failed to be bound to CPU %d\n", id, id + 1); } fd = open("/dev/binder", O_RDWR); if (fd < 0) { fprintf(stderr, "client %d failed to open binder device\n", id); return -1; } #if (!defined(INLINE_TRANSACTION_DATA)) if (mmap(NULL, 128 * 1024, PROT_READ, MAP_PRIVATE, fd, 0) == MAP_FAILED) { fprintf(stderr, "server failed to mmap shared buffer\n"); return -1; } #endif while (1) { r = lookup_service(fd, service, sizeof(service) / 2, &binder, &cookie); if (r < 0) { fprintf(stderr, "client %d failed to find the instrumentation service\n", id); return -1; } else if (r > 0) break; if (wait++ > 1) fprintf(stderr, "client %d still waiting on instrumentation service to be ready\n", id); sleep(1); } printf("client %d found instrumentation service\n", id); txn = create_transaction(0, binder, cookie, 0, NULL, sizeof(inst_buf_t) + data_SZ, NULL, 0); if (!txn) { fprintf(stderr, "client %d failed to prepare transaction buffer\n", id); return -1; } bwr.write_buffer = (unsigned long)txn; bwr.read_buffer = (unsigned long)rbuf; inst = (inst_buf_t *)txn->tdata.data.ptr.buffer; INST_INIT(inst); ibuf = malloc((iterations + 1) * sizeof(inst_entry_t) * INST_MAX_ENTRIES); if (!ibuf) fprintf(stderr, "client %d failed to allocate instrumentation buffer\n", id); p = ibuf; n = iterations + 1; while (n-- > 0) { INST_BEGIN(inst); retries = 2; bwr.write_size = sizeof(*txn); bwr.write_consumed = 0; bwr.read_size = sizeof(rbuf); bwr.read_consumed = 0; INST_ENTRY(inst, "C_SEND"); ioctl_write++; wait_reply: ioctl_read++; r = ioctl(fd, BINDER_WRITE_READ, &bwr); if (r < 0) { fprintf(stderr, "client %d failed ioctl\n", id); return r; } INST_RECORD(©); r = client_parse_command(id, rbuf, bwr.read_consumed, &inst_reply); if (r < 0) return r; if (!inst_reply) { //hexdump(rbuf, bwr.read_consumed); if (retries-- > 0) { bwr.write_size = 0; bwr.read_consumed = 0; goto wait_reply; } else { fprintf(stderr, "client %d failed to receive reply\n", id); return -1; } } memcpy(inst, inst_reply, sizeof(*inst)+data_SZ); //acsiidump(inst_reply,sizeof(*inst)+data_SZ); INST_ENTRY_COPY(inst, "C_RECV", ©); INST_END(inst, &p); #if (defined(SIMULATE_FREE_BUFFER) || !defined(INLINE_TRANSACTION_DATA)) if (FREE_BUFFER(fd, inst_reply) < 0) { fprintf(stderr, "client %d: failed to free shared buffer\n", id); return -1; } #endif } if (output_file) { if (clients > 1) { char *p = malloc(strlen(output_file) + 16); if (!p) { fprintf(stderr, "client %d failed to alloc memory for filename\n", id); return -1; } sprintf(p, "%s-%d", output_file, id); output_file = p; } fp = fopen(output_file, "w"); if (!fp) { fprintf(stderr, "client %d failed to open dump file\n", id); return -1; } } else fp = stdout; memset(total_usecs, 0, sizeof(total_usecs)); memset(max,0,sizeof(max)); memset(min,255,sizeof(min)); entry = (inst_entry_t *)ibuf; int i; //acsiidump(ibuf,80); for (n = 0; n < inst->seq; n++) { for (m = 0; m < inst->next_entry; m++) { if (n > 0) { if (m == 0) { if (time_ref == 0) // absolute time ref.tv_sec = ref.tv_usec = 0; else ref = entry->tv; } delta.tv_sec = entry->tv.tv_sec - ref.tv_sec; delta.tv_usec = entry->tv.tv_usec - ref.tv_usec; if (delta.tv_usec < 0) { delta.tv_sec--; delta.tv_usec += 1000000; } record[m] = delta.tv_sec * 1000000 + delta.tv_usec; //fprintf(fp, "%ld.%06ld\t", delta.tv_sec, delta.tv_usec); if (time_ref > 0) { total_usecs[m] += delta.tv_sec * 1000000 + delta.tv_usec; if( m == inst->next_entry -1) { if (min[m] > record[m] ) { for( i = 0; i < inst->next_entry; i++ ) { min[i] = record[i]; } } if (max[m] < record[m] ) { for( i = 0; i < inst->next_entry; i++ ) { max[i] = record[i]; } } } } if (time_ref > 1) // relative to the previous entry ref = entry->tv; } else { //fprintf(fp, "%8s\t", entry->label); if (time_ref > 0) strcpy(labels[m], entry->label); } entry++; } //fprintf(fp, "\n"); } if (fp != stdout) fclose(fp); free(txn); pid =getpid(); printf("client(%d) %d: ioctl read: %u\n",pid,id, ioctl_read); printf("client(%d) %d: ioctl write: %u\n",pid,id, ioctl_write); printf("client(%d) %d: ioctl buffer: %u\n",pid,id, ioctl_buffer); if (time_ref > 0 && iterations > 0) { int pos = 0,minpos=0,maxpos=0; char *buf = malloc(64 * m); char *minbuf = malloc(64 * m); char *maxbuf = malloc(64 * m); if (!buf||!minbuf||!maxbuf) return 1; for (n = 0; n < m; n++) { pos += sprintf(buf + pos, "\t%s: %lld.%02lldus\n", labels[n], (total_usecs[n] / iterations), (total_usecs[n] % iterations) * 100 / iterations); minpos += sprintf(minbuf + minpos, "\t%s: %lldus\n", labels[n], min[n]); maxpos += sprintf(maxbuf + maxpos, "\t%s: %lldus\n", labels[n], max[n]); } printf("client %d: average results:\n%s\n", id, buf); printf("client %d: min results:\n%s\n", id, minbuf); printf("client %d: max results:\n%s\n", id, maxbuf); free(buf); free(minbuf); free(maxbuf); } return 0; }
static void *t_srio_send(void *arg) { struct task_arg_type *args = arg; struct dma_ch *dmadev = args->dmadev; struct srio_port_data_thread send_data = args->port_data_thread; uint32_t size = PACKET_LENGTH; uint32_t port = args->port; int err = 0,val=0; uint8_t srio_type = args->srio_type; uint64_t src_phys,dest_phys; uint32_t buf_number=0,usebuf_number=0; uint32_t total_buf=BUFFER_NUM; uint32_t send_num =0; struct atb_clock *atb_clock=NULL; uint64_t atb_multiplier=0; int atb_flag=0; uint32_t test_packets_num = cmd_param.test_num; cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(args->cpu,&cpuset); err = pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&cpuset); while (1) { buf_number = *(volatile uint32_t *)send_data.virt.write_recv_data; for(;usebuf_number<buf_number;) { /*ctx add send packet */ uint32_t offset= (usebuf_number%total_buf)*sizeof(struct srio_packet); if(srio_type!=3){ src_phys = send_data.phys.write_data_prep+offset; dest_phys = send_data.port_info.range_start+offset; }else{ src_phys = send_data.port_info.range_start+offset; dest_phys = send_data.phys.read_recv_data+offset; } memcpy(send_data.virt.write_data_prep+offset,&size,sizeof(uint32_t)); memset((send_data.virt.write_data_prep+offset+4),usebuf_number+1,(size-4)); /*ctx send*/ if (test_packets_num && (!atb_flag)) { atb_flag = 1; atb_clock = malloc(sizeof(struct atb_clock)); if(!atb_clock) { printf("show performance error!\n"); exit(1); } atb_multiplier = atb_get_multiplier(); atb_clock_init(atb_clock); atb_clock_reset(atb_clock); } if (test_packets_num && atb_flag) atb_clock_start(atb_clock); fsl_dma_direct_start(dmadev, src_phys, dest_phys,size); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); goto err_dma; } if (test_packets_num && atb_flag) atb_clock_stop(atb_clock); test_packets_num--; if (!test_packets_num && cmd_param.test_num) { printf("CPU:%d PORT:%d %s %-15u %s %-15f %s %-15f %s %-15f\n",args->cpu,port, perf_str[0], size, perf_str[1], atb_to_seconds(atb_clock_total(atb_clock),atb_multiplier) / cmd_param.test_num* ATB_MHZ, perf_str[2], size * 8.0 * cmd_param.test_num / (atb_to_seconds(atb_clock_total(atb_clock), atb_multiplier) * 1000000000.0), perf_str[3], size * 8.0 / (atb_to_seconds(atb_clock_min(atb_clock), atb_multiplier) * 1000000000.0)); atb_clock_finish(atb_clock); test_packets_num = cmd_param.test_num; atb_flag = 0; } /*send success*/ usebuf_number++; memset((send_data.virt.write_data_prep+offset),0,sizeof(struct srio_packet)); send_num++; if(send_num == SEND_TOTAL_NUM) { memcpy((send_data.virt.write_data_prep+SEND_NUM_OFFSET),&usebuf_number,sizeof(uint32_t)); src_phys = send_data.phys.write_data_prep+SEND_NUM_OFFSET; dest_phys = send_data.port_info.range_start+SEND_NUM_OFFSET; fsl_dma_direct_start(dmadev, src_phys, dest_phys,sizeof(uint32_t)); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); goto err_dma; } send_num=0; } /*end*/ } } err_dma: printf("Send error!\n"); pthread_exit(NULL); }
int _get_sched_cpuset(hwloc_topology_t topology, hwloc_obj_type_t hwtype, hwloc_obj_type_t req_hwtype, cpu_set_t *mask, stepd_step_rec_t *job) { int nummasks, maskid, i, threads; char *curstr, *selstr; char mstr[1 + CPU_SETSIZE / 4]; uint32_t local_id = job->envtp->localid; char buftype[1024]; /* For CPU_BIND_RANK, CPU_BIND_MASK and CPU_BIND_MAP, generate sched * cpuset directly from cpu numbers. * For CPU_BIND_LDRANK, CPU_BIND_LDMASK and CPU_BIND_LDMAP, generate * sched cpuset from hwloc topology. */ slurm_sprint_cpu_bind_type(buftype, job->cpu_bind_type); debug3("task/cgroup: (%s[%d]) %s", buftype, job->cpu_bind_type, job->cpu_bind); CPU_ZERO(mask); if (job->cpu_bind_type & CPU_BIND_NONE) { return true; } if (job->cpu_bind_type & CPU_BIND_RANK) { threads = MAX(conf->threads, 1); CPU_SET(job->envtp->localid % (job->cpus*threads), mask); return true; } if (job->cpu_bind_type & CPU_BIND_LDRANK) { return _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, local_id, mask); } if (!job->cpu_bind) return false; nummasks = 1; maskid = 0; selstr = NULL; /* get number of strings present in cpu_bind */ curstr = job->cpu_bind; while (*curstr) { if (nummasks == local_id+1) { selstr = curstr; maskid = local_id; break; } if (*curstr == ',') nummasks++; curstr++; } /* if we didn't already find the mask... */ if (!selstr) { /* ...select mask string by wrapping task ID into list */ maskid = local_id % nummasks; i = maskid; curstr = job->cpu_bind; while (*curstr && i) { if (*curstr == ',') i--; curstr++; } if (!*curstr) { return false; } selstr = curstr; } /* extract the selected mask from the list */ i = 0; curstr = mstr; while (*selstr && *selstr != ',' && i++ < (CPU_SETSIZE/4)) *curstr++ = *selstr++; *curstr = '\0'; if (job->cpu_bind_type & CPU_BIND_MASK) { /* convert mask string into cpu_set_t mask */ if (str_to_cpuset(mask, mstr) < 0) { error("task/cgroup: str_to_cpuset %s", mstr); return false; } return true; } if (job->cpu_bind_type & CPU_BIND_MAP) { unsigned int mycpu = 0; if (strncmp(mstr, "0x", 2) == 0) { mycpu = strtoul (&(mstr[2]), NULL, 16); } else { mycpu = strtoul (mstr, NULL, 10); } CPU_SET(mycpu, mask); return true; } if (job->cpu_bind_type & CPU_BIND_LDMASK) { int len = strlen(mstr); char *ptr = mstr + len - 1; uint32_t base = 0; curstr = mstr; /* skip 0x, it's all hex anyway */ if (len > 1 && !memcmp(mstr, "0x", 2L)) curstr += 2; while (ptr >= curstr) { char val = char_to_val(*ptr); if (val == (char) -1) return false; if (val & 1) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base, mask); if (val & 2) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base + 1, mask); if (val & 4) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base + 2, mask); if (val & 8) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base + 3, mask); len--; ptr--; base += 4; } return true; } if (job->cpu_bind_type & CPU_BIND_LDMAP) { uint32_t myldom = 0; if (strncmp(mstr, "0x", 2) == 0) { myldom = strtoul (&(mstr[2]), NULL, 16); } else { myldom = strtoul (mstr, NULL, 10); } return _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, myldom, mask); } return false; }
static void *t_srio_receive(void *arg) { struct task_arg_type *args = arg; struct dma_ch *dmadev = args->dmadev; struct srio_port_data_thread receive_data = args->port_data_thread; uint32_t port = args->port; int err = 0,i=0; uint8_t srio_type = args->srio_type; uint64_t src_phys,dest_phys; uint32_t count=0,total_buf=BUFFER_NUM,buf_number=BUFFER_NUM; cpu_set_t cpuset; uint32_t receive_num=0,use_num=0,packet_num=0; CPU_ZERO(&cpuset); CPU_SET(args->cpu,&cpuset); err = pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&cpuset); if(err){ printf("(%d)fail:pthread_setaffinity_np()\n",args->cpu); return NULL; } if(srio_type!=3){ src_phys = receive_data.phys.write_data_prep; dest_phys = receive_data.port_info.range_start; }else{ src_phys = receive_data.port_info.range_start; dest_phys = receive_data.phys.read_recv_data; } memcpy(receive_data.virt.write_data_prep,&buf_number,sizeof(uint32_t)); /*ctx send*/ fsl_dma_direct_start(dmadev, src_phys, dest_phys, sizeof(uint32_t)); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); return NULL; } while (1) { /*ctx add receive packet */ uint32_t offset=(count%buf_number)*sizeof(struct srio_packet); receive_num = *(volatile uint32_t *)(receive_data.virt.write_recv_data+SEND_NUM_OFFSET); if(receive_num>use_num) { packet_num = receive_num-use_num; if(use_num == 0 || use_num == BUFFER_NUM) { printf("CPU:%d PORT:%d\n",args->cpu,port); char *p=receive_data.virt.write_recv_data+offset+4; int error_count=0; for(i=0;i<10;i++) { if(*p!=(use_num+1)) { error_count++; } p++; } if(error_count!=0) { printf("Receive Data:%02x Test Data:%02x Error Number:%d\n",*p,use_num+1,error_count); } else { printf("Data Right!\n"); } } memset((receive_data.virt.write_recv_data+offset),0,sizeof(struct srio_packet)*packet_num); count=count+packet_num; total_buf=total_buf+packet_num; memcpy(receive_data.virt.write_data_prep,&total_buf,sizeof(uint32_t)); /*ctx send*/ fsl_dma_direct_start(dmadev, src_phys, dest_phys, sizeof(uint32_t)); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); break; } use_num=receive_num; } /*end*/ } printf("Receive error!\n"); pthread_exit(NULL); }
int ATL_thread_start(ATL_thread_t *thr, int proc, int JOINABLE, void *(*rout)(void*), void *arg) /* * Creates a thread that will run only on processor proc. * RETURNS: 0 on success, non-zero on error * NOTE: present implementation dies on error, so 0 is always returned. */ { #ifdef ATL_WINTHREADS #ifdef ATL_WIN32THREADS DWORD thrID; #else unsigned thrID; #endif #ifdef ATL_NOAFFINITY #ifdef ATL_WIN32THREADS thr->thrH = CreateThread(NULL, 0, rout, arg, 0, &thrID); #else thr->thrH = (HANDLE)_beginthreadex(NULL, 0, rout, arg, 0, &thrID); #endif ATL_assert(thr->thrH); #else thr->rank = proc; #ifdef ATL_WIN32THREADS thr->thrH = CreateThread(NULL, 0, rout, arg, CREATE_SUSPENDED, &thrID); #else thr->thrH = (HANDLE)_beginthreadex(NULL, 0, rout, arg, CREATE_SUSPENDED, &thrID); #endif ATL_assert(thr->thrH); #ifdef ATL_RANK_IS_PROCESSORID ATL_assert(SetThreadAffinityMask(thr->thrH, (1<<proc))); #else ATL_assert(SetThreadAffinityMask(thr->thrH, (1<<ATL_affinityIDs[proc%ATL_AFF_NUMID]))); #endif ATL_assert(ResumeThread(thr->thrH) == 1); #endif #elif defined(ATL_OMP_THREADS) fprintf(stderr, "Should not call thread_start when using OpenMP!"); ATL_assert(0); #elif 0 && defined(ATL_OS_OSX) /* unchecked special OSX code */ /* http://developer.apple.com/library/mac/#releasenotes/Performance/RN-AffinityAPI/_index.html */ pthread_attr_t attr; #define ATL_OSX_AFF_SETS 2 /* should be probed for */ thread_affinity_policy ap; ap.affinity_tag = proc % ATL_OSX_AFF_SETS; ATL_assert(!pthread_attr_init(&attr)); if (JOINABLE) ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)); else ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* no chk, OK to fail */ ATL_assert(!pthread_create(&thr->thrH, &attr, rout, arg)); ATL_assert(!thread_policy_set(thr->thrH, THREAD_AFFINITY_POLICY, (integer_t*)&ap, THREAD_AFFINITY_POLICY_COUNT)); ATL_assert(!pthread_attr_destroy(&attr)); #else pthread_attr_t attr; #ifndef ATL_NOAFFINITY #if defined(ATL_PAFF_SETAFFNP) || defined(ATL_PAFF_SCHED) cpu_set_t cpuset; #elif defined(ATL_PAFF_PLPA) plpa_cpu_set_t cpuset; #elif defined(ATL_PAFF_CPUSET) /* untried FreeBSD code */ cpuset_t mycpuset; #endif #ifdef ATL_RANK_IS_PROCESSORID const int affID = proc; #else const int affID = ATL_affinityIDs[proc%ATL_AFF_NUMID]; #endif #ifdef ATL_PAFF_SELF thr->paff_set = 0; /* affinity must be set by created thread */ #endif #endif thr->rank = proc; ATL_assert(!pthread_attr_init(&attr)); if (JOINABLE) { #ifdef IBM_PT_ERROR ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_UNDETACHED)); #else ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)); #endif } else ATL_assert(!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* no chk, OK to fail */ #ifdef ATL_PAFF_SETAFFNP CPU_ZERO(&cpuset); CPU_SET(affID, &cpuset); ATL_assert(!pthread_attr_setaffinity_np(&attr, sizeof(cpuset), &cpuset)); #elif defined(ATL_PAFF_SETPROCNP) ATL_assert(!pthread_attr_setprocessor_np(&attr, (pthread_spu_t)affID, PTHREAD_BIND_FORCED_NP)); #endif ATL_assert(!pthread_create(&thr->thrH, &attr, rout, arg)); #if defined(ATL_PAFF_PBIND) ATL_assert(!processor_bind(P_LWPID, thr->thrH, affID, NULL)); thr->paff_set = 0; /* affinity set by spawner */ #elif defined(ATL_PAFF_BINDP) ATL_assert(!bindprocessor(BINDTHREAD, thr->thrH, bindID)); thr->paff_set = 0; /* affinity set by spawner */ #elif defined(ATL_PAFF_CPUSET) /* untried FreeBSD code */ CPU_ZERO(&mycpuset); /* no manpage, so guess works like linux */ CPU_SET(bindID, &mycpuset); if (!cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, thr->thrH, sizeof(mycpuset), &mycpuset)); thr->paff_set = 0; /* affinity set by spawner */ #endif ATL_assert(!pthread_attr_destroy(&attr)); #endif return(0); }
/* * The format pattern: --lcores='<lcores[@cpus]>[<,lcores[@cpus]>...]' * lcores, cpus could be a single digit/range or a group. * '(' and ')' are necessary if it's a group. * If not supply '@cpus', the value of cpus uses the same as lcores. * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below * lcore 0 runs on cpuset 0x41 (cpu 0,6) * lcore 1 runs on cpuset 0x2 (cpu 1) * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7) * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2) * lcore 6 runs on cpuset 0x41 (cpu 0,6) * lcore 7 runs on cpuset 0x80 (cpu 7) * lcore 8 runs on cpuset 0x100 (cpu 8) */ static int eal_parse_lcores(const char *lcores) { struct rte_config *cfg = rte_eal_get_configuration(); static uint16_t set[RTE_MAX_LCORE]; unsigned idx = 0; int i; unsigned count = 0; const char *lcore_start = NULL; const char *end = NULL; int offset; rte_cpuset_t cpuset; int lflags = 0; int ret = -1; if (lcores == NULL) return -1; /* Remove all blank characters ahead and after */ while (isblank(*lcores)) lcores++; i = strlen(lcores); while ((i > 0) && isblank(lcores[i - 1])) i--; CPU_ZERO(&cpuset); /* Reset lcore config */ for (idx = 0; idx < RTE_MAX_LCORE; idx++) { cfg->lcore_role[idx] = ROLE_OFF; lcore_config[idx].core_index = -1; CPU_ZERO(&lcore_config[idx].cpuset); } /* Get list of cores */ do { while (isblank(*lcores)) lcores++; if (*lcores == '\0') goto err; /* record lcore_set start point */ lcore_start = lcores; /* go across a complete bracket */ if (*lcore_start == '(') { lcores += strcspn(lcores, ")"); if (*lcores++ == '\0') goto err; } /* scan the separator '@', ','(next) or '\0'(finish) */ lcores += strcspn(lcores, "@,"); if (*lcores == '@') { /* explicit assign cpu_set */ offset = eal_parse_set(lcores + 1, set, RTE_DIM(set)); if (offset < 0) goto err; /* prepare cpu_set and update the end cursor */ if (0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set))) goto err; end = lcores + 1 + offset; } else { /* ',' or '\0' */ /* haven't given cpu_set, current loop done */ end = lcores; /* go back to check <number>-<number> */ offset = strcspn(lcore_start, "(-"); if (offset < (end - lcore_start) && *(lcore_start + offset) != '(') lflags = 1; } if (*end != ',' && *end != '\0') goto err; /* parse lcore_set from start point */ if (0 > eal_parse_set(lcore_start, set, RTE_DIM(set))) goto err; /* without '@', by default using lcore_set as cpu_set */ if (*lcores != '@' && 0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set))) goto err; /* start to update lcore_set */ for (idx = 0; idx < RTE_MAX_LCORE; idx++) { if (!set[idx]) continue; if (cfg->lcore_role[idx] != ROLE_RTE) { lcore_config[idx].core_index = count; cfg->lcore_role[idx] = ROLE_RTE; count++; } if (lflags) { CPU_ZERO(&cpuset); CPU_SET(idx, &cpuset); } rte_memcpy(&lcore_config[idx].cpuset, &cpuset, sizeof(rte_cpuset_t)); } lcores = end + 1; } while (*end != '\0'); if (count == 0) goto err; cfg->lcore_count = count; lcores_parsed = 1; ret = 0; err: return ret; }
affinity_set::affinity_set(int cpu_nr) { CPU_ZERO(&set); CPU_SET(cpu_nr, &set); }
static void *t_srio_send(void *arg) { struct task_arg_type *args = arg; struct dma_ch *dmadev = args->dmadev; struct srio_port_data_thread send_data = args->port_data_thread; uint32_t size = PACKET_LENGTH; uint32_t port = args->port; int err = 0,val=0; uint8_t srio_type = args->srio_type; uint8_t test_type = args->test_type; uint64_t src_phys,dest_phys; uint64_t buf_number=0,usebuf_number=0; uint64_t total_buf=BUFFER_NUM; uint64_t total_count=0; uint32_t send_num =0; struct atb_clock *atb_clock=NULL; uint64_t atb_multiplier=0; int atb_flag=0; cpu_set_t cpuset; double speed=0.0; struct timeval tm_start,tm_end; CPU_ZERO(&cpuset); CPU_SET(args->cpu,&cpuset); err = pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&cpuset); uint8_t data=0; uint32_t passes=args->passes; uint32_t pi; gettimeofday(&tm_start,NULL); volatile struct srio_ctl *pcnt=NULL; struct srio_ctl ctl_info; memset(&ctl_info,0,sizeof(struct srio_ctl)); pcnt=(struct srio_ctl *)(send_data.virt.write_recv_data); while (1) { if(!test_type) { buf_number = pcnt->number; for(;usebuf_number<buf_number;) { /*ctx add send packet */ uint32_t offset= ((uint32_t)(usebuf_number%total_buf))*sizeof(struct srio_packet); if(srio_type!=3){ src_phys = send_data.phys.write_data_prep+offset; dest_phys = send_data.port_info.range_start+offset; }else{ src_phys = send_data.port_info.range_start+offset; dest_phys = send_data.phys.read_recv_data+offset; } memset((send_data.virt.write_data_prep+offset),data,size); data++; /*ctx send*/ fsl_dma_direct_start(dmadev, src_phys, dest_phys,size); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); fflush(stdout); } /*send success*/ usebuf_number++; send_num++; if(send_num == SEND_TOTAL_NUM) { ctl_info.number=usebuf_number; memcpy((send_data.virt.write_data_prep+SEND_NUM_OFFSET),&ctl_info,sizeof(struct srio_ctl)); src_phys = send_data.phys.write_data_prep+SEND_NUM_OFFSET; dest_phys = send_data.port_info.range_start+SEND_NUM_OFFSET; fsl_dma_direct_start(dmadev, src_phys, dest_phys,sizeof(struct srio_ctl)); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); fflush(stdout); } send_num=0; } } /*end*/ } } pthread_exit(NULL); }
int test_affinity1(void) #endif { unsigned int cpu; cpu_set_t newmask; cpu_set_t src1mask; cpu_set_t src2mask; cpu_set_t src3mask; CPU_ZERO(&newmask); CPU_ZERO(&src1mask); memset(&src2mask, 0, sizeof(cpu_set_t)); assert(memcmp(&src1mask, &src2mask, sizeof(cpu_set_t)) == 0); assert(CPU_EQUAL(&src1mask, &src2mask)); assert(CPU_COUNT(&src1mask) == 0); CPU_ZERO(&src1mask); CPU_ZERO(&src2mask); CPU_ZERO(&src3mask); for (cpu = 0; cpu < sizeof(cpu_set_t)*8; cpu += 2) { CPU_SET(cpu, &src1mask); /* 0b01010101010101010101010101010101 */ } for (cpu = 0; cpu < sizeof(cpu_set_t)*4; cpu++) { CPU_SET(cpu, &src2mask); /* 0b00000000000000001111111111111111 */ } for (cpu = sizeof(cpu_set_t)*4; cpu < sizeof(cpu_set_t)*8; cpu += 2) { CPU_SET(cpu, &src2mask); /* 0b01010101010101011111111111111111 */ } for (cpu = 0; cpu < sizeof(cpu_set_t)*8; cpu += 2) { CPU_SET(cpu, &src3mask); /* 0b01010101010101010101010101010101 */ } assert(CPU_COUNT(&src1mask) == (sizeof(cpu_set_t)*4)); assert(CPU_COUNT(&src2mask) == ((sizeof(cpu_set_t)*4 + (sizeof(cpu_set_t)*2)))); assert(CPU_COUNT(&src3mask) == (sizeof(cpu_set_t)*4)); CPU_SET(0, &newmask); CPU_SET(1, &newmask); CPU_SET(3, &newmask); assert(CPU_ISSET(1, &newmask)); CPU_CLR(1, &newmask); assert(!CPU_ISSET(1, &newmask)); CPU_OR(&newmask, &src1mask, &src2mask); assert(CPU_EQUAL(&newmask, &src2mask)); CPU_AND(&newmask, &src1mask, &src2mask); assert(CPU_EQUAL(&newmask, &src1mask)); CPU_XOR(&newmask, &src1mask, &src3mask); memset(&src2mask, 0, sizeof(cpu_set_t)); assert(memcmp(&newmask, &src2mask, sizeof(cpu_set_t)) == 0); /* * Need to confirm the bitwise logical right-shift in CpuCount(). * i.e. zeros inserted into MSB on shift because cpu_set_t is * unsigned. */ CPU_ZERO(&src1mask); for (cpu = 1; cpu < sizeof(cpu_set_t)*8; cpu += 2) { CPU_SET(cpu, &src1mask); /* 0b10101010101010101010101010101010 */ } assert(CPU_ISSET(sizeof(cpu_set_t)*8-1, &src1mask)); assert(CPU_COUNT(&src1mask) == (sizeof(cpu_set_t)*4)); return 0; }
static void *t_srio_receive(void *arg) { struct task_arg_type *args = arg; struct dma_ch *dmadev = args->dmadev; struct srio_port_data_thread receive_data = args->port_data_thread; uint32_t port = args->port; int err = 0; uint32_t i=0,k=0; uint8_t srio_type = args->srio_type; uint64_t src_phys,dest_phys; uint32_t count=0,buf_number=BUFFER_NUM; cpu_set_t cpuset; uint64_t receive_num=0,use_num=0,total_buf=100; uint32_t packet_num=0; CPU_ZERO(&cpuset); CPU_SET(args->cpu,&cpuset); err = pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&cpuset); if(err){ printf("(%d)fail:pthread_setaffinity_np()\n",args->cpu); fflush(stdout); return NULL; } if(srio_type!=3){ src_phys = receive_data.phys.write_data_prep; dest_phys = receive_data.port_info.range_start; }else{ src_phys = receive_data.port_info.range_start; dest_phys = receive_data.phys.read_recv_data; } volatile struct srio_ctl *pcnt=NULL; struct srio_ctl ctl_info; printf("ctl size:%d\n",sizeof(struct srio_ctl)); memset(&ctl_info,0,sizeof(struct srio_ctl)); ctl_info.number=total_buf; memcpy(receive_data.virt.write_data_prep,&ctl_info,sizeof(struct srio_ctl)); /*ctx send*/ printf("before send ctl ########################\n"); fsl_dma_direct_start(dmadev, src_phys, dest_phys, sizeof(struct srio_ctl)); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); fflush(stdout); return NULL; } printf(" send ctl ########################\n"); pcnt=(struct srio_ctl *)(receive_data.virt.write_recv_data+SEND_NUM_OFFSET); uint8_t data=0; uint32_t receive_total=0; uint32_t passes=args->passes; uint32_t pi; while (1) { /*ctx add receive packet */ uint32_t offset=((uint32_t)(count%buf_number))*sizeof(struct srio_packet); receive_num = pcnt->number; if(receive_num>use_num) { packet_num = receive_num-use_num; if(packet_num > (buf_number - (count%buf_number))) { packet_num=buf_number-(count%buf_number); } uint8_t *p=(uint8_t *)(receive_data.virt.write_recv_data+offset); uint32_t error_count=0; fflush(stdout); for(k=use_num;k<(use_num+packet_num);k++) { uint8_t pdata=*p; for(i=0;i<PACKET_LENGTH;i++) { if(*p!=data) { error_count++; printf("###Receive ERROR Data:%02x addr: %08x Test Data:%02x port:%d option:%d\n",*p,p,data,port,i); fflush(stdout); } p++; } if(error_count!=0) { printf("Receive ERROR Data:%02x Test Data:%02x error Number:%08x port:%d\n",pdata,data,error_count,port); fflush(stdout); error_count=0; }else { receive_total=receive_total+1; } data++; } if(receive_total==1000) { printf("port:%d Data Right!\n",port); fflush(stdout); receive_total=0; } count=count+packet_num; total_buf=total_buf+packet_num; ctl_info.number=total_buf; memcpy(receive_data.virt.write_data_prep,&ctl_info,sizeof(struct srio_ctl)); /*ctx send*/ fsl_dma_direct_start(dmadev, src_phys, dest_phys, sizeof(struct srio_ctl)); err = fsl_dma_wait(dmadev); if (err < 0) { printf("port %d: dma task error!\n", port + 1); fflush(stdout); break; } use_num=use_num+packet_num; } /*end*/ } pthread_exit(NULL); }
static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array, cpu_set_t **cpuMasks) { struct bitmask **remaining_numa_node_cpus = NULL, *collective; unsigned long **numa_node_cpus = NULL; int i, j, at_least_one_cpu = 0, rc = 0; cpu_set_t *cpusetptr; char *bitmask_str = NULL; if (numa_available()) { CRAY_ERR("Libnuma not available"); return -1; } /* * numa_node_cpus: The CPUs available to the NUMA node. * numa_all_cpus_ptr: all CPUs on which the calling task may execute. * remaining_numa_node_cpus: Bitwise-AND of the above two to get all of * the CPUs that the task can run on in this * NUMA node. * collective: Collects all of the CPUs as a precaution. */ remaining_numa_node_cpus = xmalloc(num_numa_nodes * sizeof(struct bitmask *)); collective = numa_allocate_cpumask(); numa_node_cpus = xmalloc(num_numa_nodes * sizeof(unsigned long*)); for (i = 0; i < num_numa_nodes; i++) { remaining_numa_node_cpus[i] = numa_allocate_cpumask(); numa_node_cpus[i] = xmalloc(sizeof(unsigned long) * NUM_INTS_TO_HOLD_ALL_CPUS); rc = numa_node_to_cpus(numa_array[i], numa_node_cpus[i], NUM_INTS_TO_HOLD_ALL_CPUS); if (rc) { CRAY_ERR("numa_node_to_cpus failed: Return code %d", rc); } for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { (remaining_numa_node_cpus[i]->maskp[j]) = (numa_node_cpus[i][j]) & (numa_all_cpus_ptr->maskp[j]); collective->maskp[j] |= (remaining_numa_node_cpus[i]->maskp[j]); } } /* * Ensure that we have not masked off all of the CPUs. * If we have, just re-enable them all. Better to clear them all than * none of them. */ for (j = 0; j < collective->size; j++) { if (numa_bitmask_isbitset(collective, j)) { at_least_one_cpu = 1; } } if (!at_least_one_cpu) { for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < (remaining_numa_node_cpus[i]->size / (sizeof(unsigned long) * 8)); j++) { (remaining_numa_node_cpus[i]->maskp[j]) = (numa_all_cpus_ptr->maskp[j]); } } } if (debug_flags & DEBUG_FLAG_TASK) { bitmask_str = NULL; for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { xstrfmtcat(bitmask_str, "%6lx ", numa_node_cpus[i][j]); } } info("%sBitmask: Allowed CPUs for NUMA Node", bitmask_str); xfree(bitmask_str); bitmask_str = NULL; for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { xstrfmtcat(bitmask_str, "%6lx ", numa_all_cpus_ptr->maskp[j]); } } info("%sBitmask: Allowed CPUs for cpuset", bitmask_str); xfree(bitmask_str); bitmask_str = NULL; for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { xstrfmtcat(bitmask_str, "%6lx ", remaining_numa_node_cpus[i]-> maskp[j]); } } info("%sBitmask: Allowed CPUs between cpuset and NUMA Node", bitmask_str); xfree(bitmask_str); } // Convert bitmasks to cpu_set_t types cpusetptr = xmalloc(num_numa_nodes * sizeof(cpu_set_t)); for (i = 0; i < num_numa_nodes; i++) { CPU_ZERO(&cpusetptr[i]); for (j = 0; j < remaining_numa_node_cpus[i]->size; j++) { if (numa_bitmask_isbitset(remaining_numa_node_cpus[i], j)) { CPU_SET(j, &cpusetptr[i]); } } if (debug_flags & DEBUG_FLAG_TASK) { info("CPU_COUNT() of set: %d", CPU_COUNT(&cpusetptr[i])); } } *cpuMasks = cpusetptr; // Freeing Everything numa_free_cpumask(collective); for (i = 0; i < num_numa_nodes; i++) { xfree(numa_node_cpus[i]); numa_free_cpumask(remaining_numa_node_cpus[i]); } xfree(numa_node_cpus); xfree(numa_node_cpus); xfree(remaining_numa_node_cpus); return 0; }
/** * Bind the current process to a specified CPU. This function is to ensure * that the OS won't schedule the process to different processors, which * would make values read by rdtsc unreliable. * * @param uint32 cpu_id, the id of the logical cpu to be bound to. * * @author cjiang */ static void BindToCPU(uint32_t cpu_id) { cpu_set_t new_mask; CPU_ZERO(&new_mask); CPU_SET(cpu_id, &new_mask); SET_AFFINITY(0, sizeof(cpu_set_t), &new_mask); }
/* * Test pthread creation at different thread priorities. */ int main(int argc, char* argv[]) { int i, retc, nopi = 0; cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(0, &mask); setup(); rt_init("h", parse_args, argc, argv); retc = sched_setaffinity(0, sizeof(mask), &mask); if (retc < 0) { printf("Main Thread: Can't set affinity: %d %s\n", retc, strerror(retc)); exit(1); } retc = sched_getaffinity(0, sizeof(mask), &mask); /* * XXX: Have you ever heard of structures with c89/c99? * Inline assignment is a beautiful thing. */ arg1.policy = SCHED_OTHER; arg1.priority = 0; arg1.func = func_nonrt; arg2.policy = SCHED_RR; arg2.priority = 20; arg2.func = func_rt; arg3.policy = SCHED_RR; arg3.priority = 30; arg3.func = func_rt; arg4.policy = SCHED_RR; arg4.priority = 40; arg4.func = func_rt; arg5.policy = SCHED_RR; arg5.priority = 40; arg5.func = func_noise; for (i = 0;i < argc; i++) { if (strcmp(argv[i], "nopi") == 0) nopi = 1; } printf("Start %s\n",argv[0]); #if HAVE_DECL_PTHREAD_PRIO_INHERIT if (!nopi) { pthread_mutexattr_t mutexattr; int protocol; if (pthread_mutexattr_init(&mutexattr) != 0) { printf("Failed to init mutexattr\n"); }; if (pthread_mutexattr_setprotocol(&mutexattr, PTHREAD_PRIO_INHERIT) != 0) { printf("Can't set protocol prio inherit\n"); } if (pthread_mutexattr_getprotocol(&mutexattr, &protocol) != 0) { printf("Can't get mutexattr protocol\n"); } else { printf("protocol in mutexattr is %d\n", protocol); } if ((retc = pthread_mutex_init(&glob_mutex, &mutexattr)) != 0) { printf("Failed to init mutex: %d\n", retc); } } #endif startThread(&arg1); startThread(&arg2); startThread(&arg3); startThread(&arg4); startThread(&arg5); sleep(10); printf("Stopping threads\n"); stopThread(&arg1); stopThread(&arg2); stopThread(&arg3); stopThread(&arg4); stopThread(&arg5); printf("Thread counts %d %d %d %d %d\n",arg1.id, arg2.id, arg3.id, arg4.id, arg5.id); printf("Done\n"); return 0; }
PETSC_EXTERN PetscErrorCode PetscThreadCommCreate_PThread(PetscThreadComm tcomm) { PetscThreadComm_PThread ptcomm; PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; ptcommcrtct++; ierr = PetscStrcpy(tcomm->type,PTHREAD);CHKERRQ(ierr); ierr = PetscNew(struct _p_PetscThreadComm_PThread,&ptcomm);CHKERRQ(ierr); tcomm->data = (void*)ptcomm; ptcomm->nthreads = 0; ptcomm->sync = PTHREADSYNC_LOCKFREE; ptcomm->aff = PTHREADAFFPOLICY_ONECORE; ptcomm->spark = PTHREADPOOLSPARK_SELF; ptcomm->ismainworker = PETSC_TRUE; ptcomm->synchronizeafter = PETSC_TRUE; tcomm->ops->destroy = PetscThreadCommDestroy_PThread; tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree; tcomm->ops->barrier = PetscThreadCommBarrier_PThread_LockFree; tcomm->ops->getrank = PetscThreadCommGetRank_PThread; ierr = PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->granks);CHKERRQ(ierr); if (!PetscPThreadCommInitializeCalled) { /* Only done for PETSC_THREAD_COMM_WORLD */ PetscBool flg1,flg2,flg3,flg4; PetscPThreadCommInitializeCalled = PETSC_TRUE; ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"PThread communicator options",NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-threadcomm_pthread_main_is_worker","Main thread is also a worker thread",NULL,PETSC_TRUE,&ptcomm->ismainworker,&flg1);CHKERRQ(ierr); ierr = PetscOptionsEnum("-threadcomm_pthread_affpolicy","Thread affinity policy"," ",PetscPThreadCommAffinityPolicyTypes,(PetscEnum)ptcomm->aff,(PetscEnum*)&ptcomm->aff,&flg2);CHKERRQ(ierr); ierr = PetscOptionsEnum("-threadcomm_pthread_type","Thread pool type"," ",PetscPThreadCommSynchronizationTypes,(PetscEnum)ptcomm->sync,(PetscEnum*)&ptcomm->sync,&flg3);CHKERRQ(ierr); ierr = PetscOptionsEnum("-threadcomm_pthread_spark","Thread pool spark type"," ",PetscPThreadCommPoolSparkTypes,(PetscEnum)ptcomm->spark,(PetscEnum*)&ptcomm->spark,&flg4);CHKERRQ(ierr); ierr = PetscOptionsBool("-threadcomm_pthread_synchronizeafter","Puts a barrier after every kernel call",NULL,PETSC_TRUE,&ptcomm->synchronizeafter,&flg1);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); if (ptcomm->ismainworker) { ptcomm->nthreads = tcomm->nworkThreads-1; ptcomm->thread_num_start = 1; } else { ptcomm->nthreads = tcomm->nworkThreads; ptcomm->thread_num_start = 0; } switch (ptcomm->sync) { case PTHREADSYNC_LOCKFREE: ptcomm->initialize = PetscPThreadCommInitialize_LockFree; ptcomm->finalize = PetscPThreadCommFinalize_LockFree; tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree; tcomm->ops->barrier = PetscThreadCommBarrier_PThread_LockFree; break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently"); } /* Set up thread ranks */ for (i=0; i< tcomm->nworkThreads; i++) ptcomm->granks[i] = i; if (ptcomm->ismainworker) { #if defined(PETSC_PTHREAD_LOCAL) PetscPThreadRank=0; /* Main thread rank */ #else ierr = pthread_key_create(&PetscPThreadRankkey,NULL);CHKERRQ(ierr); ierr = pthread_setspecific(PetscPThreadRankkey,&ptcomm->granks[0]);CHKERRQ(ierr); #endif } /* Set the leader thread rank */ if (ptcomm->nthreads) { if (ptcomm->ismainworker) tcomm->leader = ptcomm->granks[1]; else tcomm->leader = ptcomm->granks[0]; } /* Create array holding pthread ids */ ierr = PetscMalloc(tcomm->nworkThreads*sizeof(pthread_t),&ptcomm->tid);CHKERRQ(ierr); /* Create thread attributes */ ierr = PetscMalloc(tcomm->nworkThreads*sizeof(pthread_attr_t),&ptcomm->attr);CHKERRQ(ierr); ierr = PetscThreadCommSetPThreadAttributes(tcomm);CHKERRQ(ierr); if (ptcomm->ismainworker) { /* Pin main thread */ #if defined(PETSC_HAVE_SCHED_CPU_SET_T) cpu_set_t mset; PetscInt ncores,icorr; ierr = PetscGetNCores(&ncores);CHKERRQ(ierr); CPU_ZERO(&mset); icorr = tcomm->affinities[0]%ncores; CPU_SET(icorr,&mset); sched_setaffinity(0,sizeof(cpu_set_t),&mset); #endif } /* Initialize thread pool */ ierr = (*ptcomm->initialize)(tcomm);CHKERRQ(ierr); } else { PetscThreadComm gtcomm; PetscThreadComm_PThread gptcomm; PetscInt *granks,j,*gaffinities; ierr = PetscCommGetThreadComm(PETSC_COMM_WORLD,>comm);CHKERRQ(ierr); gaffinities = gtcomm->affinities; gptcomm = (PetscThreadComm_PThread)tcomm->data; granks = gptcomm->granks; /* Copy over the data from the global thread communicator structure */ ptcomm->ismainworker = gptcomm->ismainworker; ptcomm->thread_num_start = gptcomm->thread_num_start; ptcomm->sync = gptcomm->sync; ptcomm->aff = gptcomm->aff; tcomm->ops->runkernel = gtcomm->ops->runkernel; tcomm->ops->barrier = gtcomm->ops->barrier; for (i=0; i < tcomm->nworkThreads; i++) { for (j=0; j < gtcomm->nworkThreads; j++) { if (tcomm->affinities[i] == gaffinities[j]) ptcomm->granks[i] = granks[j]; } } } PetscFunctionReturn(0); }