static void auto_config(int npages, int *nbufs, int *nsets) { size_t len; int ncpu; int llc; int64_t cacheconfig[MAX_CACHE_DEPTH]; int64_t cachesize[MAX_CACHE_DEPTH]; mutter("Autoconfiguring...\n"); len = sizeof(cacheconfig); if (sysctlbyname("hw.cacheconfig", &cacheconfig[0], &len, NULL, 0) != 0) { printf("Unable to get hw.cacheconfig, %d\n", errno); exit(1); } len = sizeof(cachesize); if (sysctlbyname("hw.cachesize", &cachesize[0], &len, NULL, 0) != 0) { printf("Unable to get hw.cachesize, %d\n", errno); exit(1); } /* * Find LLC */ for (llc = MAX_CACHE_DEPTH - 1; llc > 0; llc--) if (cacheconfig[llc] != 0) break; /* * Calculate number of buffers of size pages*4096 bytes * fit into 90% of an L2 cache. */ *nbufs = cachesize[llc] * 9 / (npages * 4096 * 10); mutter(" L%d (LLC) cache %qd bytes: " "using %d buffers of size %d bytes\n", llc, cachesize[llc], *nbufs, (npages * 4096)); /* * Calcalute how many sets: */ *nsets = cacheconfig[0]/cacheconfig[llc]; mutter(" %qd cpus; %qd cpus per L%d cache: using %d sets\n", cacheconfig[0], cacheconfig[llc], llc, *nsets); }
int main (int argc, char* argv[]) { int n1, n2, xn, i, j, k, m, nr; float o1, d1, xd, xo, *vel, *dip, t, t1,tt, x2,*ani; float t0=0., tp=0., slope0=0., slopep=0., x=0., x1=0., v0=0., *data=NULL; bool abs, half, inner, hyper, mute; sf_file in, out, heterog; sf_init (argc, argv); in = sf_input("in"); out = sf_output("out"); heterog = sf_input("heterog"); /* heterogeneity */ if (!sf_histint(heterog,"n1",&nr)) sf_error("No n1= in heterog"); if (!sf_histfloat(in,"o1",&o1)) sf_error("No o1= in input"); if (!sf_histfloat(in,"d1",&d1)) sf_error("No d1= in input"); if (!sf_histint(in,"n1",&n1)) sf_error("No n1= in input"); n2 = sf_leftsize(in,1); if (!sf_getint("n",&xn)) xn = 32; /* offset number */ if (!sf_getfloat("d",&xd)) xd = 12.5; /* offset interval */ if (!sf_getfloat("o",&xo)) xo = 0.; /* offset origin */ if (!sf_getbool("mute",&mute)) mute=false; /* if y, use mutter */ if (!sf_getbool("half",&half)) half=false; /* if y, half-offset instead of full offset */ if (half) { xd *= 2.; xo *= 2.; } vel = sf_floatalloc(n1); dip = sf_floatalloc(n1*xn); sf_shiftdim(in, out, 1); sf_putint(out,"n2",xn); sf_putfloat(out,"d2",xd); sf_putfloat(out,"o2",xo); if (mute) { if (!sf_getfloat("tp",&tp)) tp=0.150; /* end time (available when mute=y) */ if (!sf_getfloat("t0",&t0)) t0=0.; /* starting time (available when mute=y) */ if (!sf_getfloat("v0",&v0)) v0=10000; /* velocity (available when mute=y) */ if (!sf_getfloat("x0",&x1)) x1=0.; /* starting space (available when mute=y) */ if (!sf_getbool("abs",&abs)) abs=true; /* if y, use absolute value |x-x0| (available when mute=y) */ if (!sf_getbool("inner",&inner)) inner=false; /* if y, do inner muter (available when mute=y) */ if (!sf_getbool("hyper",&hyper)) hyper=false; /* if y, do hyperbolic mute (available when mute=y) */ slope0=1./v0; slopep=slope0; if (hyper) { slope0 *= slope0; slopep *= slopep; } data = sf_floatalloc(n1); mutter_init(n1,o1-t0,d1,abs,inner,hyper); } for (k=0; k < n2; k++) { sf_warning("slice %d of %d;",k+1,n2); sf_floatread(vel,n1,in); ani = sf_floatalloc(nr); sf_floatread(ani,nr,heterog); for (j=0; j < xn; j++) { for (i=0; i < n1; i++) { t = o1+i*d1; x2 = xo+j*xd; m = 0; t1 = o1 + m*d1; while(t>(t1*(1-1/ani[m])+ sqrt(t1*t1+ani[m]*x2*x2/(vel[m]*vel[m]+FLT_EPSILON))/ ani[m])) { m++; t1 = o1 + m*d1; } tt = (o1+i*d1)*(1-1/ani[m])+ sqrt((o1+i*d1)*(o1+i*d1)+ani[m]*x2*xd*xd*x2/ (vel[m]*vel[m]+FLT_EPSILON))/ani[m]; dip[j*n1+i] = x2*xd/(FLT_EPSILON+d1*vel[m]*vel[m]* (ani[m]*tt-(o1+i*d1)*(ani[m]-1))); } } if (mute) { for (j=0; j < xn; j++) { x = xo + j*xd; x -= x1; if (hyper) x *= x; for (i=0; i < n1; i++) { data[i] = dip[j*n1+i]; } mutter (tp,slope0,slopep, x, data, false); for (i=0; i < n1; i++) { dip[j*n1+i] = data[i]; } } } sf_floatwrite(dip,n1*xn,out); } sf_warning("."); exit (0); }
int main(int argc, char *argv[]) { kern_return_t ret; mach_port_name_t port; int pid; int c; thread_act_t *thread_array; mach_msg_type_number_t num_threads; int i; boolean_t interactive = FALSE; int tag; if (geteuid() != 0) { printf("Must be run as root\n"); exit(1); } /* Do switch parsing: */ while ((c = getopt (argc, argv, "hiv:")) != -1) { switch (c) { case 'i': interactive = TRUE; break; case 'v': verbosity = atoi(optarg); break; case 'h': case '?': default: usage(); } } argc -= optind; argv += optind; if (argc > 0) pid = atoi(*argv); ret = task_for_pid(mach_task_self(), pid, &port); if (ret != KERN_SUCCESS) err(1, "task_for_pid(,%d,) returned %d", pid, ret); mutter("task %p\n", port); ret = task_threads(port, &thread_array, &num_threads); if (ret != KERN_SUCCESS) err(1, "task_threads() returned %d", pid, ret); for (i = 0; i < num_threads; i++) { printf(" %d: thread 0x%08x tag %d\n", i, thread_array[i], thread_tag_get(thread_array[i])); } while (interactive) { printf("Enter new tag or <return> to skip or ^D to quit\n"); for (i = 0; i < num_threads; i++) { tag = thread_tag_get(thread_array[i]); printf(" %d: thread 0x%08x tag %d: ", i, thread_array[i], tag); fflush(stdout); (void) fgets(input, 20, stdin); if (feof(stdin)) { printf("\n"); interactive = FALSE; break; } if (strlen(input) > 1) { tag = atoi(input); thread_tag_set(thread_array[i], tag); } } } return 0; }
int main(int argc, char *argv[]) { int i; int j; int k; int pages = 256; /* 1MB */ int buffers = 2; int producers = 2; int consumers = 2; int stages = 2; int *status; stage_info_t *stage_info; stage_info_t *sp; worker_info_t *worker_info; worker_info_t *wp; kern_return_t ret; int c; /* Do switch parsing: */ while ((c = getopt (argc, argv, "ab:i:p:s:twv:")) != -1) { switch (c) { case 'a': affinity = !affinity; break; case 'b': buffers = atoi(optarg); break; case 'i': iterations = atoi(optarg); break; case 'p': pages = atoi(optarg); break; case 's': stages = atoi(optarg); if (stages >= WORKERS_MAX) usage(); break; case 't': halting = TRUE; break; case 'w': consumer_fnp = &reader_writer_fn; break; case 'v': verbosity = atoi(optarg); break; case 'h': case '?': default: usage(); } } argc -= optind; argv += optind; if (argc > 0) producers = atoi(*argv); argc--; argv++; if (argc > 0) consumers = atoi(*argv); pthread_mutex_init(&funnel, NULL); pthread_cond_init(&barrier, NULL); /* * Fire up the worker threads. */ threads = consumers * (stages - 1) + producers; mutter("Launching %d producer%s with %d stage%s of %d consumer%s\n" " with %saffinity, consumer reads%s data\n", producers, s_if_plural(producers), stages - 1, s_if_plural(stages - 1), consumers, s_if_plural(consumers), affinity? "": "no ", (consumer_fnp == &reader_writer_fn)? " and writes" : ""); if (pages < 256) mutter(" %dkB bytes per buffer, ", pages * 4); else mutter(" %dMB bytes per buffer, ", pages / 256); mutter("%d buffer%s per producer ", buffers, s_if_plural(buffers)); if (buffers * pages < 256) mutter("(total %dkB)\n", buffers * pages * 4); else mutter("(total %dMB)\n", buffers * pages / 256); mutter(" processing %d buffer%s...\n", iterations, s_if_plural(iterations)); stage_info = (stage_info_t *) malloc(stages * sizeof(stage_info_t)); worker_info = (worker_info_t *) malloc(threads * sizeof(worker_info_t)); /* Set up the queue for the workers of this thread set: */ for (i = 0; i < stages; i++) { sp = &stage_info[i]; sp->stagenum = i; pthread_mutex_init(&sp->bufq.mtx, NULL); pthread_cond_init(&sp->bufq.cnd, NULL); TAILQ_INIT(&sp->bufq.queue); sp->bufq.waiters = 0; if (i == 0) { sp->fn = producer_fnp; sp->name = "producer"; } else { sp->fn = consumer_fnp; sp->name = "consumer"; } sp->input = &sp->bufq; sp->output = &stage_info[(i + 1) % stages].bufq; stage_info[i].work_todo = iterations; } /* Create the producers */ for (i = 0; i < producers; i++) { work_t *work_array; int *data; int isize; isize = pages * 4096 / sizeof(int); data = (int *) malloc(buffers * pages * 4096); /* Set up the empty work buffers */ work_array = (work_t *) malloc(buffers * sizeof(work_t)); for (j = 0; j < buffers; j++) { work_array[j].data = data + (isize * j); work_array[j].isize = isize; work_array[j].tag = 0; TAILQ_INSERT_TAIL(&stage_info[0].bufq.queue, &work_array[j], link); DBG(" empty work item %p for data %p\n", &work_array[j], work_array[j].data); } wp = &worker_info[i]; wp->setnum = i + 1; wp->stage = &stage_info[0]; if (ret = pthread_create(&wp->thread, NULL, &manager_fn, (void *) wp)) err(1, "pthread_create %d,%d", 0, i); } /* Create consumers */ for (i = 1; i < stages; i++) { for (j = 0; j < consumers; j++) { wp = &worker_info[producers + (consumers*(i-1)) + j]; wp->setnum = j + 1; wp->stage = &stage_info[i]; if (ret = pthread_create(&wp->thread, NULL, &manager_fn, (void *) wp)) err(1, "pthread_create %d,%d", i, j); } } /* * We sit back anf wait for the slaves to finish. */ for (k = 0; k < threads; k++) { int i; int j; wp = &worker_info[k]; if (k < producers) { i = 0; j = k; } else { i = (k - producers) / consumers; j = (k - producers) % consumers; } if(ret = pthread_join(wp->thread, (void **)&status)) err(1, "pthread_join %d,%d", i, j); DBG("Thread %d,%d status %d\n", i, j, status); } /* * See how long the work took. */ timer = mach_absolute_time() - timer; timer = timer / 1000000ULL; printf("%d.%03d seconds elapsed.\n", (int) (timer/1000ULL), (int) (timer % 1000ULL)); return 0; }
int main(int argc, char *argv[]) { int i; int j; int pages = 256; /* 1MB */ int buffers = 2; int sets = 2; int stages = 2; int *status; line_info_t *line_info; line_info_t *lp; stage_info_t *stage_info; stage_info_t *sp; kern_return_t ret; int c; /* Do switch parsing: */ while ((c = getopt (argc, argv, "ab:chi:p:s:twv:")) != -1) { switch (c) { case 'a': #ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER affinity = !affinity; break; #else usage(); #endif case 'b': buffers = atoi(optarg); break; case 'c': cache_config = TRUE; break; case 'i': iterations = atoi(optarg); break; case 'p': pages = atoi(optarg); break; case 's': stages = atoi(optarg); if (stages >= WORKERS_MAX) usage(); break; case 't': halting = TRUE; break; case 'w': consumer_fnp = &reader_writer_fn; break; case 'v': verbosity = atoi(optarg); break; case '?': case 'h': default: usage(); } } argc -= optind; argv += optind; if (argc > 0) sets = atoi(*argv); if (cache_config) auto_config(pages, &buffers, &sets); pthread_mutex_init(&funnel, NULL); pthread_cond_init(&barrier, NULL); /* * Fire up the worker threads. */ threads = sets * stages; mutter("Launching %d set%s of %d threads with %saffinity, " "consumer reads%s data\n", sets, s_if_plural(sets), stages, affinity? "": "no ", (consumer_fnp == &reader_writer_fn)? " and writes" : ""); if (pages < 256) mutter(" %dkB bytes per buffer, ", pages * 4); else mutter(" %dMB bytes per buffer, ", pages / 256); mutter("%d buffer%s per set ", buffers, s_if_plural(buffers)); if (buffers * pages < 256) mutter("(total %dkB)\n", buffers * pages * 4); else mutter("(total %dMB)\n", buffers * pages / 256); mutter(" processing %d buffer%s...\n", iterations, s_if_plural(iterations)); line_info = (line_info_t *) malloc(sets * sizeof(line_info_t)); stage_info = (stage_info_t *) malloc(sets * stages * sizeof(stage_info_t)); for (i = 0; i < sets; i++) { work_t *work_array; lp = &line_info[i]; lp->setnum = i + 1; lp->isize = pages * 4096 / sizeof(int); lp->data = (int *) malloc(buffers * pages * 4096); /* Set up the queue for the workers of this thread set: */ for (j = 0; j < stages; j++) { sp = &stage_info[(i*stages) + j]; sp->stagenum = j; sp->set = lp; lp->stage[j] = sp; pthread_mutex_init(&sp->bufq.mtx, NULL); pthread_cond_init(&sp->bufq.cnd, NULL); TAILQ_INIT(&sp->bufq.queue); sp->bufq.waiters = FALSE; } /* * Take a second pass through the stages * to define what the workers are and to interconnect their input/outputs */ for (j = 0; j < stages; j++) { sp = lp->stage[j]; if (j == 0) { sp->fn = producer_fnp; sp->name = "producer"; } else { sp->fn = consumer_fnp; sp->name = "consumer"; } sp->input = &lp->stage[j]->bufq; sp->output = &lp->stage[(j + 1) % stages]->bufq; } /* Set up the buffers on the first worker of the set. */ work_array = (work_t *) malloc(buffers * sizeof(work_t)); for (j = 0; j < buffers; j++) { work_array[j].data = lp->data + (lp->isize * j); TAILQ_INSERT_TAIL(&lp->stage[0]->bufq.queue, &work_array[j], link); DBG(" empty work item %p for set %d data %p\n", &work_array[j], i, work_array[j].data); } /* Create this set of threads */ for (j = 0; j < stages; j++) { if (ret = pthread_create(&lp->stage[j]->thread, NULL, &manager_fn, (void *) lp->stage[j])) err(1, "pthread_create %d,%d", i, j); } } /* * We sit back anf wait for the slave to finish. */ for (i = 0; i < sets; i++) { lp = &line_info[i]; for (j = 0; j < stages; j++) { if(ret = pthread_join(lp->stage[j]->thread, (void **)&status)) err(1, "pthread_join %d,%d", i, j); DBG("Thread %d,%d status %d\n", i, j, status); } } /* * See how long the work took. */ timer = mach_absolute_time() - timer; timer = timer / 1000000ULL; printf("%d.%03d seconds elapsed.\n", (int) (timer/1000ULL), (int) (timer % 1000ULL)); return 0; }