void thread_pool_cleanup(struct thread_pool *pool) { int i; if (!pool->single) { thread_pool_cancel(pool); } /* Clean all Lua states first, to trigger the unload of the * extensions before cleaning the thread capture states. */ for (i=0; i<pool->count; ++i) { if (pool->threads[i]) { cleanup_thread_state_lua(pool->threads[i]); } } /* Finalize cleanup. */ for (i=0; i<pool->count; ++i) { if (pool->threads[i]) { cleanup_thread_state(pool->threads[i]); } } barrier_destroy(&pool->thread_sync); barrier_destroy(&pool->thread_start_sync); free(pool->threads); free(pool); }
int benchmark_finirun() { (void) barrier_destroy(b); return (0); }
void pcu_run_threads(int count, pcu_thread* function) { if (count < 1) pcu_fail("thread count must be positive"); global_nthreads = count; PCU_MALLOC(global_threads,(size_t)count); *global_threads = pthread_self(); barrier_init(&global_barrier, count); pthread_mutex_init(&global_lock, NULL); int err; err = pthread_key_create(&global_key,NULL); if (err) pcu_fail("pthread_key_create failed"); pthread_setspecific(global_key,0); for (int i=1; i < count; ++i) { err = pthread_create(global_threads+i,NULL,function,(void*)(ptrdiff_t)i); if (err) pcu_fail("pthread_create failed"); } function(NULL); for (int i=1; i < count; ++i) { err = pthread_join(global_threads[i],NULL); if (err) pcu_fail("pthread_join failed"); } pthread_mutex_destroy(&global_lock); barrier_destroy(&global_barrier); err = pthread_key_delete(global_key); if (err) pcu_fail("pthread_key_delete failed"); pcu_free(global_threads); }
void oph_sum_array_r_deinit(UDF_INIT * initid) { int i; ((th_data *) (initid->extension))->exit_flag = 1; barrier_wait(&(((th_data *) (initid->extension))->barr_start)); for (i = 0; i < NTHREAD; i++) pthread_join((((th_data *) (initid->extension))->thread[i]), NULL); barrier_destroy(&(((th_data *) (initid->extension))->barr_start)); barrier_destroy(&(((th_data *) (initid->extension))->barr_end)); //Free allocated space if (initid->ptr) { free(initid->ptr); initid->ptr = NULL; } if (initid->extension) { free(initid->extension); initid->extension = NULL; } }
int main (int arg, char *argv[]) { int thread_count, array_count; int status; barrier_init (&barrier, THREADS); /* * Create a set of threads that will use the barrier. */ for (thread_count = 0; thread_count < THREADS; thread_count++) { thread[thread_count].increment = thread_count; thread[thread_count].number = thread_count; for (array_count = 0; array_count < ARRAY; array_count++) thread[thread_count].array[array_count] = array_count + 1; // for (array_count = 0; array_count < ARRAY; array_count++) // printf ("%010u ", thread[thread_count].array[array_count]); // printf ("\n"); status = pthread_create (&thread[thread_count].thread_id, NULL, thread_routine, (void*)&thread[thread_count]); if (status != 0) err_abort (status, "Create thread"); } /* * Now join with each of the threads. */ for (thread_count = 0; thread_count < THREADS; thread_count++) { status = pthread_join (thread[thread_count].thread_id, NULL); if (status != 0) err_abort (status, "Join thread"); printf ("%02d: (%d) ", thread_count, thread[thread_count].increment); for (array_count = 0; array_count < ARRAY; array_count++) printf ("%010u ", thread[thread_count].array[array_count]); printf ("\n"); } /* * To be thorough, destroy the barrier. */ barrier_destroy (&barrier); return 0; }
void sylvan_quit() { while (quit_register != NULL) { struct reg_quit_entry *e = quit_register; quit_register = e->next; e->cb(); free(e); } while (gc_mark_register != NULL) { struct reg_gc_mark_entry *e = gc_mark_register; gc_mark_register = e->next; free(e); } cache_free(); llmsset_free(nodes); barrier_destroy(&gcbar); }
void drd_barrier_destroy(const Addr barrier, const BarrierT barrier_type) { barrier_destroy(barrier, barrier_type); }
int main(int argc, char **argv) { int alloc_size = 100; int num_allocs = 1000 * 1000; int pool_size; int pool_auto_size = 1; int use_malloc = 0; int touch = 0; int warm = 0; int concurrency = 1; uinet_pool_t pool; int i; struct timespec t1, t2; char ch; void **allocations; struct test_params *params; struct barrier barrier; int allocs_per_thread; int remainder; while ((ch = getopt(argc, argv, "c:hmn:p:s:tw")) != -1) { switch (ch) { case 'c': concurrency = atoi(optarg); if (concurrency < 1) concurrency = 1; break; case 'h': usage(argv[0]); return (0); break; case 'm': use_malloc = 1; break; case 'n': num_allocs = atoi(optarg); if (num_allocs < 1) num_allocs = 1; break; case 'p': pool_size = atoi(optarg); if (pool_size < 1) pool_size = 1; pool_auto_size = 0; break; case 's': alloc_size = atoi(optarg); if (alloc_size < 1) alloc_size = 1; break; case 't': touch = 1; break; case 'w': warm = 1; break; default: printf("Unknown option \"%c\"\n", ch); return (1); } } argc -= optind; argv += optind; /* * Unless otherwise requested, size the pool so the total number of * allocations can be made even with the maximum possible number of * pool members resident in per-thread caches. Allocations made by * one thread cannot be satisfied by pool members residing in the * caches of other threads, so it is possible that a pool sized too * closely to the number of allocations to be made can result in * allocation failures occurring. * * Add enough extra elements beyond the number of allocations to * fill two buckets of 128 elements per thread. */ if (pool_auto_size) pool_size = num_allocs + concurrency * 256; params = malloc(sizeof(struct test_params) * concurrency); if (params == NULL) { printf("Failed to allocate params array\n"); return (1); } if (!use_malloc) { uinet_init(1, 128*1024, 0); printf("Creating pool of %d elements\n", pool_size); pool = uinet_pool_create("test pool", alloc_size, NULL, NULL, NULL, NULL, UINET_POOL_ALIGN_PTR, 0); if (NULL == pool) { printf("Pool creation failed\n"); return (1); } uinet_pool_set_max(pool, pool_size); } clock_getres(CLOCK_PROF, &t1); printf("Timing resolution is %ldms\n", t1.tv_nsec / 1000000); if (barrier_init(&barrier, concurrency)) { printf("Failed to initialize thread sync barrier\n"); return (1); } printf("Test plan: threads=%d size=%d count=%d warmup=%s\n", concurrency, alloc_size, num_allocs, warm ? "yes" : "no"); allocs_per_thread = num_allocs / concurrency; remainder = num_allocs % concurrency; printf("Thread 0: count=%d\n", allocs_per_thread); for (i = 0; i < concurrency; i++) { params[i].id = i; params[i].use_malloc = use_malloc; params[i].alloc_size = alloc_size; params[i].num_allocs = allocs_per_thread; if (remainder) { params[i].num_allocs++; remainder--; } params[i].touch = touch; params[i].pool = pool; params[i].barrier = &barrier; if (i > 0) if (pthread_create(¶ms[i].thread, NULL, start_test_thread, ¶ms[i])) { printf("Failed to create thread %d\n", i); return (1); } } if (warm) { allocations = malloc(sizeof(void *) * num_allocs); if (allocations == NULL) { printf("Failed to allocate results array\n"); return (1); } if (use_malloc) { for (i = 0; i < num_allocs; i++) { allocations[i] = malloc(alloc_size); if (allocations[i] == NULL) { printf("Alllocation %d failed during warmup\n", i); return (1); } } for (i = 0; i < num_allocs; i++) { free(allocations[i]); } } else { for (i = 0; i < num_allocs; i++) { allocations[i] = uinet_pool_alloc(pool, 0); if (allocations[i] == NULL) { printf("Alllocation %d failed during warmup\n", i); return (1); } } for (i = 0; i < num_allocs; i++) { uinet_pool_free(pool, allocations[i]); } } } /* * Give the other threads 100 ms to reach their barriers so timing * uncertainty is reduced. */ t1.tv_sec = 0; t1.tv_nsec = 100 * 1000 * 1000; nanosleep(&t1, NULL); clock_gettime(CLOCK_PROF, &t1); barrier_wait(params[0].barrier); do_test(¶ms[0]); for (i = 1; i < concurrency; i++) pthread_join(params[i].thread, NULL); clock_gettime(CLOCK_PROF, &t2); if (t1.tv_nsec > t2.tv_nsec) { t2.tv_nsec = 1000000000 + t2.tv_nsec - t1.tv_nsec; t2.tv_sec = t2.tv_sec - t1.tv_sec - 1; } else { t2.tv_nsec = t2.tv_nsec - t1.tv_nsec; t2.tv_sec = t2.tv_sec - t1.tv_sec; } printf("Time for %d allocations of %d bytes was %lds %ldms\n", num_allocs, alloc_size, t2.tv_sec, t2.tv_nsec / 1000000); barrier_destroy(&barrier); if (!use_malloc) { uinet_pool_destroy(pool); uinet_shutdown(0); } if (warm) free(allocations); free(params); return (0); }
// Note: lower half of R is not touched, and should be given as zero matrix * QR_decomposition_prealloc(matrix *A, matrix *R) { size_t n,m,i,j,k; size_t n_threads = numCPUs()*2; pthread_t thread[n_threads]; struct QR_worker_arg t_arg[n_threads]; barrier_t sync_barrier; pthread_attr_t attr; n = A->n; m = A->m; if (n < m ) { fprintf(stderr, "Cannot create orthogonal matrix from %zux%zu matrix A.\n", n, m); exit(-1); } else if (R->n != m || R->m != m) { fprintf(stderr, "QR decomposition: R matrix has wrong shape\n"); exit(-1); } // Use modified Gram-Schmidt orthogonalization // Do we need to parallelize? if (m < 4 * n_threads || numCPUs() == 1) { // no for (i = 0; i < m; i++) { // Loop over columns // Normalize the i'th column: R->a[i][i] = sqrt(matrix_dot_cols(A,A,i,i)); if (fabs(R->a[i][i]) < 1.0e-12) { fprintf(stderr, "QR decomposition hit a singular matrix.\n"); exit(-1); } for (k = 0; k < n; k++) A->a[k][i] = A->a[k][i]/R->a[i][i]; // Then make all remaining columns orthogonal to column i. for (j = i+1; j<m;j++) { R->a[i][j] = matrix_dot_cols(A,A,i,j); for (k = 0; k < n; k++) // Loop along rows A->a[k][j] -= A->a[k][i] * R->a[i][j]; } } } else { // Parallelize pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); barrier_init(&sync_barrier, n_threads + 1); for (i = 0; i < n_threads; i++) { t_arg[i].Q = A; t_arg[i].R = R; t_arg[i].command = 1; t_arg[i].b = &sync_barrier; pthread_create(&thread[i], &attr, QR_worker, &t_arg[i]); } pthread_attr_destroy(&attr); /* We now have a number of threads running, awaiting * commands. Command 0 is for thread exit, * command 1 is for running orthogonalization */ for (i = 0; i < m; i++) { // Loop over columns // Normalize the i'th column: R->a[i][i] = sqrt(matrix_dot_cols(A,A,i,i)); if (fabs(R->a[i][i]) < 1.0e-12) { fprintf(stderr, "QR decomposition hit a singular matrix.\n"); exit(-1); } for (k = 0; k < n; k++) A->a[k][i] = A->a[k][i]/R->a[i][i]; // Then make all remaining columns orthogonal to column i, // parallelly if (m - (i+1) > 4*n_threads) { for (k = 0; k < n_threads; k++) { t_arg[k].curr_idx = i; t_arg[k].start = i+1 + (k*(m - (i+1)))/n_threads; t_arg[k].stop = i+1 + ((k+1)*(m - (i+1)))/n_threads; } // Start the orthogonalization barrier(&sync_barrier); // Then wait for it to be finished and run the // next column. barrier(&sync_barrier); } else { // Don't parallelize the last bit for (j = i+1; j<m;j++) { R->a[i][j] = matrix_dot_cols(A,A,i,j); for (k = 0; k < n; k++) // Loop along rows A->a[k][j] -= A->a[k][i] * R->a[i][j]; } } } for (i = 0; i < n_threads; i++) { t_arg[i].command = 0; } // Ready to send command 0 to threads. barrier(&sync_barrier); barrier_destroy(&sync_barrier); } return R; }
int main(int argc, char *argv[]) { pthread_t *threads; pthread_attr_t attr; uint32_t **ranks; void *status; #if defined(PAPI_ENABLED) && !defined(DEBUG) int num_sets; PAPI_event_set_wrapper_t* event_sets; #endif int rc; uint32_t t; printf("Optimized Stream benchmark (using SSE intrinsics)\n"); init_flush_cache_array(); malloc_arrays(argv); print_array_parameters(); select_code_variant(argv); print_code_variant_parameters(); threads = (pthread_t *) malloc(numThreads * sizeof(pthread_t)); ranks = (uint32_t **) malloc(numThreads * sizeof(uint32_t *)); #if !defined(DEBUG) #if defined(PAPI_ENABLED) papi_init(desired_events, num_desired, &event_sets, &num_sets); // initialize threaded PAPI if (PAPI_thread_init((unsigned long (*)(void)) (pthread_self)) != PAPI_OK) { printf("Error with PAPI_thread_init().\n"); exit(EXIT_FAILURE); } results = (double *) malloc(num_sets * numThreads * NUM_TRIALS * sizeof(double)); if (results==NULL) { printf("Error on array results malloc.\n"); exit(EXIT_FAILURE); } #else results = (double *) malloc(numThreads * NUM_TRIALS * sizeof(double)); if (results==NULL) { printf("Error on array results malloc.\n"); exit(EXIT_FAILURE); } #if defined(CYCLE_TIME) // calculate clock rate GET_CLOCK_RATE(results, NUM_TRIALS); median_counts_per_sec = find_median(results, NUM_TRIALS); //printf("Median ticks per second = %e\n", median_counts_per_sec); #else timer_init(); median_counts_per_sec = 1.0; #endif #endif #endif pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); barrier_init(&my_barrier, numThreads); #if defined(AFFINITY_ENABLED) Affinity_Init(); #endif // run stream tests for (t=0; t < numThreads; t++) { ranks[t] = (uint32_t *) malloc(sizeof(uint32_t)); *ranks[t] = t; } for (t=1; t < numThreads; t++) { #if defined(DEBUG) printf("Creating thread %u\n", t); #endif rc = pthread_create(&threads[t], &attr, pthreads_each, (void *) ranks[t]); if (rc) { printf("ERROR; return code from pthread_create() is %d\n", rc); exit(EXIT_FAILURE); } } pthreads_each((void *) ranks[0]); // join the other threads for (t=1; t < numThreads; t++) { pthread_join(threads[t], &status); } #if defined(PAPI_ENABLED) && !defined(DEBUG) papi_cleanup(event_sets, num_sets); #endif pthread_attr_destroy(&attr); pthread_exit(NULL); barrier_destroy(&my_barrier); free_arrays(); return EXIT_SUCCESS; }
int main() { long int i, ret; pass1 = 1; pass2 = 1; pthread_mutex_init(&mtx, NULL); barrier_init(&barrier, NUM_THREADS); for (i = 0; i < NUM_THREADS; i++) { pthread_create(&pthreads[i], NULL, &thread_func_throwaway, (void *)i); } printf("Throwaway Test:\n"); for (i = 0; i < NUM_THREADS; i++) { pthread_join(pthreads[i], (void **)(&ret)); printf("%ld\n", ret); /*Desired output: * 5 * 5 * 5 * 5 * 5 */ if (ret != 5) pass1 = 0; } printf("Reusable Test:\n"); for (i = 0; i < NUM_THREADS; i++) { pthread_create(&pthreads[i], NULL, &thread_func_reusable, (void *)i); } for (i = 0; i < NUM_THREADS; i++) { pthread_join(pthreads[i], (void **)(&ret)); /*Desired output: * 10 * 10 * 10 * 10 * 10 * 15 * 15 * 15 * 15 * 15 * 20 * 20 * 20 * 20 * 20 */ } if (!pass1) printf("Test 1 failed\n"); else printf("Test 1 passed\n"); if (!pass2) printf("Test 2 failed\n"); else printf("Test 2 passed\n"); pthread_mutex_destroy(&mtx); barrier_destroy(&barrier); return 0; }
// // main // // Run a game of life simulation. // int main() { int i, g, rows, cols; int div; // The first several lines take input parameters // for the game. // printf("Welcome to the Game of Life.\n"); printf("How many generations would you like to watch? "); scanf("%d", &g); printf("Enter the width of the board: "); scanf("%d", &cols); printf("Enter the height of the board: "); scanf("%d", &rows); // Define our grids: G is our main grid, and T is our // temp grid. We also print the initial state of the grid // before actually running the simulation. // grid *G = initGrid(rows, cols); grid *T = initGrid(rows, cols); populate(G); printGrid(G); mgridUpdate(T, G, G->rows, 0); // Gets the desired number of threads from the user -- we repeatedly // ask for a number until we get a divisor of rows. Once we know how // many threads there will be, we initialize the barrier. // printf("Please enter a divisor of %d to determine the number of threads: ", rows); scanf("%d", &div); while (rows % div != 0) { printf("I'm sorry, %d does not divide %d. Please choose a divisor of %d: ", div, rows, rows); scanf("%d", &div); } barrier_init(&barr, div); // Creates an array of tinfo structs and // pthreads. We then place the necessary // info into each tinfo struct. // tinfo **I = malloc(div*sizeof(tinfo)); pthread_t threads[div]; for (i=0; i<div; i++) { I[i] = initTinfo(); I[i]->in = G; I[i]->out = T; I[i]->section = i; I[i]->divide = div; I[i]->gen = g; } // Initialize a number of threads. Each thread works on a portion of our // grid -- which portion it works on is decided by the I[i] tinfo struct. // for (i=0; i<div; i++) { pthread_create(&threads[i], NULL, &mFunc, (void *)I[i]); } // My implementation requires join, because the main thread // must wait for all of the child threads to complete before // destroying the barrier and printing the final grid. // for (i=0; i<div; i++) { pthread_join(threads[i], NULL); } // Destroy the barrier, print the final generation. // barrier_destroy(&barr); printGrid(G); return 0; }
/** * Permet de simuler une iteration de propagation de chaleur * * @author Lucas Martinez */ void initSimulation(int taille, int etape, int nbIter, int nbThread, caseDansMat * mat){ vraieTaille = taille + 2; //bords nbCaseParThread = sqrt(taille * taille / nbThread); //nbCaseParThread par ligne en fait if(nbCaseParThread < 1){ nbCaseParThread = 1; nbThread = taille * taille; } pthread_t* threads = malloc(nbThread * sizeof(pthread_t)); if (!threads){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } wrappedMatrice* wrappedMat = malloc(nbThread * sizeof(wrappedMatrice)); if (!wrappedMat){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } switch (etape){ case 0: lancerThread(taille, nbIter, mat, wrappedMat); //un seul thread, comportement different break; case 1: ; pthread_barrier_t* barriereHori = malloc(sizeof(pthread_barrier_t)); if (!barriereHori){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } pthread_barrier_t* barriereVerti = malloc(sizeof(pthread_barrier_t)); if (!barriereVerti){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } initBarrieres(nbThread, barriereHori, barriereVerti); lancerThreads(taille, etape, nbIter, mat, threads, wrappedMat, barriereHori, barriereVerti); rendreBarrieres(barriereHori, barriereVerti); break; case 2: ; maBarriere* maBarriereHori = malloc(sizeof(maBarriere)); if (!maBarriereHori){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } maBarriere* maBarriereVerti = malloc(sizeof(maBarriere)); if (!maBarriereVerti){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } barrier_init(maBarriereHori, nbThread); barrier_init(maBarriereVerti, nbThread); lancerThreads(taille, etape, nbIter, mat, threads, wrappedMat, maBarriereHori, maBarriereVerti); barrier_destroy(maBarriereHori); barrier_destroy(maBarriereVerti); free(maBarriereHori); free(maBarriereVerti); break; case 3: ; maBarriereSem* maBarriereSemHori = malloc(sizeof(maBarriereSem)); if (!maBarriereSemHori){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } maBarriereSem* maBarriereSemVerti = malloc(sizeof(maBarriereSem)); if (!maBarriereSemVerti){ perror("Erreur d'allocation mémoire, arret du programme."); exit(1); } barrier_sem_init(maBarriereSemHori, nbThread); barrier_sem_init(maBarriereSemVerti, nbThread); lancerThreads(taille, etape, nbIter, mat, threads, wrappedMat, maBarriereSemHori, maBarriereSemVerti); barrier_sem_destroy(maBarriereSemHori); barrier_sem_destroy(maBarriereSemVerti); free(maBarriereSemHori); free(maBarriereSemVerti); break; } free(wrappedMat); free(threads); }