Exemple #1
0
void thread_pool_cleanup(struct thread_pool *pool)
{
	int i;

	if (!pool->single) {
		thread_pool_cancel(pool);
	}

	/* Clean all Lua states first, to trigger the unload of the
	 * extensions before cleaning the thread capture states.
	 */
	for (i=0; i<pool->count; ++i) {
		if (pool->threads[i]) {
			cleanup_thread_state_lua(pool->threads[i]);
		}
	}

	/* Finalize cleanup.
	 */
	for (i=0; i<pool->count; ++i) {
		if (pool->threads[i]) {
			cleanup_thread_state(pool->threads[i]);
		}
	}

	barrier_destroy(&pool->thread_sync);
	barrier_destroy(&pool->thread_start_sync);

	free(pool->threads);
	free(pool);
}
Exemple #2
0
int
benchmark_finirun()
{
	(void) barrier_destroy(b);

	return (0);
}
Exemple #3
0
void pcu_run_threads(int count, pcu_thread* function)
{
  if (count < 1) pcu_fail("thread count must be positive");
  global_nthreads = count;
  PCU_MALLOC(global_threads,(size_t)count);
  *global_threads = pthread_self();
  barrier_init(&global_barrier, count);
  pthread_mutex_init(&global_lock, NULL);

  int err;
  err = pthread_key_create(&global_key,NULL);
  if (err) pcu_fail("pthread_key_create failed");
  pthread_setspecific(global_key,0);

  for (int i=1; i < count; ++i)
  {
    err = pthread_create(global_threads+i,NULL,function,(void*)(ptrdiff_t)i);
    if (err) pcu_fail("pthread_create failed");
  }

  function(NULL);
  for (int i=1; i < count; ++i)
  {
    err = pthread_join(global_threads[i],NULL);
    if (err) pcu_fail("pthread_join failed");
  }
  pthread_mutex_destroy(&global_lock);
  barrier_destroy(&global_barrier);

  err = pthread_key_delete(global_key);
  if (err) pcu_fail("pthread_key_delete failed");
  pcu_free(global_threads);
}
void oph_sum_array_r_deinit(UDF_INIT * initid)
{
	int i;
	((th_data *) (initid->extension))->exit_flag = 1;
	barrier_wait(&(((th_data *) (initid->extension))->barr_start));

	for (i = 0; i < NTHREAD; i++)
		pthread_join((((th_data *) (initid->extension))->thread[i]), NULL);

	barrier_destroy(&(((th_data *) (initid->extension))->barr_start));
	barrier_destroy(&(((th_data *) (initid->extension))->barr_end));

	//Free allocated space
	if (initid->ptr) {
		free(initid->ptr);
		initid->ptr = NULL;
	}
	if (initid->extension) {
		free(initid->extension);
		initid->extension = NULL;
	}
}
int main (int arg, char *argv[])
{
    int thread_count, array_count;
    int status;

    barrier_init (&barrier, THREADS);

    /*
     * Create a set of threads that will use the barrier.
     */
    for (thread_count = 0; thread_count < THREADS; thread_count++) {
        thread[thread_count].increment = thread_count;
        thread[thread_count].number = thread_count;

        for (array_count = 0; array_count < ARRAY; array_count++)
            thread[thread_count].array[array_count] = array_count + 1;
        // for (array_count = 0; array_count < ARRAY; array_count++)
        //     printf ("%010u ", thread[thread_count].array[array_count]);
        // printf ("\n");

        status = pthread_create (&thread[thread_count].thread_id,
            NULL, thread_routine, (void*)&thread[thread_count]);
        if (status != 0)
            err_abort (status, "Create thread");
    }

    /*
     * Now join with each of the threads.
     */
    for (thread_count = 0; thread_count < THREADS; thread_count++) {
        status = pthread_join (thread[thread_count].thread_id, NULL);
        if (status != 0)
            err_abort (status, "Join thread");

        printf ("%02d: (%d) ", thread_count, thread[thread_count].increment);

        for (array_count = 0; array_count < ARRAY; array_count++)
            printf ("%010u ", thread[thread_count].array[array_count]);
        printf ("\n");
    }

    /*
     * To be thorough, destroy the barrier.
     */
    barrier_destroy (&barrier);
    return 0;
}
Exemple #6
0
void
sylvan_quit()
{
    while (quit_register != NULL) {
        struct reg_quit_entry *e = quit_register;
        quit_register = e->next;
        e->cb();
        free(e);
    }

    while (gc_mark_register != NULL) {
        struct reg_gc_mark_entry *e = gc_mark_register;
        gc_mark_register = e->next;
        free(e);
    }

    cache_free();
    llmsset_free(nodes);
    barrier_destroy(&gcbar);
}
Exemple #7
0
void drd_barrier_destroy(const Addr barrier, const BarrierT barrier_type)
{
  barrier_destroy(barrier, barrier_type);
}
Exemple #8
0
int main(int argc, char **argv)
{
	int alloc_size = 100;
	int num_allocs = 1000 * 1000;
	int pool_size;
	int pool_auto_size = 1;
	int use_malloc = 0;
	int touch = 0;
	int warm = 0;
	int concurrency = 1;
	uinet_pool_t pool;
	int i;
	struct timespec t1, t2;
	char ch;
	void **allocations;
	struct test_params *params;
	struct barrier barrier;
	int allocs_per_thread;
	int remainder;

	while ((ch = getopt(argc, argv, "c:hmn:p:s:tw")) != -1) {
		switch (ch) {
		case 'c':
			concurrency = atoi(optarg);
			if (concurrency < 1)
				concurrency = 1;
			break;
		case 'h':
			usage(argv[0]);
			return (0);
			break;
		case 'm':
			use_malloc = 1;
			break;
		case 'n':
			num_allocs = atoi(optarg);
			if (num_allocs < 1)
				num_allocs = 1;
			break;
		case 'p':
			pool_size = atoi(optarg);
			if (pool_size < 1)
				pool_size = 1;
			pool_auto_size = 0;
			break;
		case 's':
			alloc_size = atoi(optarg);
			if (alloc_size < 1)
				alloc_size = 1;
			break;
		case 't':
			touch = 1;
			break;
		case 'w':
			warm = 1;
			break;
		default:
			printf("Unknown option \"%c\"\n", ch);
			return (1);
		}
	}
	argc -= optind;
	argv += optind;

	/*
	 * Unless otherwise requested, size the pool so the total number of
	 * allocations can be made even with the maximum possible number of
	 * pool members resident in per-thread caches.  Allocations made by
	 * one thread cannot be satisfied by pool members residing in the
	 * caches of other threads, so it is possible that a pool sized too
	 * closely to the number of allocations to be made can result in
	 * allocation failures occurring.
	 *
	 * Add enough extra elements beyond the number of allocations to
	 * fill two buckets of 128 elements per thread.
	 */
	if (pool_auto_size)
		pool_size = num_allocs + concurrency * 256;

	params = malloc(sizeof(struct test_params) * concurrency);
	if (params == NULL) {
		printf("Failed to allocate params array\n");
		return (1);
	}

	if (!use_malloc) {
		uinet_init(1, 128*1024, 0);
		printf("Creating pool of %d elements\n", pool_size);
		pool = uinet_pool_create("test pool", alloc_size, NULL, NULL, NULL, NULL, UINET_POOL_ALIGN_PTR, 0);
		if (NULL == pool) {
			printf("Pool creation failed\n");
			return (1);
		}
		uinet_pool_set_max(pool, pool_size);
	}

	clock_getres(CLOCK_PROF, &t1);
	printf("Timing resolution is %ldms\n", t1.tv_nsec / 1000000);

	if (barrier_init(&barrier, concurrency)) {
		printf("Failed to initialize thread sync barrier\n");
		return (1);
	}

	printf("Test plan: threads=%d size=%d count=%d warmup=%s\n",
	       concurrency, alloc_size, num_allocs, warm ? "yes" : "no");

	allocs_per_thread = num_allocs / concurrency;
	remainder = num_allocs % concurrency;
	printf("Thread 0: count=%d\n", allocs_per_thread);
	for (i = 0; i < concurrency; i++) {
		params[i].id = i;
		params[i].use_malloc = use_malloc;
		params[i].alloc_size = alloc_size;
		params[i].num_allocs = allocs_per_thread;
		if (remainder) {
			params[i].num_allocs++;
			remainder--;
		}
		params[i].touch = touch;
		params[i].pool = pool;
		params[i].barrier = &barrier;
		
		if (i > 0)
			if (pthread_create(&params[i].thread, NULL, start_test_thread, &params[i])) {
				printf("Failed to create thread %d\n", i);
				return (1);
			}
	}

	if (warm) {
		allocations = malloc(sizeof(void *) * num_allocs);
		if (allocations == NULL) {
			printf("Failed to allocate results array\n");
			return (1);
		}

		if (use_malloc) {
			for (i = 0; i < num_allocs; i++) {
				allocations[i] = malloc(alloc_size);
				if (allocations[i] == NULL) {
					printf("Alllocation %d failed during warmup\n", i);
					return (1);
				}
			}
			for (i = 0; i < num_allocs; i++) {
				free(allocations[i]);
			}
		} else {
			for (i = 0; i < num_allocs; i++) {
				allocations[i] = uinet_pool_alloc(pool, 0);
				if (allocations[i] == NULL) {
					printf("Alllocation %d failed during warmup\n", i);
					return (1);
				}
			}
			for (i = 0; i < num_allocs; i++) {
				uinet_pool_free(pool, allocations[i]);
			}
		}
	}
	
	/* 
	 * Give the other threads 100 ms to reach their barriers so timing
	 * uncertainty is reduced.
	 */
	t1.tv_sec = 0;
	t1.tv_nsec = 100 * 1000 * 1000;
	nanosleep(&t1, NULL);

	clock_gettime(CLOCK_PROF, &t1);
	barrier_wait(params[0].barrier);
	
	do_test(&params[0]);

	for (i = 1; i < concurrency; i++)
		pthread_join(params[i].thread, NULL);

	clock_gettime(CLOCK_PROF, &t2);

	if (t1.tv_nsec > t2.tv_nsec) {
		t2.tv_nsec = 1000000000 + t2.tv_nsec - t1.tv_nsec;
		t2.tv_sec = t2.tv_sec - t1.tv_sec - 1;
	} else {
		t2.tv_nsec = t2.tv_nsec - t1.tv_nsec;
		t2.tv_sec = t2.tv_sec - t1.tv_sec;
	}
	printf("Time for %d allocations of %d bytes was %lds %ldms\n",
	       num_allocs, alloc_size, t2.tv_sec, t2.tv_nsec / 1000000);

	barrier_destroy(&barrier);
	if (!use_malloc) {
		uinet_pool_destroy(pool);
		uinet_shutdown(0);
	}
	if (warm)
		free(allocations);

	free(params);


	return (0);
}
Exemple #9
0
// Note: lower half of R is not touched, and should be given as zero
matrix * QR_decomposition_prealloc(matrix *A, matrix *R) {

	size_t n,m,i,j,k;
	size_t n_threads = numCPUs()*2;
	pthread_t thread[n_threads];
	struct QR_worker_arg t_arg[n_threads];
	barrier_t sync_barrier;
	pthread_attr_t attr;

	n = A->n;
	m = A->m;
	if (n < m ) {
		fprintf(stderr, "Cannot create orthogonal matrix from %zux%zu matrix A.\n", n, m);
		exit(-1);
	} else if (R->n != m || R->m != m) {
		fprintf(stderr, "QR decomposition: R matrix has wrong shape\n");
		exit(-1);
	}

	// Use modified Gram-Schmidt orthogonalization

	// Do we need to parallelize?

	if (m < 4 * n_threads || numCPUs() == 1) { // no
	
	for (i = 0; i < m; i++) { // Loop over columns
		// Normalize the i'th column:
		R->a[i][i] = sqrt(matrix_dot_cols(A,A,i,i));
		if (fabs(R->a[i][i]) < 1.0e-12) {
			fprintf(stderr, "QR decomposition hit a singular matrix.\n");
			exit(-1);
		}
		for (k = 0; k < n; k++)
			A->a[k][i] = A->a[k][i]/R->a[i][i];

		// Then make all remaining columns orthogonal to column i.
		for (j = i+1; j<m;j++) {
			R->a[i][j] = matrix_dot_cols(A,A,i,j);
			for (k = 0; k < n; k++) // Loop along rows
				A->a[k][j] -= A->a[k][i] * R->a[i][j];
		}
	}
	} else { // Parallelize
		pthread_attr_init(&attr);
		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
		barrier_init(&sync_barrier, n_threads + 1);

		for (i = 0; i < n_threads; i++) {
		   t_arg[i].Q = A;
		   t_arg[i].R = R;
		   t_arg[i].command = 1;
		   t_arg[i].b = &sync_barrier;
		   pthread_create(&thread[i], &attr, QR_worker, &t_arg[i]);
		}
		pthread_attr_destroy(&attr);

		/* We now have a number of threads running, awaiting
 		 * commands. Command 0 is for thread exit,
 		 * command 1 is for running orthogonalization */

	for (i = 0; i < m; i++) { // Loop over columns
		// Normalize the i'th column:
		R->a[i][i] = sqrt(matrix_dot_cols(A,A,i,i));
		if (fabs(R->a[i][i]) < 1.0e-12) {
			fprintf(stderr, "QR decomposition hit a singular matrix.\n");
			exit(-1);
		}
		for (k = 0; k < n; k++)
			A->a[k][i] = A->a[k][i]/R->a[i][i];

		// Then make all remaining columns orthogonal to column i,
		// parallelly

		if (m - (i+1) > 4*n_threads) {	
		for (k = 0; k < n_threads; k++) {
			t_arg[k].curr_idx = i;
			t_arg[k].start = i+1 + (k*(m - (i+1)))/n_threads;
			t_arg[k].stop = i+1 + ((k+1)*(m - (i+1)))/n_threads;
		}

		// Start the orthogonalization
		barrier(&sync_barrier);

		// Then wait for it to be finished and run the
		// next column.

		barrier(&sync_barrier);

		} else { // Don't parallelize the last bit
		for (j = i+1; j<m;j++) {
			R->a[i][j] = matrix_dot_cols(A,A,i,j);
			for (k = 0; k < n; k++) // Loop along rows
				A->a[k][j] -= A->a[k][i] * R->a[i][j];
		}	
		}

		}
		for (i = 0; i < n_threads; i++) {
		   t_arg[i].command = 0;
		}
		// Ready to send command 0 to threads.
		barrier(&sync_barrier);
		barrier_destroy(&sync_barrier);

	}


	return R;
}
Exemple #10
0
int main(int argc, char *argv[]) {
  pthread_t *threads;
  pthread_attr_t attr;
  uint32_t **ranks;
  void *status;

#if defined(PAPI_ENABLED) && !defined(DEBUG)
  int num_sets;
  PAPI_event_set_wrapper_t* event_sets;
#endif
  int rc;
  uint32_t t;

  printf("Optimized Stream benchmark (using SSE intrinsics)\n");

  init_flush_cache_array();
  malloc_arrays(argv);
  print_array_parameters();
  select_code_variant(argv);
  print_code_variant_parameters();

  threads = (pthread_t *) malloc(numThreads * sizeof(pthread_t));
  ranks = (uint32_t **) malloc(numThreads * sizeof(uint32_t *));

#if !defined(DEBUG)
#if defined(PAPI_ENABLED)
  papi_init(desired_events, num_desired, &event_sets, &num_sets);

  // initialize threaded PAPI
  if (PAPI_thread_init((unsigned long (*)(void)) (pthread_self)) != PAPI_OK) {
    printf("Error with PAPI_thread_init().\n");
    exit(EXIT_FAILURE);
  }

  results = (double *) malloc(num_sets * numThreads * NUM_TRIALS * sizeof(double));
  if (results==NULL) {
    printf("Error on array results malloc.\n");
    exit(EXIT_FAILURE);
  }
#else
  results = (double *) malloc(numThreads * NUM_TRIALS * sizeof(double));
  if (results==NULL) {
    printf("Error on array results malloc.\n");
    exit(EXIT_FAILURE);
  }
#if defined(CYCLE_TIME)
  // calculate clock rate
  GET_CLOCK_RATE(results, NUM_TRIALS);
  median_counts_per_sec = find_median(results, NUM_TRIALS);
  //printf("Median ticks per second = %e\n", median_counts_per_sec);

#else
  timer_init();
  median_counts_per_sec = 1.0;
#endif
#endif
#endif

  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  barrier_init(&my_barrier, numThreads);
#if defined(AFFINITY_ENABLED)
  Affinity_Init();
#endif

  // run stream tests
  for (t=0; t < numThreads; t++) {
    ranks[t] = (uint32_t *) malloc(sizeof(uint32_t));
    *ranks[t] = t;
  }

  for (t=1; t < numThreads; t++) {
#if defined(DEBUG)
    printf("Creating thread %u\n", t);
#endif
    rc = pthread_create(&threads[t], &attr, pthreads_each, (void *) ranks[t]);
    if (rc) {
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(EXIT_FAILURE);
    }
  }
  pthreads_each((void *) ranks[0]);

  // join the other threads
  for (t=1; t < numThreads; t++) {
    pthread_join(threads[t], &status);
  }

#if defined(PAPI_ENABLED) && !defined(DEBUG)
  papi_cleanup(event_sets, num_sets);
#endif
  pthread_attr_destroy(&attr);
  pthread_exit(NULL);
  barrier_destroy(&my_barrier);
  free_arrays();

  return EXIT_SUCCESS;
}
int main() {
  long int i, ret;
  pass1 = 1;
  pass2 = 1;

  pthread_mutex_init(&mtx, NULL);
  barrier_init(&barrier, NUM_THREADS);

  for (i = 0; i < NUM_THREADS; i++) {
    pthread_create(&pthreads[i], NULL, &thread_func_throwaway, (void *)i);
  }
  printf("Throwaway Test:\n");
  for (i = 0; i < NUM_THREADS; i++) {
    pthread_join(pthreads[i], (void **)(&ret));
    printf("%ld\n", ret);
    /*Desired output:
     * 5
     * 5
     * 5
     * 5
     * 5
     */
    if (ret != 5)
      pass1 = 0;
  }

  printf("Reusable Test:\n");
  for (i = 0; i < NUM_THREADS; i++) {
    pthread_create(&pthreads[i], NULL, &thread_func_reusable, (void *)i);
  }
  for (i = 0; i < NUM_THREADS; i++) {
    pthread_join(pthreads[i], (void **)(&ret));
    /*Desired output:
     * 10
     * 10
     * 10
     * 10
     * 10
     * 15
     * 15
     * 15
     * 15
     * 15
     * 20
     * 20
     * 20
     * 20
     * 20
     */
  }

  if (!pass1)
    printf("Test 1 failed\n");
  else
    printf("Test 1 passed\n");
  if (!pass2)
    printf("Test 2 failed\n");
  else
    printf("Test 2 passed\n");

  pthread_mutex_destroy(&mtx);
  barrier_destroy(&barrier);

  return 0;
}
Exemple #12
0
// 
// main
//
// Run a game of life simulation.
//
int main() {
  int i, g, rows, cols;
  int div;
  
  // The first several lines take input parameters
  // for the game.
  //
  printf("Welcome to the Game of Life.\n");
  printf("How many generations would you like to watch? ");
  scanf("%d", &g);
  printf("Enter the width of the board: ");
  scanf("%d", &cols);
  printf("Enter the height of the board: ");
  scanf("%d", &rows);
  
  // Define our grids: G is our main grid, and T is our
  // temp grid. We also print the initial state of the grid
  // before actually running the simulation.
  //
  grid *G = initGrid(rows, cols);
  grid *T = initGrid(rows, cols);
  populate(G);
  printGrid(G);
  mgridUpdate(T, G, G->rows, 0);  

  // Gets the desired number of threads from the user -- we repeatedly
  // ask for a number until we get a divisor of rows. Once we know how
  // many threads there will be, we initialize the barrier.
  //
  printf("Please enter a divisor of %d to determine the number of threads: ", rows);
  scanf("%d", &div);
  while (rows % div != 0) {
    printf("I'm sorry, %d does not divide %d. Please choose a divisor of %d: ", div, rows, rows);
    scanf("%d", &div);
  }

  barrier_init(&barr, div);
  
  // Creates an array of tinfo structs and
  // pthreads. We then place the necessary 
  // info into each tinfo struct.
  //
  tinfo **I = malloc(div*sizeof(tinfo));
  pthread_t threads[div];
  
  for (i=0; i<div; i++) {
    I[i] = initTinfo();
    I[i]->in = G;
    I[i]->out = T;
    I[i]->section = i;
    I[i]->divide = div;
    I[i]->gen = g;
  }
  
  // Initialize a number of threads. Each thread works on a portion of our
  // grid -- which portion it works on is decided by the I[i] tinfo struct.
  //  
  for (i=0; i<div; i++) {
    pthread_create(&threads[i], NULL, &mFunc, (void *)I[i]);
  }

  // My implementation requires join, because the main thread
  // must wait for all of the child threads to complete before
  // destroying the barrier and printing the final grid.
  //
  for (i=0; i<div; i++) {
    pthread_join(threads[i], NULL);
  }
  
  // Destroy the barrier, print the final generation.
  //
  barrier_destroy(&barr);
  printGrid(G);

  return 0;
}
/**
 * Permet de simuler une iteration de propagation de chaleur
 *
 * @author   Lucas Martinez
 */
void initSimulation(int taille, int etape, int nbIter, int nbThread, caseDansMat * mat){
	vraieTaille = taille + 2; //bords

	nbCaseParThread = sqrt(taille * taille / nbThread); //nbCaseParThread par ligne en fait
	if(nbCaseParThread < 1){
		nbCaseParThread = 1;
		nbThread = taille * taille;
	}

	pthread_t* threads = malloc(nbThread * sizeof(pthread_t));
	if (!threads){
		perror("Erreur d'allocation mémoire, arret du programme.");
		exit(1);
	}

	wrappedMatrice* wrappedMat = malloc(nbThread * sizeof(wrappedMatrice));
	if (!wrappedMat){
		perror("Erreur d'allocation mémoire, arret du programme.");
		exit(1);
	}

	switch (etape){
		case 0:
			lancerThread(taille, nbIter, mat, wrappedMat); //un seul thread, comportement different
			break;
		case 1: ;
			pthread_barrier_t* barriereHori = malloc(sizeof(pthread_barrier_t));
			if (!barriereHori){
				perror("Erreur d'allocation mémoire, arret du programme.");
				exit(1);
			}

			pthread_barrier_t* barriereVerti = malloc(sizeof(pthread_barrier_t));
			if (!barriereVerti){
				perror("Erreur d'allocation mémoire, arret du programme.");
				exit(1);
			}

			initBarrieres(nbThread, barriereHori, barriereVerti);
			lancerThreads(taille, etape, nbIter, mat, threads, wrappedMat, barriereHori, barriereVerti);
			rendreBarrieres(barriereHori, barriereVerti);
			break;
		case 2: ;
			maBarriere* maBarriereHori = malloc(sizeof(maBarriere));
			if (!maBarriereHori){
				perror("Erreur d'allocation mémoire, arret du programme.");
				exit(1);
			}

			maBarriere* maBarriereVerti = malloc(sizeof(maBarriere));
			if (!maBarriereVerti){
				perror("Erreur d'allocation mémoire, arret du programme.");
				exit(1);
			}

			barrier_init(maBarriereHori, nbThread);
			barrier_init(maBarriereVerti, nbThread);

			lancerThreads(taille, etape, nbIter, mat, threads, wrappedMat, maBarriereHori, maBarriereVerti);

			barrier_destroy(maBarriereHori);
			barrier_destroy(maBarriereVerti);
			free(maBarriereHori);
			free(maBarriereVerti);

			break;
		case 3: ;
			maBarriereSem* maBarriereSemHori = malloc(sizeof(maBarriereSem));
			if (!maBarriereSemHori){
				perror("Erreur d'allocation mémoire, arret du programme.");
				exit(1);
			}

			maBarriereSem* maBarriereSemVerti = malloc(sizeof(maBarriereSem));
			if (!maBarriereSemVerti){
				perror("Erreur d'allocation mémoire, arret du programme.");
				exit(1);
			}

			barrier_sem_init(maBarriereSemHori, nbThread);
			barrier_sem_init(maBarriereSemVerti, nbThread);

			lancerThreads(taille, etape, nbIter, mat, threads, wrappedMat, maBarriereSemHori, maBarriereSemVerti);

			barrier_sem_destroy(maBarriereSemHori);
			barrier_sem_destroy(maBarriereSemVerti);
			free(maBarriereSemHori);
			free(maBarriereSemVerti);

			break;
	}

	free(wrappedMat);
	free(threads);

}