Example #1
0
/*
 * Allocate cpu_pda pointer table and array via alloc_bootmem.
 */
static void __init setup_cpu_pda_map(void)
{
	char *pda;
	struct x8664_pda **new_cpu_pda;
	unsigned long size;
	int cpu;

	size = roundup(sizeof(struct x8664_pda), cache_line_size());

	/* allocate cpu_pda array and pointer table */
	{
		unsigned long tsize = nr_cpu_ids * sizeof(void *);
		unsigned long asize = size * (nr_cpu_ids - 1);

		tsize = roundup(tsize, cache_line_size());
		new_cpu_pda = alloc_bootmem(tsize + asize);
		pda = (char *)new_cpu_pda + tsize;
	}

	/* initialize pointer table to static pda's */
	for_each_possible_cpu(cpu) {
		if (cpu == 0) {
			/* leave boot cpu pda in place */
			new_cpu_pda[0] = cpu_pda(0);
			continue;
		}
		new_cpu_pda[cpu] = (struct x8664_pda *)pda;
		new_cpu_pda[cpu]->in_bootmem = 1;
		pda += size;
	}

	/* point to new pointer table */
	_cpu_pda = new_cpu_pda;
}
Example #2
0
static int __init arm64_dma_init(void)
{
	WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
		   TAINT_CPU_OUT_OF_SPEC,
		   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
		   ARCH_DMA_MINALIGN, cache_line_size());

	return atomic_pool_init();
}
Example #3
0
struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
				 int *num_elem,
				 unsigned int elem_size)
{
	struct rxe_queue *q;
	size_t buf_size;
	unsigned int num_slots;

	/* num_elem == 0 is allowed, but uninteresting */
	if (*num_elem < 0)
		goto err1;

	q = kmalloc(sizeof(*q), GFP_KERNEL);
	if (!q)
		goto err1;

	q->rxe = rxe;

	/* used in resize, only need to copy used part of queue */
	q->elem_size = elem_size;

	/* pad element up to at least a cacheline and always a power of 2 */
	if (elem_size < cache_line_size())
		elem_size = cache_line_size();
	elem_size = roundup_pow_of_two(elem_size);

	q->log2_elem_size = order_base_2(elem_size);

	num_slots = *num_elem + 1;
	num_slots = roundup_pow_of_two(num_slots);
	q->index_mask = num_slots - 1;

	buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size;

	q->buf = vmalloc_user(buf_size);
	if (!q->buf)
		goto err2;

	q->buf->log2_elem_size = q->log2_elem_size;
	q->buf->index_mask = q->index_mask;

	q->buf_size = buf_size;

	*num_elem = num_slots - 1;
	return q;

err2:
	kfree(q);
err1:
	return NULL;
}
Example #4
0
char *ring_client(ring_t *ring, char *title) {
    char buf[32] = {0};
    int i = 0;
    int fd = -1;
    // set up shm
    while (fd < 0) {
        snprintf(buf, 32, "/%s.%d", title, i++);
        fd = shm_open(buf, O_RDWR | O_CREAT, 0700);
        if (i > 65535) {
            fprintf(stderr, "panic: failed to shm_open() 65535 times, giving up.\n");
            abort();
        }
    }
    // map it
    int size = RING_SIZE + cache_line_size() * 8;
    char *name = strdup(buf);
    ftruncate(fd, size);
    void *addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if (addr == MAP_FAILED) {
        return NULL;
    }
    ring_set_pointers(ring, addr);
    ring->size = RING_SIZE;
    ring->me = 1;
    memset(addr, 0, size);
    return name;
}
Example #5
0
static void ring_set_pointers(ring_t *ring, void *addr) {
    size_t cache_line = cache_line_size();
    int i = 0;
#define next_line (addr + cache_line * i++)
    ring->read  = next_line;
    ring->write = next_line;
    ring->mark  = next_line;
    ring->wrap  = next_line;
    ring->dir   = next_line;
#undef next_line
    ring->buf   = addr + cache_line * 8;
}
Example #6
0
static int ag71xx_ring_alloc(struct ag71xx_ring *ring, unsigned int size)
{
	int err;
	int i;

	ring->desc_size = sizeof(struct ag71xx_desc);
	if (ring->desc_size % cache_line_size()) {
		DBG("ag71xx: ring %p, desc size %u rounded to %u\n",
			ring, ring->desc_size,
			roundup(ring->desc_size, cache_line_size()));
		ring->desc_size = roundup(ring->desc_size, cache_line_size());
	}

	ring->descs_cpu = dma_alloc_coherent(NULL, size * ring->desc_size,
					     &ring->descs_dma, GFP_ATOMIC);
	if (!ring->descs_cpu) {
		err = -ENOMEM;
		goto err;
	}

	ring->size = size;

	ring->buf = kzalloc(size * sizeof(*ring->buf), GFP_KERNEL);
	if (!ring->buf) {
		err = -ENOMEM;
		goto err;
	}

	for (i = 0; i < size; i++) {
		int idx = i * ring->desc_size;
		ring->buf[i].desc = (struct ag71xx_desc *)&ring->descs_cpu[idx];
		DBG("ag71xx: ring %p, desc %d at %p\n",
			ring, i, ring->buf[i].desc);
	}

	return 0;

err:
	return err;
}
Example #7
0
int ring_server(ring_t *ring, char *name) {
    // set up shm
    int fd = shm_open(name, O_RDWR, 0700);
    int size = RING_SIZE + cache_line_size() * 8;
    void *addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if (addr == MAP_FAILED) {
        return -1;
    }
    shm_unlink(name);
    ring_set_pointers(ring, addr);
    ring->size = RING_SIZE;
    ring->me = 0;
    return 0;
}
Example #8
0
/**
 * percpu_alloc_mask - initial setup of per-cpu data
 * @size: size of per-cpu object
 * @gfp: may sleep or not etc.
 * @mask: populate per-data for cpu's selected through mask bits
 *
 * Populating per-cpu data for all online cpu's would be a typical use case,
 * which is simplified by the percpu_alloc() wrapper.
 * Per-cpu objects are populated with zeroed buffers.
 */
void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
{
	/*
	 * We allocate whole cache lines to avoid false sharing
	 */
	size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
	void *pdata = kzalloc(sz, gfp);
	void *__pdata = __percpu_disguise(pdata);

	if (unlikely(!pdata))
		return NULL;
	if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
		return __pdata;
	kfree(pdata);
	return NULL;
}
Example #9
0
/**
 * percpu_populate - populate per-cpu data for given cpu
 * @__pdata: per-cpu data to populate further
 * @size: size of per-cpu object
 * @gfp: may sleep or not etc.
 * @cpu: populate per-data for this cpu
 *
 * Populating per-cpu data for a cpu coming online would be a typical
 * use case. You need to register a cpu hotplug handler for that purpose.
 * Per-cpu object is populated with zeroed buffer.
 */
void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
{
	struct percpu_data *pdata = __percpu_disguise(__pdata);
	int node = cpu_to_node(cpu);

	/*
	 * We should make sure each CPU gets private memory.
	 */
	size = roundup(size, cache_line_size());

	BUG_ON(pdata->ptrs[cpu]);
	if (node_online(node))
		pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node);
	else
		pdata->ptrs[cpu] = kzalloc(size, gfp);
	return pdata->ptrs[cpu];
}
Example #10
0
void *
kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	size_t size = cachep->size;

	if (cachep->flags & SLAB_HWCACHE_ALIGN)
		size = max(cachep->size, (size_t)cache_line_size());

	void *objp = kmem_alloc(size);
	if (!objp) {
		if (cachep->flags & SLAB_PANIC)
			panic("kmem_cache_alloc() failed.");
		else
			return NULL;
	}

	if (cachep->ctor)
		cachep->ctor(objp);

	return objp;
}
/*
 * Entry point of Multi-core Insense runtime.
 */
int main() {
	PRINTFMC("Cache line size: %dB\n", cache_line_size());PRINTFMC("Main thread: %u\n", (unsigned) pthread_self());

#if HEAPS // Small heaps
	// Initialize mutex
	if (pthread_mutex_init(&thread_lock, NULL ) != 0) {
		PRINTF("Mutex initialization failed.\n");
		return NULL;
	}
#else // Big heap
	// Initialize mutex
	if (pthread_mutex_init(&alloc_lock, NULL ) != 0) {
		PRINTF("Mutex initialization failed.\n");
		return NULL ;
	}
#endif

	mainThread = pthread_self(); // Note the ID of the main thread.

	// Create a list for storing references to p-threads
	threadList = listCreate();

	// Create map used to store memory locations of small heaps (using Thread safe list)
	SHList = listCreate();

	// Create map used to store memory locations what is allocated using malloc
	mallocList = listCreate();

// Start recording execution time
#if TIMING
	// CPU time
	struct timespec start, finish;
	double elapsed;
	//clock_gettime(CLOCK_MONOTONIC, &start);
	// User time
	time_t start_t, end_t;
	double diff_t;
	time(&start_t);
#endif

	// Call primordial_main.
	primordial_main(NULL );

	// Join all p-threads
	if (threadList != NULL ) {
		listJoinThreads(threadList);
	}

// Stop recording execution time
#if TIMING
	// CPU time
	//clock_gettime(CLOCK_MONOTONIC, &finish);
	elapsed = (finish.tv_sec - start.tv_sec);
	elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
	printf("CPU:  %f seconds elapsed\n", elapsed);
#endif

	// Destroy lists and free memory
	listDestroy(threadList);
	listDestroy(SHList);
	listDestroy(mallocList);
	pthread_mutex_destroy(&thread_lock); 	// Destroy mutex lock used with pthreads
	pthread_mutex_destroy(&alloc_lock); 	// Destroy mutex lock used with alloc and free in the big heap scheme

	return 1;
}
Example #12
0
int main(int argc, const char **argv)
{
	int err;
	const char *cmd;
	char sbuf[STRERR_BUFSIZE];
	int value;

	/* libsubcmd init */
	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
	pager_init(PERF_PAGER_ENVIRONMENT);

	/* The page_size is placed in util object. */
	page_size = sysconf(_SC_PAGE_SIZE);
	cache_line_size(&cacheline_size);

	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
		sysctl_perf_event_max_stack = value;

	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
		sysctl_perf_event_max_contexts_per_stack = value;

	cmd = extract_argv0_path(argv[0]);
	if (!cmd)
		cmd = "perf-help";

	srandom(time(NULL));

	perf_config__init();
	err = perf_config(perf_default_config, NULL);
	if (err)
		return err;
	set_buildid_dir(NULL);

	/* get debugfs/tracefs mount point from /proc/mounts */
	tracing_path_mount();

	/*
	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
	 *
	 *  - cannot take flags in between the "perf" and the "xxxx".
	 *  - cannot execute it externally (since it would just do
	 *    the same thing over again)
	 *
	 * So we just directly call the internal command handler. If that one
	 * fails to handle this, then maybe we just run a renamed perf binary
	 * that contains a dash in its name. To handle this scenario, we just
	 * fall through and ignore the "xxxx" part of the command string.
	 */
	if (strstarts(cmd, "perf-")) {
		cmd += 5;
		argv[0] = cmd;
		handle_internal_command(argc, argv);
		/*
		 * If the command is handled, the above function does not
		 * return undo changes and fall through in such a case.
		 */
		cmd -= 5;
		argv[0] = cmd;
	}
	if (strstarts(cmd, "trace")) {
#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
		setup_path();
		argv[0] = "trace";
		return cmd_trace(argc, argv);
#else
		fprintf(stderr,
			"trace command not available: missing audit-libs devel package at build time.\n");
		goto out;
#endif
	}
	/* Look for flags.. */
	argv++;
	argc--;
	handle_options(&argv, &argc, NULL);
	commit_pager_choice();

	if (argc > 0) {
		if (strstarts(argv[0], "--"))
			argv[0] += 2;
	} else {
		/* The user didn't specify a command; give them help */
		printf("\n usage: %s\n\n", perf_usage_string);
		list_common_cmds_help();
		printf("\n %s\n\n", perf_more_info_string);
		goto out;
	}
	cmd = argv[0];

	test_attr__init();

	/*
	 * We use PATH to find perf commands, but we prepend some higher
	 * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH
	 * environment, and the $(perfexecdir) from the Makefile at build
	 * time.
	 */
	setup_path();
	/*
	 * Block SIGWINCH notifications so that the thread that wants it can
	 * unblock and get syscalls like select interrupted instead of waiting
	 * forever while the signal goes to some other non interested thread.
	 */
	pthread__block_sigwinch();

	perf_debug_setup();

	while (1) {
		static int done_help;

		run_argv(&argc, &argv);

		if (errno != ENOENT)
			break;

		if (!done_help) {
			cmd = argv[0] = help_unknown_cmd(cmd);
			done_help = 1;
		} else
			break;
	}

	fprintf(stderr, "Failed to run command '%s': %s\n",
		cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
out:
	return 1;
}
Example #13
0
TCA emitFreeLocalsHelpers(CodeBlock& cb, DataBlock& data, UniqueStubs& us) {
  // The address of the first local is passed in the second argument register.
  // We use the third and fourth as scratch registers.
  auto const local = rarg(1);
  auto const last = rarg(2);
  auto const type = rarg(3);
  CGMeta fixups;

  // This stub is very hot; keep it cache-aligned.
  align(cb, &fixups, Alignment::CacheLine, AlignContext::Dead);
  auto const release =
    emitDecRefHelper(cb, data, fixups, local, type, local | last);

  auto const decref_local = [&] (Vout& v) {
    auto const sf = v.makeReg();

    // We can't do a byte load here---we have to sign-extend since we use
    // `type' as a 32-bit array index to the destructor table.
    v << loadzbl{local[TVOFF(m_type)], type};
    emitCmpTVType(v, sf, KindOfRefCountThreshold, type);

    ifThen(v, CC_G, sf, [&] (Vout& v) {
      auto const dword_size = sizeof(int64_t);

      // saving return value on the stack, but keeping it 16-byte aligned
      v << mflr{rfuncln()};
      v << lea {rsp()[-2 * dword_size], rsp()};
      v << store{rfuncln(), rsp()[0]};

      v << call{release, arg_regs(3)};

      // restore the return value from the stack
      v << load{rsp()[0], rfuncln()};
      v << lea {rsp()[2 * dword_size], rsp()};
      v << mtlr{rfuncln()};
    });
  };

  auto const next_local = [&] (Vout& v) {
    v << addqi{static_cast<int>(sizeof(TypedValue)),
               local, local, v.makeReg()};
  };

  alignJmpTarget(cb);

  us.freeManyLocalsHelper = vwrap(cb, data, fixups, [&] (Vout& v) {
    // We always unroll the final `kNumFreeLocalsHelpers' decrefs, so only loop
    // until we hit that point.
    v << lea{rvmfp()[localOffset(kNumFreeLocalsHelpers - 1)], last};

    doWhile(v, CC_NZ, {},
      [&] (const VregList& in, const VregList& out) {
        auto const sf = v.makeReg();

        decref_local(v);
        next_local(v);
        v << cmpq{local, last, sf};
        return sf;
      }
    );
  });

  for (auto i = kNumFreeLocalsHelpers - 1; i >= 0; --i) {
    us.freeLocalsHelpers[i] = vwrap(cb, data, [&] (Vout& v) {
      decref_local(v);
      if (i != 0) next_local(v);
    });
  }

  // All the stub entrypoints share the same ret.
  vwrap(cb, data, fixups, [] (Vout& v) { v << ret{}; });

  // This stub is hot, so make sure to keep it small.
#if 0
  // TODO(gut): Currently this assert fails.
  // Take a closer look when looking at performance
  always_assert(Stats::enabled() ||
                (cb.frontier() - release <= 4 * cache_line_size()));
#endif

  fixups.process(nullptr);
  return release;
}
Example #14
0
int
main(int argc, char **argv)
{
	int i;
    int cache_line = cache_line_size();
    int level;
    discover_caches();
    printf("cache line size: %d\n", cache_line);
    if( argc < 2 ) {
        printf("Usage: <prog> <narrays> [sfence]");
		return 0;
	}

	if( argc > 2 && !strcmp(argv[2], "sfence") ){
		want_sfence = 1;
	}

	narrays = atoi(argv[1]);
    data = calloc(narrays, sizeof(*data));

    for(level = 0; level < cache_level_cnt; level++) {
        uint64_t result;
        niters = iters[level];

        printf("Fit data to the level %d of memory hirarchy (%zdB)\n",
               level + 1, cache_sizes[level]);

        nitems = cache_sizes[level] / narrays;
        for(i = 0; i < narrays; i++ ){
            data[i] = calloc(nitems + cache_sizes[level], sizeof(*data[0]));
        }
        flush_array_sz = cache_sizes[level] * 2;
        flush_array = calloc(flush_array_sz, sizeof(char));


//      printf("\t#1 WOUT cache flush:\n");
        want_cache_flush = 0;
        result = testloop1();
        printf("\tseq:\tstride=1\t%lu cycles/B\n", result / niters / nitems / narrays);
        result = testloop2();
        printf("\tsplit2:\tstride=1\t%lu cycles/B\n", result / niters / nitems / narrays);
        for(i=2; i<=cache_line; i*=2) {
            result = testloop3(i);
            printf("\tsplit2:\tstride=%d\t%lu cycles/B\n", i, result / niters / nitems / narrays);
        }

//        printf("\t#2 WITH cache flush:\n");
//        want_cache_flush = 1;
//        result = testloop1();
//        printf("\t\tseq:\tstride=1\t%lu cycles/B\n", result / niters / nitems / narrays);
//        result = testloop2();
//        printf("\t\tsplit2:\tstride=1\t%lu cycles/B\n", result / niters / nitems / narrays);
//        for(i=2; i<=cache_line; i*=2) {
//            result = testloop3(i);
//            printf("\t\tsplit2:\tstride=%d\t%lu cycles/B\n", i, result / niters / nitems / narrays);
//        }

        for(i = 0; i < narrays; i++ ){
            free(data[i]);
        }
        free(flush_array);
    }
	return 0;
}
Example #15
0
static void __init setup_processor(void)
{
	u64 features;
	s64 block;
	u32 cwg;
	int cls;

	printk("CPU: AArch64 Processor [%08x] revision %d\n",
	       read_cpuid_id(), read_cpuid_id() & 15);

	sprintf(init_utsname()->machine, ELF_PLATFORM);
	elf_hwcap = 0;

	cpuinfo_store_boot_cpu();

	/*
	 * Check for sane CTR_EL0.CWG value.
	 */
	cwg = cache_type_cwg();
	cls = cache_line_size();
	if (!cwg)
		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
			cls);
	if (L1_CACHE_BYTES < cls)
		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
			L1_CACHE_BYTES, cls);

	/*
	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
	 * The blocks we test below represent incremental functionality
	 * for non-negative values. Negative values are reserved.
	 */
	features = read_cpuid(ID_AA64ISAR0_EL1);
	block = cpuid_feature_extract_field(features, 4);
	if (block > 0) {
		switch (block) {
		default:
		case 2:
			elf_hwcap |= HWCAP_PMULL;
		case 1:
			elf_hwcap |= HWCAP_AES;
		case 0:
			break;
		}
	}

	if (cpuid_feature_extract_field(features, 8) > 0)
		elf_hwcap |= HWCAP_SHA1;

	if (cpuid_feature_extract_field(features, 12) > 0)
		elf_hwcap |= HWCAP_SHA2;

	if (cpuid_feature_extract_field(features, 16) > 0)
		elf_hwcap |= HWCAP_CRC32;

	block = cpuid_feature_extract_field(features, 20);
	if (block > 0) {
		switch (block) {
		default:
		case 2:
			elf_hwcap |= HWCAP_ATOMICS;
		case 1:
			/* RESERVED */
		case 0:
			break;
		}
	}

#ifdef CONFIG_COMPAT
	/*
	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
	 * the AArch32 32-bit execution state.
	 */
	features = read_cpuid(ID_ISAR5_EL1);
	block = cpuid_feature_extract_field(features, 4);
	if (block > 0) {
		switch (block) {
		default:
		case 2:
			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
		case 1:
			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
		case 0:
			break;
		}
	}

	if (cpuid_feature_extract_field(features, 8) > 0)
		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;

	if (cpuid_feature_extract_field(features, 12) > 0)
		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;

	if (cpuid_feature_extract_field(features, 16) > 0)
		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
#endif
}
Example #16
0
/*
 * Entry point of Multi-core Insense runtime.
 */
int main(int argc, char* argv[]) {
	PRINTFMC("Cache line size: %dB\n", cache_line_size());
	PRINTFMC("Main thread: %u\n", (unsigned) pthread_self());

	errval_t err;
	coreid_t mycore = disp_get_core_id();

	if (argc == 2) {
		num_to_span = atoi(argv[1]);
		if(num_to_span==0)
			all_spanned = true;		

		debug_printf("Spanning onto %d cores\n", num_to_span);
		for (int i = 1; i < num_to_span; i++) {
			err = domain_new_dispatcher(mycore + i, span_cb, NULL);
		    
			if (err_is_fail(err)) {
				DEBUG_ERR(err, "failed span %d", i);
			} 
		}
	} else {
		debug_printf("ERROR: Must specify number of cores to span\n");
		return EXIT_FAILURE;
	}

	posixcompat_pthread_set_placement_fn(rrPlacement);

	while (!all_spanned) {
		thread_yield();
	}

	my_mutex_init(&shared_heap_mutex);
#if HEAPS == HEAP_PRIVATE // Private heaps
	// Initialize mutex
	if (pthread_mutex_init(&thread_lock, NULL ) != 0) {
		PRINTF("Mutex initialization failed.\n");
		return -1;
	}
#endif

	mainThread = pthread_self(); // Note the ID of the main thread.

	// Create a list for storing references to p-threads
	threadList = listCreate();

	// Create map used to store memory locations of small heaps (using Thread safe list)
	SHList = listCreate();

	// Create map used to store memory locations what is allocated using malloc
	mallocList = listCreate();

// Start recording execution time
#if TIMING
	// CPU time
	uint64_t start, end;
	uint64_t tsc_per_ms = 0;
	sys_debug_get_tsc_per_ms(&tsc_per_ms);
	start = rdtsc();
#endif

	// Call primordial_main.
	primordial_main(NULL );

	// Join all p-threads
	if (threadList != NULL ) {
		listJoinThreads(threadList);
	}

// Stop recording execution time
#if TIMING
	end = rdtsc();
	
	uint64_t diff = (end - start) / tsc_per_ms;
	float elapsed = (diff / 1000) + ((diff % 1000) / 1000.0);

	printf("CPU:  %f seconds elapsed\n", elapsed);
#endif

	// Destroy lists and free memory
	listDestroy(threadList);
	listDestroy(SHList);
	listDestroy(mallocList);
#if HEAPS == HEAP_PRIVATE
	pthread_mutex_destroy(&thread_lock); 	// Destroy mutex lock used with pthreads
#endif
	return 0;
}