Exemplo n.º 1
0
bool PlatformRPI::MyMalloc(void **memptr, size_t alignment, size_t size)
{
	return (posix_memalign(memptr, alignment, size) == 0);
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
	// Check arguments
	if (argc < 2) {
		// Default operation
	} else if (argc == 2) {
		// Check arg
		unsigned long arg = strtol(argv[1], NULL, 0);
		if (arg == 0)
			usage();
		else
			loopCnt = arg;
	}
	// Error
	else {
		usage();
	}


	printf("\nHtBcm Test\n\n");

	int nau = HtBcmPrepare(&BcmReportNonce, &BcmFreeTask);

	printf("  Running with %d Units\n", nau);
	printf("  Looping %d Times\n", loopCnt);

	// Alloc g_pBcmTask
	g_pBcmTask = (CHtBcmTask **)calloc(loopCnt * nau, sizeof(CHtBcmTask));

	// For each loop
	for (int j = 0; j < loopCnt; j += 1) {
		printf("\nLoop %d\n", j + 1);
		g_bReportNonce = false;
		g_BcmFreeTaskCnt = 0;

		// Create Task for each Unit
		for (int i = 0; i < nau; i++) {
			int cnt = i + j * nau + 1;
			int total = loopCnt * nau;
			printf("HtBcm Hash Instance %d of %d created\n", cnt, total);

			block_header header;

			BcmHex2bin(header.version, "01000000");
			BcmHex2bin(header.prev_block, "81cd02ab7e569e8bcd9317e2fe99f2de44d49ab2b8851ba4a308000000000000");
			BcmHex2bin(header.merkle_root, "e320b6c2fffc8d750423db8b1eb942ae710e951ed797f7affc8892b0f1fc122b");
			BcmHex2bin(header.timestamp, "c7f5d74d");
			BcmHex2bin(header.bits, "f2b9441a");
			BcmHex2bin(header.nonce, "42a14695");

			// put header in big endian format
			BcmEndianFlip((uint8_t *)&header, (uint8_t *)&header, 80);

			//RegenHash(&header);

			posix_memalign((void **)&g_pBcmTask[cnt - 1], 64, sizeof(CHtBcmTask) * 1);

			BcmCalcMidState(g_pBcmTask[cnt - 1]->m_midState, (uint8_t *)&header);

			memcpy(g_pBcmTask[cnt - 1]->m_data, ((uint8_t *)&header) + 64, 12);

			uint32_t target[8];
			uint32_t bits = BcmByteSwap32(*(uint32_t *)header.bits);
			BcmInitTarget(target, bits);

			memcpy(g_pBcmTask[cnt - 1]->m_target, target + 5, 12);

			g_pBcmTask[cnt - 1]->m_initNonce = init_nonce;
			g_pBcmTask[cnt - 1]->m_lastNonce = last_nonce;

			HtBcmAddNewTask(g_pBcmTask[cnt - 1]);
		}

		// Force ScanHash call until all units have returned
		while (!g_bReportNonce || (g_BcmFreeTaskCnt < nau))
			HtBcmScanHash((void *)1);

		for (int i = 0; i < nau; i++) {
			int cnt = i + j * nau + 1;
			free(g_pBcmTask[cnt - 1]);
		}
	}

	// All loops complete, finish up
	printf("%s\n", g_bError ? "FAILED" : "PASSED");

	free(g_pBcmTask);

	HtBcmShutdown();
}
Exemplo n.º 3
0
int main(int argc, char *argv[])
{
	int op, ret;

	struct iovec s_iov[IOV_CNT], r_iov[IOV_CNT];
	char *s_buf, *r_buf;
	int align_size;
	int pairs, print_rate;
	int window_varied;
	int c, j;
	int curr_size;
	enum send_recv_type_e type;

	ctpm_Init(&argc, &argv);
	ctpm_Rank(&myid);
	ctpm_Job_size(&numprocs);

	/* default values */
	pairs            = numprocs / 2;
	window_size      = DEFAULT_WINDOW;
	window_varied    = 0;
	print_rate       = 1;

	hints = fi_allocinfo();
	if (!hints)
		return -1;

	while ((op = getopt(argc, argv, "hp:w:vr:" CT_STD_OPTS)) != -1) {
		switch (op) {
		default:
			ct_parse_std_opts(op, optarg, hints);
			break;
		case 'p':
			pairs = atoi(optarg);
			if (pairs > (numprocs / 2)) {
				print_usage();
				return EXIT_FAILURE;
			}
			break;
		case 'w':
			window_size = atoi(optarg);
			break;
		case 'v':
			window_varied = 1;
			break;
		case 'r':
			print_rate = atoi(optarg);
			if (0 != print_rate && 1 != print_rate) {
				print_usage();
				return EXIT_FAILURE;
			}
			break;
		case '?':
		case 'h':
			print_usage();
			return EXIT_FAILURE;
		}
	}

	hints->ep_attr->type	= FI_EP_RDM;
	hints->caps		= FI_MSG | FI_DIRECTED_RECV;
	hints->mode		= FI_CONTEXT | FI_LOCAL_MR;

	if (numprocs < 2) {
		if (!myid) {
			fprintf(stderr, "This test requires at least two processes\n");
		}
		ctpm_Finalize();
		return -1;
	}

	/* Fabric initialization */
	ret = init_fabric();
	if (ret) {
		fprintf(stderr, "Problem in fabric initialization\n");
		return ret;
	}

	ret = init_av();
	if (ret) {
		fprintf(stderr, "Problem in AV initialization\n");
		return ret;
	}

	/* Data initialization */
	align_size = getpagesize();
	assert(align_size <= MAX_ALIGNMENT);

	/* Allocate page aligned buffers */
	for (c = 0; c < IOV_CNT; c++) {
		assert(!posix_memalign(&s_iov[c].iov_base, align_size, MAX_MSG_SIZE));
		assert(!posix_memalign(&r_iov[c].iov_base, align_size, MAX_MSG_SIZE));
	}

	assert(!posix_memalign((void **)&s_buf, align_size, MAX_MSG_SIZE * IOV_CNT));
	assert(!posix_memalign((void **)&r_buf, align_size, MAX_MSG_SIZE * IOV_CNT));

	for (type = 0; type < FIN; type++) {
		if (!myid) {
			fprintf(stdout, HEADER);
			switch (type) {
			case SEND_RECV:
				fprintf(stdout, SEND_RECV_DESC);
				break;
			case SENDV_RECVV:
				fprintf(stdout, SENDV_RECVV_DESC);
				break;
			case SEND_RECVV:
				fprintf(stdout, SEND_RECVV_DESC);
				break;
			case SENDV_RECV:
				fprintf(stdout, SENDV_RECV_DESC);
				break;
			default:
				abort();
			}

			if (window_varied) {
				fprintf(stdout, "# [ pairs: %d ] [ window size: varied ]\n", pairs);
				fprintf(stdout, "\n# Uni-directional Bandwidth (MB/sec)\n");
			} else {
				fprintf(stdout, "# [ pairs: %d ] [ window size: %d ]\n", pairs,
					window_size);
				if (print_rate) {
					fprintf(stdout, "%-*s%*s%*s%*s\n", 10, "# Size", FIELD_WIDTH,
						"Iov count", FIELD_WIDTH, "MB/s", FIELD_WIDTH, "Messages/s");
				} else {
					fprintf(stdout, "%-*s%*s%*s\n", 10, "# Size", FIELD_WIDTH,
						"Iov count", FIELD_WIDTH, "MB/s");
				}
			}
			fflush(stdout);
		}

		if (window_varied) {
			int window_array[] = WINDOW_SIZES;
			double **bandwidth_results;
			int log_val = 1, tmp_message_size = MAX_MSG_SIZE;
			int i, j;

			for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
				if (window_array[i] > window_size) {
					window_size = window_array[i];
				}
			}

			while (tmp_message_size >>= 1) {
				log_val++;
			}

			bandwidth_results = (double **)malloc(sizeof(double *) * log_val);

			for (i = 0; i < log_val; i++) {
				bandwidth_results[i] = (double *)malloc(sizeof(double) *
									WINDOW_SIZES_COUNT);
			}

			if (!myid) {
				fprintf(stdout, "#      ");

				for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
					fprintf(stdout, "  %10d", window_array[i]);
				}

				fprintf(stdout, "\n");
				fflush(stdout);
			}

			for (j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2, j++) {
				if (!myid) {
					fprintf(stdout, "%-7d", curr_size);
				}

				for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
					for (c = 0; c < IOV_CNT; c++) {
						r_iov[c].iov_len = s_iov[c].iov_len = curr_size;
						bandwidth_results[j][i] = calc_bw(myid, pairs,
										  window_array[i], s_iov, r_iov, c + 1,
										  s_buf, (c + 1) * curr_size, r_buf,
										  (c + 1) * curr_size, type);

						if (!myid) {
							fprintf(stdout, "%*d  %10.*f", FIELD_WIDTH, c + 1,
								FLOAT_PRECISION,
								bandwidth_results[j][i]);
						}

						fprintf(stdout, c == IOV_CNT - 1 ? "\n" : "");
					}
				}

				if (!myid) {
					fprintf(stdout, "\n");
					fflush(stdout);
				}
			}

			if (!myid && print_rate) {
				fprintf(stdout, "\n# Message Rate Profile\n");
				fprintf(stdout, "#      ");

				for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
					fprintf(stdout, "  %10d", window_array[i]);
				}

				fprintf(stdout, "\n");
				fflush(stdout);

				for (c = 0; c < IOV_CNT; c++) {
					for (j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) {
						fprintf(stdout, "%-7d,%*d", curr_size * (c + 1), FIELD_WIDTH, c + 1);

						for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
							double rate = 1e6 * bandwidth_results[j][i] / (curr_size * (c + 1));

							fprintf(stdout, "  %10.2f", rate);
						}

						fprintf(stdout, "\n");
						fflush(stdout);
						j++;
					}
				}
			}
		} else {
			/* Just one window size */
			for (curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) {
				double bw, rate;

				for (c = 0; c < IOV_CNT; c++) {
					r_iov[c].iov_len = s_iov[c].iov_len = curr_size;
					bw = calc_bw(myid, pairs, window_size, s_iov, r_iov, c + 1,
						     s_buf, (c + 1) * curr_size, r_buf,
						     (c + 1) * curr_size, type);

					if (!myid) {
						rate = 1e6 * bw / (curr_size * (c + 1));

						if (print_rate) {
							fprintf(stdout, "%-*d%*d%*.*f%*.*f\n", 10, curr_size * (c + 1),
								FIELD_WIDTH, c + 1, FIELD_WIDTH,
								FLOAT_PRECISION, bw, FIELD_WIDTH,
								FLOAT_PRECISION, rate);
							fflush(stdout);
						} else {
							fprintf(stdout, "%-*d%*d%*.*f\n", 10, curr_size * (c + 1), FIELD_WIDTH,
								FIELD_WIDTH, c + 1, FLOAT_PRECISION, bw);
							fflush(stdout);
						}
						fprintf(stdout, c == IOV_CNT - 1 ? "\n" : "");
					}
				}
			}
		}
	}
Exemplo n.º 4
0
void *av_malloc(size_t size)
{
    void *ptr = NULL;
#if CONFIG_MEMALIGN_HACK
    long diff;
#endif

    /* let's disallow possibly ambiguous cases */
    if (size > (max_alloc_size - 32))
        return NULL;

#if CONFIG_MEMALIGN_HACK
    ptr = malloc(size + ALIGN);
    if (!ptr)
        return ptr;
    diff              = ((~(long)ptr)&(ALIGN - 1)) + 1;
    ptr               = (char *)ptr + diff;
    ((char *)ptr)[-1] = diff;
#elif HAVE_POSIX_MEMALIGN
    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
    if (posix_memalign(&ptr, ALIGN, size))
        ptr = NULL;
#elif HAVE_ALIGNED_MALLOC
    ptr = _aligned_malloc(size, ALIGN);
#elif HAVE_MEMALIGN
#ifndef __DJGPP__
    ptr = memalign(ALIGN, size);
#else
    ptr = memalign(size, ALIGN);
#endif
    /* Why 64?
     * Indeed, we should align it:
     *   on  4 for 386
     *   on 16 for 486
     *   on 32 for 586, PPro - K6-III
     *   on 64 for K7 (maybe for P3 too).
     * Because L1 and L2 caches are aligned on those values.
     * But I don't want to code such logic here!
     */
    /* Why 32?
     * For AVX ASM. SSE / NEON needs only 16.
     * Why not larger? Because I did not see a difference in benchmarks ...
     */
    /* benchmarks with P3
     * memalign(64) + 1          3071, 3051, 3032
     * memalign(64) + 2          3051, 3032, 3041
     * memalign(64) + 4          2911, 2896, 2915
     * memalign(64) + 8          2545, 2554, 2550
     * memalign(64) + 16         2543, 2572, 2563
     * memalign(64) + 32         2546, 2545, 2571
     * memalign(64) + 64         2570, 2533, 2558
     *
     * BTW, malloc seems to do 8-byte alignment by default here.
     */
#else
    ptr = malloc(size);
#ifdef USE_MEM_STATS
    printf("malloc(%ld) -> %p\n", size, ptr);
    if (ptr) {
        mem_cur += malloc_usable_size(ptr);
        if (mem_cur > mem_max) {
            mem_max = mem_cur;
            printf("mem_max=%d\n", mem_max);
        }
    }
#endif
#endif
    if(!ptr && !size) {
        size = 1;
        ptr= av_malloc(1);
    }
#if CONFIG_MEMORY_POISONING
    if (ptr)
        memset(ptr, FF_MEMORY_POISON, size);
#endif
    return ptr;
}
Exemplo n.º 5
0
void *VSIMalloc( size_t nSize )

{
    if (nMaxPeakAllocSize < 0)
    {
        char* pszMaxPeakAllocSize = getenv("CPL_MAX_PEAK_ALLOC_SIZE");
        nMaxPeakAllocSize = (pszMaxPeakAllocSize) ? atoi(pszMaxPeakAllocSize) : 0;
        char* pszMaxCumulAllocSize = getenv("CPL_MAX_CUMUL_ALLOC_SIZE");
        nMaxCumulAllocSize = (pszMaxCumulAllocSize) ? atoi(pszMaxCumulAllocSize) : 0;
    }
    if (nMaxPeakAllocSize > 0 && (GIntBig)nSize > nMaxPeakAllocSize)
        return NULL;
#ifdef DEBUG_VSIMALLOC_STATS
    if (nMaxCumulAllocSize > 0 && (GIntBig)nCurrentTotalAllocs + (GIntBig)nSize > nMaxCumulAllocSize)
        return NULL;
#endif  // DEBUG_VSIMALLOC_STATS

#ifdef DEBUG_VSIMALLOC_MPROTECT
    char* ptr = NULL;
    size_t nPageSize = getpagesize();
    posix_memalign((void**)&ptr, nPageSize, (3 * sizeof(void*) + nSize + nPageSize - 1) & ~(nPageSize - 1));
#else
    char* ptr = (char*) malloc(3 * sizeof(void*) + nSize);
#endif  // DEBUG_VSIMALLOC_MPROTECT
    if (ptr == NULL)
        return NULL;
    ptr[0] = 'V';
    ptr[1] = 'S';
    ptr[2] = 'I';
    ptr[3] = 'M';
    memcpy(ptr + sizeof(void*), &nSize, sizeof(void*));
    ptr[2 * sizeof(void*) + nSize + 0] = 'E';
    ptr[2 * sizeof(void*) + nSize + 1] = 'V';
    ptr[2 * sizeof(void*) + nSize + 2] = 'S';
    ptr[2 * sizeof(void*) + nSize + 3] = 'I';
#if defined(DEBUG_VSIMALLOC_STATS) || defined(DEBUG_VSIMALLOC_VERBOSE)
    {
        CPLMutexHolderD(&hMemStatMutex);
#ifdef DEBUG_VSIMALLOC_VERBOSE
        if( nSize > THRESHOLD_PRINT )
        {
            fprintf(stderr, "Thread[%p] VSIMalloc(%d) = %p"
#ifdef DEBUG_VSIMALLOC_STATS
                         ", current_cumul = " CPL_FRMT_GUIB
#ifdef DEBUG_BLOCK_CACHE_USE
                         ", block_cache_used = " CPL_FRMT_GIB
#endif
                         ", mal+cal-free = %d"
#endif
                         "\n",
                (void*)CPLGetPID(), (int)nSize, ptr + 2 * sizeof(void*)
#ifdef DEBUG_VSIMALLOC_STATS
                , (GUIntBig)(nCurrentTotalAllocs + nSize),
#ifdef DEBUG_BLOCK_CACHE_USE
                , GDALGetCacheUsed64()
#endif
                ,(int)(nVSIMallocs + nVSICallocs - nVSIFrees)
#endif
                );
        }
#endif  // DEBUG_VSIMALLOC_VERBOSE
#ifdef DEBUG_VSIMALLOC_STATS
        nVSIMallocs ++;
        if (nMaxTotalAllocs == 0)
            atexit(VSIShowMemStats);
        nCurrentTotalAllocs += nSize;
        if (nCurrentTotalAllocs > nMaxTotalAllocs)
            nMaxTotalAllocs = nCurrentTotalAllocs;
#endif  // DEBUG_VSIMALLOC_STATS
    }
Exemplo n.º 6
0
int main(int argc, char *argv[])
{
        int i, j, rtest, m, k, nerrs, r;
        void *buf;
        u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
        u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
        u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
        u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
        struct perf start, stop;

        // Pick test parameters
        m = 14;
        k = 10;
        nerrs = 4;
        const u8 err_list[] = {2, 4, 5, 7};

        printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);

        if (m > MMAX || k > KMAX || nerrs > (m - k)){
                printf(" Input test parameter error\n");
                return -1;
        }

        memcpy(src_err_list, err_list, nerrs);
        memset(src_in_err, 0, TEST_SOURCES);
        for (i = 0; i < nerrs; i++)
                src_in_err[src_err_list[i]] = 1;

        // Allocate the arrays
        for (i = 0; i < m; i++) {
                if (posix_memalign(&buf, 64, TEST_LEN(m))) {
                        printf("alloc error: Fail\n");
                        return -1;
                }
                buffs[i] = buf;
        }

        for (i = 0; i < (m - k); i++) {
                if (posix_memalign(&buf, 64, TEST_LEN(m))) {
                        printf("alloc error: Fail\n");
                        return -1;
                }
                temp_buffs[i] = buf;
        }

        // Make random data
        for (i = 0; i < k; i++)
                for (j = 0; j < TEST_LEN(m); j++)
                        buffs[i][j] = rand();

        gf_gen_rs_matrix(a, m, k);
        ec_init_tables(k, m - k, &a[k * k], g_tbls);
        ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);

        // Start encode test
        perf_start(&start);
        for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
                // Make parity vects
                ec_init_tables(k, m - k, &a[k * k], g_tbls);
                ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
        }
        perf_stop(&stop);
        printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
        perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);

        // Start decode test
        perf_start(&start);
        for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
                // Construct b by removing error rows
                for (i = 0, r = 0; i < k; i++, r++) {
                        while (src_in_err[r])
                                r++;
                        recov[i] = buffs[r];
                        for (j = 0; j < k; j++)
                                b[k * i + j] = a[k * r + j];
                }

                if (gf_invert_matrix(b, d, k) < 0) {
                        printf("BAD MATRIX\n");
                        return -1;
                }

                for (i = 0; i < nerrs; i++)
                        for (j = 0; j < k; j++)
                                c[k * i + j] = d[k * src_err_list[i] + j];

                // Recover data
                ec_init_tables(k, nerrs, c, g_tbls);
                ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
        }
        perf_stop(&stop);

        for (i = 0; i < nerrs; i++) {
                if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
                        printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
                        return -1;
                }
        }

        printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
        perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);

        printf("done all: Pass\n");
        return 0;
}
Exemplo n.º 7
0
int main(int argc, char *argv[]) {
	if(argc < 3) {
		printf("Usage: $0 dfe_ip cpu_ip\n");
		return 1;
	}

	struct in_addr dfe_ip;
	inet_aton(argv[1], &dfe_ip);
	struct in_addr cpu_ip;
	inet_aton(argv[2], &cpu_ip);
	struct in_addr netmask;
	inet_aton("255.255.255.0", &netmask);
	const int port = 5007;

	max_file_t *maxfile = Tracker_init();
	max_engine_t * engine = max_load(maxfile, "*");


	max_config_set_bool(MAX_CONFIG_PRINTF_TO_STDOUT, true);

	max_actions_t *actions = max_actions_init(maxfile, NULL);
	char regName[32];
	for (int i=0; i < 1024; i++) {
		sprintf(regName, "filter_%d", i);
		if (i == 150) {
			max_set_uint64t(actions, "filteringKernel", regName, 0xCC /* a value to match... */);
		} else {
			max_set_uint64t(actions, "filteringKernel", regName, 0x4D1B /* or any value you want */);
		}
	}
	max_run(engine, actions);
	max_actions_free(actions);


	void *buffer;
	size_t bufferSize = 4096 * 512;
	posix_memalign(&buffer, 4096, bufferSize);

	max_framed_stream_t *toCpu = max_framed_stream_setup(engine, "toCPU", buffer, bufferSize, -1);

	/*
	 * This executable both creates a normal Linux UDP socket as well as a DFE UDP Socket.
	 * We then exchange data between the two.
	 */

	// DFE Socket
	max_ip_config(engine, MAX_NET_CONNECTION_QSFP_TOP_10G_PORT1, &dfe_ip, &netmask);
	max_udp_socket_t *dfe_socket = max_udp_create_socket(engine, "udpTopPort1");
	max_udp_bind(dfe_socket, port);
	max_udp_connect(dfe_socket, &cpu_ip, port);


	// Linux Socket
	int cpu_socket = create_cpu_udp_socket(&cpu_ip, &dfe_ip, port);

	printf("Sending test frame...\n");
	sendTestFrame(cpu_socket);

	printf("Waiting for kernel response...\n"); fflush(stdout);

	void *f;
	size_t fsz;
	size_t numMessageRx = 0;
	uint8_t received_data[512];
	while (numMessageRx < NUM_MESSAGES_EXPECTED) {
		if (max_framed_stream_read(toCpu, 1, &f, &fsz) == 1) {
			printf("CPU: Got output frame - size %zd - NumMsg = %zd!\n", fsz, numMessageRx); // Frame size would be rounded up to the next 8 bytes.

			memcpy(received_data, f, fsz);
			numMessageRx++;
			max_framed_stream_discard(toCpu, 1);
		} else 	usleep(10);
	}

	max_udp_close(dfe_socket);
	max_unload(engine);
	max_file_free(maxfile);

	printf("Done.\n"); fflush(stdout);
	return 0;
}
Exemplo n.º 8
0
static void init_problem_data(void)
{
	unsigned i,j;

#ifdef STARPU_USE_CUDA
	if (pin) {
		starpu_data_malloc_pinned_if_possible((void **)&A, zdim*ydim*sizeof(float));
		starpu_data_malloc_pinned_if_possible((void **)&B, xdim*zdim*sizeof(float));
		starpu_data_malloc_pinned_if_possible((void **)&C, xdim*ydim*sizeof(float));
	} else
#endif
	{
#ifdef STARPU_HAVE_POSIX_MEMALIGN
		posix_memalign((void **)&A, 4096, zdim*ydim*sizeof(float));
		posix_memalign((void **)&B, 4096, xdim*zdim*sizeof(float));
		posix_memalign((void **)&C, 4096, xdim*ydim*sizeof(float));
#else
		A = malloc(zdim*ydim*sizeof(float));
		B = malloc(xdim*zdim*sizeof(float));
		C = malloc(xdim*ydim*sizeof(float));
#endif
	}

	/* fill the A and B matrices */
	if (norandom) {
		for (j=0; j < ydim; j++) {
			for (i=0; i < zdim; i++) {
				A[j+i*ydim] = (float)(i);
			}
		}
	
		for (j=0; j < zdim; j++) {
			for (i=0; i < xdim; i++) {
				B[j+i*zdim] = (float)(j);
			}
		}
	} 
	else {
#ifdef NORANDOM
		srand(2008);
		STARPU_ABORT();
#endif
		for (j=0; j < ydim; j++) {
			for (i=0; i < zdim; i++) {
				A[j+i*ydim] = (float)(starpu_drand48());
			}
		}
	
		for (j=0; j < zdim; j++) {
			for (i=0; i < xdim; i++) {
				B[j+i*zdim] = (float)(starpu_drand48());
			}
		}
	}

	for (j=0; j < ydim; j++) {
		for (i=0; i < xdim; i++) {
			C[j+i*ydim] = (float)(0);
		}
	}

	display_memory_consumption();
}
int main (int argc, char* argv[])
{
    const int width = 40;
    const int height = 40;

    float* input, *output1, *output2;
    float* input_sc, *output1_sc, *output2_sc;
   
   
    if(posix_memalign((void **) &input,     64, 4 * width*height * sizeof(float)) != 0)
    {
        exit(1);
    }
    if(posix_memalign((void **) &output1, 64, 4 * width*height * sizeof(float)) != 0)
    {
        exit(1);
    }
    if(posix_memalign((void **) &output2,  64, 4 * width*height * sizeof(float)) != 0)
    {
        exit(1);
    }
    if(posix_memalign((void **) &input_sc,     64, 4 * width*height * sizeof(float)) != 0)
    {
        exit(1);
    }
    if(posix_memalign((void **) &output1_sc, 64, 4 * width*height * sizeof(float)) != 0)
    {
        exit(1);
    }
    if(posix_memalign((void **) &output2_sc,  64, 4 * width*height * sizeof(float)) != 0)
    {
        exit(1);
    }

    int i;
    for (i=0; i<(width*height*4); i++)
    {
        input[i] = (i*0.9f)/(i+1);
        input_sc[i] = (i*0.9f)/(i+1);
    }

    blackscholes(width, height, input, output1, output2);
    blackscholes_sc(width, height, input_sc, output1_sc, output2_sc);
    
 
    for (i=0; i<(width*height*4); i++)
    {
        if(fabsf(input_sc[i] - input[i]) > 0.01f)
        {
            printf("ERROR\n");
            exit(1);
        }
        if(fabsf(output1_sc[i] - output1[i]) > 0.01f)
        {
            printf("ERROR\n");
            exit(1);
        }
        if(fabsf(output2_sc[i] - output2[i]) > 0.01f)
        {
            printf("ERROR\n");
            exit(1);
        }
    }

    return 0;
}
Exemplo n.º 10
0
/*
 * pmem_memcpy_init -- benchmark initialization
 *
 * Parses command line arguments, allocates persistent memory, and maps it.
 */
static int
pmem_memcpy_init(struct benchmark *bench, struct benchmark_args *args)
{
	assert(bench != NULL);
	assert(args != NULL);
	int ret = 0;

	struct pmem_bench *pmb = malloc(sizeof (struct pmem_bench));
	assert(pmb != NULL);

	pmb->pargs = args->opts;
	assert(pmb->pargs != NULL);

	pmb->pargs->chunk_size = args->dsize;

	enum operation_type op_type;
	/*
	 * Assign file and buffer size depending on the operation type
	 * (READ from PMEM or WRITE to PMEM)
	 */
	if (assign_size(pmb, args, &op_type) != 0) {
		ret = -1;
		goto err_free_pmb;
	}

	if ((errno = posix_memalign(
		(void **) &pmb->buf, FLUSH_ALIGN, pmb->bsize)) != 0) {
		perror("posix_memalign");
		ret = -1;
		goto err_free_pmb;
	}

	pmb->n_rand_offsets = args->n_ops_per_thread * args->n_threads;
	pmb->rand_offsets = malloc(pmb->n_rand_offsets *
			sizeof (*pmb->rand_offsets));

	if (pmb->rand_offsets == NULL) {
		perror("malloc");
		ret = -1;
		goto err_free_pmb;
	}

	for (size_t i = 0; i < pmb->n_rand_offsets; ++i)
		pmb->rand_offsets[i] = rand() % args->n_ops_per_thread;

	/* create a pmem file and memory map it */
	if ((pmb->pmem_addr = pmem_map_file(args->fname, pmb->fsize,
				PMEM_FILE_CREATE|PMEM_FILE_EXCL,
				args->fmode, NULL, NULL)) == NULL) {
		perror(args->fname);
		ret = -1;
		goto err_free_buf;
	}

	if (op_type == OP_TYPE_READ) {
		pmb->src_addr = pmb->pmem_addr;
		pmb->dest_addr = pmb->buf;
	} else {
		pmb->src_addr = pmb->buf;
		pmb->dest_addr = pmb->pmem_addr;
	}

	/* set proper func_src() and func_dest() depending on benchmark args */
	if ((pmb->func_src = assign_mode_func(pmb->pargs->src_mode)) == NULL) {
		fprintf(stderr, "wrong src_mode parameter -- '%s'",
						pmb->pargs->src_mode);
		ret = -1;
		goto err_unmap;
	}

	if ((pmb->func_dest = assign_mode_func(pmb->pargs->dest_mode))
								== NULL) {
		fprintf(stderr, "wrong dest_mode parameter -- '%s'",
						pmb->pargs->dest_mode);
		ret = -1;
		goto err_unmap;
	}

	if (pmb->pargs->memcpy) {
		pmb->func_op = pmb->pargs->persist ?
					libc_memcpy_persist : libc_memcpy;
	} else {
		pmb->func_op = pmb->pargs->persist ?
			libpmem_memcpy_persist : libpmem_memcpy_nodrain;
	}

	pmembench_set_priv(bench, pmb);

	return 0;

err_unmap:
	pmem_unmap(pmb->pmem_addr, pmb->fsize);
err_free_buf:
	free(pmb->buf);
err_free_pmb:
	free(pmb);

	return ret;
}
Exemplo n.º 11
0
static void thread(int id)
{
	struct timeval start, stop, diff;
	int fd, i, ret;
	size_t n;
	void *buf;
	int flags = O_CREAT | O_RDWR | O_LARGEFILE;
	char filename[32];

	ret = posix_memalign(&buf, PAGE_SIZE, chunk_size);
	if (ret < 0) {
		fprintf(stderr,
			"ERROR: task %d couldn't allocate %lu bytes (%s)\n",
                       id, chunk_size, strerror(errno));
		exit(1);
	}
	memset(buf, 0xaa, chunk_size);

	snprintf(filename, sizeof(filename), "%s-%d-iobw.tmp", mygroup, id);
	if (directio)
		flags |= O_DIRECT;
	fd = open(filename, flags, 0600);
	if (fd < 0) {
		fprintf(stderr, "ERROR: task %d couldn't open %s (%s)\n",
			id, filename, strerror(errno));
		free(buf);
		exit(1);
	}

	/* Write */
	lseek(fd, 0, SEEK_SET);
	n = 0;
	gettimeofday(&start, NULL);
	while (n < data_size) {
		i = write(fd, buf, chunk_size);
		if (i < 0) {
			fprintf(stderr, "ERROR: task %d writing to %s (%s)\n",
				id, filename, strerror(errno));
			ret = 1;
			goto out;
		}
		n += i;
	}
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);
	print_results(id + 1, OP_WRITE, data_size, &diff);

	/* Read */
	lseek(fd, 0, SEEK_SET);
	n = 0;
	gettimeofday(&start, NULL);
	while (n < data_size) {
		i = read(fd, buf, chunk_size);
		if (i < 0) {
			fprintf(stderr, "ERROR: task %d reading to %s (%s)\n",
				id, filename, strerror(errno));
			ret = 1;
			goto out;
		}
		n += i;
	}
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);
	print_results(id + 1, OP_READ, data_size, &diff);
out:
	close(fd);
	unlink(filename);
	free(buf);
	exit(ret);
}
Exemplo n.º 12
0
static void *aligned_malloc(void **base, int size)
{
	return posix_memalign(base, 8192, size) ? NULL : *base;
}
Exemplo n.º 13
0
int main (int argc, char **argv)
{
    ssize_t ret_size;
    struct stat st;
    int ret, flags;

    int part_request;
    long long this_time;
    double part_min, part_max, time_min, time_max;
    double time_sum, time_sum2, time_mdev, time_avg;
    double part_sum, part_sum2, part_mdev, part_avg;
    long long time_now, time_next, period_deadline;

    setvbuf(stdout, NULL, _IOLBF, 0);

    parse_options(argc, argv);

    interval_ts.tv_sec = interval / 1000000;
    interval_ts.tv_nsec = (interval % 1000000) * 1000;

    if (!size)
        size = default_size;

    if (size <= 0)
        errx(1, "request size must be greather than zero");

#ifdef MAX_RW_COUNT
    if (size > MAX_RW_COUNT)
        warnx("this platform supports requests %u bytes at most",
              MAX_RW_COUNT);
#endif

    if (wsize)
        temp_wsize = wsize;
    else if (size > temp_wsize)
        temp_wsize = size;

    flags = O_RDONLY;

#if !defined(HAVE_POSIX_FADVICE) && !defined(HAVE_NOCACHE_IO)
# if defined(HAVE_DIRECT_IO)
    direct |= !cached;
# else
    if (!cached && !write_test) {
        warnx("non-cached read I/O not supported by this platform");
        warnx("you can use write I/O to get reliable results");
        cached = 1;
    }
# endif
#endif

    if (write_test) {
        flags = O_RDWR;
        make_request = do_pwrite;
    }

    if (async)
        aio_setup();

    if (direct)
#ifdef HAVE_DIRECT_IO
        flags |= O_DIRECT;
#else
        errx(1, "direct I/O not supported by this platform");
#endif

#ifdef __MINGW32__
    flags |= O_BINARY;
#endif

    if (stat(path, &st))
        err(2, "stat \"%s\" failed", path);

    if (!S_ISDIR(st.st_mode) && write_test && write_test < 3)
        errx(2, "think twice, then use -WWW to shred this target");

    if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode)) {
        if (S_ISDIR(st.st_mode))
            st.st_size = offset + temp_wsize;
        parse_device(st.st_dev);
    } else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
        fd = open(path, flags);
        if (fd < 0)
            err(2, "failed to open \"%s\"", path);

        if (get_device_size(fd, &st)) {
            if (!S_ISCHR(st.st_mode))
                err(2, "block get size ioctl failed");
            st.st_size = offset + temp_wsize;
            fstype = "character";
            device = "device";
        } else {
            device_size = st.st_size;
            fstype = "block";
            device = "device ";
        }

        if (!cached && write_test && fdatasync(fd)) {
            warnx("fdatasync not supported by \"%s\", "
                  "enable cached requests", path);
            cached = 1;
        }
    } else {
        errx(2, "unsupported destination: \"%s\"", path);
    }

    if (wsize > st.st_size || offset > st.st_size - wsize)
        errx(2, "target is too small for this");

    if (!wsize)
        wsize = st.st_size - offset;

    if (size > wsize)
        errx(2, "request size is too big for this target");

    ret = posix_memalign(&buf, 0x1000, size);
    if (ret)
        errx(2, "buffer allocation failed");

    random_memory(buf, size);

    if (S_ISDIR(st.st_mode)) {
        fd = create_temp(path, "ioping.tmp");
        if (fd < 0)
            err(2, "failed to create temporary file at \"%s\"", path);
        if (keep_file) {
            if (fstat(fd, &st))
                err(2, "fstat at \"%s\" failed", path);
            if (st.st_size >= offset + wsize)
#ifndef __MINGW32__
                if (st.st_blocks >= (st.st_size + 511) / 512)
#endif
                    goto skip_preparation;
        }
        for (woffset = 0 ; woffset < wsize ; woffset += ret_size) {
            ret_size = size;
            if (woffset + ret_size > wsize)
                ret_size = wsize - woffset;
            if (woffset)
                random_memory(buf, ret_size);
            ret_size = pwrite(fd, buf, ret_size, offset + woffset);
            if (ret_size <= 0)
                err(2, "preparation write failed");
        }
skip_preparation:
        if (fsync(fd))
            err(2, "fsync failed");
    } else if (S_ISREG(st.st_mode)) {
        fd = open(path, flags);
        if (fd < 0)
            err(2, "failed to open \"%s\"", path);
    }

    if (!cached) {
#ifdef HAVE_POSIX_FADVICE
        ret = posix_fadvise(fd, offset, wsize, POSIX_FADV_RANDOM);
        if (ret)
            err(2, "fadvise failed");
#endif
#ifdef HAVE_NOCACHE_IO
        ret = fcntl(fd, F_NOCACHE, 1);
        if (ret)
            err(2, "fcntl nocache failed");
#endif
    }

    srandom(now());

    if (deadline)
        deadline += now();

    set_signal();

    request = 0;
    woffset = 0;

    part_request = 0;
    part_min = time_min = LLONG_MAX;
    part_max = time_max = LLONG_MIN;
    part_sum = time_sum = 0;
    part_sum2 = time_sum2 = 0;

    time_now = now();
    period_deadline = time_now + period_time;

    while (!exiting) {
        request++;
        part_request++;

        if (randomize)
            woffset = random() % (wsize / size) * size;

#ifdef HAVE_POSIX_FADVICE
        if (!cached) {
            ret = posix_fadvise(fd, offset + woffset, size,
                                POSIX_FADV_DONTNEED);
            if (ret)
                err(3, "fadvise failed");
        }
#endif

        if (write_test)
            shake_memory(buf, size);

        this_time = now();

        ret_size = make_request(fd, buf, size, offset + woffset);
        if (ret_size < 0) {
            if (errno != EINTR)
                err(3, "request failed");
        } else if (ret_size < size)
            warnx("request returned less than expected: %zu", ret_size);
        else if (ret_size > size)
            errx(3, "request returned more than expected: %zu", ret_size);

        time_now = now();
        this_time = time_now - this_time;
        time_next = time_now + interval;

        part_sum += this_time;
        part_sum2 += this_time * this_time;
        if (this_time < part_min)
            part_min = this_time;
        if (this_time > part_max)
            part_max = this_time;

        if (!quiet) {
            print_size(ret_size);
            printf(" %s %s (%s %s", write_test ? "to" : "from",
                   path, fstype, device);
            if (device_size)
                print_size(device_size);
            printf("): request=%d time=", request);
            print_time(this_time);
            printf("\n");
        }

        if ((period_request && (part_request >= period_request)) ||
                (period_time && (time_next >= period_deadline))) {
            part_avg = part_sum / part_request;
            part_mdev = sqrt(part_sum2 / part_request - part_avg * part_avg);

            printf("%d %.0f %.0f %.0f %.0f %.0f %.0f %.0f\n",
                   part_request, part_sum,
                   1000000. * part_request / part_sum,
                   1000000. * part_request * size / part_sum,
                   part_min, part_avg,
                   part_max, part_mdev);

            time_sum += part_sum;
            time_sum2 += part_sum2;
            if (part_min < time_min)
                time_min = part_min;
            if (part_max > time_max)
                time_max = part_max;
            part_min = LLONG_MAX;
            part_max = LLONG_MIN;
            part_sum = part_sum2 = 0;
            part_request = 0;

            period_deadline = time_now + period_time;
        }

        if (!randomize) {
            woffset += size;
            if (woffset + size > wsize)
                woffset = 0;
        }

        if (exiting)
            break;

        if (stop_at_request && request >= stop_at_request)
            break;

        if (deadline && time_next >= deadline)
            break;

        if (interval)
            nanosleep(&interval_ts, NULL);
    }

    time_sum += part_sum;
    time_sum2 += part_sum2;
    if (part_min < time_min)
        time_min = part_min;
    if (part_max > time_max)
        time_max = part_max;

    time_avg = time_sum / request;
    time_mdev = sqrt(time_sum2 / request - time_avg * time_avg);

    if (batch_mode) {
        printf("%d %.0f %.0f %.0f %.0f %.0f %.0f %.0f\n",
               request, time_sum,
               1000000. * request / time_sum,
               1000000. * request * size / time_sum,
               time_min, time_avg,
               time_max, time_mdev);
    } else if (!quiet || (!period_time && !period_request)) {
        printf("\n--- %s (%s %s", path, fstype, device);
        if (device_size)
            print_size(device_size);
        printf(") ioping statistics ---\n");
        print_int(request);
        printf(" requests completed in ");
        print_time(time_sum);
        printf(", ");
        print_size((long long)request * size);
        printf(" %s, ", write_test ? "written" : "read");
        print_int(1000000. * request / time_sum);
        printf(" iops, ");
        print_size(1000000. * request * size / time_sum);
        printf("/s\n");
        printf("min/avg/max/mdev = ");
        print_time(time_min);
        printf(" / ");
        print_time(time_avg);
        printf(" / ");
        print_time(time_max);
        printf(" / ");
        print_time(time_mdev);
        printf("\n");
    }

    return 0;
}
Exemplo n.º 14
0
void instantiateFIR(FIR* pFIR, const uint uiSampleRate)
{
	printf("FIR: instantiating FIR with %d samples / s\n", uiSampleRate);
	//check which base frequency to use
	float** ppfDataSource = NULL;
	uint nSourceRate = 0;
	uint* pnSourceBufferLengths = NULL;

	switch(uiSampleRate)
	{
	case 44100:
	case 22050:
	case 88200:
		ppfDataSource = g_aafFIRs44k1;
		nSourceRate = 44100;
		pnSourceBufferLengths = g_anSamples44k1;
		break;
	default:
	//case 48000:
	//case 96000:
	//case 192000:
		ppfDataSource = g_aafFIRs48k;
		nSourceRate = 48000;
		pnSourceBufferLengths = g_anSamples48k;
		break;
	}

	double fRateScaler = (float)nSourceRate / (double)uiSampleRate;

	printf("FIR: chosen rate %d, scaler %f\n", nSourceRate, fRateScaler);

	//reserve buffer
	uint uiModel = 0;
	for(; uiModel < NUM_MODELS; uiModel++)
	{
		uint nSourceSamples = pnSourceBufferLengths[uiModel];

		uint n8Tuples = (uint)(fRateScaler * (float)nSourceSamples - 1.0f) / 8 + 1;

		pFIR->m_anHistory8Tuples[uiModel] = n8Tuples;

		uint nDestSamples = n8Tuples * 8;


#ifdef _MSC_VER
		pFIR->m_apfHistory[uiModel] = (v8f_t*) _aligned_malloc(n8Tuples * sizeof(v8f_t), 16);
#else
		posix_memalign((void**)&(pFIR->m_apfHistory[uiModel]), sizeof(v8f_t), n8Tuples * sizeof(v8f_t));
#endif

#ifdef _MSC_VER
		pFIR->m_apfFIR[uiModel] = (v8f_t*) _aligned_malloc(nDestSamples * sizeof(v8f_t), 16);
#else
		posix_memalign((void**)&(pFIR->m_apfFIR[uiModel]), sizeof(v8f_t), nDestSamples * sizeof(v8f_t));
#endif

		//fill fir coefficients
		//the fir is reversed here, so multiplication with history data can be carried out sequentially
		uint uiPermutation = 0;
		for(; uiPermutation < 8; uiPermutation++)
		{
			uint uiFIRSample = 0;
			while (uiFIRSample < nDestSamples)
			{
				float afCoeffs[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
				const uint uiStartSample = uiFIRSample;
				for (; uiFIRSample < uiStartSample + 8 && uiFIRSample < nDestSamples; uiFIRSample++)
				{
					uint uiSourcePosUnscaled = (uiPermutation - uiFIRSample + nDestSamples) % nDestSamples;
					uint uiSourceBufferPos = (uint)((double)uiSourcePosUnscaled * fRateScaler);

					if(uiSourceBufferPos < nSourceSamples)
						afCoeffs[uiFIRSample & 0x7] = ppfDataSource[uiModel][uiSourceBufferPos];
				}

				const uint uiDestIndex = uiPermutation * n8Tuples + (uiStartSample >> 3);
				pFIR->m_apfFIR [uiModel][uiDestIndex] = v8f_create(afCoeffs);
			}
		}
	}
	memset(pFIR->m_auiBufferPos, 0, NUM_MODELS * sizeof(uint));
}
Exemplo n.º 15
0
/**
 * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen):
 * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
 * p, buflen) and write the result into buf.  The parameters r, p, and buflen
 * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32.  The parameter N
 * must be a power of 2 greater than 1.
 *
 * Return 0 on success; or -1 on error.
 */
int
crypto_scrypt(const uint8_t * passwd, size_t passwdlen,
    const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p,
    uint8_t * buf, size_t buflen)
{
	void * B0, * V0, * XY0;
	uint8_t * B;
	uint32_t * V;
	uint32_t * XY;
	uint32_t i;

	/* Sanity-check parameters. */
#if SIZE_MAX > UINT32_MAX
	if (buflen > (((uint64_t)(1) << 32) - 1) * 32) {
		errno = EFBIG;
		goto err0;
	}
#endif
	if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) {
		errno = EFBIG;
		goto err0;
	}
	if (((N & (N - 1)) != 0) || (N == 0)) {
		errno = EINVAL;
		goto err0;
	}
	if ((r > SIZE_MAX / 128 / p) ||
#if SIZE_MAX / 256 <= UINT32_MAX
	    (r > SIZE_MAX / 256) ||
#endif
	    (N > SIZE_MAX / 128 / r)) {
		errno = ENOMEM;
		goto err0;
	}

	/* Allocate memory. */
#ifdef _WIN32
#undef HAVE_POSIX_MEMALIGN
#endif

#ifdef HAVE_POSIX_MEMALIGN
	if ((errno = posix_memalign(&B0, 64, 128 * r * p)) != 0)
		goto err0;
	B = (uint8_t *)(B0);
	if ((errno = posix_memalign(&XY0, 64, 256 * r + 64)) != 0)
		goto err1;
	XY = (uint32_t *)(XY0);
#ifndef MAP_ANON
	if ((errno = posix_memalign(&V0, 64, 128 * r * N)) != 0)
		goto err2;
	V = (uint32_t *)(V0);
#endif
#else
	if ((B0 = malloc(128 * r * p + 63)) == NULL)
		goto err0;
	B = (uint8_t *)(((uintptr_t)(B0) + 63) & ~ (uintptr_t)(63));
	if ((XY0 = malloc(256 * r + 64 + 63)) == NULL)
		goto err1;
	XY = (uint32_t *)(((uintptr_t)(XY0) + 63) & ~ (uintptr_t)(63));
#ifndef MAP_ANON
	if ((V0 = malloc(128 * r * N + 63)) == NULL)
		goto err2;
	V = (uint32_t *)(((uintptr_t)(V0) + 63) & ~ (uintptr_t)(63));
#endif
#endif
#ifdef MAP_ANON
	if ((V0 = mmap(NULL, 128 * r * N, PROT_READ | PROT_WRITE,
#ifdef MAP_NOCORE
	    MAP_ANON | MAP_PRIVATE | MAP_NOCORE,
#else
	    MAP_ANON | MAP_PRIVATE,
#endif
	    -1, 0)) == MAP_FAILED)
		goto err2;
	V = (uint32_t *)(V0);
#endif

	/* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */
	PBKDF2_scrypt_SHA256(passwd, passwdlen, salt, saltlen, 1, B, p * 128 * r);

	/* 2: for i = 0 to p - 1 do */
	for (i = 0; i < p; i++) {
		/* 3: B_i <-- MF(B_i, N) */
		smix(&B[i * 128 * r], r, N, V, XY);
	}

	/* 5: DK <-- PBKDF2(P, B, 1, dkLen) */
	PBKDF2_scrypt_SHA256(passwd, passwdlen, B, p * 128 * r, 1, buf, buflen);

	/* Free memory. */
#ifdef MAP_ANON
	if (munmap(V0, 128 * r * N))
		goto err2;
#else
	free(V0);
#endif
	free(XY0);
	free(B0);

	/* Success! */
	return (0);

err2:
	free(XY0);
err1:
	free(B0);
err0:
	/* Failure! */
	return (-1);
}
Exemplo n.º 16
0
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Alltoallv variables */
  size_t               alltoallv_num_algorithm[2];
  pami_algorithm_t    *alltoallv_always_works_algo = NULL;
  pami_metadata_t     *alltoallv_always_works_md = NULL;
  pami_algorithm_t    *next_algo = NULL;
  pami_metadata_t     *next_md= NULL;
  pami_algorithm_t    *alltoallv_must_query_algo = NULL;
  pami_metadata_t     *alltoallv_must_query_md = NULL;
  pami_xfer_type_t     alltoallv_xfer = PAMI_XFER_ALLTOALLV;
  volatile unsigned    alltoallv_poll_flag = 0;

  int                  nalg= 0, total_alg;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          alltoallv;
  pami_type_t          pami_stype = 0;
  pami_type_t          pami_rtype = 0;
  pami_result_t        ret;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);


  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  /*  Allocate buffer(s) */
  int err = 0;
  void* sbuf = NULL;
  err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;

  void* rbuf = NULL;
  err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  sndlens = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(sndlens);
  sdispls = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(sdispls);
  rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(rcvlens);
  rdispls = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(rdispls);
  ret = PAMI_Type_create(&pami_stype);
  if(ret != PAMI_SUCCESS)
    return 1;

  ret = PAMI_Type_create(&pami_rtype);
  if(ret != PAMI_SUCCESS)
    return 1;

  PAMI_Type_add_simple(pami_stype, sizeof(double), 0, 1, sizeof(double)*2);
  PAMI_Type_add_simple(pami_rtype, sizeof(double), sizeof(double), 1, sizeof(double));

  ret = PAMI_Type_complete(pami_stype, sizeof(double));
  if(ret != PAMI_SUCCESS){
    printf("Invalid atom size for stype\n");
    return 1;
  }

  ret = PAMI_Type_complete(pami_rtype, sizeof(double));
  if(ret != PAMI_SUCCESS){
    printf("Invalid atom size for rtype\n");
    return 1;
  }

  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for barrier algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               barrier_xfer,
                               barrier_num_algorithm,
                               &bar_always_works_algo,
                               &bar_always_works_md,
                               &bar_must_query_algo,
                               &bar_must_query_md);

    if (rc == 1)
      return 1;

    /*  Query the world geometry for alltoallv algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               alltoallv_xfer,
                               alltoallv_num_algorithm,
                               &alltoallv_always_works_algo,
                               &alltoallv_always_works_md,
                               &alltoallv_must_query_algo,
                               &alltoallv_must_query_md);

    if (rc == 1)
      return 1;

    barrier.cb_done   = cb_done;
    barrier.cookie    = (void*) & bar_poll_flag;
    barrier.algorithm = bar_always_works_algo[0];

    total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1];
    for (nalg = 0; nalg < total_alg; nalg++)
    {
      metadata_result_t result = {0};
      unsigned query_protocol;
      if(nalg < alltoallv_num_algorithm[0])
      {
        query_protocol = 0;
        next_algo = &alltoallv_always_works_algo[nalg];
        next_md  = &alltoallv_always_works_md[nalg];
      }
      else
      {
        query_protocol = 1;
        next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]];
        next_md  = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]];
      }

      gProtocolName = next_md->name;

      alltoallv.cb_done    = cb_done;
      alltoallv.cookie     = (void*) & alltoallv_poll_flag;
      alltoallv.algorithm  = *next_algo;
      alltoallv.cmd.xfer_alltoallv.sndbuf        = sbuf;
      alltoallv.cmd.xfer_alltoallv.stype         = pami_stype;
      alltoallv.cmd.xfer_alltoallv.stypecounts   = sndlens;
      alltoallv.cmd.xfer_alltoallv.sdispls       = sdispls;
      alltoallv.cmd.xfer_alltoallv.rcvbuf        = rbuf;
      alltoallv.cmd.xfer_alltoallv.rtype         = pami_rtype;
      alltoallv.cmd.xfer_alltoallv.rtypecounts   = rcvlens;
      alltoallv.cmd.xfer_alltoallv.rdispls       = rdispls;

      gProtocolName = next_md->name;

      if (task_id == 0)
      {
        printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               iContext, gProtocolName,
               next_md->range_lo,(ssize_t)next_md->range_hi,
               next_md->check_correct.bitmask_correct);
        printf("# Size(bytes)  iterations    bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(next_md->name, gSelected) == NULL) && gSelector) ||
          ((strstr(next_md->name, gSelected) != NULL) && !gSelector))  continue;

      int i, j;

      unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */

      for (i = 0; i <= (gMax_byte_count/(sizeof(double)*2)); i *= 2)
      {
        size_t  dataSent = i;
        int          niter;

        if (dataSent < CUTOFF)
          niter = gNiterlat;
        else
          niter = NITERBW;

        for (j = 0; j < num_tasks; j++)
        {
          sndlens[j] = rcvlens[j] = i;
          sdispls[j] = rdispls[j] = i * j;

          initialize_sndbuf( j, (double*)sbuf, (double*)rbuf );

        }

              if(query_protocol)
              {
                  size_t sz=get_type_size(pami_stype)*i;
                  size_t rsz=get_type_size(pami_rtype)*i;
                result = check_metadata(*next_md,
                                        alltoallv,
                                          pami_stype,
                                        sz, /* metadata uses bytes i, */
                                        alltoallv.cmd.xfer_alltoallv.sndbuf,
                                          pami_rtype,
                                        rsz,
                                        alltoallv.cmd.xfer_alltoallv.rcvbuf);
                if (next_md->check_correct.values.nonlocal)
                {
                  /* \note We currently ignore check_correct.values.nonlocal
                        because these tests should not have nonlocal differences (so far). */
                  result.check.nonlocal = 0;
                }

                if (result.bitmask) 
                {
                  if(!i)i++;
                  continue;
                }
              }

        blocking_coll(context[iContext], &barrier, &bar_poll_flag);

        ti = timer();

        for (j = 0; j < niter; j++)
        {
                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&alltoallv);
                  if (result.bitmask) 
                  {
                    if(!i)i++;
                    continue;
                  }
                }
          blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag);
        }

        tf = timer();
        blocking_coll(context[iContext], &barrier, &bar_poll_flag);

        int rc_check;
        rc |= rc_check = check_rcvbuf(num_tasks, task_id, (double*)rbuf, (double*)sbuf);

        if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName);

        usec = (tf - ti) / (double)niter;

        if (task_id == 0)
        {
          printf("  %11lld %16d %14.1f %12.2f\n",
                 (long long)dataSent,
                 niter,
                 (double)1e6*(double)dataSent / (double)usec,
                 usec);
          fflush(stdout);
        }
        if(!i)i++;
      }
    }

    free(bar_always_works_algo);
    free(bar_always_works_md);
    free(bar_must_query_algo);
    free(bar_must_query_md);
    free(alltoallv_always_works_algo);
    free(alltoallv_always_works_md);
    free(alltoallv_must_query_algo);
    free(alltoallv_must_query_md);
  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);

  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  free(sndlens);
  free(sdispls);
  free(rcvlens);
  free(rdispls);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
	inline void* aligned_alloc(size_t alignment, size_t size) {
		void *p = NULL;
		posix_memalign(&p, alignment, size);
		return p;
	}
Exemplo n.º 18
0
void* FLA_malloc( size_t size )
{
  void*     ptr = NULL;
  FLA_Error e_val;
#ifdef FLA_ENABLE_MEMORY_ALIGNMENT
  int       r_val;
#endif

  // In practice, the size argument should very rarely be zero. However, if the
  // calling code does request a memory region of zero length, we short-circut
  // the actual allocation request and just return NULL. Hopefully, the calling
  // code is written such that the pointer is never dereferenced. At free()-time
  // everything will be fine, as calling free() with a NULL pointer is safe.
  // Also note that we do NOT increment the memory leak counter before returning.
  // (Likewise, we will not decrement the counter when a NULL pointer is freed.)
  if ( size == 0 ) return NULL;

#ifdef FLA_ENABLE_MEMORY_ALIGNMENT

  // Allocate size bytes of memory. Here, we call posix_memalign() if
  // memory alignment was requested at configure-time, providing the
  // alignment boundary value given by the user. posix_memalign() also
  // returns an error code, which is how it signals that something
  // went wrong. Compare to malloc(), which does this by simply returning
  // a NULL pointer.
  r_val = posix_memalign( &ptr, ( size_t ) FLA_MEMORY_ALIGNMENT_BOUNDARY, size );

  // Check the return value of posix_memalign() for evidence that the
  // request failed.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
  {
    e_val = FLA_Check_posix_memalign_failure( r_val );
    FLA_Check_error_code( e_val );
  }

#else

  // Allocate size bytes of memory. Note that malloc() only guarantees 8-byte
  // alignment.
  ptr = malloc( size );

  // It may not seem useful to have a check for a null pointer here, given
  // that such an occurance would cause the file and line of the error to
  // be reported as the below line of the current file instead of the file
  // and line number of the calling code. However, consider that in the
  // unlikely event that malloc() does return a null pointer, the user will
  // have much bigger problems on his hands (e.g. an exhausted memory heap)
  // than needing to know exactly what line in the library triggered error.
  // Note that such a line in the application code is likely not the root
  // source of the problem anyway (ie: not the reason why the heap is full).
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
  {
    e_val = FLA_Check_malloc_pointer( ptr );
    FLA_Check_error_code( e_val );
  }

#endif

  // Update the memory leak counter if it is enabled, and do so thread-safely
  // if multithreading is enabled.
  if ( FLA_Memory_leak_counter_status() == TRUE )
  {
#ifdef FLA_ENABLE_MULTITHREADING
    FLA_Lock_acquire( &fla_mem_leak_counter_lock );
    fla_mem_leak_counter += 1;
    FLA_Lock_release( &fla_mem_leak_counter_lock );
#else
    fla_mem_leak_counter += 1;
#endif
  }
  
  // Return the pointer to the new memory region returned by malloc().
  return ptr;
}
Exemplo n.º 19
0
int main(int argc, char  **argv)
{

  struct timespec  t1, t2; 
  int c, d, k, sum = 0;
  int size, opt, i;
  char *fname; 
  
  while((opt = getopt(argc, argv, "f:s:"))!= -1) {
    switch (opt){
    case 's':
      size = atoi(optarg);
      break;
    case 'f':
      fname = optarg;
      break;
    default:
      size = MEDIUM; 
      break; 
    }
  }
 
  FILE *fp;
  fp = fopen(fname,"a");
  
  int edge;
  int *first; 
  posix_memalign((void**)&first,16,sizeof(int)*size*size);  //use posix_memalign to get 16byte alignment 
  int *multiply; 
  posix_memalign((void**)&multiply,16,sizeof(int)*size*size);  
  __m128i m1, m2,m3; 
 
  for (  c = 0 ; c < size ; c++ )
  	for ( d = 0 ; d < size ; d++ )    	
      first[c*size+d] = ((c+d) % 2) - 1;
  multiply[c*size+d] = 0;
      
	
  printf("multiplying the %d-size matrices\n  You should try to time this part.\n",size);
	
  clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t1);

	for ( c = 0 ; c < size ; c++ )
	{
		for ( k = 0 ; k < size ; k++ )
	    {
	    	m2 = _mm_set1_epi32(first[c*size+k]); //first[c][k]
			for (d = 0 ; d < size ; d+=4) 
			{
				edge = size - d; 
				if (edge < 4){	//account for non-div by 4 matrices
					for (i = d; i < size; i++)
						multiply[c*size+i] += first[c*size+k]*first[k*size+i];
				}
				else{ 
				  m1 = _mm_loadu_si128(&first[k*size+d]);  //first[k][d]
				  m1 = _mm_mullo_epi32(m1,m2); // first[k][d] * first[c][k]
				  m3 = _mm_loadu_si128(&multiply[c*size+d]);//load up old values of multiply[c][d] 
				  
				  m1 = _mm_add_epi32(m3,m1);  //[+= to mult]
			
				  _mm_storeu_si128(&multiply[c*size+d],m1);
			  	}
			  	
	      		} 
	    	}
		}
	
		
  clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t2);

  double nanos = (diff(t1,t2).tv_nsec) * pow(10,-9);
  double secs = (diff(t1,t2).tv_sec);
  double dif = secs + nanos;
  
  fprintf(fp,"%.10f\n", dif); 
  
  fclose(fp);

  printf("test first %d\n",first[size]);
  printf("test mult %d\n",multiply[size]);
  

  free(first);   //free SSE aligned array with _aligned_free
  free(multiply);
  return 0;
}
Exemplo n.º 20
0
int main(int argc, char **argv)
{
	int fd, i;
	unsigned long long FILE_SIZE;
	size_t len;
	char c;
	char unit;
	int size;
	unsigned long long count;
	void *buf1 = NULL;
	char *buf, *buf2;
	char fs_type[20];
	char quill_enabled[40];
	char file_size_num[20];
	char filename[60];
	int req_size;

	if (argc < 6) {
		printf("Usage: ./integrity_test2_write $FS $SCNEARIO $REQ_SIZE $FILE_SIZE $filename\n");
		return 0;
	}

	strcpy(fs_type, argv[1]);
	strcpy(quill_enabled, argv[2]);

	strcpy(file_size_num, argv[4]);
	len = strlen(file_size_num);
	unit = file_size_num[len - 1];
	file_size_num[len - 1] = '\0';
	FILE_SIZE = atoll(file_size_num);
	switch (unit) {
	case 'K':
	case 'k':
		FILE_SIZE *= 1024;
		break;
	case 'M':
	case 'm':
		FILE_SIZE *= 1048576;
		break;
	case 'G':
	case 'g':
		FILE_SIZE *= 1073741824;
		break;
	default:
		printf("ERROR: FILE_SIZE should be #K/M/G format.\n");
		return 0;
		break;
	}

	if (FILE_SIZE < 4096)
		FILE_SIZE = 4096;
	if (FILE_SIZE > 2147483648) // RAM disk size
		FILE_SIZE = 2147483648;

	strcpy(filename, argv[5]);
	c = filename[0];

	if (posix_memalign(&buf1, END_SIZE, END_SIZE)) { // up to 64MB
		printf("ERROR - POSIX NOMEM!\n");
		return 0;
	}

	buf = (char *)buf1;
	buf2 = malloc(END_SIZE);
	fd = open("/mnt/ramdisk/test1", O_CREAT | O_RDWR | O_DIRECT, 0640); 
//	fd = open("/dev/null", O_WRONLY, 0640); 
//	fd = open("/dev/zero", O_RDONLY, 0640); 
	printf("fd: %d\n", fd);
	req_size = atoi(argv[3]);
	if (req_size > END_SIZE)
		req_size = END_SIZE;

	size = req_size;
	memset(buf, c, size);
	lseek(fd, 0, SEEK_SET);
	count = FILE_SIZE / size;
	printf("Start c: %c\n", c);
	for (i = 0; i < count; i++) {
		if (write(fd, buf, size) != size)
			printf("ERROR\n");
		c++;
		if (c > 'z')
			c = 'A';
		memset(buf, c, size);
	}

//	fsync(fd);
	close(fd);

	free(buf1);
	free(buf2);
	return 0;
}
Exemplo n.º 21
0
int critbitInsert(critbit_t *t, const char *u, void *data, void **old) {
    const uint8_t *const ubytes = (void*)u;
    const size_t ulen = strlen(u);
    uint8_t *p = t->root;
    void *d = t->data;

    if(old != NULL) {
        (*old) = NULL;
    }
    if(!p) {
        char *x;
        int a = posix_memalign((void **)&x, sizeof(void *), ulen + 1);
        if(a) {
            return 0;
        }
        memcpy(x, u, ulen + 1);
        t->root = x;
        t->data = data;
        return 2;
    }

    while(1 & (intptr_t)p) {
        critbit_node *q = (void*)(p - 1);
        uint8_t c = 0;
        if(q->byte < ulen) {
            c = ubytes[q->byte];
        }
        const int direction = (1 + (q->otherbits | c)) >> 8;
        p = q->child[direction];
        d = q->data[direction];
    }

    uint32_t newbyte;
    uint32_t newotherbits;

    for(newbyte = 0; newbyte < ulen; newbyte++) {
        if(p[newbyte] != ubytes[newbyte]) {
            newotherbits = p[newbyte] ^ ubytes[newbyte];
            goto different_byte_found;
        }
    }
    if(p[newbyte] != 0) {
        newotherbits = p[newbyte];
        goto different_byte_found;
    }
    if(old != NULL) {
        (*old) = d;
    }
    d = data;
    return 1;

different_byte_found:

    while(newotherbits & (newotherbits - 1)) {
        newotherbits&= newotherbits-1;
    }
    newotherbits ^= 255;
    uint8_t c = p[newbyte];
    int newdirection = (1 + (newotherbits | c)) >> 8;

    critbit_node *newnode;
    if(posix_memalign((void **)&newnode, sizeof(void *), sizeof(critbit_node))) {
        return 0;
    }

    char *x;
    if(posix_memalign((void **)&x, sizeof(void *), ulen + 1)) {
        free(newnode);
        return 0;
    }
    memcpy(x, ubytes, ulen + 1);

    newnode->byte = newbyte;
    newnode->otherbits = newotherbits;
    newnode->child[1 - newdirection] = x;
    newnode->data[1 - newdirection] = data;
    
    void **wherep = &t->root;
    void **datap = &t->data;
    for(;;) {
        uint8_t *p = *wherep;
        if(!(1 & (intptr_t)p)) {
            break;
        }
        critbit_node *q = (void*)(p - 1);
        if(q->byte > newbyte) {
            break;
        }
        if((q->byte == newbyte) && (q->otherbits > newotherbits)) {
            break;
        }
        uint8_t c = 0;
        if(q->byte < ulen) {
            c = ubytes[q->byte];
        }
        const int direction = (1 + (q->otherbits | c)) >> 8;
        wherep = q->child + direction;
        datap = q->data + direction;
    }
    newnode->child[newdirection] = *wherep;
    newnode->data[newdirection] = *datap;
    *wherep = (void*)(1 + (char*)newnode);
    *datap = NULL;

    return 2;
}
Exemplo n.º 22
0
//------------------------------------------------
// Aligned memory allocation.
//
static inline uint8_t* cf_valloc(size_t size) {
	void* pv;
	return posix_memalign(&pv, 4096, size) == 0 ? (uint8_t*)pv : 0;
}
Exemplo n.º 23
0
void *VSICalloc( size_t nCount, size_t nSize )

{
#ifdef DEBUG_VSIMALLOC
    size_t nMul = nCount * nSize;
    if (nCount != 0 && nMul / nCount != nSize)
    {
        fprintf(stderr, "Overflow in VSICalloc(%d, %d)\n",
                (int)nCount, (int)nSize);
        return NULL;
    }
    if (nMaxPeakAllocSize < 0)
    {
        char* pszMaxPeakAllocSize = getenv("CPL_MAX_PEAK_ALLOC_SIZE");
        nMaxPeakAllocSize = (pszMaxPeakAllocSize) ? atoi(pszMaxPeakAllocSize) : 0;
        char* pszMaxCumulAllocSize = getenv("CPL_MAX_CUMUL_ALLOC_SIZE");
        nMaxCumulAllocSize = (pszMaxCumulAllocSize) ? atoi(pszMaxCumulAllocSize) : 0;
    }
    if (nMaxPeakAllocSize > 0 && (GIntBig)nMul > nMaxPeakAllocSize)
        return NULL;
#ifdef DEBUG_VSIMALLOC_STATS
    if (nMaxCumulAllocSize > 0 && (GIntBig)nCurrentTotalAllocs + (GIntBig)nMul > nMaxCumulAllocSize)
        return NULL;
#endif

#ifdef DEBUG_VSIMALLOC_MPROTECT
    char* ptr = NULL;
    size_t nPageSize = getpagesize();
    posix_memalign((void**)&ptr, nPageSize, (3 * sizeof(void*) + nMul + nPageSize - 1) & ~(nPageSize - 1));
    if (ptr == NULL)
        return NULL;
    memset(ptr + 2 * sizeof(void*), 0, nMul);
#else
    char* ptr = (char*) calloc(1, 3 * sizeof(void*) + nMul);
    if (ptr == NULL)
        return NULL;
#endif

    ptr[0] = 'V';
    ptr[1] = 'S';
    ptr[2] = 'I';
    ptr[3] = 'M';
    memcpy(ptr + sizeof(void*), &nMul, sizeof(void*));
    ptr[2 * sizeof(void*) + nMul + 0] = 'E';
    ptr[2 * sizeof(void*) + nMul + 1] = 'V';
    ptr[2 * sizeof(void*) + nMul + 2] = 'S';
    ptr[2 * sizeof(void*) + nMul + 3] = 'I';
#if defined(DEBUG_VSIMALLOC_STATS) || defined(DEBUG_VSIMALLOC_VERBOSE)
    {
        CPLMutexHolderD(&hMemStatMutex);
#ifdef DEBUG_VSIMALLOC_VERBOSE
        if( nMul > THRESHOLD_PRINT )
        {
            fprintf(stderr, "Thread[%p] VSICalloc(%d,%d) = %p\n",
                    (void*)CPLGetPID(), (int)nCount, (int)nSize, ptr + 2 * sizeof(void*));
        }
#endif
#ifdef DEBUG_VSIMALLOC_STATS
        nVSICallocs ++;
        if (nMaxTotalAllocs == 0)
            atexit(VSIShowMemStats);
        nCurrentTotalAllocs += nMul;
        if (nCurrentTotalAllocs > nMaxTotalAllocs)
            nMaxTotalAllocs = nCurrentTotalAllocs;
#endif
    }
#endif
    return ptr + 2 * sizeof(void*);
#else
    return calloc( nCount, nSize );
#endif
}
Exemplo n.º 24
0
// Allocate page-aligned memory.
void *malloc_aligned(size_t size)
{
    void *mem;
    errno = posix_memalign(&mem, BCM2835_PAGE_SIZE, size);
    return (errno ? NULL : mem);
}
Exemplo n.º 25
0
/*
 * A function that splits the parent node and creates 4 new children nodes in the array of nodes named "tree".
 */
void split(Node* parent, Node* tree, int depth, unsigned int* index, value_type* xsorted, value_type* ysorted, value_type* mass_sorted, int k, int* newNodeIndex){
// Capture and update the newNodeIndex atomically to avoid race condition
/* In case of omp tasking make this atomic to avoid race conditions
 * #pragma omp atomic capture
    {} */
        parent->child_id = *newNodeIndex;
        *newNodeIndex += 4;


    // Compute the level of the children
    unsigned int children_level = parent->level +1;

    // Compute the indexvalue of this level so we can easily compute the morton-id's of the children
    int indexValue_level = pow(2,2*(depth - children_level));

    // Allocate pointers for the expansions arrays
    value_type * rxps0;
    value_type * ixps0;
    value_type * rxps1;
    value_type * ixps1;
    value_type * rxps2;
    value_type * ixps2;
    value_type * rxps3;
    value_type * ixps3;

    // Initialize the children nodes
    Node child_0 = Node {children_level, // level
                         parent->morton_id, // morton index
                         -1,        // child_id
                         -1,        // part_start
                         -1,        // part_end
                         1,         // node mass
                         1,         // x center of mass
                         1,         // y center of mass
                         NAN,       // radius of node
                         rxps0,     // real part of multipole expansion
                         ixps0      // imaginary part of multipole expansion
    };

    Node child_1 = Node {children_level, // level
                         parent->morton_id + indexValue_level, // morton index
                         -1,        // child_id
                         -1,        // part_start
                         -1,        // part_end
                         1,         // node mass
                         1,         // x center of mass
                         1,         // y center of mass
                         NAN,         // radius of node
                         rxps1,      // real part of multipole expansion
                         ixps1       // imaginary part of multipole expansion
    };

    Node child_2 = Node {children_level, // level
                         parent->morton_id + 2*indexValue_level, // morton index
                         -1,        // child_id
                         -1,        // part_start
                         -1,        // part_end
                         1,         // node mass
                         1,         // x center of mass
                         1,         // y center of mass
                         NAN,         // radius of node
                         rxps2,      // real part of multipole expansion
                         ixps2       // imaginary part of multipole expansion
    };

    Node child_3 = Node {children_level, // level
                         parent->morton_id + 3*indexValue_level, // morton index
                         -1,        // child_id
                         -1,        // part_start
                         -1,        // part_end
                         1,         // node mass
                         1,         // x center of mass
                         1,         // y center of mass
                         NAN,         // radius of node
                         rxps3,      // real part of multipole expansion
                         ixps3       // imaginary part of multipole expansion
    };

    if(parent->level == 0){
        // Print the maximum index
        unsigned int max = index[0];
        int min = 0;
        for (int l = 0; l < parent->part_end - parent->part_start; ++l) {
            if(index[l]>max){
                max = index[l];
            }
            if(index[l] < min){
                min = index[l];
            }
        }
//        std::cout << "Biggest Morton Index = " << max << std::endl;
//        std::cout << "Smallest Morton Index = " << min << std::endl;
//        std::cout << "Morton limit tree = " <<  child_3.morton_id - 1 + indexValue_level << std::endl;
//        std::cout << "IndexValue at level = " << children_level << " is " << indexValue_level << std::endl;
    }



    // Set a pointer to the first child node
    Node* children = tree + parent->child_id;

    // Put the children nodes into the array at indices [ children[0], ... , children[3] ].
    children[0] = child_0;
    children[1] = child_1;
    children[2] = child_2;
    children[3] = child_3;


    // Assign the particles to the children
    assignParticles(parent, children, depth, index);

    // Assign the total and center of mass to the children, as well as their radius r 
    centerOfMass(children, xsorted, ysorted, mass_sorted);
    radius(children, xsorted, ysorted);

    // Compute the multipole expansions for the children nodes, only if the level is 2 or deeper and if the node is not empty
    if(children_level >= 2){
        int nParticlesChild = 0;
        for (int c = 0; c < 4; ++c) {
            // Check if this child node is empty or contains only 1 particle.
            if(children[c].part_start ==  children[c].part_end){
                // Do nothing

            } else {
                // Compute the number of particles in this child node
                nParticlesChild = children[c].part_end - children[c].part_start + 1;

                // Align expansion arrays
                posix_memalign((void **) &children[c].rxps, 32, sizeof(value_type) * exp_order);
                posix_memalign((void **) &children[c].ixps, 32, sizeof(value_type) * exp_order);

                // Compute and set the expansion in this child node
                p2e(xsorted + children[c].part_start,        // x values of child's particles
                    ysorted + children[c].part_start,        // y values of child's particles
                    mass_sorted + children[c].part_start,    // mass values of child's particles
                    nParticlesChild,                         // number of particles in child
                    exp_order,                               // order of expansion
                    children[c].xcom,                        // x value center of mass
                    children[c].ycom,                        // y value center of mass
                    children[c].rxps,                        // real parts of expansion
                    children[c].ixps);                       // imaginary parts of expansion

                /* Print info about the computed expansion
                std::cout << "Child node " << c << " has expansion:" << std::endl;
                for (int i = 0; i < exp_order; ++i) {
                    std::cout << "rxps [" << i << "] = " << children[c].rxps[i] << std::endl;
                    std::cout << "ixps [" << i << "] = " << children[c].ixps[i] << "\n" << std::endl;
                } */
            }
        }
    }

    // Check number of particles in the children nodes
    for (int i = 0; i < 4; ++i) {
        if (children[i].part_end - children[i].part_start + 1 > k && children[i].level < depth) {
            // There are more than k particles in the node and we haven't reached the maximum depth so split it in four
            split(children + i, tree, depth, index, xsorted, ysorted, mass_sorted, k, newNodeIndex);
        }
        else {
            // There are less than or equal to k particles in the node
            // or we reached the maximum depth => the node is a leaf
        }
    }

}
Exemplo n.º 26
0
void pagerank(list* plist, int ncores, int npages, int nedges, double dampener)
{
	/* For whoever reading this. Here's a strange hack. Don't edit. 
	Don't know why it speeds things up - unexplainble. Sorry. -Roger (4/Jun/2014) */
	if (ncores > 1)
		ncores --;


	/*************************/
	/* Padding for AVX later */
	/*************************/
	while ((npages + g_padding) % (ncores * 4) != 0)
	{
		g_padding++;
	}
	
	
	/***************/
	/* Declaration */
	/***************/
	double constant = (1.0 - dampener) / npages;
	g_npages = npages;
	g_nedges = nedges;

	/* Lists that uses page index */
	// double * curr_scores; 	/* Stores the scores for this round */
	// double * prev_scores; 	/* Stores the score for last round - for calculating converce */
	// int * page_inlinks; 	/* Number of inlinks per page[i] - to determine the loop */
	
	/* Lists the use edge index */
	// int * inlinks; 			/* the index for the numerator prev_score[inlinks[edge]] */
	// int * index_edge;		/* Stores where the corresponding edge is for the page index */
	// double * outlinks;		/* the numerator for each edge */
	
	/* Lists that use ncores as index */
	//g_sumDiff is declare globally

	posix_memalign((void *)&curr_scores, 32, (npages + g_padding) * sizeof(double));
	posix_memalign((void *)&prev_scores, 32, (npages + g_padding) * sizeof(double));
	posix_memalign((void *)&page_inlinks, 32, npages * sizeof(int));
	posix_memalign((void *)&inlinks, 32, nedges * sizeof (int));
	posix_memalign((void *)&index_edge, 32, nedges * sizeof (int));
	posix_memalign((void *)&outlinks, 32, nedges * sizeof (double));
	posix_memalign((void *)&g_sumDiff, 32, ncores * sizeof (double));


	/*****************************/
	/* Setting up data structure */
	/*****************************/
	node* curr = plist->head;
	unsigned int edge = 0;

	for (int i = 0; i < npages; ++i) /* For each node */
	{
		if (curr->page->inlinks != NULL) /* If this page has a inlinks */
		{
			page_inlinks[i] = curr->page->inlinks->length; /* page_inlinks[i] +1 */
			index_edge[i] = edge;

			node* list_node =  curr->page->inlinks->head; /* Setting the first node in the inlinks */

			for (int j = 0; j < curr->page->inlinks->length; ++j)
			{
				inlinks[edge] = list_node->page->index; /* inlinks[edge] +1 */
				outlinks[edge] = 1.0 / list_node->page->noutlinks; /* outlinks[edge] +1*/
				list_node = list_node -> next;
				++edge;
			}
		}

	  curr = curr->next;
	}

	
	/************************/
	/* First iteration (P0) */
	/************************/
	for (int i = 0; i < g_padding; i++)
	{
		curr_scores[npages + i] = 0.0;
		prev_scores[npages + i] = 0.0; //padding
	}

	double p0 = 1.0 / npages;

	for (int i = 0; i < npages; i++)
	{
		prev_scores[i] = p0;
		/* We should have started with assigning to curr_score and then swap 
		but this way it save me from swapping once more in the beginning */
	}
	

	/**************************************/
	/* Setting up arguements for parellel */
	/**************************************/
	pthread_t tids[ncores];
	workerargs wargs[ncores];

	for (int i = 0; i < ncores; i++)
	{
		wargs[i].i = i;
		wargs[i].start = i * ((npages + g_padding) / ncores);
		wargs[i].end = (i + 1) * ((npages + g_padding) / ncores);
		wargs[i].dampener = dampener;
		wargs[i].constant = constant;
	}

	wargs[ncores-1].end = npages + g_padding;

	// for (int i = 0; i < ncores; i++)
	// 	printf("<wargs[%d]: start:%d end:%d>\n", i, wargs[i].start, wargs[i].end);

	pthread_barrier_init(&score_barrier, NULL, ncores);
	pthread_barrier_init(&conv_barrier, NULL, ncores);
	
	for (int i = 0; i < ncores - 1; i++)
		pthread_create(&tids[i], NULL, worker, &wargs[i]);

	int start = wargs[ncores-1].start;
	int end = wargs[ncores-1].end;
	/*************************/
	/* Manager thread memory */
	/*************************/
	for (;;)
	{
		if ((start < g_npages) && (g_nedges > 0)) //padding test
			edge = index_edge[start]; /* for edge count */

		double conv = 0.0;

		/* WORKER: Calculating curr_score */
		for (int i = start; i < end; ++i) /* Calculation for each page */
		{
			if (i >= g_npages)
				break; 

			double sum = 0.0;

			for (int j = 0; j < page_inlinks[i]; ++j) /* Calculation for each inlink */
			{
				sum += prev_scores[inlinks[edge]] * outlinks[edge];
				//printf("<%d: %f = %f * %f>\n", i, prev_scores[inlinks[edge]] / outlinks[edge], prev_scores[inlinks[edge]], outlinks[edge]);
				++edge;
			}

			curr_scores[i] = constant + dampener * sum;
		}

		/* WORKER: Calculating conv */
		int nblocks = (end - start) * 0.25;

		for (int i = 0; i < nblocks; i++)
		{
			__m256d* curr_block = (__m256d*) &curr_scores[start]; // Cast it instead. 
			__m256d* prev_block = (__m256d*) &prev_scores[start];

			__m256d m1 = _mm256_sub_pd(curr_block[i], prev_block[i]);
			__m256d m2 = _mm256_mul_pd(m1, m1);

			conv += m2[0] + m2[1] + m2[2] + m2[3];
		}

		pthread_barrier_wait(&score_barrier); /* Wait until all workers are done */
		/* WORKER COMPLETE */
		/* MANAGER BEGIN */

		double* tmp = prev_scores; /* For each iteration: more curr to prev, and replace the old prev */
		prev_scores = curr_scores;
		curr_scores = tmp;

		/* MANAGER: Summing*/
		for (int i = 0; i < ncores; i++)
		{
			conv += g_sumDiff[i];
		}

		/* MANAGER: Checking */
		if (conv < EPSILON*EPSILON)
		{
			g_hasConverged = true; /* It has converged */

			pthread_barrier_wait(&conv_barrier);

			tmp = prev_scores; /* Reverse the swap then */
			prev_scores = curr_scores;
			curr_scores = tmp;

			break;
		}

		pthread_barrier_wait(&conv_barrier); /* Tell the worker they may begin again */
	}

	/********************/
	/* Printing results */
	/********************/
	// printf("curr_scores:\n");
	displayPageRank(plist, curr_scores);
	// printf("prev_scores:\n");
	// displayPageRank(plist, prev_scores);
	

	/******************/
	/* Cleaning up    */
	/******************/

	for (int i = 0; i < ncores - 1; i++)
		pthread_join(tids[i], NULL);

	pthread_barrier_destroy(&score_barrier);
	pthread_barrier_destroy(&conv_barrier);
	
	free(curr_scores);
	free(prev_scores);
	free(index_edge);
	free(page_inlinks);
	free(outlinks);
	free(inlinks);
	free(g_sumDiff);
	
}
Exemplo n.º 27
0
void initialize_parameters() {
	long long a, b;
	vector_size++; // Temporarily increment to allocate space for bias
    
	/* Allocate space for word vectors and context word vectors, and correspodning gradsq */
	a = posix_memalign((void **)&W, 128, 2 * vocab_size * vector_size * sizeof(real)); // Might perform better than malloc
    if (W == NULL) {
        fprintf(stderr, "Error allocating memory for W\n");
        exit(1);
    }
    a = posix_memalign((void **)&gradsq, 128, 2 * vocab_size * vector_size * sizeof(real)); // Might perform better than malloc
	if (gradsq == NULL) {
        fprintf(stderr, "Error allocating memory for gradsq\n");
        exit(1);
    }

    if(initialize_from_file){
        /*
        char context_weight[MAX_STRING_LENGTH],word_weight[MAX_STRING_LENGTH];
        FILE *fin;

        sprintf(word_weight,"%s_word.txt",initial_weight_file);
        sprintf(context_weight,"%s_context.txt",initial_weight_file);

        fin = fopen(word_weight, "r");
        fscanf (fin, "%*d %*d\n");
        //Initialize word weight from file
        for (a = 0; a < vocab_size; a++){
            fscanf (fin, "%*s");
            for (b = 0; b < (vector_size - 1); b++) {
                float w;
                fscanf (fin, "%g", &w);
                W[a * vector_size + b] = w;
            }
        }
        fclose(fin);

        fin = fopen(context_weight, "r");
        fscanf (fin, "%*d %*d\n");
        //Initialize context weight from file
        for (a = vocab_size; a < 2 * vocab_size; a++){
            fscanf (fin, "%*s");
            for (b = 0; b < (vector_size - 1); b++) {
                float w;
                fscanf (fin, "%g", &w);
                W[a * vector_size + b] = w;
            }
        }
        fclose(fin);
        */

        char weight_file[MAX_STRING_LENGTH];
        FILE *fin;
        sprintf(weight_file,"%s.bin",initial_weight_file);

        fin = fopen(weight_file, "r");

        long long file_s = ftello(fin);
        long long term_vocab_s = file_s/(sizeof(real))/2/vector_size;

        fread(W,sizeof(real),term_vocab_s*vector_size,fin);

        fread(&W[vocab_size*vector_size],sizeof(real),term_vocab_s*vector_size,fin);
        
        fclose(fin);

        //Initialize the rest
        for (b = 0; b < vector_size; b++) for (a = term_vocab_s; a < vocab_size; a++) W[a * vector_size + b] = (rand() / (real)RAND_MAX - 0.5) / vector_size;
        for (b = 0; b < vector_size; b++) for (a = vocab_size+term_vocab_s; a < vocab_size; a++) W[a * vector_size + b] = (rand() / (real)RAND_MAX - 0.5) / vector_size;
        
        //Initialize bias randomly
        for (a = 0; a < 2 * vocab_size; a++) W[a * vector_size + vector_size - 1] = (rand() / (real)RAND_MAX - 0.5) / vector_size;
    }else{
        for (b = 0; b < vector_size; b++) for (a = 0; a < 2 * vocab_size; a++) W[a * vector_size + b] = (rand() / (real)RAND_MAX - 0.5) / vector_size;
    }

    for (b = 0; b < vector_size; b++) for (a = 0; a < 2 * vocab_size; a++) gradsq[a * vector_size + b] = 1.0; // So initial value of eta is equal to initial learning rate

	vector_size--;
}
Exemplo n.º 28
0
static void setup(void)
{
	char filename[PATH_MAX];
	int n, j, fd, directflag = 1;
	long type;

	if (align_str) {
		align = atoi(align_str);
		if (align < 0 || align > PAGE_SIZE)
			tst_brkm(TCONF, NULL, "Bad alignment %d.", align);
	}
	tst_resm(TINFO, "using alignment %d", align);

	if (workers_str) {
		workers = atoi(workers_str);
		if (workers < MIN_WORKERS || workers > MAX_WORKERS) {
			tst_brkm(TCONF, NULL, "Worker count %d not between "
				 "%d and %d, inclusive",
				 workers, MIN_WORKERS, MAX_WORKERS);
		}
	}
	tst_resm(TINFO, "using %d workers.", workers);

	tst_sig(FORK, DEF_HANDLER, NULL);
	tst_require_root(NULL);

	TEST_PAUSE;

	tst_tmpdir();

	/*
	 * Some file systems may not implement the O_DIRECT flag and open() will
	 * fail with EINVAL if it is used. So add this check for current
	 * filesystem current directory is in, if not supported, we choose to
	 * have this test in LTP_BIG_DEV and mkfs it as ext3.
	 */
	fd = open("testfile", O_CREAT | O_DIRECT, 0644);
	if (fd < 0 && errno == EINVAL) {
		type = tst_fs_type(NULL, ".");
		tst_resm(TINFO, "O_DIRECT flag is not supported on %s "
			 "filesystem", tst_fs_type_name(type));
		directflag = 0;
	} else if (fd > 0) {
		SAFE_CLOSE(NULL, fd);
	}

	SAFE_MKDIR(cleanup, MNT_POINT, DIR_MODE);

	/*
	 * verify whether the current directory has enough free space,
	 * if it is not satisfied, we will use the LTP_BIG_DEV, which
	 * will be exported by runltp with "-z" option.
	 */
	if (!directflag || !tst_fs_has_free(NULL, ".", 1300, TST_MB)) {
		device = getenv("LTP_BIG_DEV");
		if (device == NULL) {
			tst_brkm(TCONF, NULL,
				 "you must specify a big blockdevice(>1.3G)");
		} else {
			tst_mkfs(NULL, device, "ext3", NULL);
		}

		if (mount(device, MNT_POINT, "ext3", 0, NULL) < 0) {
			tst_brkm(TBROK | TERRNO, NULL,
				 "mount device:%s failed", device);
		}
		mount_flag = 1;
	}

	worker = SAFE_MALLOC(cleanup, workers * sizeof(worker_t));

	for (j = 0; j < workers; j++)
		worker[j].worker_number = j;

	for (n = 1; n <= FILECOUNT; n++) {
		snprintf(filename, sizeof(filename), FILE_BASEPATH, n);

		if (tst_fill_file(filename, n, FILESIZE, 1)) {
			tst_brkm(TBROK, cleanup, "failed to create file: %s",
				 filename);
		}
	}

	if (posix_memalign((void **)&buffer, PAGE_SIZE, READSIZE + align) != 0)
		tst_brkm(TBROK, cleanup, "call posix_memalign failed");
}
Exemplo n.º 29
0
Tree* build_index(size_t num_levels, size_t fanout[], size_t num_keys, int32_t key[]) {
        // return null pointer for invalid tree configuration
        size_t min_num_keys = 1;
        for (size_t i = 0; i < num_levels - 1; ++i) {
                min_num_keys *= fanout[i];
        }
        size_t max_num_keys = min_num_keys * fanout[num_levels - 1] - 1;
        if (num_keys < min_num_keys || num_keys > max_num_keys) {
                fprintf(stderr, "Error: incorrect number of keys, min %zu, max %zu\n", min_num_keys, max_num_keys);
                return NULL;
        }

        // initialize the tree index
        Tree* tree = malloc(sizeof(Tree));
        assert(tree != NULL);
        tree->num_levels = num_levels;
        tree->node_capacity = malloc(sizeof(size_t) * num_levels);
        assert(tree->node_capacity != NULL);
        for (size_t i = 0; i < num_levels; ++i) {
                tree->node_capacity[i] = fanout[i] - 1;
        }
        tree->key_array = malloc(sizeof(int32_t*) * num_levels);
        assert(tree->key_array != NULL);
        size_t* key_count = malloc(sizeof(size_t) * num_levels);
        assert(key_count != NULL);
        size_t* array_capacity = malloc(sizeof(size_t) * num_levels);
        assert(array_capacity != NULL);
        for (size_t i = 0; i < num_levels; ++i) {
                size_t size = sizeof(int32_t) * tree->node_capacity[i];         // allocate one node per level
                int error = posix_memalign((void**) &(tree->key_array[i]), alignment, size);
                assert(error == 0);
                key_count[i] = 0;
                array_capacity[i] = tree->node_capacity[i];     // array_capacity[i] is always a multiple of node_capacity[i]
        }

        // insert sorted keys into index
        for (size_t i = 1; i < num_keys; ++i) {
                assert(key[i - 1] < key[i]);
        }
        for (size_t i = 0; i < num_keys; ++i) {
                size_t level = num_levels - 1;
                while (key_count[level] == array_capacity[level])
                        level -= 1;
                tree->key_array[level][key_count[level]] = key[i];
                key_count[level] += 1;
                while (level < num_levels - 1) {
                        level += 1;
                        size_t new_capacity = array_capacity[level] + tree->node_capacity[level];
                        size_t size = sizeof(int32_t) * new_capacity;           // allocate one more node
                        int32_t* new_array = NULL;
                        int error = posix_memalign((void**) &new_array, alignment, size);
                        assert(error == 0);
                        memcpy(new_array, tree->key_array[level], sizeof(int32_t) * key_count[level]);
                        free(tree->key_array[level]);
                        tree->key_array[level] = new_array;
                        array_capacity[level] = new_capacity;
                }
        }

        // pad with INT32_MAXs
        for (size_t i = 0; i < num_levels; ++i) {
                for (size_t j = key_count[i]; j < array_capacity[i]; ++j)
                        tree->key_array[i][j] = INT32_MAX;
                key_count[i] = array_capacity[i];
        }

        // print the tree
        // for (size_t i = 0; i < num_levels; ++i) {
        //         printf("Level %zu:", i);
        //         for (size_t j = 0; j < key_count[i]; ++j)
        //                 printf(" %d", tree->key_array[i][j]);
        //         printf("\n");
        // }

        free(array_capacity);
        free(key_count);
        return tree;
}
Exemplo n.º 30
0
int main(int argc, char *argv[])
{
    unsigned char  *buffer = NULL;
    char	    filename[1024];
    int		    fd;
    bool	    dowrite = true;
    pthread_t	    fork_tid;
    int		    c, n, j;
    worker_t	   *worker;
    int		    align = 0;
    int		    offset, rc;

    workers = sysconf(_SC_NPROCESSORS_ONLN);

    while ((c = getopt(argc, argv, "a:hw:")) != -1) {
	switch (c) {
	case 'a':
	    align = atoi(optarg);
	    if (align < 0 || align > PAGE_SIZE) {
		printf("Bad alignment %d.\n", align);
		exit(1);
	    }
	    dowrite = false;
	    break;

	case 'h':
	    usage();
	    exit(0);
	    break;

	case 'w':
	    workers = atoi(optarg);
	    if (workers < MIN_WORKERS || workers > MAX_WORKERS) {
		fprintf(stderr, "Worker count %d not between "
				"%d and %d, inclusive.\n",
				workers, MIN_WORKERS, MAX_WORKERS);
		usage();
		exit(1);
	    }
	    dowrite = false;
	    break;

	default:
	    usage();
	    exit(1);
	}
    }

    if (argc > 1 && (optind < argc)) {
	fprintf(stderr, "Bad command line.\n");
	usage();
	exit(1);
    }

    if (dowrite) {

	buffer = malloc(FILESIZE);
	if (buffer == NULL) {
	    fprintf(stderr, "Failed to malloc write buffer.\n");
	    exit(1);
	}

	for (n = 1; n <= FILECOUNT; n++) {
	    sprintf(filename, FILENAME, n);
	    fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0666);
	    if (fd < 0) {
		printf("create failed(%s): %s.\n", filename, strerror(errno));
		exit(1);
	    }
	    memset(buffer, n, FILESIZE);
	    printf("Writing file %s.\n", filename);
	    if (write(fd, buffer, FILESIZE) != FILESIZE) {
		printf("write failed (%s)\n", filename);
	    }

	    close(fd);
	    fd = -1;
	}

	free(buffer);
	buffer = NULL;

	printf("done\n");
	exit(0);
    }

    printf("Using %d workers.\n", workers);

    worker = malloc(workers * sizeof(worker_t));
    if (worker == NULL) {
	fprintf(stderr, "Failed to malloc worker array.\n");
	exit(1);
    }

    for (j = 0; j < workers; j++) {
	worker[j].worker_number = j;
    }

    printf("Using alignment %d.\n", align);

    posix_memalign((void *)&buffer, PAGE_SIZE, READSIZE+ align);
    printf("Read buffer: %p.\n", buffer);
    for (n = 1; n <= FILECOUNT; n++) {

	sprintf(filename, FILENAME, n);
	for (j = 0; j < workers; j++) {
	    if ((worker[j].fd = open(filename,  O_RDONLY|O_DIRECT)) < 0) {
		fprintf(stderr, "Failed to open %s: %s.\n",
				filename, strerror(errno));
		exit(1);
	    }

	    worker[j].pattern = n;
	}

	printf("Reading file %d.\n", n);

	for (offset = 0; offset < FILESIZE; offset += READSIZE) {
	    memset(buffer, PATTERN, READSIZE + align);
	    for (j = 0; j < workers; j++) {
		worker[j].offset = offset + j * PAGE_SIZE;
		worker[j].buffer = buffer + align + j * PAGE_SIZE;
		worker[j].length = PAGE_SIZE;
	    }
	    /* The final worker reads whatever is left over. */
	    worker[workers - 1].length = READSIZE - PAGE_SIZE * (workers - 1);

	    done = 0;

	    rc = pthread_create(&fork_tid, NULL, fork_thread, NULL);
	    if (rc != 0) {
		fprintf(stderr, "Can't create fork thread: %s.\n",
				strerror(rc));
		exit(1);
	    }

	    for (j = 0; j < workers; j++) {
		rc = pthread_create(&worker[j].tid,
				    NULL,
				    worker_thread,
				    worker + j);
		if (rc != 0) {
		    fprintf(stderr, "Can't create worker thread %d: %s.\n",
				    j, strerror(rc));
		    exit(1);
		}
	    }

	    for (j = 0; j < workers; j++) {
		rc = pthread_join(worker[j].tid, NULL);
		if (rc != 0) {
		    fprintf(stderr, "Failed to join worker thread %d: %s.\n",
				    j, strerror(rc));
		    exit(1);
		}
	    }

	    /* Let the fork thread know it's ok to exit */
	    done = 1;

	    rc = pthread_join(fork_tid, NULL);
	    if (rc != 0) {
		fprintf(stderr, "Failed to join fork thread: %s.\n",
				strerror(rc));
		exit(1);
	    }
	}

	/* Close the fd's for the next file. */
	for (j = 0; j < workers; j++) {
	    close(worker[j].fd);
	}
    }

  return 0;
}