示例#1
0
int main(int argc, char **argv)
{
	CHtHif *pHtHif = new CHtHif();

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	CHtAuUnit *pUnit = new CHtAuUnit(pHtHif);

	pUnit->SendCall_htmain();

	// wait for return
	uint32_t err_cnt;
	while (!pUnit->RecvReturn_htmain(err_cnt))
		usleep(1);

	delete pUnit;
	delete pHtHif;

	if (err_cnt > 0)
		printf("FAILED: detected %d issues!\n", err_cnt);
	else
		printf("PASSED\n");

	return err_cnt;
}
示例#2
0
int main(int argc, char **argv)
{
	printf("%s\n", argv[0]);

	CHtHif *pHtHif = new CHtHif();
	CHtAuUnit *pUnit = new CHtAuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	uint64_t data[(32*8)+LOOPCNT]; // Max 32 Cycles Per Test

	for (uint32_t i = 0; i < LOOPCNT; i++) {

		printf("Running Loop %d of %d...", i+1, LOOPCNT);

		while (!pUnit->SendCall_main(&data[i]))
			usleep(1000);

		while (!pUnit->RecvReturn_main())
			usleep(1000);

		printf("OK\n");

	}

	delete pUnit;
	delete pHtHif;

	printf("PASSED\n");

	return 0;
}
示例#3
0
int main(int argc, char **argv)
{
	CHtHif *pHtHif = new CHtHif();
	CHtAuUnit *pUnit = new CHtAuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	pUnit->SendCall_htmain(LOOP_CNT);

	// wait for return
	uint16_t rtn_errorCnt;
	while (!pUnit->RecvReturn_htmain(rtn_errorCnt))
		usleep(1);

	delete pHtHif;

	printf("RTN: errorCnt = %d\n", rtn_errorCnt);

	if (rtn_errorCnt > 0)
		printf("FAILED: detected %d issues!\n", rtn_errorCnt);
	else
		printf("PASSED\n");

	return rtn_errorCnt;
}
示例#4
0
int main(int argc, char **argv)
{
    CHtHif *pHtHif = new CHtHif();
    CHtAuUnit *pUnit = new CHtAuUnit(pHtHif);

    printf("#AUs = %d\n", pHtHif->GetUnitCnt());
    fflush(stdout);

    uint64_t data[4];

    pUnit->SendCall_main(data);

    bool bErr;
    while (!pUnit->RecvReturn_main(bErr))
        usleep(1);

    delete pHtHif;

    if (bErr)
        printf("FAILED\n");
    else
        printf("PASSED\n");

    return bErr ? 1 : 0;
}
示例#5
0
int main(int argc, char **argv)
{
	CHtHif *pHtHif = new CHtHif();
	CHtSuUnit *pSuUnit = new CHtSuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	for (int i = 0; i < CNT; i++)
		arr[i] = i;


	for (int i = 0; i < THREADS; i += 1)
		while (!pSuUnit->SendCall_htmain((uint64_t)&arr)) ;

	// wait for return
	uint32_t err;
	uint32_t errCnt = 0;

	for (int i = 0; i < THREADS; i += 1) {
		while (!pSuUnit->RecvReturn_htmain(err))
			usleep(1000);
		errCnt += err;
	}

	delete pHtHif;

	if (err)
		printf("FAILED: detected %d issues!\n", err);
	else
		printf("PASSED\n");

	return err;
}
示例#6
0
文件: Main.cpp 项目: mdjamoos/OpenHT
int main(int argc, char **argv)
{
	for (int i = 0; i < CNT; i++) {
		arr[i * 2] = i;
		arr[i * 2 + 1] = 0xdeadbeefdeadbeefLL;
	}

	CHtHifParams htHifParams;
	htHifParams.m_bHtHifHugePage = true;

	CHtHif *pHtHif;
	try {
		pHtHif = new CHtHif(&htHifParams);
	}
	catch (CHtException &htException) {
		printf("new CHtHif threw an exception: '%s'\n", htException.GetMsg().c_str());
		exit(1);
	}

	CHtSuUnit *pUnit = new CHtSuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	pHtHif->SendAllHostMsg(SU_ARRAY_ADDR, (uint64_t)&arr);
	//pUnit->SendHostMsg(SU_ARRAY_ADDR, (uint64_t)&arr);

	pUnit->SendCall_htmain(CNT);

	// wait for return
	uint32_t rtn_elemCnt;
	while (!pUnit->RecvReturn_htmain(rtn_elemCnt))
		usleep(1);

	delete pUnit;
	delete pHtHif;

	printf("RTN: elemCnt = %d\n", rtn_elemCnt);

	// check results
	int err_cnt = 0;
	for (unsigned i = 0; i < CNT; i++) {
		if (arr[i * 2 + 1] != i + 1) {
			printf("arr[%d] is %lld, should be %d\n",
			       i, (long long)arr[i * 2 + 1], i + 1);
			err_cnt++;
		}
	}

	if (err_cnt)
		printf("FAILED: detected %d issues!\n", err_cnt);
	else
		printf("PASSED\n");

	return err_cnt;
}
示例#7
0
int main(int argc, char **argv)
{
	CHtHif *pHtHif = new CHtHif();
	CHtSuUnit *pSuUnit = new CHtSuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	pSuUnit->SendCall_htmain();

	// wait for return
	while (!pSuUnit->RecvReturn_htmain())
		usleep(1000);

	delete pHtHif;

	printf("PASSED\n");

	return 0;
}
示例#8
0
int main(int argc, char **argv)
{
	for (int i = 0; i < CNT; i++)
		arr[i] = i;

	CHtHif *pHtHif = new CHtHif();
	CHtAuUnit *pUnit = new CHtAuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	ArgStruct argStruct;
	argStruct.a = 1;
	argStruct.b = 2;
	argStruct.c = 3;
	argStruct.d = 4;
	argStruct.e = true;

	int callCnt = 0;
	int rtnCnt = 0;
	while (callCnt < CALL_RTN_CNT || rtnCnt < CALL_RTN_CNT) {
		if (callCnt < CALL_RTN_CNT && pUnit->SendCall_htmain(argStruct, true))
			callCnt += 1;

		if (rtnCnt < 2 && pUnit->RecvReturn_htmain())
			rtnCnt += 1;

		usleep(1);
	}

	delete pUnit;
	delete pHtHif;

	int err_cnt = 0;
	if (err_cnt)
		printf("FAILED: detected %d issues!\n", err_cnt);
	else
		printf("PASSED\n");

	return err_cnt;
}
示例#9
0
uint64_t vadd(uint64_t *a1, uint64_t *a2, uint64_t *a3, uint64_t vecLen)
{
	CHtHif *pHtHif = new CHtHif();

	int unitCnt = pHtHif->GetUnitCnt();
	CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt];

	for (int unitId = 0; unitId < unitCnt; unitId += 1)
		pAuUnits[unitId] = new CHtAuUnit(pHtHif);

	int unit = 0;

	pHtHif->SendAllHostMsg(OP1_ADDR, (uint64_t)a1);
	pHtHif->SendAllHostMsg(OP2_ADDR, (uint64_t)a2);
	pHtHif->SendAllHostMsg(RES_ADDR, (uint64_t)a3);
	pHtHif->SendAllHostMsg(VEC_LEN, (uint64_t)vecLen);

	for (unit = 0; unit < unitCnt; unit++) {
		pAuUnits[unit]->SendCall_htmain(unit /*offset*/, unitCnt /*stride*/);
	}

	uint64_t sum = 0;
	uint64_t au_sum;
	for (int unit = 0; unit < unitCnt; unit++) {
		while (!pAuUnits[unit]->RecvReturn_htmain(au_sum))
			usleep(1000);
		printf("unit=%-2d: au_sum %llu \n", unit, (long long)au_sum);
		fflush(stdout);
		sum += au_sum;
	}

	printf("RTN: sum = %llu\n", (long long) sum);

	delete pHtHif;

	return sum;
}
示例#10
0
int main(int argc, char **argv)
{
	if (0 && !tfp && !(tfp = fopen(tfn, "w"))) {
		fprintf(stderr, "Could not open %s for writing\n", tfn);
		exit(-1);
	}

	CHtHifParams params;
	//params.m_iBlkTimerUSec = 0;
	//params.m_oBlkTimerUSec = 0;

	CHtHif *pHtHif = new CHtHif();
	int nau = pHtHif->GetUnitCnt();

	CHtAuUnit ** pAuUnits = new CHtAuUnit * [nau];
	for (int unit = 0; unit < nau; unit++)
		pAuUnits[unit] = new CHtAuUnit(pHtHif);

	time_t time_now = time(NULL);

	printf("start time = %s", ctime(&time_now));

	struct timeval st;
	gettimeofday(&st, NULL);
	unsigned int seed = (SEED != -1) ? SEED : (int)st.tv_usec;
	printf("seed = 0x%x\n", seed);
	fflush(stdout);
	srand(seed);

	int active_cnt = 0;
	int err_cnt = 0, aerr_cnt = 0;
	int tot_call = 0, tot_rtn = 0;
	long long tot_push = 0, tot_pop = 0;

	state_t *ast = (state_t *)calloc(nau, sizeof(state_t));

	for (int au = 0; au < nau; au++) {
		active_cnt += 1;
		ast[au].done = ast[au].done_calls = false;
		ast[au].num_call = 1;
		if (MAX_CALLS > 1)
			ast[au].num_call = (rand() % (MAX_CALLS - 1)) + 1;
		ast[au].send_word = -1;
		ast[au].recv_word = -1;
	}

	while (active_cnt) {
		for (int au = 0; au < nau; au++) {
			uint64_t d;

			// fineto
			if (ast[au].done)
				continue;

			// Call
			if (!ast[au].done_calls && ast[au].send_word < 0) {
				int cnt = rand() % MAX_WORDS;
				if (pAuUnits[au]->SendCall_htmain(au, ast[au].send_call, cnt)) {
					if (tfp) fprintf(tfp, "SendCall(%d, 0x%x, %d) unit=%d\n",
							 au, ast[au].send_call, cnt, au);

					tot_call += 1;
					if (!(tot_call % 250))
						fprintf(stderr, "tot_call=%d\n", tot_call);

					ast[au].send_word = 0;
					ast[au].num_words[ast[au].send_call] = -2;
					if (cnt)
						ast[au].num_words[ast[au].send_call] = cnt;
					if (ast[au].num_call - 1 == ast[au].send_call)
						ast[au].done_calls = true;
				}
			}

			// iBlk
			if (ast[au].num_words[ast[au].send_call] == -2) {
				ast[au].send_call += 1;
				ast[au].send_word = -1;
			} else while (ast[au].send_word >= 0) {
				d = 0;
				d |= ast[au].send_word;
				d |= (uint64_t)ast[au].send_call << 32;
				d |= (uint64_t)au << 56;
				if (pAuUnits[au]->SendHostData(1, &d)) {
					if (tfp) fprintf(tfp, "SendHostData(0x%016llx)\n", (long long)d);
					tot_push += 1;
					ast[au].send_word += 1;
					if (ast[au].send_word ==
					    ast[au].num_words[ast[au].send_call]) {
						ast[au].send_call += 1;
						ast[au].send_word = -1;

						// flush data
						pAuUnits[au]->FlushHostData();
					}
				} else {
					break;
				}
			}

			// Return
			int errs;
			if (pAuUnits[au]->RecvReturn_htmain(errs)) {
				if (tfp) fprintf(tfp, "RecvReturn_htmain(%d) unit=%d\n", errs, au);
				tot_rtn += 1;

				aerr_cnt += errs;

				if (ast[au].recv_word >= 0) {
					err_cnt += 1;
					int exp = ast[au].num_words[ast[au].recv_call];
					fprintf(stderr, "ERROR: unit%d call %d missing %d words\n",
						au, ast[au].recv_call, exp - ast[au].recv_word);
				}

				ast[au].recv_call += 1;

				if (ast[au].num_call == ast[au].recv_call) {
					active_cnt -= 1;
					ast[au].done = true;
				}

				int pcnt = rand() % MAX_WORDS;
				pAuUnits[au]->SendHostMsg(FLUSH_PERIOD, pcnt);
			}

			// oBlk
			while (pAuUnits[au]->RecvHostData(1, &d)) {
				if (tfp) fprintf(tfp, "RecvHostData(0x%016llx)\n",
						 (long long)d);
				tot_pop += 1;

				if (ast[au].recv_word < 0)
					ast[au].recv_word = 0;

				uint64_t exp = 0;
				exp |= (uint64_t)ast[au].recv_word;
				exp |= (uint64_t)ast[au].recv_call << 32;
				exp |= (uint64_t)au << 56;

				ast[au].recv_word += 1;

				if (ast[au].recv_word ==
				    ast[au].num_words[ast[au].recv_call])
					ast[au].recv_word = -1;

				if (d != exp) {
					err_cnt += 1;
					fprintf(stderr, "ERROR: unit%d expected 0x%016llx != 0x%016llx\n",
						au, (long long)exp, (long long)d);
				}
				assert(!err_cnt);
			}
		}
	}

	time_now = time(NULL);
	printf("end time = %s", ctime(&time_now));

	for (int au = 0; au < nau; au += 1)
		delete pAuUnits[au];
	delete pAuUnits;
	delete pHtHif;

	if (err_cnt || aerr_cnt)
		printf("FAILED: detected %d/%d issues!\n", err_cnt, aerr_cnt);
	else
		printf("PASSED\n");

	return err_cnt;
}
示例#11
0
int main(int argc, char **argv) {

	int debug = 0;
	int mat_int_len, temp;

#ifdef HT_MODEL
	default_size = 15;
#endif

#ifdef HT_SYSC
	default_size = 15;
#endif

#ifdef HT_VSIM
	default_size = 3;
#endif

	uint64_t i, j, k;
	uint64_t aRow, aCol, bRow, bCol;
	uint64_t *a1, *a2, *a3;

	// check command line args
	if (argc <= 2) {
		// Defaults
		aRow = default_size;
		aCol = default_size;
		bRow = default_size;
		bCol = default_size;
		if (argc==2) {
			if (atoi(argv[1]) == 1 || atoi(argv[1]) == 0) {
				debug = atoi(argv[1]);
			} else {
				usage(argv[0]);
			}
			debug = atoi(argv[1])==1 ? 1 : 0;
		}

	} else if (argc == 5 || argc == 6) {
		// Grab Command Line Values
		aRow = atoi(argv[1]);
		aCol = atoi(argv[2]);
		bRow = atoi(argv[3]);
		bCol = atoi(argv[4]);
		if (argc==6)
			debug = atoi(argv[5])==1 ? 1 : 0;

		if (aRow <= 0 || aCol <= 0 || bRow <= 0 || bCol <= 0) {
			usage(argv[0]);
			return 0;
		}
	} else {
		usage(argv[0]);
		return 0;
	}

	if (aCol != bRow) {
		printf("ERROR: Number of columns in Matrix A (%lld) does not equal the number of rows in Matrix B (%lld)\n\n",
			(long long)aCol, (long long)bRow);
		exit(1);
	}

	printf("Running with: Matrix A: (%lldx%lld) and Matrix B (%lldx%lld)\n",
		(long long)aRow, (long long)aCol, (long long)bRow, (long long)bCol);
	printf("Initializing arrays\n");
	fflush(stdout);

	a1 = (uint64_t *)malloc(aRow * aCol * 8);
	a2 = (uint64_t *)malloc(bRow * bCol * 8);
	a3 = (uint64_t *)malloc(aRow * bCol * 8);
	memset(a3, 0, aRow * bCol * 8);

	/* 
	* Matrix A and B are stored differently in memory (to make reads more efficient?)
	* The following are 3x3 examples: (A counts from 8 -> 0, B counts from 0 -> 8)
	*
	* Actual Numbers:         Memory Locations (by Index)
	* 
	* A: 8  7  6              A: 0  3  6
	*    5  4  5                 1  4  7
	*    2  1  0                 2  5  8
	*
	* B: 0  1  2              B: 0  1  2
	*    3  4  5                 3  4  5
	*    6  7  8                 6  7  8
	*
	* I did this to try to make A Rows available on strides, as well as B Columns on strides...
	*
	*/

	// Fill Matrix A
	k = 0;
	for (i = 0; i < aCol; i++) {
		for (j = aRow; j > 0; j--) {
			a1[k] = (aRow*aCol)-aCol*(aRow-j)-i-1;
			k++;
		}
	}

	// Fill Matrix B
	for (i = 0; i < bRow*bCol; i++) {
		a2[i] = i;
	}

	//Print Matrices
	mat_int_len = 1;
	temp = 0;
	for(i = 0; i < aRow*bCol; i++) {
		temp = num_length(a1[i]);
		mat_int_len = (temp > mat_int_len) ? temp : mat_int_len;
	}
	printf("Matrix A:\n");
	for (i = 0; i < aRow; i++) {
		for (j = 0; j < aCol; j++) {
			printf("%*lld ", mat_int_len, (long long)a1[i+aRow*j]);
		}
		printf("\n");
	}
	printf("\n\n");

	mat_int_len = 1;
	temp = 0;
	for(i = 0; i < aRow*bCol; i++) {
		temp = num_length(a1[i]);
		mat_int_len = (temp > mat_int_len) ? temp : mat_int_len;
	}
	printf("Matrix B:\n");
	for(i = 0; i < bRow*bCol; i++) {
		if (i > 0) {
			if (i%bCol == 0) {
				printf("\n");
			}
		}
		printf("%*lld ", mat_int_len, (long long)a2[i]);
	}
	printf("\n\n");


	// Debug - Print Matrix Values at memory locations
	if (debug) {
		printf("A - MEM\n");
		for (i = 0; i < aRow*aCol; i++) {
			printf("%lld - %lld\n", (long long)i, (long long)a1[i]);
		}

		printf("B - MEM\n");
		for (i = 0; i < bRow*bCol; i++) {
			printf("%lld - %lld\n", (long long)i, (long long)a2[i]);
		}
	}

	CHtHif *pHtHif = new CHtHif();

	// Coprocessor memory arrays
	uint64_t *cp_a1 = (uint64_t *)pHtHif->MemAllocAlign(4 * 1024, aRow * aCol * sizeof(uint64_t));
	uint64_t *cp_a2 = (uint64_t *)pHtHif->MemAllocAlign(4 * 1024, bRow * bCol * sizeof(uint64_t));
	uint64_t *cp_a3 = (uint64_t *)pHtHif->MemAllocAlign(4 * 1024, aRow * bCol * sizeof(uint64_t));

	pHtHif->MemCpy(cp_a1, a1, aRow * aCol * sizeof(uint64_t));
	pHtHif->MemCpy(cp_a2, a2, bRow * bCol * sizeof(uint64_t));
	pHtHif->MemSet(cp_a3, 0, aRow * bCol * sizeof(uint64_t));

	int unitCnt = pHtHif->GetUnitCnt();
	CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt];

	for (int unitId = 0; unitId < unitCnt; unitId += 1)
		pAuUnits[unitId] = new CHtAuUnit(pHtHif);

	printf("#AUs = %d\n", unitCnt);

	// avoid bank aliasing for performance
	if (unitCnt > 16 && !(unitCnt & 1)) unitCnt -= 1;
	printf("stride = %d\n", unitCnt);

	fflush(stdout);

	pHtHif->SendAllHostMsg(MA_BASE, (uint64_t)cp_a1);
	pHtHif->SendAllHostMsg(MB_BASE, (uint64_t)cp_a2);
	pHtHif->SendAllHostMsg(MC_BASE, (uint64_t)cp_a3);
	pHtHif->SendAllHostMsg(MC_ROW, (uint32_t)aRow);
	pHtHif->SendAllHostMsg(MC_COL, (uint32_t)bCol);
	pHtHif->SendAllHostMsg(COMMON, (uint32_t)aCol);

	for (int unit = 0; unit < unitCnt; unit++)
		pAuUnits[unit]->SendCall_htmain(unit /*rowOffset*/, unitCnt /*stride*/);

	for (int unit = 0; unit < unitCnt; unit++) {
		while (!pAuUnits[unit]->RecvReturn_htmain())
			usleep(1000);
		fflush(stdout);
	}

	pHtHif->MemCpy(a3, cp_a3, aRow * bCol * sizeof(uint64_t));

	// Print Resulting Matrix
	mat_int_len = 1;
	temp = 0;
	for(i = 0; i < aRow*bCol; i++) {
		temp = num_length(a3[i]);
		mat_int_len = (temp > mat_int_len) ? temp : mat_int_len;
	}

	printf("\nMatrix C:\n");
	for(i = 0; i < aRow*bCol; i++) {
		if (i > 0) {
			if (i%bCol == 0) {
				printf("\n");
			}
		}
		printf("%*lld ", mat_int_len, (long long)a3[i]);
	}
	printf("\n\n");

	if (debug) {
		printf("C - MEM\n");
		for (i = 0; i < aRow*bCol; i++) {
			printf("%lld - %lld\n", (long long)i, (long long)a3[i]);
		}
	}

	// Do error checking
	int err_cnt = 0;
	uint64_t rowNum = 0, colNum = 0, calcNum = 0, eleNum = 0;
	uint64_t *val;

	val = (uint64_t *)malloc(aRow * bCol * 8);
	memset(val, 0, aRow * bCol * 8);

	// Calculate the resulting matrix to check against coprocessor results
	for (rowNum = 0; rowNum < aRow; rowNum++) {

		for (colNum = 0; colNum < bCol; colNum++) {

			for (calcNum = 0; calcNum < aCol; calcNum++) {

				val[eleNum] += a1[rowNum+(calcNum*aRow)] * a2[colNum+(calcNum*bCol)];

			}

			eleNum++;

		}

	}

	// Check results
	for (eleNum = 0; eleNum < aRow*bCol; eleNum++) {

		if (val[eleNum] != a3[eleNum]) {
			err_cnt++;
			printf("Found element mismatch at matrix position %lld - found value %lld, expected value %lld.\n",
				(unsigned long long)eleNum, (unsigned long long)a3[eleNum], (unsigned long long)val[eleNum]);
		}

	}

	if (err_cnt == 0) {
		// Test Passed
		printf("PASSED\n\n");
	} else {
		// Test Failed
		printf("FAILED - error count %d\n\n", err_cnt);
	}

	// free memory
	free(a1);
	free(a2);
	free(a3);
	pHtHif->MemFreeAlign(cp_a1);
	pHtHif->MemFreeAlign(cp_a2);
	pHtHif->MemFreeAlign(cp_a3);
	free(val);

	delete pHtHif;

	return err_cnt;
}
示例#12
0
int main(int argc, char **argv)
{
	uint32_t rows, cols;
	uint32_t *a1, *a3, *e3;

	int const radius = STENCIL_RADIUS;

	// check command line args
	if (argc == 1) {
		rows = 100;  // default rows
		cols = 100;  // default cols
	} else if (argc == 3) {
		rows = atoi(argv[1]);
		if (rows <= 0 || rows > 1023) {
			usage(argv[1]);
			return 0;
		}
		cols = atoi(argv[2]);
		if (cols <= 0 || cols > 1023) {
			usage(argv[2]);
			return 0;
		}
	} else {
		usage(argv[0]);
		return 0;
	}

	printf("Running with rows = %u, cols = %u\n", rows, cols);
	printf("Initializing arrays\n");
	fflush(stdout);

	a1 = (uint32_t *)malloc((rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t));
	a3 = (uint32_t *)malloc((rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t));
	memset(a1, 0, (rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t));
	memset(a3, 0, (rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t));

	for (uint32_t r = 0; r < rows; r++)
		for (uint32_t c = 0; c < cols; c++) {
			a1[(r+radius)*(cols + radius * 2)+(c+radius)] = ((r+radius) << 8) | (c+radius);
	}

	CHtHif *pHtHif = new CHtHif();
	int unitCnt = pHtHif->GetUnitCnt();
	printf("#AUs = %d\n", unitCnt);

	CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt];
	for (int unit = 0; unit < unitCnt; unit++)
		pAuUnits[unit] = new CHtAuUnit(pHtHif);

	// Coprocessor memory arrays
	uint32_t *cp_a1 = (uint32_t*)pHtHif->MemAlloc((rows+radius*2) * (cols+radius*2) * sizeof(uint32_t));
	uint32_t *cp_a3 = (uint32_t*)pHtHif->MemAlloc((rows+radius*2) * (cols+radius*2) * sizeof(uint32_t));
	if (!cp_a1 || !cp_a3) {
		fprintf(stderr, "ht_cp_malloc() failed.\n");
		exit(-1);
	}
	pHtHif->MemCpy(cp_a1, a1, (rows+radius*2) * (cols+radius*2) * sizeof(uint32_t));
	pHtHif->MemSet(cp_a3, 0, (rows+radius*2) * (cols+radius*2) * sizeof(uint32_t));

	// avoid bank aliasing for performance
	if (unitCnt > 16 && !(unitCnt & 1)) unitCnt -= 1;
	printf("stride = %d\n", unitCnt);

	fflush(stdout);

	// Send calls to units
	uint32_t rowsPerUnit = (cols + unitCnt - 1) / unitCnt;
	for (int unit = 0; unit < unitCnt; unit++) {
		uint32_t * pSrcAddr = cp_a1 + unit * (cols+radius*2) * rowsPerUnit;
		uint32_t * pDstAddr = cp_a3 + unit * (cols+radius*2) * rowsPerUnit;

		uint32_t unitRows = (unit+1)*rowsPerUnit > rows ? (rows - unit*rowsPerUnit) : rowsPerUnit;

		pAuUnits[unit]->SendCall_htmain((uint64_t)pSrcAddr, (uint64_t)pDstAddr, unitRows, cols);
	}

	// generate expected results while waiting for returns
	e3 = (uint32_t *)malloc((rows+radius*2) * (cols+radius*2) * sizeof(uint32_t));
	memset(e3, 0, (rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t));

	uint32_t coef[5] = { STENCIL_COEF2, STENCIL_COEF1, STENCIL_COEF0/2, STENCIL_COEF1, STENCIL_COEF2 };

	for (uint32_t row = radius; row < rows+radius; row++) {
		for (uint32_t col = radius; col < cols+radius; col++) {
			uint32_t rslt = 0;
			for (uint32_t c = col - STENCIL_RADIUS; c <= col + STENCIL_RADIUS; c++)
				rslt += a1[row*(cols+radius*2) + c] * coef[c - col + STENCIL_RADIUS];

			for (uint32_t r = row - STENCIL_RADIUS; r <= row + STENCIL_RADIUS; r++)
				rslt += a1[r*(cols+radius*2) + col] * coef[r - row + STENCIL_RADIUS];

			e3[row*(cols+radius*2) + col] = rslt >> 8;
		}
	}

	// Wait for returns
	for (int unit = 0; unit < unitCnt; unit++) {
		while (!pAuUnits[unit]->RecvReturn_htmain())
			usleep(1000);
	}

	pHtHif->MemCpy(a3, cp_a3, (rows+radius*2) * (cols+radius*2) * sizeof(uint32_t));

	// check results
	int err_cnt = 0;
	for (uint32_t col = 0; col < (cols+radius*2); col++) {
		for (uint32_t row = 0; row < (rows+radius*2); row++) {
			if (a3[row*(cols+radius*2) + col] != e3[row*(cols+radius*2) + col]) {
				printf("a3[row=%u, col=%u] is %u, should be %u\n",
					   row, col, a3[row*(cols+radius*2) + col], e3[row*(cols+radius*2) + col]);
				err_cnt++;
			}
		}
	}

	if (err_cnt)
		printf("FAILED: detected %d issues!\n", err_cnt);
	else
		printf("PASSED\n");

	// free memory
	free(a1);
	free(a3);
	pHtHif->MemFree(cp_a1);
	pHtHif->MemFree(cp_a3);

	delete pHtHif;

	return err_cnt;
}
示例#13
0
int main(int argc, char **argv)
{
	uint64_t i;
	uint64_t vecLen;
	uint64_t *a1, *a2, *a3;

	// check command line args
	if (argc == 1) {
		vecLen = 100;  // default vecLen
	} else if (argc == 2) {
		vecLen = atoi(argv[1]);
		if (vecLen <= 0) {
			usage(argv[0]);
			return 0;
		}
	} else {
		usage(argv[0]);
		return 0;
	}

	printf("Running with vecLen = %llu\n", (long long)vecLen);
	printf("Initializing arrays\n");
	fflush(stdout);

	a1 = (uint64_t *)malloc(vecLen * sizeof(uint64_t));
	a2 = (uint64_t *)malloc(vecLen * sizeof(uint64_t));
	a3 = (uint64_t *)malloc(vecLen * sizeof(uint64_t));
	memset(a3, 0, vecLen * 8);

	for (i = 0; i < vecLen; i++) {
		a1[i] = i;
		a2[i] = 2 * i;
	}

	CHtHif *pHtHif = new CHtHif();
	int unitCnt = pHtHif->GetUnitCnt();
	printf("#AUs = %d\n", unitCnt);

	// Coprocessor memory arrays
	uint64_t * cp_a1 = (uint64_t*)pHtHif->MemAllocAlign(4 * 1024, vecLen * sizeof(uint64_t));
	uint64_t * cp_a2 = (uint64_t*)pHtHif->MemAllocAlign(4 * 1024, vecLen * sizeof(uint64_t));
	uint64_t * cp_a3 = (uint64_t*)pHtHif->MemAllocAlign(4 * 1024, vecLen * sizeof(uint64_t));

	pHtHif->MemCpy(cp_a1, a1, vecLen * sizeof(uint64_t));
	pHtHif->MemCpy(cp_a2, a2, vecLen * sizeof(uint64_t));
	pHtHif->MemSet(cp_a3, 0, vecLen * sizeof(uint64_t));

	CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt];
	for (int unit = 0; unit < unitCnt; unit++)
		pAuUnits[unit] = new CHtAuUnit(pHtHif);

	// avoid bank aliasing for performance
	if (unitCnt > 16 && !(unitCnt & 1)) unitCnt -= 1;
	printf("stride = %d\n", unitCnt);

	fflush(stdout);

	pHtHif->SendAllHostMsg(OP1_ADDR, (uint64_t)cp_a1);
	pHtHif->SendAllHostMsg(OP2_ADDR, (uint64_t)cp_a2);
	pHtHif->SendAllHostMsg(RES_ADDR, (uint64_t)cp_a3);
	pHtHif->SendAllHostMsg(VEC_LEN, (uint64_t)vecLen);

	for (int unit = 0; unit < unitCnt; unit++)
		pAuUnits[unit]->SendCall_htmain(unit /*offset*/, unitCnt /*stride*/);

	uint64_t act_sum = 0;
	uint64_t au_sum;
	for (int unit = 0; unit < unitCnt; unit++) {
		while (!pAuUnits[unit]->RecvReturn_htmain(au_sum))
			usleep(1000);
		printf("unit=%-2d: au_sum %llu \n", unit, (long long)au_sum);
		fflush(stdout);
		act_sum += au_sum;
	}

	printf("RTN: act_sum = %llu\n", (long long)act_sum);

	pHtHif->MemCpy(a3, cp_a3, vecLen * sizeof(uint64_t));

	// check results
	int err_cnt = 0;
	uint64_t exp_sum = 0;
	for (i = 0; i < vecLen; i++) {
		if (a3[i] != a1[i] + a2[i]) {
			printf("a3[%llu] is %llu, should be %llu\n",
			       (long long)i, (long long)a3[i], (long long)(a1[i] + a2[i]));
			err_cnt++;
		}
		exp_sum += a1[i] + a2[i];
		//printf("i=%llu:  a1=%llu + a2=%llu => a3=%llu\n",
		//	(long long)i, (long long)a1[i], (long long)a2[i], (long long)a3[i]);
	}
	if (act_sum != exp_sum) {
		printf("act_sum %llu != exp_sum %llu\n", (long long)act_sum, (long long)exp_sum);
		err_cnt++;
	}

	if (err_cnt)
		printf("FAILED: detected %d issues!\n", err_cnt);
	else
		printf("PASSED\n");

	// free memory
	free(a1);
	free(a2);
	free(a3);
	pHtHif->MemFreeAlign(cp_a1);
	pHtHif->MemFreeAlign(cp_a2);
	pHtHif->MemFreeAlign(cp_a3);

	delete pHtHif;

	return err_cnt;
}
示例#14
0
int main(int argc, char **argv)
{
	uint16_t inArray[11];
	uint16_t outArray[11];
	for (int i = 0; i < 11; i++) {
		inArray[i] = 0x6900 | i;
		outArray[i] = 0x5A5A;
	}

	Align16 inStruct[11];
	Align16 outStruct[11];
	for (int i = 0; i < 11; i += 1) {
		inStruct[i].m_i16[0] = 0x1110 | i;
		inStruct[i].m_i16[1] = 0x2220 | i;
		inStruct[i].m_i8[0] = 0x30 | i;
		inStruct[i].m_i8[1] = 0x40 | i;
		inStruct[i].m_i8[2] = 0x50 | i;
		inStruct[i].m_u16[0] = 0x6660 | i;
		inStruct[i].m_u16[1] = 0x7770 | i;
		inStruct[i].m_u16[2] = 0x8880 | i;
	}

	uint64_t inPrivArray[10];
	uint64_t outPrivArray[10];
	for (uint64_t i = 0; i < 10; i++) {
		inPrivArray[i] = 0x0f1e2d3c4b5a6900ULL | i;
		outPrivArray[i] = 0x5A5A5A5A5A5A5A5AULL;
	}

	CHtHif *pHtHif = new CHtHif();
	CHtAuUnit *pUnit = new CHtAuUnit(pHtHif);

	printf("#AUs = %d\n", pHtHif->GetUnitCnt());
	fflush(stdout);

	pUnit->SendCall_MemSub(inArray, outArray, &inStruct[3], &outStruct[3], &inPrivArray[2], &outPrivArray[2]);

	while (!pUnit->RecvReturn_MemSub())
		usleep(1);

	int err_cnt = 0;
	for (int i = 0; i < 11; i += 1) {
		if (outArray[i] != (0x6900 | i))
			err_cnt += 1;
	}

	for (int i = 3; i < 10; i += 1) {
		if (outStruct[i].m_i16[0] != (0x1110 | i)) err_cnt += 1;
		if (outStruct[i].m_i16[1] != (0x2220 | i)) err_cnt += 1;
		if (outStruct[i].m_i8[0] != (0x30 | i)) err_cnt += 1;
		if (outStruct[i].m_i8[1] != (0x40 | i)) err_cnt += 1;
		if (outStruct[i].m_i8[2] != (0x50 | i)) err_cnt += 1;
		if (outStruct[i].m_u16[0] != (0x6660 | i)) err_cnt += 1;
		if (outStruct[i].m_u16[1] != (0x7770 | i)) err_cnt += 1;
		if (outStruct[i].m_u16[2] != (0x8880 | i)) err_cnt += 1;
	}

	for (int i = 2; i < 8; i++) {
		if (outPrivArray[i] != (0x0f1e2d3c4b5a6900LL | i)) err_cnt += 1;
	}

	delete pHtHif;

	if (err_cnt)
		printf("FAILED: detected %d issues!\n", err_cnt);
	else
		printf("PASSED\n");

	return err_cnt;
}