int main(int argc, char **argv) { CHtHif *pHtHif = new CHtHif(); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); CHtAuUnit *pUnit = new CHtAuUnit(pHtHif); pUnit->SendCall_htmain(); // wait for return uint32_t err_cnt; while (!pUnit->RecvReturn_htmain(err_cnt)) usleep(1); delete pUnit; delete pHtHif; if (err_cnt > 0) printf("FAILED: detected %d issues!\n", err_cnt); else printf("PASSED\n"); return err_cnt; }
int main(int argc, char **argv) { printf("%s\n", argv[0]); CHtHif *pHtHif = new CHtHif(); CHtAuUnit *pUnit = new CHtAuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); uint64_t data[(32*8)+LOOPCNT]; // Max 32 Cycles Per Test for (uint32_t i = 0; i < LOOPCNT; i++) { printf("Running Loop %d of %d...", i+1, LOOPCNT); while (!pUnit->SendCall_main(&data[i])) usleep(1000); while (!pUnit->RecvReturn_main()) usleep(1000); printf("OK\n"); } delete pUnit; delete pHtHif; printf("PASSED\n"); return 0; }
int main(int argc, char **argv) { CHtHif *pHtHif = new CHtHif(); CHtAuUnit *pUnit = new CHtAuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); pUnit->SendCall_htmain(LOOP_CNT); // wait for return uint16_t rtn_errorCnt; while (!pUnit->RecvReturn_htmain(rtn_errorCnt)) usleep(1); delete pHtHif; printf("RTN: errorCnt = %d\n", rtn_errorCnt); if (rtn_errorCnt > 0) printf("FAILED: detected %d issues!\n", rtn_errorCnt); else printf("PASSED\n"); return rtn_errorCnt; }
int main(int argc, char **argv) { CHtHif *pHtHif = new CHtHif(); CHtAuUnit *pUnit = new CHtAuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); uint64_t data[4]; pUnit->SendCall_main(data); bool bErr; while (!pUnit->RecvReturn_main(bErr)) usleep(1); delete pHtHif; if (bErr) printf("FAILED\n"); else printf("PASSED\n"); return bErr ? 1 : 0; }
int main(int argc, char **argv) { CHtHif *pHtHif = new CHtHif(); CHtSuUnit *pSuUnit = new CHtSuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); for (int i = 0; i < CNT; i++) arr[i] = i; for (int i = 0; i < THREADS; i += 1) while (!pSuUnit->SendCall_htmain((uint64_t)&arr)) ; // wait for return uint32_t err; uint32_t errCnt = 0; for (int i = 0; i < THREADS; i += 1) { while (!pSuUnit->RecvReturn_htmain(err)) usleep(1000); errCnt += err; } delete pHtHif; if (err) printf("FAILED: detected %d issues!\n", err); else printf("PASSED\n"); return err; }
int main(int argc, char **argv) { for (int i = 0; i < CNT; i++) { arr[i * 2] = i; arr[i * 2 + 1] = 0xdeadbeefdeadbeefLL; } CHtHifParams htHifParams; htHifParams.m_bHtHifHugePage = true; CHtHif *pHtHif; try { pHtHif = new CHtHif(&htHifParams); } catch (CHtException &htException) { printf("new CHtHif threw an exception: '%s'\n", htException.GetMsg().c_str()); exit(1); } CHtSuUnit *pUnit = new CHtSuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); pHtHif->SendAllHostMsg(SU_ARRAY_ADDR, (uint64_t)&arr); //pUnit->SendHostMsg(SU_ARRAY_ADDR, (uint64_t)&arr); pUnit->SendCall_htmain(CNT); // wait for return uint32_t rtn_elemCnt; while (!pUnit->RecvReturn_htmain(rtn_elemCnt)) usleep(1); delete pUnit; delete pHtHif; printf("RTN: elemCnt = %d\n", rtn_elemCnt); // check results int err_cnt = 0; for (unsigned i = 0; i < CNT; i++) { if (arr[i * 2 + 1] != i + 1) { printf("arr[%d] is %lld, should be %d\n", i, (long long)arr[i * 2 + 1], i + 1); err_cnt++; } } if (err_cnt) printf("FAILED: detected %d issues!\n", err_cnt); else printf("PASSED\n"); return err_cnt; }
int main(int argc, char **argv) { CHtHif *pHtHif = new CHtHif(); CHtSuUnit *pSuUnit = new CHtSuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); pSuUnit->SendCall_htmain(); // wait for return while (!pSuUnit->RecvReturn_htmain()) usleep(1000); delete pHtHif; printf("PASSED\n"); return 0; }
int main(int argc, char **argv) { for (int i = 0; i < CNT; i++) arr[i] = i; CHtHif *pHtHif = new CHtHif(); CHtAuUnit *pUnit = new CHtAuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); ArgStruct argStruct; argStruct.a = 1; argStruct.b = 2; argStruct.c = 3; argStruct.d = 4; argStruct.e = true; int callCnt = 0; int rtnCnt = 0; while (callCnt < CALL_RTN_CNT || rtnCnt < CALL_RTN_CNT) { if (callCnt < CALL_RTN_CNT && pUnit->SendCall_htmain(argStruct, true)) callCnt += 1; if (rtnCnt < 2 && pUnit->RecvReturn_htmain()) rtnCnt += 1; usleep(1); } delete pUnit; delete pHtHif; int err_cnt = 0; if (err_cnt) printf("FAILED: detected %d issues!\n", err_cnt); else printf("PASSED\n"); return err_cnt; }
uint64_t vadd(uint64_t *a1, uint64_t *a2, uint64_t *a3, uint64_t vecLen) { CHtHif *pHtHif = new CHtHif(); int unitCnt = pHtHif->GetUnitCnt(); CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt]; for (int unitId = 0; unitId < unitCnt; unitId += 1) pAuUnits[unitId] = new CHtAuUnit(pHtHif); int unit = 0; pHtHif->SendAllHostMsg(OP1_ADDR, (uint64_t)a1); pHtHif->SendAllHostMsg(OP2_ADDR, (uint64_t)a2); pHtHif->SendAllHostMsg(RES_ADDR, (uint64_t)a3); pHtHif->SendAllHostMsg(VEC_LEN, (uint64_t)vecLen); for (unit = 0; unit < unitCnt; unit++) { pAuUnits[unit]->SendCall_htmain(unit /*offset*/, unitCnt /*stride*/); } uint64_t sum = 0; uint64_t au_sum; for (int unit = 0; unit < unitCnt; unit++) { while (!pAuUnits[unit]->RecvReturn_htmain(au_sum)) usleep(1000); printf("unit=%-2d: au_sum %llu \n", unit, (long long)au_sum); fflush(stdout); sum += au_sum; } printf("RTN: sum = %llu\n", (long long) sum); delete pHtHif; return sum; }
int main(int argc, char **argv) { if (0 && !tfp && !(tfp = fopen(tfn, "w"))) { fprintf(stderr, "Could not open %s for writing\n", tfn); exit(-1); } CHtHifParams params; //params.m_iBlkTimerUSec = 0; //params.m_oBlkTimerUSec = 0; CHtHif *pHtHif = new CHtHif(); int nau = pHtHif->GetUnitCnt(); CHtAuUnit ** pAuUnits = new CHtAuUnit * [nau]; for (int unit = 0; unit < nau; unit++) pAuUnits[unit] = new CHtAuUnit(pHtHif); time_t time_now = time(NULL); printf("start time = %s", ctime(&time_now)); struct timeval st; gettimeofday(&st, NULL); unsigned int seed = (SEED != -1) ? SEED : (int)st.tv_usec; printf("seed = 0x%x\n", seed); fflush(stdout); srand(seed); int active_cnt = 0; int err_cnt = 0, aerr_cnt = 0; int tot_call = 0, tot_rtn = 0; long long tot_push = 0, tot_pop = 0; state_t *ast = (state_t *)calloc(nau, sizeof(state_t)); for (int au = 0; au < nau; au++) { active_cnt += 1; ast[au].done = ast[au].done_calls = false; ast[au].num_call = 1; if (MAX_CALLS > 1) ast[au].num_call = (rand() % (MAX_CALLS - 1)) + 1; ast[au].send_word = -1; ast[au].recv_word = -1; } while (active_cnt) { for (int au = 0; au < nau; au++) { uint64_t d; // fineto if (ast[au].done) continue; // Call if (!ast[au].done_calls && ast[au].send_word < 0) { int cnt = rand() % MAX_WORDS; if (pAuUnits[au]->SendCall_htmain(au, ast[au].send_call, cnt)) { if (tfp) fprintf(tfp, "SendCall(%d, 0x%x, %d) unit=%d\n", au, ast[au].send_call, cnt, au); tot_call += 1; if (!(tot_call % 250)) fprintf(stderr, "tot_call=%d\n", tot_call); ast[au].send_word = 0; ast[au].num_words[ast[au].send_call] = -2; if (cnt) ast[au].num_words[ast[au].send_call] = cnt; if (ast[au].num_call - 1 == ast[au].send_call) ast[au].done_calls = true; } } // iBlk if (ast[au].num_words[ast[au].send_call] == -2) { ast[au].send_call += 1; ast[au].send_word = -1; } else while (ast[au].send_word >= 0) { d = 0; d |= ast[au].send_word; d |= (uint64_t)ast[au].send_call << 32; d |= (uint64_t)au << 56; if (pAuUnits[au]->SendHostData(1, &d)) { if (tfp) fprintf(tfp, "SendHostData(0x%016llx)\n", (long long)d); tot_push += 1; ast[au].send_word += 1; if (ast[au].send_word == ast[au].num_words[ast[au].send_call]) { ast[au].send_call += 1; ast[au].send_word = -1; // flush data pAuUnits[au]->FlushHostData(); } } else { break; } } // Return int errs; if (pAuUnits[au]->RecvReturn_htmain(errs)) { if (tfp) fprintf(tfp, "RecvReturn_htmain(%d) unit=%d\n", errs, au); tot_rtn += 1; aerr_cnt += errs; if (ast[au].recv_word >= 0) { err_cnt += 1; int exp = ast[au].num_words[ast[au].recv_call]; fprintf(stderr, "ERROR: unit%d call %d missing %d words\n", au, ast[au].recv_call, exp - ast[au].recv_word); } ast[au].recv_call += 1; if (ast[au].num_call == ast[au].recv_call) { active_cnt -= 1; ast[au].done = true; } int pcnt = rand() % MAX_WORDS; pAuUnits[au]->SendHostMsg(FLUSH_PERIOD, pcnt); } // oBlk while (pAuUnits[au]->RecvHostData(1, &d)) { if (tfp) fprintf(tfp, "RecvHostData(0x%016llx)\n", (long long)d); tot_pop += 1; if (ast[au].recv_word < 0) ast[au].recv_word = 0; uint64_t exp = 0; exp |= (uint64_t)ast[au].recv_word; exp |= (uint64_t)ast[au].recv_call << 32; exp |= (uint64_t)au << 56; ast[au].recv_word += 1; if (ast[au].recv_word == ast[au].num_words[ast[au].recv_call]) ast[au].recv_word = -1; if (d != exp) { err_cnt += 1; fprintf(stderr, "ERROR: unit%d expected 0x%016llx != 0x%016llx\n", au, (long long)exp, (long long)d); } assert(!err_cnt); } } } time_now = time(NULL); printf("end time = %s", ctime(&time_now)); for (int au = 0; au < nau; au += 1) delete pAuUnits[au]; delete pAuUnits; delete pHtHif; if (err_cnt || aerr_cnt) printf("FAILED: detected %d/%d issues!\n", err_cnt, aerr_cnt); else printf("PASSED\n"); return err_cnt; }
int main(int argc, char **argv) { int debug = 0; int mat_int_len, temp; #ifdef HT_MODEL default_size = 15; #endif #ifdef HT_SYSC default_size = 15; #endif #ifdef HT_VSIM default_size = 3; #endif uint64_t i, j, k; uint64_t aRow, aCol, bRow, bCol; uint64_t *a1, *a2, *a3; // check command line args if (argc <= 2) { // Defaults aRow = default_size; aCol = default_size; bRow = default_size; bCol = default_size; if (argc==2) { if (atoi(argv[1]) == 1 || atoi(argv[1]) == 0) { debug = atoi(argv[1]); } else { usage(argv[0]); } debug = atoi(argv[1])==1 ? 1 : 0; } } else if (argc == 5 || argc == 6) { // Grab Command Line Values aRow = atoi(argv[1]); aCol = atoi(argv[2]); bRow = atoi(argv[3]); bCol = atoi(argv[4]); if (argc==6) debug = atoi(argv[5])==1 ? 1 : 0; if (aRow <= 0 || aCol <= 0 || bRow <= 0 || bCol <= 0) { usage(argv[0]); return 0; } } else { usage(argv[0]); return 0; } if (aCol != bRow) { printf("ERROR: Number of columns in Matrix A (%lld) does not equal the number of rows in Matrix B (%lld)\n\n", (long long)aCol, (long long)bRow); exit(1); } printf("Running with: Matrix A: (%lldx%lld) and Matrix B (%lldx%lld)\n", (long long)aRow, (long long)aCol, (long long)bRow, (long long)bCol); printf("Initializing arrays\n"); fflush(stdout); a1 = (uint64_t *)malloc(aRow * aCol * 8); a2 = (uint64_t *)malloc(bRow * bCol * 8); a3 = (uint64_t *)malloc(aRow * bCol * 8); memset(a3, 0, aRow * bCol * 8); /* * Matrix A and B are stored differently in memory (to make reads more efficient?) * The following are 3x3 examples: (A counts from 8 -> 0, B counts from 0 -> 8) * * Actual Numbers: Memory Locations (by Index) * * A: 8 7 6 A: 0 3 6 * 5 4 5 1 4 7 * 2 1 0 2 5 8 * * B: 0 1 2 B: 0 1 2 * 3 4 5 3 4 5 * 6 7 8 6 7 8 * * I did this to try to make A Rows available on strides, as well as B Columns on strides... * */ // Fill Matrix A k = 0; for (i = 0; i < aCol; i++) { for (j = aRow; j > 0; j--) { a1[k] = (aRow*aCol)-aCol*(aRow-j)-i-1; k++; } } // Fill Matrix B for (i = 0; i < bRow*bCol; i++) { a2[i] = i; } //Print Matrices mat_int_len = 1; temp = 0; for(i = 0; i < aRow*bCol; i++) { temp = num_length(a1[i]); mat_int_len = (temp > mat_int_len) ? temp : mat_int_len; } printf("Matrix A:\n"); for (i = 0; i < aRow; i++) { for (j = 0; j < aCol; j++) { printf("%*lld ", mat_int_len, (long long)a1[i+aRow*j]); } printf("\n"); } printf("\n\n"); mat_int_len = 1; temp = 0; for(i = 0; i < aRow*bCol; i++) { temp = num_length(a1[i]); mat_int_len = (temp > mat_int_len) ? temp : mat_int_len; } printf("Matrix B:\n"); for(i = 0; i < bRow*bCol; i++) { if (i > 0) { if (i%bCol == 0) { printf("\n"); } } printf("%*lld ", mat_int_len, (long long)a2[i]); } printf("\n\n"); // Debug - Print Matrix Values at memory locations if (debug) { printf("A - MEM\n"); for (i = 0; i < aRow*aCol; i++) { printf("%lld - %lld\n", (long long)i, (long long)a1[i]); } printf("B - MEM\n"); for (i = 0; i < bRow*bCol; i++) { printf("%lld - %lld\n", (long long)i, (long long)a2[i]); } } CHtHif *pHtHif = new CHtHif(); // Coprocessor memory arrays uint64_t *cp_a1 = (uint64_t *)pHtHif->MemAllocAlign(4 * 1024, aRow * aCol * sizeof(uint64_t)); uint64_t *cp_a2 = (uint64_t *)pHtHif->MemAllocAlign(4 * 1024, bRow * bCol * sizeof(uint64_t)); uint64_t *cp_a3 = (uint64_t *)pHtHif->MemAllocAlign(4 * 1024, aRow * bCol * sizeof(uint64_t)); pHtHif->MemCpy(cp_a1, a1, aRow * aCol * sizeof(uint64_t)); pHtHif->MemCpy(cp_a2, a2, bRow * bCol * sizeof(uint64_t)); pHtHif->MemSet(cp_a3, 0, aRow * bCol * sizeof(uint64_t)); int unitCnt = pHtHif->GetUnitCnt(); CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt]; for (int unitId = 0; unitId < unitCnt; unitId += 1) pAuUnits[unitId] = new CHtAuUnit(pHtHif); printf("#AUs = %d\n", unitCnt); // avoid bank aliasing for performance if (unitCnt > 16 && !(unitCnt & 1)) unitCnt -= 1; printf("stride = %d\n", unitCnt); fflush(stdout); pHtHif->SendAllHostMsg(MA_BASE, (uint64_t)cp_a1); pHtHif->SendAllHostMsg(MB_BASE, (uint64_t)cp_a2); pHtHif->SendAllHostMsg(MC_BASE, (uint64_t)cp_a3); pHtHif->SendAllHostMsg(MC_ROW, (uint32_t)aRow); pHtHif->SendAllHostMsg(MC_COL, (uint32_t)bCol); pHtHif->SendAllHostMsg(COMMON, (uint32_t)aCol); for (int unit = 0; unit < unitCnt; unit++) pAuUnits[unit]->SendCall_htmain(unit /*rowOffset*/, unitCnt /*stride*/); for (int unit = 0; unit < unitCnt; unit++) { while (!pAuUnits[unit]->RecvReturn_htmain()) usleep(1000); fflush(stdout); } pHtHif->MemCpy(a3, cp_a3, aRow * bCol * sizeof(uint64_t)); // Print Resulting Matrix mat_int_len = 1; temp = 0; for(i = 0; i < aRow*bCol; i++) { temp = num_length(a3[i]); mat_int_len = (temp > mat_int_len) ? temp : mat_int_len; } printf("\nMatrix C:\n"); for(i = 0; i < aRow*bCol; i++) { if (i > 0) { if (i%bCol == 0) { printf("\n"); } } printf("%*lld ", mat_int_len, (long long)a3[i]); } printf("\n\n"); if (debug) { printf("C - MEM\n"); for (i = 0; i < aRow*bCol; i++) { printf("%lld - %lld\n", (long long)i, (long long)a3[i]); } } // Do error checking int err_cnt = 0; uint64_t rowNum = 0, colNum = 0, calcNum = 0, eleNum = 0; uint64_t *val; val = (uint64_t *)malloc(aRow * bCol * 8); memset(val, 0, aRow * bCol * 8); // Calculate the resulting matrix to check against coprocessor results for (rowNum = 0; rowNum < aRow; rowNum++) { for (colNum = 0; colNum < bCol; colNum++) { for (calcNum = 0; calcNum < aCol; calcNum++) { val[eleNum] += a1[rowNum+(calcNum*aRow)] * a2[colNum+(calcNum*bCol)]; } eleNum++; } } // Check results for (eleNum = 0; eleNum < aRow*bCol; eleNum++) { if (val[eleNum] != a3[eleNum]) { err_cnt++; printf("Found element mismatch at matrix position %lld - found value %lld, expected value %lld.\n", (unsigned long long)eleNum, (unsigned long long)a3[eleNum], (unsigned long long)val[eleNum]); } } if (err_cnt == 0) { // Test Passed printf("PASSED\n\n"); } else { // Test Failed printf("FAILED - error count %d\n\n", err_cnt); } // free memory free(a1); free(a2); free(a3); pHtHif->MemFreeAlign(cp_a1); pHtHif->MemFreeAlign(cp_a2); pHtHif->MemFreeAlign(cp_a3); free(val); delete pHtHif; return err_cnt; }
int main(int argc, char **argv) { uint32_t rows, cols; uint32_t *a1, *a3, *e3; int const radius = STENCIL_RADIUS; // check command line args if (argc == 1) { rows = 100; // default rows cols = 100; // default cols } else if (argc == 3) { rows = atoi(argv[1]); if (rows <= 0 || rows > 1023) { usage(argv[1]); return 0; } cols = atoi(argv[2]); if (cols <= 0 || cols > 1023) { usage(argv[2]); return 0; } } else { usage(argv[0]); return 0; } printf("Running with rows = %u, cols = %u\n", rows, cols); printf("Initializing arrays\n"); fflush(stdout); a1 = (uint32_t *)malloc((rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t)); a3 = (uint32_t *)malloc((rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t)); memset(a1, 0, (rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t)); memset(a3, 0, (rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t)); for (uint32_t r = 0; r < rows; r++) for (uint32_t c = 0; c < cols; c++) { a1[(r+radius)*(cols + radius * 2)+(c+radius)] = ((r+radius) << 8) | (c+radius); } CHtHif *pHtHif = new CHtHif(); int unitCnt = pHtHif->GetUnitCnt(); printf("#AUs = %d\n", unitCnt); CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt]; for (int unit = 0; unit < unitCnt; unit++) pAuUnits[unit] = new CHtAuUnit(pHtHif); // Coprocessor memory arrays uint32_t *cp_a1 = (uint32_t*)pHtHif->MemAlloc((rows+radius*2) * (cols+radius*2) * sizeof(uint32_t)); uint32_t *cp_a3 = (uint32_t*)pHtHif->MemAlloc((rows+radius*2) * (cols+radius*2) * sizeof(uint32_t)); if (!cp_a1 || !cp_a3) { fprintf(stderr, "ht_cp_malloc() failed.\n"); exit(-1); } pHtHif->MemCpy(cp_a1, a1, (rows+radius*2) * (cols+radius*2) * sizeof(uint32_t)); pHtHif->MemSet(cp_a3, 0, (rows+radius*2) * (cols+radius*2) * sizeof(uint32_t)); // avoid bank aliasing for performance if (unitCnt > 16 && !(unitCnt & 1)) unitCnt -= 1; printf("stride = %d\n", unitCnt); fflush(stdout); // Send calls to units uint32_t rowsPerUnit = (cols + unitCnt - 1) / unitCnt; for (int unit = 0; unit < unitCnt; unit++) { uint32_t * pSrcAddr = cp_a1 + unit * (cols+radius*2) * rowsPerUnit; uint32_t * pDstAddr = cp_a3 + unit * (cols+radius*2) * rowsPerUnit; uint32_t unitRows = (unit+1)*rowsPerUnit > rows ? (rows - unit*rowsPerUnit) : rowsPerUnit; pAuUnits[unit]->SendCall_htmain((uint64_t)pSrcAddr, (uint64_t)pDstAddr, unitRows, cols); } // generate expected results while waiting for returns e3 = (uint32_t *)malloc((rows+radius*2) * (cols+radius*2) * sizeof(uint32_t)); memset(e3, 0, (rows + radius * 2) * (cols + radius * 2) * sizeof(uint32_t)); uint32_t coef[5] = { STENCIL_COEF2, STENCIL_COEF1, STENCIL_COEF0/2, STENCIL_COEF1, STENCIL_COEF2 }; for (uint32_t row = radius; row < rows+radius; row++) { for (uint32_t col = radius; col < cols+radius; col++) { uint32_t rslt = 0; for (uint32_t c = col - STENCIL_RADIUS; c <= col + STENCIL_RADIUS; c++) rslt += a1[row*(cols+radius*2) + c] * coef[c - col + STENCIL_RADIUS]; for (uint32_t r = row - STENCIL_RADIUS; r <= row + STENCIL_RADIUS; r++) rslt += a1[r*(cols+radius*2) + col] * coef[r - row + STENCIL_RADIUS]; e3[row*(cols+radius*2) + col] = rslt >> 8; } } // Wait for returns for (int unit = 0; unit < unitCnt; unit++) { while (!pAuUnits[unit]->RecvReturn_htmain()) usleep(1000); } pHtHif->MemCpy(a3, cp_a3, (rows+radius*2) * (cols+radius*2) * sizeof(uint32_t)); // check results int err_cnt = 0; for (uint32_t col = 0; col < (cols+radius*2); col++) { for (uint32_t row = 0; row < (rows+radius*2); row++) { if (a3[row*(cols+radius*2) + col] != e3[row*(cols+radius*2) + col]) { printf("a3[row=%u, col=%u] is %u, should be %u\n", row, col, a3[row*(cols+radius*2) + col], e3[row*(cols+radius*2) + col]); err_cnt++; } } } if (err_cnt) printf("FAILED: detected %d issues!\n", err_cnt); else printf("PASSED\n"); // free memory free(a1); free(a3); pHtHif->MemFree(cp_a1); pHtHif->MemFree(cp_a3); delete pHtHif; return err_cnt; }
int main(int argc, char **argv) { uint64_t i; uint64_t vecLen; uint64_t *a1, *a2, *a3; // check command line args if (argc == 1) { vecLen = 100; // default vecLen } else if (argc == 2) { vecLen = atoi(argv[1]); if (vecLen <= 0) { usage(argv[0]); return 0; } } else { usage(argv[0]); return 0; } printf("Running with vecLen = %llu\n", (long long)vecLen); printf("Initializing arrays\n"); fflush(stdout); a1 = (uint64_t *)malloc(vecLen * sizeof(uint64_t)); a2 = (uint64_t *)malloc(vecLen * sizeof(uint64_t)); a3 = (uint64_t *)malloc(vecLen * sizeof(uint64_t)); memset(a3, 0, vecLen * 8); for (i = 0; i < vecLen; i++) { a1[i] = i; a2[i] = 2 * i; } CHtHif *pHtHif = new CHtHif(); int unitCnt = pHtHif->GetUnitCnt(); printf("#AUs = %d\n", unitCnt); // Coprocessor memory arrays uint64_t * cp_a1 = (uint64_t*)pHtHif->MemAllocAlign(4 * 1024, vecLen * sizeof(uint64_t)); uint64_t * cp_a2 = (uint64_t*)pHtHif->MemAllocAlign(4 * 1024, vecLen * sizeof(uint64_t)); uint64_t * cp_a3 = (uint64_t*)pHtHif->MemAllocAlign(4 * 1024, vecLen * sizeof(uint64_t)); pHtHif->MemCpy(cp_a1, a1, vecLen * sizeof(uint64_t)); pHtHif->MemCpy(cp_a2, a2, vecLen * sizeof(uint64_t)); pHtHif->MemSet(cp_a3, 0, vecLen * sizeof(uint64_t)); CHtAuUnit ** pAuUnits = new CHtAuUnit * [unitCnt]; for (int unit = 0; unit < unitCnt; unit++) pAuUnits[unit] = new CHtAuUnit(pHtHif); // avoid bank aliasing for performance if (unitCnt > 16 && !(unitCnt & 1)) unitCnt -= 1; printf("stride = %d\n", unitCnt); fflush(stdout); pHtHif->SendAllHostMsg(OP1_ADDR, (uint64_t)cp_a1); pHtHif->SendAllHostMsg(OP2_ADDR, (uint64_t)cp_a2); pHtHif->SendAllHostMsg(RES_ADDR, (uint64_t)cp_a3); pHtHif->SendAllHostMsg(VEC_LEN, (uint64_t)vecLen); for (int unit = 0; unit < unitCnt; unit++) pAuUnits[unit]->SendCall_htmain(unit /*offset*/, unitCnt /*stride*/); uint64_t act_sum = 0; uint64_t au_sum; for (int unit = 0; unit < unitCnt; unit++) { while (!pAuUnits[unit]->RecvReturn_htmain(au_sum)) usleep(1000); printf("unit=%-2d: au_sum %llu \n", unit, (long long)au_sum); fflush(stdout); act_sum += au_sum; } printf("RTN: act_sum = %llu\n", (long long)act_sum); pHtHif->MemCpy(a3, cp_a3, vecLen * sizeof(uint64_t)); // check results int err_cnt = 0; uint64_t exp_sum = 0; for (i = 0; i < vecLen; i++) { if (a3[i] != a1[i] + a2[i]) { printf("a3[%llu] is %llu, should be %llu\n", (long long)i, (long long)a3[i], (long long)(a1[i] + a2[i])); err_cnt++; } exp_sum += a1[i] + a2[i]; //printf("i=%llu: a1=%llu + a2=%llu => a3=%llu\n", // (long long)i, (long long)a1[i], (long long)a2[i], (long long)a3[i]); } if (act_sum != exp_sum) { printf("act_sum %llu != exp_sum %llu\n", (long long)act_sum, (long long)exp_sum); err_cnt++; } if (err_cnt) printf("FAILED: detected %d issues!\n", err_cnt); else printf("PASSED\n"); // free memory free(a1); free(a2); free(a3); pHtHif->MemFreeAlign(cp_a1); pHtHif->MemFreeAlign(cp_a2); pHtHif->MemFreeAlign(cp_a3); delete pHtHif; return err_cnt; }
int main(int argc, char **argv) { uint16_t inArray[11]; uint16_t outArray[11]; for (int i = 0; i < 11; i++) { inArray[i] = 0x6900 | i; outArray[i] = 0x5A5A; } Align16 inStruct[11]; Align16 outStruct[11]; for (int i = 0; i < 11; i += 1) { inStruct[i].m_i16[0] = 0x1110 | i; inStruct[i].m_i16[1] = 0x2220 | i; inStruct[i].m_i8[0] = 0x30 | i; inStruct[i].m_i8[1] = 0x40 | i; inStruct[i].m_i8[2] = 0x50 | i; inStruct[i].m_u16[0] = 0x6660 | i; inStruct[i].m_u16[1] = 0x7770 | i; inStruct[i].m_u16[2] = 0x8880 | i; } uint64_t inPrivArray[10]; uint64_t outPrivArray[10]; for (uint64_t i = 0; i < 10; i++) { inPrivArray[i] = 0x0f1e2d3c4b5a6900ULL | i; outPrivArray[i] = 0x5A5A5A5A5A5A5A5AULL; } CHtHif *pHtHif = new CHtHif(); CHtAuUnit *pUnit = new CHtAuUnit(pHtHif); printf("#AUs = %d\n", pHtHif->GetUnitCnt()); fflush(stdout); pUnit->SendCall_MemSub(inArray, outArray, &inStruct[3], &outStruct[3], &inPrivArray[2], &outPrivArray[2]); while (!pUnit->RecvReturn_MemSub()) usleep(1); int err_cnt = 0; for (int i = 0; i < 11; i += 1) { if (outArray[i] != (0x6900 | i)) err_cnt += 1; } for (int i = 3; i < 10; i += 1) { if (outStruct[i].m_i16[0] != (0x1110 | i)) err_cnt += 1; if (outStruct[i].m_i16[1] != (0x2220 | i)) err_cnt += 1; if (outStruct[i].m_i8[0] != (0x30 | i)) err_cnt += 1; if (outStruct[i].m_i8[1] != (0x40 | i)) err_cnt += 1; if (outStruct[i].m_i8[2] != (0x50 | i)) err_cnt += 1; if (outStruct[i].m_u16[0] != (0x6660 | i)) err_cnt += 1; if (outStruct[i].m_u16[1] != (0x7770 | i)) err_cnt += 1; if (outStruct[i].m_u16[2] != (0x8880 | i)) err_cnt += 1; } for (int i = 2; i < 8; i++) { if (outPrivArray[i] != (0x0f1e2d3c4b5a6900LL | i)) err_cnt += 1; } delete pHtHif; if (err_cnt) printf("FAILED: detected %d issues!\n", err_cnt); else printf("PASSED\n"); return err_cnt; }