void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_INST1: { BUSY_RETRY(SendCallBusy_inst1()); SendCall_inst1(CTL_INST2); break; } case CTL_INST2: { BUSY_RETRY(SendCallBusy_Inst22()); SendCall_Inst22(CTL_RTN); break; } case CTL_RTN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(); break; } default: assert(0); } } }
void CPersTst::PersTst() { if (PR_htValid) { switch (PR_htInst) { case TST_READ: { if (ReadMemBusy()) { HtRetry(); break; } P_err = 0; P_gvarAddr = 0; ReadMem_gvar(P_memAddr, P_gvarAddr, P_gvarAddr, 0); ReadMemPause(TST_CHK); } break; case TST_CHK: { if (SendCallBusy_tst2()) { HtRetry(); break; } if (GR_gvar[0].data != 0xdeadbeef00001234ULL) { HtAssert(0, 0); P_err += 1; } SendCall_tst2(TST_CALL, P_memAddr, P_err); } break; case TST_CALL: { if (SendCallBusy_tst3()) { HtRetry(); break; } SendCall_tst3(TST_RTN, P_memAddr); } break; case TST_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendReturn_htmain(P_err); } break; default: assert(0); } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_ENTRY: { HtContinue(CTL_FORK); } break; case CTL_FORK: { if (SendCallBusy_barrier()) { HtRetry(); break; } uint16_t threadCnt = (1 << BARRIER_HTID_W) * BARRIER_REPL_CNT; SendCallFork_barrier(CTL_JOIN, threadCnt, P_threadCnt); // Check if end of forks P_threadCnt += 1; if (P_threadCnt == threadCnt) // Return to host interface RecvReturnPause_barrier(CTL_LOOP); else HtContinue(CTL_FORK); } break; case CTL_JOIN: { RecvReturnJoin_barrier(); if (P_error) P_errCnt += 1; } break; case CTL_LOOP: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendMsg_clr(true); P_testCnt += 1; P_threadCnt = 0; if (P_testCnt == 50) SendReturn_htmain(P_errCnt); else HtContinue(CTL_FORK); } break; default: assert(0); } } }
void CPersVimg::PersVimg() { if (PR_htValid) { switch (PR_htInst) { case VIMG_ENTRY: { if (SendCallBusy_veInfo()) { HtRetry(); break; } SendCall_veInfo(VIMG_FORK, 0, PR_jobId, PR_pJobInfo); } break; case VIMG_FORK: { if (SendCallBusy_vsm() || SendCallBusy_vctl() || SendCallBusy_vwm()) { HtRetry(); break; } SendCallFork_vsm(VIMG_VSM_JOIN, 0); SendCallFork_vctl(VIMG_VCTL_JOIN, 0); SendCallFork_vwm(VIMG_VWM_JOIN, 0); RecvReturnPause_vsm(VIMG_VSM_CONT); } break; case VIMG_VSM_JOIN: { RecvReturnJoin_vsm(); } break; case VIMG_VCTL_JOIN: { RecvReturnJoin_vctl(); } break; case VIMG_VWM_JOIN: { RecvReturnJoin_vwm(); } break; case VIMG_VSM_CONT: { RecvReturnPause_vctl(VIMG_VCTL_CONT); } break; case VIMG_VCTL_CONT: { RecvReturnPause_vwm(VIMG_VWM_CONT); } break; case VIMG_VWM_CONT: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendReturn_htmain(PR_jobId); } break; default: assert(0); } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case ENTRY: { if (SendCallBusy_gv1() || SendCallBusy_gv2()) { HtRetry(); break; } SendCallFork_gv1(JOIN_GV1); SendCallFork_gv2(JOIN_GV2); if (PR_forkCnt < 8) HtContinue(ENTRY); else HtContinue(PAUSE_GV1); P_forkCnt += 1; break; } case JOIN_GV1: { RecvReturnJoin_gv1(); break; } case JOIN_GV2: { RecvReturnJoin_gv2(); break; } case PAUSE_GV1: { RecvReturnPause_gv1(PAUSE_GV2); break; } case PAUSE_GV2: { RecvReturnPause_gv1(RETURN); break; } case RETURN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendReturn_htmain(); break; } default: assert(0); } } }
void CPersStream::PersStream() { if (PR_htValid) { switch (PR_htInst) { // Main entry point from Host (htmain call) // P_rcvData => 0 (init) case STRM_RECV: { if (RecvHostDataBusy()) { HtRetry(); break; } // Receive data until we see a DataMarker from the host if (RecvHostDataMarker()) { HtContinue(STRM_RTN); } else { // Store received data into private variable recvData P_rcvData = RecvHostData(); HtContinue(STRM_ECHO); } } break; case STRM_ECHO: { if (SendHostDataBusy()) { HtRetry(); break; } // Echo data to back host SendHostData(PR_rcvData); HtContinue(STRM_RECV); } break; case STRM_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } // Return number of bytes seen to the host SendReturn_htmain(); } break; default: assert(0); } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case ENTRY: { P_errorCnt = 0; P_loopCnt = 0; HtContinue(CALL_A); } break; case CALL_A: { if (P_loopCnt == P_loopCntLimit) { RecvReturnPause_ModA(RETURN); break; } if (SendCallBusy_ModA()) { HtRetry(); break; } SendCallFork_ModA(CALL_A_JOIN, P_loopCnt & 0xff, 0); P_loopCnt += 1; HtContinue(CALL_A); } break; case CALL_A_JOIN: { ht_uint24 expData = 0x1B1B00 + P_rtnInData; if (expData != P_outData) P_errorCnt += 1; RecvReturnJoin_ModA(); } break; case RETURN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendReturn_htmain(P_errorCnt); } break; default: assert(0); } } }
void CPersVadd::PersVadd() { if (PR_htValid) { switch (PR_htInst) { case VADD_ENTER: { BUSY_RETRY(ReadStreamBusy_A()); BUSY_RETRY(ReadStreamBusy_B()); BUSY_RETRY(WriteStreamBusy_C()); S_sum = 0; if (!PR_vecLen) { HtContinue(VADD_RETURN); break; } MemAddr_t addrA = SR_op1Addr + PR_offset * sizeof(uint64_t); MemAddr_t addrB = SR_op2Addr + PR_offset * sizeof(uint64_t); MemAddr_t addrC = SR_resAddr + PR_offset * sizeof(uint64_t); ReadStreamOpen_A(addrA, PR_vecLen); ReadStreamOpen_B(addrB, PR_vecLen); WriteStreamOpen_C(addrC, PR_vecLen); WriteStreamPause_C(VADD_RETURN); break; } case VADD_RETURN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(S_sum); } break; default: assert(0); } } if (ReadStreamReady_A() && ReadStreamReady_B() && WriteStreamReady_C()) { uint64_t a = ReadStream_A(); uint64_t b = ReadStream_B(); uint64_t c = a + b; S_sum += c; WriteStream_C(c); } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_ENTRY: { if (SendCallBusy_echo()) { HtRetry(); break; } if (P_count < P_length) { //printf("Sending %d\n", P_count); SendCallFork_echo(CTL_JOIN, P_count); P_count++; HtContinue(CTL_ENTRY); } else { RecvReturnPause_echo(CTL_RTN); } } break; case CTL_JOIN: { //printf("Got %d, adding to %d, ", P_result, P_sum); P_sum += P_result; //printf("result %d\n", P_sum); RecvReturnJoin_echo(); } break; case CTL_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendReturn_htmain(P_sum); } break; default: assert(0); } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_ROW: { if (SendCallBusy_row()) { HtRetry(); break; } // Generate a seperate thread for each row of the result matrix if (P_rowIdx < SR_mcRow) { SendCallFork_row(CTL_JOIN, P_rowIdx, 0); HtContinue(CTL_ROW); P_rowIdx += P_rowStride; } else { RecvReturnPause_row(CTL_RTN); } } break; case CTL_JOIN: { RecvReturnJoin_row(); } break; case CTL_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } // Finished calculating result matrix SendReturn_htmain(); } break; default: assert(0); } } }
void CPersStart::PersStart() { if (PR_htValid) { switch (PR_htInst) { // Main entry point from Main.cpp // Receives data from calling funtion into private startData variable case START_ENTRY: { if (SendCallBusy_funcA()) { HtRetry(); break; } // Shift and append a byte to prove we are manipulating the expected data P_startData = (uint64_t)(P_startData << 4) | 0xb; // Pass control to the Exec1 module with a parameter - private startData variable // Upon return, start from START_EXIT instruction SendCall_funcA(START_EXIT, P_startData); } break; case START_EXIT: { if (SendReturnBusy_htmain()) { HtRetry(); break; } P_startData = (uint64_t)(P_startData << 4) | 0xf; // Exit application and return to Main.cpp with private startData variable SendReturn_htmain(P_startData); break; } default: assert(0); } } }
void CPersOver::PersOver() { if (PR_htValid) { switch (PR_htInstr) { case OVER_RD: { BUSY_RETRY(ReadMemBusy()); ReadMem_data(P_addr); ReadMemPause(OVER_WR); } break; case OVER_WR: { BUSY_RETRY(WriteMemBusy()); WriteMem(P_addr, ~PR_data); WriteMemPause(OVER_RSM); } break; case OVER_RSM: { S_bResume = true; HtPause(OVER_RTN); } break; case OVER_RTN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(); } break; default: assert(0); } } if (SR_bResume) { S_bResume = false; HtResume(0); } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_ENTRY: { S_sum = 0; P_result = 0; HtContinue(CTL_ADD); } break; case CTL_ADD: { BUSY_RETRY(SendCallBusy_add()); if (P_vecIdx < SR_vecLen) { SendCallFork_add(CTL_JOIN, P_vecIdx); HtContinue(CTL_ADD); P_vecIdx += P_vecStride; } else { RecvReturnPause_add(CTL_RTN); } } break; case CTL_JOIN: { S_sum += P_result; RecvReturnJoin_add(); } break; case CTL_RTN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(S_sum); } break; default: assert(0); } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_ENTRY: { printf("Task: %d\n",PR_task); if(PR_task == CONV_FORWARD ){ BUSY_RETRY(SendCallBusy_conv_fwd()); SendCall_conv_fwd(CTL_RTN, PR_rank, PR_rankStride); } else if(PR_task == CONV_BACKWARD_DATA ){ BUSY_RETRY(SendCallBusy_load_filters()); SendCall_load_filters(CTL_RTN, PR_rank, PR_rankStride, PR_task); } else if(PR_task == CONV_BACKWARD_BIAS && PR_rank == 0 ){ BUSY_RETRY(SendCallBusy_conv_back_bias()); SendCall_conv_back_bias(CTL_RTN); } else if(PR_task == CONV_BACKWARD_FILTER ){ BUSY_RETRY(SendCallBusy_load_filters()); SendCall_load_filters(CTL_RTN, PR_rank, PR_rankStride, PR_task); } else{ HtContinue(CTL_RTN); } } break; case CTL_RTN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(); } break; default: assert(0); } } }
void CPersFunc::PersFunc() { if (PR1_htValid) { switch (PR1_htInst) { case TEST: { { uint8_t o = 6; uint8_t i = 7; f3(i, o); if (o != 13) { HtAssert(0, (uint32_t)o); P1_err += 1; } } { uint8_t a=3; uint8_t b=4; uint8_t c1 = f2(a, b); uint8_t c2 = f2(a); uint8_t c3 = f2(); if (c1 != 7 || c2 != 5 || c3 != 3) { HtAssert(0, (uint32_t)((c3 << 16) | (c2 << 8) | c1)); P1_err += 1; } } { int d=1; s1 a; a.m_f1[0] = 1; a.m_f1[0] += 1; a.m_f1[0] ++; a.m_f1[(ht_uint2)d] = 0; a.m_f1[(ht_uint2)d] += 1; a.m_f1[d&3] ++; f1(a.m_f1[0], a.m_f1[(ht_uint2)d]); if (a.m_f1[0] != 3) { HtAssert(0, (uint32_t)a.m_f1[0]); P1_err += 1; } if (a.m_f1[(ht_uint2)d] != 5) { HtAssert(0, (uint32_t)a.m_f1[(ht_uint2)d]); P1_err += 1; } } { int d=1; uint8_t a[2] = { 0 }; a[0] = 1; a[0] += 1; a[0] ++; a[(ht_uint1)d] = 0; a[(ht_uint1)d] += 1; a[d&1] ++; f1(a[0], a[(ht_uint1)d]); if (a[0] != 3) { HtAssert(0, (uint32_t)a[0]); P1_err += 1; } if (a[(ht_uint1)d] != 5) { HtAssert(0, (uint32_t)a[(ht_uint1)d]); P1_err += 1; } } { ht_uint2 d=2; ht_uint2 e = 1; s2 a[4]; a[0].m_f1[0] = 1; a[1].m_f1[0] = 1; a[2].m_f1[0] = 1; a[3].m_f1[0] = 1; a[d].m_f1[0] += 1; a[2].m_f1[0] ++; a[e].m_f1[(ht_uint2)d] = 0; a[1].m_f1[(ht_uint2)d] += 1; a[e].m_f1[d&3] ++; f1(a[d].m_f1[0], a[e].m_f1[d]); if (a[d].m_f1[0] != 3) { HtAssert(0, (uint32_t)a[d].m_f1[0]); P1_err += 1; } if (a[e].m_f1[d] != 5) { HtAssert(0, (uint32_t)a[e].m_f1[d]); P1_err += 1; } } { T1_bool = true; P1_s4.m_u8 = 1; f4(P1_s4); if (P1_s4.m_u8 != 2) { HtAssert(0, (uint32_t)P1_s4.m_u8); P1_err += 1; } } if (P1_err) P1_cnt = 128; HtContinue(RTN); } break; case RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } // let HtAssert propagate if (P1_cnt) { P1_cnt -= 1; HtContinue(RTN); } SendReturn_htmain(P1_err); } break; default: assert(0); } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { // Main Entry Point from Host -> CTL_ENTRY // Check if AU is unused (lastIdx == -1) and return if so // P_rcvAu is technically created here, but never used (is in for debug if needed) // Continue to CTL_RECV to listen for Host Data case CTL_ENTRY: { if (P_lastIdx == -1) { HtContinue(CTL_RTN); } HtContinue(CTL_RECV); } break; // Store Received Host Data into P_rcvIdx // Dispatch threads to process the received Index // Continue listening until P_rcvIdx == S_lastIdx, at which point // wait for all threads to return before continuing to CTL_RTN case CTL_RECV: { if (RecvHostDataBusy() || SendCallBusy_process()) { HtRetry(); break; } P_rcvIdx = (uint32_t)RecvHostData(); if (P_rcvIdx != (uint32_t)P_lastIdx) { SendCallFork_process(CTL_SEND, P_rcvIdx); HtContinue(CTL_RECV); } else { SendCallFork_process(CTL_SEND, P_rcvIdx); RecvReturnPause_process(CTL_RTN); } } break; // Join spawned Process threads // The P_sndIdx and P_errRcv variables are passed in as these threads return, // send that back to the Host to signal that index being completed // Accumulate errors from the returning thread // P_sndIdx should be the same as P_rcvIdx for that thread case CTL_SEND: { if (SendHostDataBusy()) { HtRetry(); break; } P_errs += P_errRcv; SendHostData((uint64_t)P_sndIdx); RecvReturnJoin_process(); } break; // Done with all work for this AU // Return to the Host with the number of total errors seen case CTL_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } SendReturn_htmain(P_errs); } break; default: assert(0); } } }
void CPersStream::PersStream() { if (PR_htValid) { switch (PR_htInst) { // Main entry point from Main.cpp // P_rcvAu, P_rcvCnt are populated through call. case STRM_IDLE: { if (SendReturnBusy_htmain()) { HtRetry(); break; } #ifndef _HTV printf("SysC: AU %2d - Processing\n", P_rcvAu); #endif if (P_rcvCnt) { HtContinue(STRM_RECV); } else { // There are no calls to receive...simply return SendReturn_htmain(0); } } break; case STRM_RECV: { if (RecvHostDataBusy()) { HtRetry(); break; } // Store received data into pricate variable recvData P_recvData = RecvHostData(); HtContinue(STRM_SEND); } break; case STRM_SEND: { if (SendHostDataBusy() || SendReturnBusy_htmain()) { HtRetry(); break; } // Generate an expected value to compare against received data uint64_t expectedData = 0LL; expectedData |= (uint64_t)((uint64_t)P_rcvAu << 48); expectedData |= (uint64_t)(P_wordCnt + 1); if (expectedData != P_recvData) { #ifndef _HTV printf("SysC: WARNING - Expected Data did not match Received data!\n"); printf(" 0x%016llx != 0x%016llx\n", (unsigned long long)expectedData, (unsigned long long)P_recvData); #endif P_errs += 1; } HtAssert(!P_errs, 0); // Send generated data back to the host // More error checking will be done there SendHostData(expectedData); P_wordCnt += 1; // Check count so far.. // Either return to Main.cpp or continue reading in values if (P_wordCnt == P_rcvCnt) { SendReturn_htmain(P_errs); } else { HtContinue(STRM_RECV); } } break; default: assert(0); } } }
void CPersInc::PersInc() { if (PR_htValid) { switch (PR_htInst) { case INC_INIT: { P_loopCnt = 0; P_reqCnt = 0; // Set address for reading memory response data P_arrayMemRdPtr = PR_htId; HtContinue(INC_READ); } break; case INC_READ: { if (ReadMemBusy() || SendReturnBusy_htmain()) { HtRetry(); break; } // Check if end of loop if (P_loopCnt == P_elemCnt) { // Return to host interface SendReturn_htmain(P_loopCnt); } else { // Calculate memory read address sc_uint<MEM_ADDR_W> memRdAddr = (sc_uint<MEM_ADDR_W>)(SR_arrayAddr + (((P_loopCnt + P_reqCnt) * 2) << 3)); sc_uint<2> rdDstId = P_reqCnt; bool bLast = P_reqCnt == 3; // Issue read request to memory switch (rdDstId) { case 0: ReadMem_arrayMem1Fld1(memRdAddr, PR_htId); break; case 1: ReadMem_arrayMem1Fld2(memRdAddr, PR_htId); break; case 2: ReadMem_arrayMem2Fld1(memRdAddr, PR_htId); break; case 3: ReadMem_arrayMem2Fld2(memRdAddr, PR_htId); break; } if (bLast) { P_reqCnt = 0; ReadMemPause(INC_WRITE); } else { P_reqCnt += 1; HtContinue(INC_READ); } } } break; case INC_WRITE: { if (WriteMemBusy()) { HtRetry(); break; } // Increment memory data uint64_t memWrData = 0; sc_uint<2> rdDstId = P_reqCnt; switch (rdDstId) { case 0: memWrData = GR_arrayMem1.fld1 + 1; break; case 1: memWrData = GR_arrayMem1.fld2 + 1; break; case 2: memWrData = GR_arrayMem2.fld1 + 1; break; case 3: memWrData = GR_arrayMem2.fld2 + 1; break; } // Calculate memory write address sc_uint<MEM_ADDR_W> memWrAddr = (sc_uint<MEM_ADDR_W>)(SR_arrayAddr + (((P_loopCnt + P_reqCnt) * 2 + 1) << 3)); bool bLast = P_reqCnt == 3; // Issue write memory request WriteMem(memWrAddr, memWrData); if (bLast) { // Increment loop count P_loopCnt = P_loopCnt + 4; P_reqCnt = 0; WriteMemPause(INC_READ); } else { P_reqCnt += 1; HtContinue(INC_WRITE); } } break; default: assert(0); } } }
void CPersVadd::PersVadd() { if (PR_htValid) { switch (PR_htInst) { case VADD_RESET: if (SR_msgDelay < 500 || SendHostMsgBusy()) { S_msgDelay += 1; HtRetry(); break; } SendHostMsg(VADD_TYPE_SIZE, (XDIM_LEN << 8) | TYPE_SIZE); HtTerminate(); break; case VADD_ENTER: S_yIdx[PR_htId] = 0; S_yDimLen[PR_htId] = PR_yDimLen; S_xIdx[PR_htId] = 0; S_xDimLen[PR_htId] = PR_xDimLen; S_sum[PR_htId] = 0; P_addrA = SR_addrA + PR_yAddrOff; P_addrB = SR_addrB + PR_yAddrOff; P_addrC = SR_addrC + PR_yAddrOff; HtContinue(VADD_OPEN); break; case VADD_OPEN: // Open read stream A, once for each xDim to be processed if (PR_yOpenAIdx < PR_yDimLen && !ReadStreamBusy_A(PR_htId)) { ht_uint32 remLen = (ht_uint32)((PR_yDimLen - PR_yOpenAIdx) * PR_xDimLen); ReadStreamOpen_A(PR_htId, PR_addrA, remLen > 0x3f ? (ht_uint6)0x3f : (ht_uint6)remLen, P_yOpenAIdx); P_addrA+= PR_xDimLen * TYPE_SIZE; P_yOpenAIdx += 1; } // Open read stream B, once for each xDim to be processed if (PR_yOpenBIdx < PR_yDimLen && !ReadStreamBusy_B(PR_htId)) { ReadStreamOpen_B(PR_htId, PR_addrB, PR_xDimLen); P_addrB += PR_xDimLen * TYPE_SIZE; P_yOpenBIdx += 1; } // Open write stream, once for each xDim to be processed if (PR_yOpenCIdx < SR_yDimLen[PR_htId] && !WriteStreamBusy_C(PR_htId)) { #if VADD_STRM_RSP_GRP_HTID == 0 && VADD_HTID_W == 0 && VADD_RSP_GRP_W > 0 WriteStreamOpen_C(PR_htId, 1u, PR_addrC); #elif VADD_STRM_RSP_GRP_HTID || VADD_HTID_W == 0 WriteStreamOpen_C(PR_htId, PR_addrC); #else WriteStreamOpen_C(PR_htId, PR_htId ^ 1, PR_addrC); #endif P_addrC += PR_xDimLen * TYPE_SIZE; P_yOpenCIdx += 1; } if (PR_yOpenAIdx == PR_yDimLen && PR_yOpenBIdx == PR_yDimLen && PR_yOpenCIdx == PR_yDimLen) #if VADD_STRM_RSP_GRP_HTID == 0 && VADD_HTID_W == 0 && VADD_RSP_GRP_W > 0 WriteStreamPause_C(1, VADD_RETURN); #elif VADD_STRM_RSP_GRP_HTID || VADD_HTID_W == 0 WriteStreamPause_C(VADD_RETURN); #else WriteStreamPause_C(PR_htId ^ 1, VADD_RETURN); #endif else HtContinue(VADD_OPEN); break; case VADD_RETURN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(S_sum[PR_htId]); } break; default: assert(0); } }
void CPersStencil::PersStencil() { if (PR_htValid) { switch (PR_htInst) { case STENCIL_ENTER: { // Split offset calucation from source stencil to destination location over // two cytles for timing. OFF = ((Y_ORIGIN * (PR_cols + X_SIZE-1) + X_ORIGIN) * sizeof(StType_t); // uint32_t offset = Y_ORIGIN * (PR_cols + X_SIZE-1); S_rdAddr = PR_rdAddr; S_wrAddr = offset; S_rdRowIdx = 0; S_wrRowIdx = Y_ORIGIN; S_cols = PR_cols; S_rows = PR_rows; S_coef = PR_coef; HtContinue(STENCIL_START); } break; case STENCIL_START: { S_bStart = true; S_wrAddr = ((uint32_t)S_wrAddr + X_ORIGIN) * sizeof(StType_t) + PR_wrAddr; StencilBufferInit_5x5r2((ht_uint11)PR_cols, (ht_uint11)PR_rows); HtContinue(STENCIL_WAIT); } break; case STENCIL_WAIT: { if (S_wrRowIdx == S_rows) WriteStreamPause(STENCIL_RETURN); else HtContinue(STENCIL_WAIT); } break; case STENCIL_RETURN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(); } break; default: assert(0); } } // start read stream per row if (SR_bStart && SR_rdRowIdx < SR_rows + Y_SIZE-1 && !ReadStreamBusy()) { ReadStreamOpen(SR_rdAddr, SR_cols + X_SIZE-1); S_rdAddr += (SR_cols + X_SIZE-1) * sizeof(StType_t); S_rdRowIdx += 1; } // start write stream per row if (SR_bStart && SR_wrRowIdx < SR_rows + Y_ORIGIN && !WriteStreamBusy()) { WriteStreamOpen(SR_wrAddr, SR_cols); S_wrAddr += (SR_cols + X_SIZE-1) * sizeof(StType_t); S_wrRowIdx += 1; } CStencilBufferIn_5x5r2 stIn; stIn.m_bValid = ReadStreamReady() && WriteStreamReady(); stIn.m_data = stIn.m_bValid ? ReadStream() : 0; CStencilBufferOut_5x5r2 stOut; StencilBuffer_5x5r2(stIn, stOut); // // compute stencil // T1_bValid = stOut.m_bValid; for (uint32_t x = 0; x < X_SIZE; x += 1) for (uint32_t y = 0; y < Y_SIZE; y += 1) T1_mult[y][x] = (StType_t)(stOut.m_data[y][x] * SR_coef.m_coef[y][x]); for (uint32_t x = 0; x < X_SIZE; x += 1) { T2_ysum[x] = 0; for (uint32_t y = 0; y < Y_SIZE; y += 1) T2_ysum[x] += T2_mult[y][x]; } T3_rslt = 0; for (uint32_t x = 0; x < X_SIZE; x += 1) T3_rslt += T3_ysum[x]; if (T3_bValid) WriteStream(T3_rslt); }
void CPersVadd::PersVadd() { if (PR_htValid) { switch (PR_htInst) { case VADD_RESET: if (SR_msgDelay < 500 || SendHostMsgBusy()) { S_msgDelay += 1; HtRetry(); break; } SendHostMsg(VADD_TYPE_SIZE, (XDIM_LEN << 8) | TYPE_SIZE); HtTerminate(); break; case VADD_ENTER: S_yIdx = 0; S_yDimLen = PR_yDimLen; S_xIdx = 0; S_xDimLen = PR_xDimLen; S_sum = 0; S_addrA += PR_yAddrOff; S_addrB += PR_yAddrOff; S_addrC += PR_yAddrOff; WriteStreamPause_C(VADD_OPEN); break; case VADD_OPEN: // Open read stream A, once for each xDim to be processed if (PR_yOpenAIdx < PR_yDimLen && !ReadStreamBusy_A()) { ht_uint32 remLen = (ht_uint32)((PR_yDimLen - PR_yOpenAIdx) * PR_xDimLen); ReadStreamOpen_A(SR_addrA, remLen > 0x3f ? (ht_uint6)0x3f : (ht_uint6)remLen, P_yOpenAIdx); S_addrA += PR_xDimLen * TYPE_SIZE; P_yOpenAIdx += 1; } // Open read stream B, once for each xDim to be processed if (PR_yOpenBIdx < PR_yDimLen && !ReadStreamBusy_B()) { ReadStreamOpen_B(SR_addrB, PR_xDimLen); S_addrB += PR_xDimLen * TYPE_SIZE; P_yOpenBIdx += 1; } // Open write stream, once for each xDim to be processed if (PR_yOpenCIdx < SR_yDimLen && !WriteStreamBusy_C()) { WriteStreamOpen_C(SR_addrC); S_addrC += PR_xDimLen * TYPE_SIZE; P_yOpenCIdx += 1; } if (PR_yOpenAIdx == PR_yDimLen && PR_yOpenBIdx == PR_yDimLen && PR_yOpenCIdx == PR_yDimLen) WriteStreamPause_C(VADD_RETURN); else HtContinue(VADD_OPEN); break; case VADD_RETURN: { BUSY_RETRY(SendReturnBusy_htmain()); SendReturn_htmain(S_sum); } break; default: assert(0); } } if (SR_yIdx < SR_yDimLen && ReadStreamReady_A() && ReadStreamReady_B() && WriteStreamReady_C()) { PersType_t a, b; a = ReadStream_A(); b = ReadStream_B(); PersType_t c = a + b; S_sum += (ht_uint32)c; WriteStream_C(c); assert_msg(SR_yIdx == ReadStreamTag_A(), "ReadStreamTag_A() error"); if (SR_xIdx + 1 < SR_xDimLen) S_xIdx += 1; else { ReadStreamClose_A(); WriteStreamClose_C(); S_xIdx = 0; S_yIdx += 1; } } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_INIT: { HtContinue(CTL_A); } break; case CTL_A: { if (SendCallBusy_modA()) { HtRetry(); break; } SendCall_modA(CTL_B); } break; case CTL_B: { if (SendCallBusy_modB()) { HtRetry(); break; } SendCall_modB(CTL_C); } break; case CTL_C: { if (SendCallBusy_modC()) { HtRetry(); break; } SendCall_modC(CTL_D); } break; case CTL_D: { if (SendCallBusy_modD()) { HtRetry(); break; } SendCall_modD(CTL_E); } break; case CTL_E: { if (SendCallBusy_modE()) { HtRetry(); break; } SendCall_modE(CTL_SEND_MSG); } break; case CTL_SEND_MSG: { if (SendMsgBusy_CtlToA(2)) { HtRetry(); break; } S_msgRcvd = false; // Send messages to modA SendMsg_CtlToA(2, 0); HtContinue(CTL_RTN); } break; case CTL_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } if (!S_msgRcvd) { HtRetry(); break; } // Return to host interface SendReturn_htmain(P_elemCnt); } break; default: assert(0); } } if (!GR_htReset && !RecvMsgBusy_EtoCtl()) { ht_uint4 msg = RecvMsg_EtoCtl(); HtAssert(msg == 0, msg); S_msgRcvd = msg == 0; } }
void CPersCtl::PersCtl() { if (PR_htValid) { switch (PR_htInst) { case CTL_INIT: { P_loopCnt = 0; S_totalCnt = 0; HtContinue(CTL_T1_ASYNC); } break; case CTL_T1_ASYNC: { if (SendCallBusy_t1()) { HtRetry(); break; } SendCallFork_t1(CTL_T1_JOIN); P_loopCnt += 1; if (P_loopCnt == CTL_LOOP_CNT) RecvReturnPause_t1(CTL_T1_CALL); else HtContinue(CTL_T1_ASYNC); } break; case CTL_T1_JOIN: { S_totalCnt += P_sumCnt; RecvReturnJoin_t1(); } break; case CTL_T1_CALL: { if (SendCallBusy_t1()) { HtRetry(); break; } assert(S_totalCnt == CTL_LOOP_CNT * T1_LOOP_CNT * SUM_LOOP_CNT); SendCall_t1(CTL_T2_ASYNC); P_loopCnt = 0; S_totalCnt = 0; } break; case CTL_T2_ASYNC: { if (SendCallBusy_t2()) { HtRetry(); break; } assert(P_sumCnt == T1_LOOP_CNT * SUM_LOOP_CNT || P_loopCnt > 0); SendCallFork_t2(CTL_T2_JOIN); P_loopCnt += 1; if (P_loopCnt == CTL_LOOP_CNT) { P_loopCnt = 0; RecvReturnPause_t2(CTL_T2_CALL); } else { HtContinue(CTL_T2_ASYNC); } } break; case CTL_T2_JOIN: { S_totalCnt += P_sumCnt; RecvReturnJoin_t2(); } break; case CTL_T2_CALL: { if (SendCallBusy_t2()) { HtRetry(); break; } assert(S_totalCnt == CTL_LOOP_CNT * T2_LOOP_CNT * SUM_LOOP_CNT); SendCall_t2(CTL_T3_CALL); } break; case CTL_T3_CALL: { if (SendCallBusy_t3()) { HtRetry(); break; } assert(P_sumCnt == T2_LOOP_CNT * SUM_LOOP_CNT); SendCall_t3(CTL_T4_CALL); } break; case CTL_T4_CALL: { if (SendCallBusy_t4()) { HtRetry(); break; } assert(P_sumCnt == T3_LOOP_CNT * SUM_LOOP_CNT); SendCall_t4(CTL_RTN); } break; case CTL_RTN: { if (SendReturnBusy_htmain()) { HtRetry(); break; } assert(P_sumCnt == T4_LOOP_CNT * SUM_LOOP_CNT); SendReturn_htmain(); } break; default: assert(0); } } }