void DCT(float X[8][8], unsigned char function, float Y[8][8]) {_ssdm_SpecArrayDimSize(X,8);_ssdm_SpecArrayDimSize(Y,8); #pragma HLS DATAFLOW #10 "dct/dct.c" #pragma HLS INTERFACE ap_fifo port=Y #10 "dct/dct.c" #pragma HLS INTERFACE ap_fifo port=X #10 "dct/dct.c" float temp[8][8]; switch (function){ case 1: MAT_Multiply(Tinv,X,temp); MAT_Multiply2(temp, T, Y); break; case 0: default: MAT_Multiply(T,X,temp); MAT_Multiply2(temp, Tinv, Y); break; } }
void MAT_Multiply(int A[100][100], int B[100][100], long C[100][100], unsigned char mA, unsigned char nA, unsigned char mB, unsigned char nB, unsigned char mC, unsigned char nC) {_ssdm_SpecArrayDimSize(A,100);_ssdm_SpecArrayDimSize(B,100);_ssdm_SpecArrayDimSize(C,100); unsigned char i, j, k; if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable { Row: for (i=0; i<100; i++) Col: for (j=0; j<100; j++) { #pragma HLS UNROLL factor=5 #14 "parta/matrixmath.c" if ((i<mC)&(j<nC)) { C[i][j] = 0; Product: for (k=0; k<100; k++) #pragma HLS UNROLL factor=10 #19 "parta/matrixmath.c" if (k<nA) C[i][j] += A[i][k] * B[k][j]; } } } }
void MAT_Multiply(float A[8][8], float B[8][8], float C[8][8]) {_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8); unsigned char i, j, k; float temp; float B_cached[8][8]; LoadRow: for (i=0; i<8; i++){ LoadCol: for (j=0; j<8; j++){ B_cached[i][j]=B[i][j]; } } Row: for (i=0; i<8; i++) Col: for (j=0; j<8; j++) { //Make sure the data is fully cached to avoid multiple read. temp = 0; Product: for (k=0; k<8; k++) { temp += A[i][k] * B_cached[k][j]; } C[i][j] = temp; } }
void MAT_Multiply2(float A[8][8], float B[8][8], float C[8][8]) {_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8); unsigned char i, j, k; float temp; float A_cached_row[8]; Row: for (i=0; i<8; i++) //Cache the whole row of matrix A RowCaching: for (k=0;k<8;k++) A_cached_row[k]=A[i][k]; Col: for (j=0; j<8; j++) { //Make sure the data is fully cached to avoid multiple read. temp = 0; // if (j==0) // { //Cache the whole row of matrix A // RowCaching: for (k=0;k<MAT_SIZE;k++) // A_cached_row[k]=A[i][k]; // } Product: for (k=0; k<8; k++) { temp += A_cached_row[k] * B[k][j]; } C[i][j] = temp; } }
void MAT_Multiply(int A[50][50], int B[50][50], long C[50][50], unsigned char mA, unsigned char nA, unsigned char mB, unsigned char nB, unsigned char mC, unsigned char nC) { _ssdm_SpecArrayDimSize(A,50); _ssdm_SpecArrayDimSize(B,50); _ssdm_SpecArrayDimSize(C,50); _ssdm_op_SpecInterface(C, "ap_fifo", 0, 0, 0, 100, "", "", ""); # 8 "parta_2/matrixmath.c" _ssdm_op_SpecInterface(B, "ap_fifo", 0, 0, 0, 100, "", "", ""); # 8 "parta_2/matrixmath.c" _ssdm_op_SpecInterface(A, "ap_fifo", 0, 0, 0, 100, "", "", ""); # 8 "parta_2/matrixmath.c" unsigned char i, j, k; long temp; int A_cached_row[50]; int B_cached[50][50]; if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable { Row: for (i=0; i<50; i++) Col: for (j=0; j<50; j++) { //Make sure the data is fully cached to avoid multiple read. temp = 0; if ((i<mC)&(j<nC)) { if (j==0) { //Cache the whole row of matrix A RowCaching: for (k=0; k<50; k++) A_cached_row[k]=A[i][k]; } //Cache all the columns of matrix B, see Fig. 7.21. B will be read only once if (i==0) { ColCaching: for (k=0; k<50; k++) B_cached[k][j]=B[k][j]; } Product: for (k=0; k<50; k++) { if (k<nA) temp += A_cached_row[k] * B_cached[k][j]; } } C[i][j] = temp; } } }
void DOT_Divide(float A[8][8], float B[8][8], float C[8][8]) {_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8); unsigned char i,j; row: for (i=0; i<8; i++){ col: for (j=0; j<8; j++){ C[i][j] = A[i][j] / B[i][j]; } } }
void top(int out[10], int w[1000*10], int b[1000*10], int x[1000]) {_ssdm_SpecArrayDimSize(b,1000*10);_ssdm_SpecArrayDimSize(w,1000*10);_ssdm_SpecArrayDimSize(x,1000);_ssdm_SpecArrayDimSize(out,10); #pragma HLS INTERFACE ap_memory port=out #pragma HLS INTERFACE ap_memory port=w #pragma HLS INTERFACE ap_memory port=b #pragma HLS INTERFACE ap_memory port=x #pragma HLS RESOURCE variable=out core=RAM_1P #pragma HLS RESOURCE variable=w core=RAM_1P #pragma HLS RESOURCE variable=b core=RAM_1P #pragma HLS RESOURCE variable=x core=RAM_1P #pragma empty_line fully_connected_layer(out, w, b, x, 1000, 10); }
void top(int out[1], int w[1000*1], int b[1000*1], int x[1000]) {_ssdm_SpecArrayDimSize(b,1000*1);_ssdm_SpecArrayDimSize(w,1000*1);_ssdm_SpecArrayDimSize(x,1000);_ssdm_SpecArrayDimSize(out,1); _ssdm_op_SpecInterface(out, "ap_memory", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecInterface(w, "ap_memory", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecInterface(b, "ap_memory", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecInterface(x, "ap_memory", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecResource(out, "", "RAM_1P", "", -1, "", "", ""); _ssdm_op_SpecResource(w, "", "RAM_1P", "", -1, "", "", ""); _ssdm_op_SpecResource(b, "", "RAM_1P", "", -1, "", "", ""); _ssdm_op_SpecResource(x, "", "RAM_1P", "", -1, "", "", ""); fully_connected_layer(out, w, b, x, 1000, 1); }
void Quant(float X[8][8], unsigned char function, float Y[8][8]) {_ssdm_SpecArrayDimSize(X,8);_ssdm_SpecArrayDimSize(Y,8); switch (function){ case 1: DOT_Multiply(X, QMatrix, Y); break; case 0: default: DOT_Divide(X, QMatrix, Y); break; } }
void MAT_Multiply2(float A[8][8], float B[8][8], float C[8][8]) {_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8); _ssdm_op_SpecDataflowPipeline(-1, ""); #33 "dct/matrixmath.c" _ssdm_SpecArrayPartition( B, 1, "COMPLETE", 0, ""); #33 "dct/matrixmath.c" unsigned char i, j, k; float temp; float A_cached_row[8]; _ssdm_SpecArrayPartition( A_cached_row, 0, "COMPLETE", 0, ""); #36 "dct/matrixmath.c" Row: for (i=0; i<8; i++) //Cache the whole row of matrix A RowCaching: for (k=0;k<8;k++) _ssdm_op_SpecPipeline(1, 1, 1, 0, ""); #41 "dct/matrixmath.c" A_cached_row[k]=A[i][k]; Col: for (j=0; j<8; j++) { _ssdm_op_SpecPipeline(1, 1, 1, 0, ""); #44 "dct/matrixmath.c" //Make sure the data is fully cached to avoid multiple read. temp = 0; // if (j==0) // { //Cache the whole row of matrix A // RowCaching: for (k=0;k<MAT_SIZE;k++) // A_cached_row[k]=A[i][k]; // } Product: for (k=0; k<8; k++) { temp += A_cached_row[k] * B[k][j]; } C[i][j] = temp; } }
void DCT(float X[8][8], unsigned char function, float Y[8][8]) {_ssdm_SpecArrayDimSize(X,8);_ssdm_SpecArrayDimSize(Y,8); #pragma empty_line float temp[8][8]; switch (function){ case 1: MAT_Multiply(Tinv,X,temp); MAT_Multiply2(temp, T, Y); break; case 0: default: MAT_Multiply(T,X,temp); MAT_Multiply2(temp, Tinv, Y); break; } }
void MAT_Multiply(float A[8][8], float B[8][8], float C[8][8]) {_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8); _ssdm_op_SpecDataflowPipeline(-1, ""); #6 "dct/matrixmath.c" _ssdm_SpecArrayPartition( A, 1, "COMPLETE", 0, ""); #6 "dct/matrixmath.c" unsigned char i, j, k; float temp; float B_cached[8][8]; _ssdm_SpecArrayPartition( B_cached, 0, "COMPLETE", 0, ""); #9 "dct/matrixmath.c" LoadRow: for (i=0; i<8; i++){ LoadCol: for (j=0; j<8; j++){ _ssdm_op_SpecPipeline(1, 1, 1, 0, ""); #12 "dct/matrixmath.c" B_cached[i][j]=B[i][j]; } } Row: for (i=0; i<8; i++) Col: for (j=0; j<8; j++) { _ssdm_op_SpecPipeline(1, 1, 1, 0, ""); #19 "dct/matrixmath.c" //Make sure the data is fully cached to avoid multiple read. temp = 0; Product: for (k=0; k<8; k++) { temp += A[i][k] * B_cached[k][j]; } C[i][j] = temp; } }
void fir ( data_t *y, coef_t c[11], data_t x ) {_ssdm_SpecArrayDimSize(c,11); #pragma HLS RESOURCE variable=c core=RAM_1P_BRAM #pragma line 52 "fir.c" #pragma HLS INTERFACE ap_vld port=y #pragma HLS INTERFACE ap_vld port=x #pragma empty_line #pragma empty_line #pragma empty_line #pragma empty_line static data_t shift_reg[11]; #pragma HLS ARRAY_PARTITION variable=shift_reg complete dim=1 #pragma line 59 "fir.c" acc_t acc; data_t data; int i; #pragma empty_line acc=0; Shift_Accum_Loop: for (i=11 -1;i>=0;i--) { #pragma HLS UNROLL #pragma line 65 "fir.c" if (i==0) { shift_reg[0]=x; data = x; } else { shift_reg[i]=shift_reg[i-1]; data = shift_reg[i]; } acc+=data*c[i];; } *y=acc; }
void fir ( data_t *y, coef_t c[11], data_t x ) {_ssdm_SpecArrayDimSize(c,11); _ssdm_op_SpecResource(c, "", "RAM_1P_BRAM", "", -1, "", "", ""); # 52 "fir.c" _ssdm_op_SpecInterface(y, "ap_vld", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecInterface(x, "ap_vld", 0, 0, 0, 0, "", "", ""); static data_t shift_reg[11]; _ssdm_SpecArrayPartition( shift_reg, 1, "COMPLETE", 0, ""); # 59 "fir.c" acc_t acc; data_t data; int i; acc=0; Shift_Accum_Loop: for (i=11 -1;i>=0;i--) { _ssdm_Unroll(0,0,0, ""); # 65 "fir.c" if (i==0) { shift_reg[0]=x; data = x; } else { shift_reg[i]=shift_reg[i-1]; data = shift_reg[i]; } acc+=data*c[i];; } *y=acc; }
void acc_vadd_hls ( volatile int *cmd, volatile int *resp, int a[4096], int b[4096], int result[4096] ) {_ssdm_SpecArrayDimSize(result,4096);_ssdm_SpecArrayDimSize(b,4096);_ssdm_SpecArrayDimSize(a,4096); _ssdm_op_SpecInterface(0, "ap_ctrl_none", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecInterface(cmd, "axis", 0, 0, 0, 16, "", "", ""); _ssdm_op_SpecInterface(resp, "axis", 0, 0, 0, 16, "", "", ""); _ssdm_op_SpecInterface(a, "bram", 0, 0, 0, 1024, "", "", ""); _ssdm_op_SpecInterface(b, "bram", 0, 0, 0, 1024, "", "", ""); _ssdm_op_SpecInterface(result, "bram", 0, 0, 0, 1024, "", "", ""); _ssdm_op_SpecResource(a, "", "RAM_1P_BRAM", "", "", "", ""); _ssdm_op_SpecResource(b, "", "RAM_1P_BRAM", "", "", "", ""); _ssdm_op_SpecResource(result, "", "RAM_1P_BRAM", "", "", "", ""); int i,op, start,end; // Accumulate each channel op = *cmd; //get the start command end = *cmd; start = *cmd; if (op == 1) add_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("add_Loop");_ssdm_RegionBegin("add_Loop"); result[i]= a[i] + b[i]; if (i == end-1) { *resp= 1; //means I am done. } _ssdm_RegionEnd("add_Loop");} else if (op == 2) sub_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("sub_Loop");_ssdm_RegionBegin("sub_Loop"); result[i]= b[i] + a[i]; if (i == end-1) { *resp= 1; //means I am done. } _ssdm_RegionEnd("sub_Loop");} }
void MAT_Multiply(int A[100][100], int B[100][100], long C[100][100], unsigned char mA, unsigned char nA, unsigned char mB, unsigned char nB, unsigned char mC, unsigned char nC) {_ssdm_SpecArrayDimSize(A,100);_ssdm_SpecArrayDimSize(B,100);_ssdm_SpecArrayDimSize(C,100); #pragma HLS INTERFACE ap_fifo port=C #8 "parta_2/matrixmath.c" #pragma HLS INTERFACE ap_fifo port=B #8 "parta_2/matrixmath.c" #pragma HLS INTERFACE ap_fifo port=A #8 "parta_2/matrixmath.c" unsigned char i, j, k; long temp; int A_cached_row[100]; #pragma HLS ARRAY_PARTITION variable=A_cached_row cyclic factor=10 dim=1 #11 "parta_2/matrixmath.c" int B_cached[100][100]; #pragma HLS ARRAY_PARTITION variable=B_cached complete factor=10 dim=1 #12 "parta_2/matrixmath.c" if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable { Row: for (i=0; i<100; i++) Col: for (j=0; j<100; j++) { //Make sure the data is fully cached to avoid multiple read. if ((i<mC)&(j<nC)) { temp = 0; if (j==0) { //Cache the whole row of matrix A RowCaching: for (k=0;k<100;k++) #pragma HLS UNROLL factor=20 #28 "parta_2/matrixmath.c" A_cached_row[k]=A[i][k]; } //Cache all the columns of matrix B, see Fig. 7.21. B will be read only once if (i==0) { ColCaching: for (k=0;k<100;k++) #pragma HLS UNROLL factor=20 #35 "parta_2/matrixmath.c" B_cached[k][j]=B[k][j]; } Product: for (k=0; k<100; k++) { #pragma HLS UNROLL factor=20 #39 "parta_2/matrixmath.c" if (k<nA) temp += A_cached_row[k] * B_cached[k][j]; } C[i][j] = temp; } } } }
void dma_filter(volatile unsigned int input_config[0x58], volatile unsigned int output_config[0x58], volatile unsigned minAddress, bool reset){_ssdm_SpecArrayDimSize(input_config,0x58);_ssdm_SpecArrayDimSize(output_config,0x58); _ssdm_op_SpecWire(&reset, "ap_none", 0, 0, 0, 1, "", "", ""); _ssdm_op_SpecWire(&minAddress, "s_axilite", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecWire(output_config, "m_axi", 0, 0, 0, 0, "", "", ""); _ssdm_op_SpecWire(input_config, "s_axilite", 0, 0, 0, 0, "", "", ""); if(!reset){ bool wait = true; int i; INITIAL_LOOP: for(i=0; i<0x58; i++){_ssdm_op_SpecLoopName("INITIAL_LOOP");_ssdm_RegionBegin("INITIAL_LOOP"); _ssdm_op_SpecPipeline(1, 1, 1, 0, ""); input_config[i] = 0; _ssdm_RegionEnd("INITIAL_LOOP");} bool read_ready = false; bool write_ready = false; unsigned read_config = 0; bool read_enable = false; bool read_interrupts = false; unsigned read_address = 0; unsigned read_length = 0; unsigned write_config = 0; bool write_enable = false; bool write_interrupts = false; unsigned write_address = 0; unsigned write_length = 0; WAIT_LOOP: while(wait){_ssdm_op_SpecLoopName("WAIT_LOOP");_ssdm_RegionBegin("WAIT_LOOP"); bool clear = false; unsigned read_config = input_config[0]; bool read_enable = read_config &= 1; bool read_interrupts = read_config &= 4096; unsigned read_address = input_config[6]; unsigned read_length = input_config[10]; unsigned write_config = input_config[12]; bool write_enable = write_config &= 1; bool write_interrupts = write_config &= 4096; unsigned write_address = input_config[18]; unsigned write_length = input_config[22]; if(!read_enable && !write_enable){ continue; } else if(read_address == 0 && write_address==0){ continue; } if(read_address > 0 && read_address < minAddress){ continue; } else if(read_length == 0){ continue; } else{ read_ready = true; } if(write_address > 0 && write_address < minAddress){ continue; } else if(write_length == 0){ continue; } else{ write_ready = true; } if(read_ready){ //enable read dma block output_config[0] |= 1; //enable read interupts if(read_interrupts){ output_config[0] |= 4096; } //write source address output_config[6] = read_address; output_config[10] = read_length; clear = true; } if(write_ready){ //enable s2mm on write dma block output_config[12] |= 1; //enable write interrupts if(write_enable){ output_config[12] |= 4096; } //write dest address output_config[18] = write_address; output_config[22] = write_length; clear = true; } if(clear){ CLEAR_LOOP: for(i=0; i<0x58; i++){_ssdm_op_SpecLoopName("CLEAR_LOOP");_ssdm_RegionBegin("CLEAR_LOOP"); _ssdm_op_SpecPipeline(1, 1, 1, 0, ""); input_config[i] = 0; _ssdm_RegionEnd("CLEAR_LOOP");} } _ssdm_RegionEnd("WAIT_LOOP");} } }
void acc_vadd_hls ( volatile int *cmd, volatile int *resp, int a[4096], int b[4096], int result[4096] ) {_ssdm_SpecArrayDimSize(result,4096);_ssdm_SpecArrayDimSize(b,4096);_ssdm_SpecArrayDimSize(a,4096); #pragma empty_line #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS INTERFACE axis depth=16 port=cmd #pragma HLS INTERFACE axis depth=16 port=resp #pragma HLS INTERFACE bram depth=1024 port=a #pragma HLS INTERFACE bram depth=1024 port=b #pragma HLS INTERFACE bram depth=1024 port=result #pragma empty_line #pragma HLS RESOURCE variable=a core=RAM_1P_BRAM #pragma HLS RESOURCE variable=b core=RAM_1P_BRAM #pragma HLS RESOURCE variable=result core=RAM_1P_BRAM #pragma empty_line int i,op, start,end; // Accumulate each channel op = *cmd; //get the start command end = *cmd; start = *cmd; if (op == 1) add_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("add_Loop");_ssdm_RegionBegin("add_Loop"); result[i]= a[i] + b[i]; if (i == end-1) { *resp= 1; //means I am done. } _ssdm_RegionEnd("add_Loop");} else if (op == 2) sub_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("sub_Loop");_ssdm_RegionBegin("sub_Loop"); result[i]= b[i] + a[i]; if (i == end-1) { *resp= 1; //means I am done. } _ssdm_RegionEnd("sub_Loop");} }
void MAT_Multiply(int A[100][100], int B[100][100], long C[100][100], unsigned char mA, unsigned char nA, unsigned char mB, unsigned char nB, unsigned char mC, unsigned char nC) {_ssdm_SpecArrayDimSize(A,100);_ssdm_SpecArrayDimSize(B,100);_ssdm_SpecArrayDimSize(C,100); _ssdm_op_SpecInterface(C, "ap_fifo", 0, 0, 0, 0, "", "", ""); #8 "parta_2/matrixmath.c" _ssdm_op_SpecInterface(B, "ap_fifo", 0, 0, 0, 0, "", "", ""); #8 "parta_2/matrixmath.c" _ssdm_op_SpecInterface(A, "ap_fifo", 0, 0, 0, 0, "", "", ""); #8 "parta_2/matrixmath.c" unsigned char i, j, k; long temp; int A_cached_row[100]; _ssdm_SpecArrayPartition( A_cached_row, 1, "CYCLIC", 10, ""); #11 "parta_2/matrixmath.c" int B_cached[100][100]; _ssdm_SpecArrayPartition( B_cached, 1, "COMPLETE", 10, ""); #12 "parta_2/matrixmath.c" if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable { Row: for (i=0; i<100; i++) Col: for (j=0; j<100; j++) { //Make sure the data is fully cached to avoid multiple read. if ((i<mC)&(j<nC)) { temp = 0; if (j==0) { //Cache the whole row of matrix A RowCaching: for (k=0;k<100;k++) _ssdm_Unroll(1, 0, 20, ""); #28 "parta_2/matrixmath.c" A_cached_row[k]=A[i][k]; } //Cache all the columns of matrix B, see Fig. 7.21. B will be read only once if (i==0) { ColCaching: for (k=0;k<100;k++) _ssdm_Unroll(1, 0, 20, ""); #35 "parta_2/matrixmath.c" B_cached[k][j]=B[k][j]; } Product: for (k=0; k<100; k++) { _ssdm_Unroll(1, 0, 20, ""); #39 "parta_2/matrixmath.c" if (k<nA) temp += A_cached_row[k] * B_cached[k][j]; } C[i][j] = temp; } } } }