Esempio n. 1
0
void DCT(float X[8][8],
  unsigned char function,
  float Y[8][8])
{_ssdm_SpecArrayDimSize(X,8);_ssdm_SpecArrayDimSize(Y,8);
#pragma HLS DATAFLOW
#10 "dct/dct.c"

#pragma HLS INTERFACE ap_fifo port=Y
#10 "dct/dct.c"

#pragma HLS INTERFACE ap_fifo port=X
#10 "dct/dct.c"


 float temp[8][8];
 switch (function){
 case 1:
  MAT_Multiply(Tinv,X,temp);
  MAT_Multiply2(temp, T, Y);
  break;
 case 0:
 default:
  MAT_Multiply(T,X,temp);
  MAT_Multiply2(temp, Tinv, Y);
  break;
 }
}
Esempio n. 2
0
void MAT_Multiply(int A[100][100],
  int B[100][100], long C[100][100],
  unsigned char mA, unsigned char nA, unsigned char mB,
  unsigned char nB, unsigned char mC, unsigned char nC)
{_ssdm_SpecArrayDimSize(A,100);_ssdm_SpecArrayDimSize(B,100);_ssdm_SpecArrayDimSize(C,100);
 unsigned char i, j, k;
 if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable
 {
  Row: for (i=0; i<100; i++)
   Col: for (j=0; j<100; j++)
   {
#pragma HLS UNROLL factor=5
#14 "parta/matrixmath.c"

    if ((i<mC)&(j<nC))
    {
     C[i][j] = 0;
     Product: for (k=0; k<100; k++)
        
#pragma HLS UNROLL factor=10
#19 "parta/matrixmath.c"
if (k<nA)
         C[i][j] += A[i][k] * B[k][j];
    }
   }
 }
}
Esempio n. 3
0
void MAT_Multiply(float A[8][8],
  float B[8][8], float C[8][8])
{_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8);
 unsigned char i, j, k;
 float temp;
 float B_cached[8][8];

 LoadRow: for (i=0; i<8; i++){
  LoadCol: for (j=0; j<8; j++){
   B_cached[i][j]=B[i][j];
  }
 }

 Row: for (i=0; i<8; i++)
  Col: for (j=0; j<8; j++)
  {
   //Make sure the data is fully cached to avoid multiple read.
   temp = 0;

   Product: for (k=0; k<8; k++)
   {
    temp += A[i][k] * B_cached[k][j];
   }
   C[i][j] = temp;
  }
}
Esempio n. 4
0
void MAT_Multiply2(float A[8][8],
  float B[8][8], float C[8][8])
{_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8);
 unsigned char i, j, k;
 float temp;
 float A_cached_row[8];

 Row: for (i=0; i<8; i++)
  //Cache the whole row of matrix A
  RowCaching: for (k=0;k<8;k++)
   A_cached_row[k]=A[i][k];
  Col: for (j=0; j<8; j++)
  {
   //Make sure the data is fully cached to avoid multiple read.
   temp = 0;
  //	if (j==0)
  //	{
    //Cache the whole row of matrix A
  //		RowCaching: for (k=0;k<MAT_SIZE;k++)
  //			A_cached_row[k]=A[i][k];
  //	}

   Product: for (k=0; k<8; k++)
   {
    temp += A_cached_row[k] * B[k][j];
   }
   C[i][j] = temp;
  }
}
Esempio n. 5
0
void MAT_Multiply(int A[50][50],
                  int B[50][50], long C[50][50],
                  unsigned char mA, unsigned char nA, unsigned char mB,
                  unsigned char nB, unsigned char mC, unsigned char nC)
{   _ssdm_SpecArrayDimSize(A,50);
    _ssdm_SpecArrayDimSize(B,50);
    _ssdm_SpecArrayDimSize(C,50);
    _ssdm_op_SpecInterface(C, "ap_fifo", 0, 0, 0, 100, "", "", "");
# 8 "parta_2/matrixmath.c"

    _ssdm_op_SpecInterface(B, "ap_fifo", 0, 0, 0, 100, "", "", "");
# 8 "parta_2/matrixmath.c"

    _ssdm_op_SpecInterface(A, "ap_fifo", 0, 0, 0, 100, "", "", "");
# 8 "parta_2/matrixmath.c"

    unsigned char i, j, k;
    long temp;
    int A_cached_row[50];
    int B_cached[50][50];

    if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable
    {
Row:
        for (i=0; i<50; i++)
Col:
            for (j=0; j<50; j++)
            {
                //Make sure the data is fully cached to avoid multiple read.
                temp = 0;
                if ((i<mC)&(j<nC))
                {
                    if (j==0)
                    {
                        //Cache the whole row of matrix A
RowCaching:
                        for (k=0; k<50; k++)
                            A_cached_row[k]=A[i][k];
                    }

                    //Cache all the columns of matrix B, see Fig. 7.21. B will be read only once
                    if (i==0)
                    {
ColCaching:
                        for (k=0; k<50; k++)
                            B_cached[k][j]=B[k][j];
                    }

Product:
                    for (k=0; k<50; k++)
                    {
                        if (k<nA)
                            temp += A_cached_row[k] * B_cached[k][j];
                    }
                }
                C[i][j] = temp;
            }
    }
}
Esempio n. 6
0
void DOT_Divide(float A[8][8],
  float B[8][8], float C[8][8])
{_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8);
 unsigned char i,j;
 row: for (i=0; i<8; i++){
  col: for (j=0; j<8; j++){
   C[i][j] = A[i][j] / B[i][j];
  }
 }
}
void top(int out[10], int w[1000*10], int b[1000*10], int x[1000])
{_ssdm_SpecArrayDimSize(b,1000*10);_ssdm_SpecArrayDimSize(w,1000*10);_ssdm_SpecArrayDimSize(x,1000);_ssdm_SpecArrayDimSize(out,10);
#pragma HLS INTERFACE ap_memory port=out
#pragma HLS INTERFACE ap_memory port=w
#pragma HLS INTERFACE ap_memory port=b
#pragma HLS INTERFACE ap_memory port=x
#pragma HLS RESOURCE variable=out core=RAM_1P
#pragma HLS RESOURCE variable=w core=RAM_1P
#pragma HLS RESOURCE variable=b core=RAM_1P
#pragma HLS RESOURCE variable=x core=RAM_1P
#pragma empty_line
 fully_connected_layer(out, w, b, x, 1000, 10);
}
Esempio n. 8
0
void top(int out[1], int w[1000*1], int b[1000*1], int x[1000])
{_ssdm_SpecArrayDimSize(b,1000*1);_ssdm_SpecArrayDimSize(w,1000*1);_ssdm_SpecArrayDimSize(x,1000);_ssdm_SpecArrayDimSize(out,1);
_ssdm_op_SpecInterface(out, "ap_memory", 0, 0, 0, 0, "", "", "");
_ssdm_op_SpecInterface(w, "ap_memory", 0, 0, 0, 0, "", "", "");
_ssdm_op_SpecInterface(b, "ap_memory", 0, 0, 0, 0, "", "", "");
_ssdm_op_SpecInterface(x, "ap_memory", 0, 0, 0, 0, "", "", "");
_ssdm_op_SpecResource(out, "", "RAM_1P", "", -1, "", "", "");
_ssdm_op_SpecResource(w, "", "RAM_1P", "", -1, "", "", "");
_ssdm_op_SpecResource(b, "", "RAM_1P", "", -1, "", "", "");
_ssdm_op_SpecResource(x, "", "RAM_1P", "", -1, "", "", "");

 fully_connected_layer(out, w, b, x, 1000, 1);
}
Esempio n. 9
0
void Quant(float X[8][8],
  unsigned char function,
  float Y[8][8])
{_ssdm_SpecArrayDimSize(X,8);_ssdm_SpecArrayDimSize(Y,8);
 switch (function){
 case 1:
  DOT_Multiply(X, QMatrix, Y);
  break;
 case 0:
 default:
  DOT_Divide(X, QMatrix, Y);
  break;
 }
}
Esempio n. 10
0
void MAT_Multiply2(float A[8][8],
  float B[8][8], float C[8][8])
{_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8);
_ssdm_op_SpecDataflowPipeline(-1, "");
#33 "dct/matrixmath.c"

_ssdm_SpecArrayPartition( B, 1, "COMPLETE", 0, "");
#33 "dct/matrixmath.c"

 unsigned char i, j, k;
 float temp;
 float A_cached_row[8];
_ssdm_SpecArrayPartition( A_cached_row, 0, "COMPLETE", 0, "");
#36 "dct/matrixmath.c"


 Row: for (i=0; i<8; i++)
  //Cache the whole row of matrix A
  RowCaching: for (k=0;k<8;k++)
   
_ssdm_op_SpecPipeline(1, 1, 1, 0, "");
#41 "dct/matrixmath.c"
A_cached_row[k]=A[i][k];

  Col: for (j=0; j<8; j++)
  {
_ssdm_op_SpecPipeline(1, 1, 1, 0, "");
#44 "dct/matrixmath.c"

   //Make sure the data is fully cached to avoid multiple read.
   temp = 0;
  //	if (j==0)
  //	{
    //Cache the whole row of matrix A
  //		RowCaching: for (k=0;k<MAT_SIZE;k++)
  //			A_cached_row[k]=A[i][k];
  //	}

   Product: for (k=0; k<8; k++)
   {
    temp += A_cached_row[k] * B[k][j];
   }
   C[i][j] = temp;
  }
}
Esempio n. 11
0
void DCT(float X[8][8],
  unsigned char function,
  float Y[8][8])
{_ssdm_SpecArrayDimSize(X,8);_ssdm_SpecArrayDimSize(Y,8);
#pragma empty_line
 float temp[8][8];
 switch (function){
 case 1:
  MAT_Multiply(Tinv,X,temp);
  MAT_Multiply2(temp, T, Y);
  break;
 case 0:
 default:
  MAT_Multiply(T,X,temp);
  MAT_Multiply2(temp, Tinv, Y);
  break;
 }
}
Esempio n. 12
0
void MAT_Multiply(float A[8][8],
  float B[8][8], float C[8][8])
{_ssdm_SpecArrayDimSize(A,8);_ssdm_SpecArrayDimSize(B,8);_ssdm_SpecArrayDimSize(C,8);
_ssdm_op_SpecDataflowPipeline(-1, "");
#6 "dct/matrixmath.c"

_ssdm_SpecArrayPartition( A, 1, "COMPLETE", 0, "");
#6 "dct/matrixmath.c"

 unsigned char i, j, k;
 float temp;
 float B_cached[8][8];
_ssdm_SpecArrayPartition( B_cached, 0, "COMPLETE", 0, "");
#9 "dct/matrixmath.c"


 LoadRow: for (i=0; i<8; i++){
  LoadCol: for (j=0; j<8; j++){
_ssdm_op_SpecPipeline(1, 1, 1, 0, "");
#12 "dct/matrixmath.c"

   B_cached[i][j]=B[i][j];
  }
 }

 Row: for (i=0; i<8; i++)
  Col: for (j=0; j<8; j++)
  {
_ssdm_op_SpecPipeline(1, 1, 1, 0, "");
#19 "dct/matrixmath.c"

   //Make sure the data is fully cached to avoid multiple read.
   temp = 0;

   Product: for (k=0; k<8; k++)
   {
    temp += A[i][k] * B_cached[k][j];
   }
   C[i][j] = temp;
  }
}
void fir (
  data_t *y,
  coef_t c[11],
  data_t x
  ) {_ssdm_SpecArrayDimSize(c,11);
#pragma HLS RESOURCE variable=c core=RAM_1P_BRAM
#pragma line 52 "fir.c"

#pragma HLS INTERFACE ap_vld port=y
#pragma HLS INTERFACE ap_vld port=x
#pragma empty_line
#pragma empty_line
#pragma empty_line
#pragma empty_line
 static data_t shift_reg[11];
#pragma HLS ARRAY_PARTITION variable=shift_reg complete dim=1
#pragma line 59 "fir.c"

  acc_t acc;
  data_t data;
  int i;
#pragma empty_line
  acc=0;
  Shift_Accum_Loop: for (i=11 -1;i>=0;i--) {
#pragma HLS UNROLL
#pragma line 65 "fir.c"

 if (i==0) {
   shift_reg[0]=x;
   data = x;
    } else {
   shift_reg[i]=shift_reg[i-1];
   data = shift_reg[i];
    }
    acc+=data*c[i];;
  }
  *y=acc;
}
Esempio n. 14
0
void fir (
  data_t *y,
  coef_t c[11],
  data_t x
  ) {_ssdm_SpecArrayDimSize(c,11);
_ssdm_op_SpecResource(c, "", "RAM_1P_BRAM", "", -1, "", "", "");
# 52 "fir.c"

_ssdm_op_SpecInterface(y, "ap_vld", 0, 0, 0, 0, "", "", "");
_ssdm_op_SpecInterface(x, "ap_vld", 0, 0, 0, 0, "", "", "");




 static data_t shift_reg[11];
_ssdm_SpecArrayPartition( shift_reg, 1, "COMPLETE", 0, "");
# 59 "fir.c"

  acc_t acc;
  data_t data;
  int i;

  acc=0;
  Shift_Accum_Loop: for (i=11 -1;i>=0;i--) {
_ssdm_Unroll(0,0,0, "");
# 65 "fir.c"

 if (i==0) {
   shift_reg[0]=x;
   data = x;
    } else {
   shift_reg[i]=shift_reg[i-1];
   data = shift_reg[i];
    }
    acc+=data*c[i];;
  }
  *y=acc;
}
void acc_vadd_hls ( volatile int *cmd, volatile int *resp, int a[4096], int b[4096], int result[4096] ) {_ssdm_SpecArrayDimSize(result,4096);_ssdm_SpecArrayDimSize(b,4096);_ssdm_SpecArrayDimSize(a,4096);

_ssdm_op_SpecInterface(0, "ap_ctrl_none", 0, 0, 0, 0, "", "", "");
_ssdm_op_SpecInterface(cmd, "axis", 0, 0, 0, 16, "", "", "");
_ssdm_op_SpecInterface(resp, "axis", 0, 0, 0, 16, "", "", "");
_ssdm_op_SpecInterface(a, "bram", 0, 0, 0, 1024, "", "", "");
_ssdm_op_SpecInterface(b, "bram", 0, 0, 0, 1024, "", "", "");
_ssdm_op_SpecInterface(result, "bram", 0, 0, 0, 1024, "", "", "");

_ssdm_op_SpecResource(a, "", "RAM_1P_BRAM", "", "", "", "");
_ssdm_op_SpecResource(b, "", "RAM_1P_BRAM", "", "", "", "");
_ssdm_op_SpecResource(result, "", "RAM_1P_BRAM", "", "", "", "");

 int i,op, start,end;
 // Accumulate each channel
 op = *cmd; //get the start command
 end = *cmd;
 start = *cmd;
 if (op == 1)
  add_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("add_Loop");_ssdm_RegionBegin("add_Loop");
     result[i]= a[i] + b[i];
      if (i == end-1) {
       *resp= 1; //means I am done.
      }
 _ssdm_RegionEnd("add_Loop");}
 else if (op == 2)
  sub_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("sub_Loop");_ssdm_RegionBegin("sub_Loop");
     result[i]= b[i] + a[i];
      if (i == end-1) {
       *resp= 1; //means I am done.
      }
 _ssdm_RegionEnd("sub_Loop");}
}
Esempio n. 16
0
void MAT_Multiply(int A[100][100],
  int B[100][100], long C[100][100],
  unsigned char mA, unsigned char nA, unsigned char mB,
  unsigned char nB, unsigned char mC, unsigned char nC)
{_ssdm_SpecArrayDimSize(A,100);_ssdm_SpecArrayDimSize(B,100);_ssdm_SpecArrayDimSize(C,100);
#pragma HLS INTERFACE ap_fifo port=C
#8 "parta_2/matrixmath.c"

#pragma HLS INTERFACE ap_fifo port=B
#8 "parta_2/matrixmath.c"

#pragma HLS INTERFACE ap_fifo port=A
#8 "parta_2/matrixmath.c"

 unsigned char i, j, k;
 long temp;
 int A_cached_row[100];
#pragma HLS ARRAY_PARTITION variable=A_cached_row cyclic factor=10 dim=1
#11 "parta_2/matrixmath.c"

 int B_cached[100][100];
#pragma HLS ARRAY_PARTITION variable=B_cached complete factor=10 dim=1
#12 "parta_2/matrixmath.c"


 if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable
 {
  Row: for (i=0; i<100; i++)
   Col: for (j=0; j<100; j++)
   {
    //Make sure the data is fully cached to avoid multiple read.

    if ((i<mC)&(j<nC))
    {
     temp = 0;
     if (j==0)
     {
      //Cache the whole row of matrix A
      RowCaching: for (k=0;k<100;k++)
       
#pragma HLS UNROLL factor=20
#28 "parta_2/matrixmath.c"
A_cached_row[k]=A[i][k];
     }

     //Cache all the columns of matrix B, see Fig. 7.21. B will be read only once
     if (i==0)
     {
      ColCaching: for (k=0;k<100;k++)
       
#pragma HLS UNROLL factor=20
#35 "parta_2/matrixmath.c"
B_cached[k][j]=B[k][j];
     }

     Product: for (k=0; k<100; k++)
     {
#pragma HLS UNROLL factor=20
#39 "parta_2/matrixmath.c"

        if (k<nA)
         temp += A_cached_row[k] * B_cached[k][j];
     }
     C[i][j] = temp;
    }
   }
 }
}
Esempio n. 17
0
void dma_filter(volatile unsigned int input_config[0x58], volatile unsigned int output_config[0x58],
  volatile unsigned minAddress, bool reset){_ssdm_SpecArrayDimSize(input_config,0x58);_ssdm_SpecArrayDimSize(output_config,0x58);
_ssdm_op_SpecWire(&reset, "ap_none", 0, 0, 0, 1, "", "", "");

_ssdm_op_SpecWire(&minAddress, "s_axilite", 0, 0, 0, 0, "", "", "");

_ssdm_op_SpecWire(output_config, "m_axi", 0, 0, 0, 0, "", "", "");

_ssdm_op_SpecWire(input_config, "s_axilite", 0, 0, 0, 0, "", "", "");

 if(!reset){
  bool wait = true;
  int i;
  INITIAL_LOOP: for(i=0; i<0x58; i++){_ssdm_op_SpecLoopName("INITIAL_LOOP");_ssdm_RegionBegin("INITIAL_LOOP");
_ssdm_op_SpecPipeline(1, 1, 1, 0, "");
 input_config[i] = 0;
  _ssdm_RegionEnd("INITIAL_LOOP");}

  bool read_ready = false;
  bool write_ready = false;
  unsigned read_config = 0;
  bool read_enable = false;
  bool read_interrupts = false;
  unsigned read_address = 0;
  unsigned read_length = 0;

  unsigned write_config = 0;
  bool write_enable = false;
  bool write_interrupts = false;
  unsigned write_address = 0;
  unsigned write_length = 0;
  WAIT_LOOP: while(wait){_ssdm_op_SpecLoopName("WAIT_LOOP");_ssdm_RegionBegin("WAIT_LOOP");
   bool clear = false;
   unsigned read_config = input_config[0];
   bool read_enable = read_config &= 1;
   bool read_interrupts = read_config &= 4096;
   unsigned read_address = input_config[6];
   unsigned read_length = input_config[10];

   unsigned write_config = input_config[12];
   bool write_enable = write_config &= 1;
   bool write_interrupts = write_config &= 4096;
   unsigned write_address = input_config[18];
   unsigned write_length = input_config[22];

   if(!read_enable && !write_enable){
    continue;
   } else if(read_address == 0 && write_address==0){
    continue;
   }

   if(read_address > 0 && read_address < minAddress){
    continue;
   } else if(read_length == 0){
    continue;
   } else{
    read_ready = true;
   }

   if(write_address > 0 && write_address < minAddress){
    continue;
   } else if(write_length == 0){
    continue;
   } else{
    write_ready = true;
   }
   if(read_ready){
    //enable read dma block
    output_config[0] |= 1;
    //enable read interupts
    if(read_interrupts){
     output_config[0] |= 4096;
    }
    //write source address
    output_config[6] = read_address;
    output_config[10] = read_length;
    clear = true;
   }

   if(write_ready){
    //enable s2mm on write dma block
    output_config[12] |= 1;
    //enable write interrupts
    if(write_enable){
     output_config[12] |= 4096;
    }
    //write dest address
    output_config[18] = write_address;
    output_config[22] = write_length;
    clear = true;
   }

   if(clear){
    CLEAR_LOOP: for(i=0; i<0x58; i++){_ssdm_op_SpecLoopName("CLEAR_LOOP");_ssdm_RegionBegin("CLEAR_LOOP");
_ssdm_op_SpecPipeline(1, 1, 1, 0, "");
 input_config[i] = 0;
    _ssdm_RegionEnd("CLEAR_LOOP");}
   }
  _ssdm_RegionEnd("WAIT_LOOP");}

 }
}
void acc_vadd_hls ( volatile int *cmd, volatile int *resp, int a[4096], int b[4096], int result[4096] ) {_ssdm_SpecArrayDimSize(result,4096);_ssdm_SpecArrayDimSize(b,4096);_ssdm_SpecArrayDimSize(a,4096);
#pragma empty_line
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE axis depth=16 port=cmd
#pragma HLS INTERFACE axis depth=16 port=resp
#pragma HLS INTERFACE bram depth=1024 port=a
#pragma HLS INTERFACE bram depth=1024 port=b
#pragma HLS INTERFACE bram depth=1024 port=result
#pragma empty_line
#pragma HLS RESOURCE variable=a core=RAM_1P_BRAM
#pragma HLS RESOURCE variable=b core=RAM_1P_BRAM
#pragma HLS RESOURCE variable=result core=RAM_1P_BRAM
#pragma empty_line
 int i,op, start,end;
 // Accumulate each channel
 op = *cmd; //get the start command
 end = *cmd;
 start = *cmd;
 if (op == 1)
  add_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("add_Loop");_ssdm_RegionBegin("add_Loop");
     result[i]= a[i] + b[i];
      if (i == end-1) {
       *resp= 1; //means I am done.
      }
 _ssdm_RegionEnd("add_Loop");}
 else if (op == 2)
  sub_Loop: for (i = start; i < end; i++) {_ssdm_op_SpecLoopName("sub_Loop");_ssdm_RegionBegin("sub_Loop");
     result[i]= b[i] + a[i];
      if (i == end-1) {
       *resp= 1; //means I am done.
      }
 _ssdm_RegionEnd("sub_Loop");}
}
Esempio n. 19
0
void MAT_Multiply(int A[100][100],
  int B[100][100], long C[100][100],
  unsigned char mA, unsigned char nA, unsigned char mB,
  unsigned char nB, unsigned char mC, unsigned char nC)
{_ssdm_SpecArrayDimSize(A,100);_ssdm_SpecArrayDimSize(B,100);_ssdm_SpecArrayDimSize(C,100);
_ssdm_op_SpecInterface(C, "ap_fifo", 0, 0, 0, 0, "", "", "");
#8 "parta_2/matrixmath.c"

_ssdm_op_SpecInterface(B, "ap_fifo", 0, 0, 0, 0, "", "", "");
#8 "parta_2/matrixmath.c"

_ssdm_op_SpecInterface(A, "ap_fifo", 0, 0, 0, 0, "", "", "");
#8 "parta_2/matrixmath.c"

 unsigned char i, j, k;
 long temp;
 int A_cached_row[100];
_ssdm_SpecArrayPartition( A_cached_row, 1, "CYCLIC", 10, "");
#11 "parta_2/matrixmath.c"

 int B_cached[100][100];
_ssdm_SpecArrayPartition( B_cached, 1, "COMPLETE", 10, "");
#12 "parta_2/matrixmath.c"


 if ((nA == mB)&(mA == mC)&(nB==nC))//Multiplication only when the dimensions are suitable
 {
  Row: for (i=0; i<100; i++)
   Col: for (j=0; j<100; j++)
   {
    //Make sure the data is fully cached to avoid multiple read.

    if ((i<mC)&(j<nC))
    {
     temp = 0;
     if (j==0)
     {
      //Cache the whole row of matrix A
      RowCaching: for (k=0;k<100;k++)
       
_ssdm_Unroll(1, 0, 20, "");
#28 "parta_2/matrixmath.c"
A_cached_row[k]=A[i][k];
     }

     //Cache all the columns of matrix B, see Fig. 7.21. B will be read only once
     if (i==0)
     {
      ColCaching: for (k=0;k<100;k++)
       
_ssdm_Unroll(1, 0, 20, "");
#35 "parta_2/matrixmath.c"
B_cached[k][j]=B[k][j];
     }

     Product: for (k=0; k<100; k++)
     {
_ssdm_Unroll(1, 0, 20, "");
#39 "parta_2/matrixmath.c"

        if (k<nA)
         temp += A_cached_row[k] * B_cached[k][j];
     }
     C[i][j] = temp;
    }
   }
 }
}