Esempio n. 1
0
void test_vmov_ns16 (void)
{
  int16x4_t out_int16x4_t;
  int16_t arg0_int16_t;

  out_int16x4_t = vmov_n_s16 (arg0_int16_t);
}
void matMult(int16_t mat1[], int16_t mat2[], int32_t prod[matrix_size][matrix_size])
{
	int output_size = 2 * matrix_size;
	int l,k;
	int16x4_t data1;
	int32x4_t mac_output[output_size/4];
	int32x4_t MAC_addvalue[output_size/4];
	int16x4_t constant_value;
	unsigned int index_input = 0;
	unsigned int transfer_index = 0 ;
	int32_t *pres_ver;

	/* Allocate output */
	pres_ver = malloc(output_size * output_size * sizeof(int32_t));
	for(l = 0 ; l < matrix_size/4; l++)
	{
	    MAC_addvalue[l] = vmovq_n_s32(0);
	}

	/* Perform the multiplication */
	for(l = 0; l < matrix_size*matrix_size; l++)
	{
	    constant_value = vmov_n_s16 (mat1[l]);
	    for(k = 0 ; k < matrix_size/4 ; k++)
	    {
	        data1 = vld1_s16 (&mat2[index_input]);
	        MAC4 (&MAC_addvalue[k], &constant_value, &data1,&mac_output[k]);
	        MAC_addvalue[k] = mac_output[k];
	        index_input +=4;
	    }

		index_input+=output_size-matrix_size;
	    if ((l + 1) % matrix_size == 0 )
	    {
	        index_input = 0;

	        for(k = 0 ; k < matrix_size/4 ; k++)
	        {
	            vst1q_s32(&pres_ver[transfer_index],MAC_addvalue[k]);
	            transfer_index +=4;
	        }

			transfer_index += output_size-matrix_size;
	        for(k = 0 ; k < matrix_size/4; k++)
	        {
	            MAC_addvalue[k] = vmovq_n_s32(0);
	        }
	    }
	}
}
Esempio n. 3
0
int16x4_t test_vmov_n_s16(int16_t v1) {
  // CHECK: test_vmov_n_s16
  return vmov_n_s16(v1);
  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
}