コード例 #1
0
ファイル: vgetQ_laneu16.c プロジェクト: 0day-ci/gcc
void test_vgetQ_laneu16 (void)
{
  uint16_t out_uint16_t;
  uint16x8_t arg0_uint16x8_t;

  out_uint16_t = vgetq_lane_u16 (arg0_uint16x8_t, 1);
}
コード例 #2
0
ファイル: mw_neon.c プロジェクト: Daniel-Hwang/eeg
/* u16x8 mv mul */
void mw_neon_mv_mul_u16x8(unsigned short * A, int Row, int T, unsigned short * B, unsigned short * C)
{
	int i = 0;
	int k = 0;

	uint16x8_t neon_b, neon_c;
	uint16x8_t neon_a0, neon_a1, neon_a2, neon_a3, neon_a4, neon_a5, neon_a6, neon_a7;
	uint16x8_t neon_b0, neon_b1, neon_b2, neon_b3, neon_b4, neon_b5, neon_b6, neon_b7;

	for (i = 0; i < Row; i+=8)
	{
		neon_c = vmovq_n_u16(0);

		for (k = 0; k < T; k+=8)
		{
			int j = k * T + i;

			neon_a0 = vld1q_u16(A + j);
			j+=Row;
			neon_a1 = vld1q_u16(A + j);
			j+=Row;
			neon_a2 = vld1q_u16(A + j);
			j+=Row;
			neon_a3 = vld1q_u16(A + j);
			j+=Row;
			neon_a4 = vld1q_u16(A + j);
			j+=Row;
			neon_a5 = vld1q_u16(A + j);
			j+=Row;
			neon_a6 = vld1q_u16(A + j);
			j+=Row;
			neon_a7 = vld1q_u16(A + j);

			neon_b = vld1q_u16(B + k);
			neon_b0 = vdupq_n_u16(vgetq_lane_u16(neon_b, 0));
			neon_b1 = vdupq_n_u16(vgetq_lane_u16(neon_b, 1));
			neon_b2 = vdupq_n_u16(vgetq_lane_u16(neon_b, 2));
			neon_b3 = vdupq_n_u16(vgetq_lane_u16(neon_b, 3));
			neon_b4 = vdupq_n_u16(vgetq_lane_u16(neon_b, 4));
			neon_b5 = vdupq_n_u16(vgetq_lane_u16(neon_b, 5));
			neon_b6 = vdupq_n_u16(vgetq_lane_u16(neon_b, 6));
			neon_b7 = vdupq_n_u16(vgetq_lane_u16(neon_b, 7));

			neon_c = vaddq_u16(vmulq_u16(neon_a0, neon_b0), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a1, neon_b1), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a2, neon_b2), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a3, neon_b3), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a4, neon_b4), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a5, neon_b5), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a6, neon_b6), neon_c);
			neon_c = vaddq_u16(vmulq_u16(neon_a7, neon_b7), neon_c);

		}

		vst1q_u16(C + i, neon_c);
	}
}
コード例 #3
0
uint16_t test_vgetq_lane_u16(uint16x8_t v1) {
  // CHECK: test_vgetq_lane_u16
  return vgetq_lane_u16(v1, 6);
  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
}
/**
 * Get the sum of absolute differences for a specific pixel location and disparity
 *
 * @param leftImage left image
 * @param rightImage right image
 * @param laplacianL laplacian-fitlered left image
 * @param laplacianR laplacian-filtered right image
 * @param pxX row pixel location
 * @param pxY column pixel location
 * @param state state structure that includes a number of parameters
 * @param left_interest optional parameter that will be filled with the value for the left interest operation
 * @param right_interest same as above, for the right image
 *
 * @retval scaled sum of absolute differences for this block --
 *      the value is the sum/numberOfPixels
 */
int PushbroomStereo::GetSAD(Mat leftImage, Mat rightImage, Mat laplacianL, Mat laplacianR, int pxX, int pxY, int *left_interest, int *right_interest, int *raw_sad)
{
    // top left corner of the SAD box
    int startX = pxX;
    int startY = pxY;

    // bottom right corner of the SAD box
    #ifndef USE_NEON
        int endX = pxX + m_iBlockSize - 1;
    #endif

    int endY = pxY + m_iBlockSize - 1;

    #if USE_SAFTEY_CHECKS
        int flag = false;
        if (startX < 0)
        {
            printf("Warning: startX < 0\n");
            flag = true;
        }

        if (endX > rightImage.cols)
        {
            printf("Warning: endX > leftImage.cols\n");
            flag = true;
        }

        if (startX + disparity < 0)
        {
            printf("Warning: startX + disparity < 0\n");
            flag = true;
        }

        if (endX + disparity > rightImage.cols)
        {
            printf("Warning: endX + disparity > leftImage.cols\n");
            flag = true;
        }

        if (endX + disparity > rightImage.cols)
        {
            printf("Warning: endX + disparity > rightImage.cols\n");
            endX = rightImage.cols - disparity;
            flag = true;
        }

        if (startY < 0) {
            printf("Warning: startY < 0\n");
            flag = true;
        }

        if (endY > rightImage.rows) {
            printf("Warning: endY > rightImage.rows\n");
            flag = true;
        }

        // disparity might be negative as well
        if (disparity < 0 && startX + disparity < 0)
        {
            printf("Warning: disparity < 0 && startX + disparity < 0\n");
            startX = -disparity;
            flag = true;
        }

        if (flag == true)
        {
            printf("startX = %d, endX = %d, disparity = %d, startY = %d, endY = %d\n", startX, endX, disparity, startY, endY);
        }



        startX = max(0, startX);
        startY = max(0, startY);

        endX = min(leftImage.cols - disparity, endX);
        endY = min(leftImage.rows, endY);
    #endif

    int leftVal = 0, rightVal = 0;

    int sad = 0;

    #ifdef USE_NEON
        uint16x8_t interest_op_sum_8x_L, interest_op_sum_8x_R, sad_sum_8x;

        // load zeros into everything
        interest_op_sum_8x_L = vdupq_n_u16(0);
        interest_op_sum_8x_R = vdupq_n_u16(0);
        sad_sum_8x = vdupq_n_u16(0);

    #endif
    for (int i=startY;i<=endY;i++) 
	{
		if(i>=leftImage.rows-1)
			continue;
        //Get a pointer for this row
        uchar *this_rowL = leftImage.ptr<uchar>(i);
        uchar *this_rowR = rightImage.ptr<uchar>(i);

        uchar *this_row_laplacianL = laplacianL.ptr<uchar>(i);
        uchar *this_row_laplacianR = laplacianR.ptr<uchar>(i);

        #ifdef USE_NEON
            // load this row into memory
            uint8x8_t this_row_8x8_L = vld1_u8(this_rowL + startX);
            uint8x8_t this_row_8x8_R = vld1_u8(this_rowR + startX + disparity);

            uint8x8_t interest_op_8x8_L = vld1_u8(this_row_laplacianL + startX);
            uint8x8_t interest_op_8x8_R = vld1_u8(this_row_laplacianR + startX + disparity);

            // do absolute differencing for the entire row in one operation!
            uint8x8_t sad_8x = vabd_u8(this_row_8x8_L, this_row_8x8_R);

            // sum up
            sad_sum_8x = vaddw_u8(sad_sum_8x, sad_8x);

            // sum laplacian values
            interest_op_sum_8x_L = vaddw_u8(interest_op_sum_8x_L, interest_op_8x8_L);
            interest_op_sum_8x_R = vaddw_u8(interest_op_sum_8x_R, interest_op_8x8_R);

        #else // USE_NEON
            for (int j=startX;j<=endX;j++) {
                // we are now looking at a single pixel value
                /*uchar pxL = leftImage.at<uchar>(i,j);
                uchar pxR = rightImage.at<uchar>(i,j + disparity);

                uchar sL = laplacianL.at<uchar>(i,j);
                uchar sR = laplacianR.at<uchar>(i,j + disparity);
                */


                uchar sL = this_row_laplacianL[j];//laplacianL.at<uchar>(i,j);
                uchar sR = this_row_laplacianR[j + m_iDisparity]; //laplacianR.at<uchar>(i,j + disparity);

                leftVal += sL;
                rightVal += sR;

                uchar pxL = this_rowL[j];
                uchar pxR = this_rowR[j + m_iDisparity];

                sad += abs(pxL - pxR);
            }
        #endif // USE_NEON
    }

    #ifdef USE_NEON
        // sum up
        sad = vgetq_lane_u16(sad_sum_8x, 0) + vgetq_lane_u16(sad_sum_8x, 1)
           + vgetq_lane_u16(sad_sum_8x, 2) + vgetq_lane_u16(sad_sum_8x, 3)
           + vgetq_lane_u16(sad_sum_8x, 4);// + vgetq_lane_u16(sad_sum_8x, 5)
    //           + vgetq_lane_u16(sad_sum_8x, 6) + vgetq_lane_u16(sad_sum_8x, 7);

        leftVal = vgetq_lane_u16(interest_op_sum_8x_L, 0)
                + vgetq_lane_u16(interest_op_sum_8x_L, 1)
                + vgetq_lane_u16(interest_op_sum_8x_L, 2)
                + vgetq_lane_u16(interest_op_sum_8x_L, 3)
                + vgetq_lane_u16(interest_op_sum_8x_L, 4);


        rightVal = vgetq_lane_u16(interest_op_sum_8x_R, 0)
                 + vgetq_lane_u16(interest_op_sum_8x_R, 1)
                 + vgetq_lane_u16(interest_op_sum_8x_R, 2)
                 + vgetq_lane_u16(interest_op_sum_8x_R, 3)
                 + vgetq_lane_u16(interest_op_sum_8x_R, 4);
    #endif

    //cout << "(" << leftVal << ", " << rightVal << ") vs. (" << leftVal2 << ", " << rightVal2 << ")" << endl;

    int laplacian_value = leftVal + rightVal;

	int fThresh = 200;
	if((leftVal<fThresh)||(rightVal<fThresh))
		laplacian_value /= 10;
    //cout << "sad with neon: " << sad << " without neon: " << sad2 << endl;
    if (left_interest != NULL)         *left_interest = leftVal; 
    if (right_interest != NULL)        *right_interest = rightVal;

    // percentage of total interest value that is different
    //float diff_score = 100*(float)abs(leftVal - rightVal)/(float)laplacian_value;

    if (raw_sad != NULL)        *raw_sad = sad;
    if (leftVal < m_iSobelLimit || rightVal < m_iSobelLimit)// || diff_score > state.interest_diff_limit)
        return -1;

    // weight laplacian_value into the score

    //return sobel;
    return NUMERIC_CONST*(float)sad/(float)laplacian_value;
}
コード例 #5
0
ファイル: aarch64-neon-vget.c プロジェクト: Blizzard/clang
uint16_t test_vgetq_lane_u16(uint16x8_t a) {
  // CHECK-LABEL: test_vgetq_lane_u16:
  // CHECK-NEXT:  umov.h w0, v0[7]
  // CHECK-NEXT:  ret
  return vgetq_lane_u16(a, 7);
}
コード例 #6
0
ファイル: mw_neon.c プロジェクト: Daniel-Hwang/eeg
/* u16x8 mm mul */
void mw_neon_mm_mul_u16x8(unsigned short * A, int Row, int T, unsigned short * B, int Col, unsigned short * C)
{
	int i, k, j;

	uint16x8_t neon_b, neon_c;
	uint16x8_t neon_a0, neon_a1, neon_a2, neon_a3, neon_a4, neon_a5, neon_a6, neon_a7;
	uint16x8_t neon_b0, neon_b1, neon_b2, neon_b3, neon_b4, neon_b5, neon_b6, neon_b7;

	for (i = 0; i < Row; i+=8)
	{

		for (k = 0; k < Col; k+=1)
		{
			neon_c = vmovq_n_u16(0);

			for (j = 0; j < T; j+=8)
			{

				int j_T = j * T + i;
				int k_Row = k * Row;

				neon_a0 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a1 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a2 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a3 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a4 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a5 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a6 = vld1q_u16(A + j_T);
				j_T+=Row;
				neon_a7 = vld1q_u16(A + j_T);

				neon_b = vld1q_u16(B + k_Row + j);
				neon_b0 = vdupq_n_u16(vgetq_lane_u16(neon_b, 0));
				neon_b1 = vdupq_n_u16(vgetq_lane_u16(neon_b, 1));
				neon_b2 = vdupq_n_u16(vgetq_lane_u16(neon_b, 2));
				neon_b3 = vdupq_n_u16(vgetq_lane_u16(neon_b, 3));
				neon_b4 = vdupq_n_u16(vgetq_lane_u16(neon_b, 4));
				neon_b5 = vdupq_n_u16(vgetq_lane_u16(neon_b, 5));
				neon_b6 = vdupq_n_u16(vgetq_lane_u16(neon_b, 6));
				neon_b7 = vdupq_n_u16(vgetq_lane_u16(neon_b, 7));

				neon_c = vaddq_u16(vmulq_u16(neon_a0, neon_b0), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a1, neon_b1), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a2, neon_b2), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a3, neon_b3), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a4, neon_b4), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a5, neon_b5), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a6, neon_b6), neon_c);
				neon_c = vaddq_u16(vmulq_u16(neon_a7, neon_b7), neon_c);

				vst1q_lane_u16(C + k_Row + i, neon_c, 0);
				vst1q_lane_u16(C + k_Row + i + 1, neon_c, 1);
				vst1q_lane_u16(C + k_Row + i + 2, neon_c, 2);
				vst1q_lane_u16(C + k_Row + i + 3, neon_c, 3);
				vst1q_lane_u16(C + k_Row + i + 4, neon_c, 4);
				vst1q_lane_u16(C + k_Row + i + 5, neon_c, 5);
				vst1q_lane_u16(C + k_Row + i + 6, neon_c, 6);
				vst1q_lane_u16(C + k_Row + i + 7, neon_c, 7);

			}
		}
	}
}
コード例 #7
0
static inline int setbits(uint16x8_t d){
        
	return (int) (vgetq_lane_u16 (d,0)+ vgetq_lane_u16 (d,1)+ vgetq_lane_u16 (d,2)+
		      vgetq_lane_u16 (d,3)+ vgetq_lane_u16 (d,4)+ vgetq_lane_u16 (d,5)+
		      vgetq_lane_u16 (d,6)+ vgetq_lane_u16 (d,7) );
}
コード例 #8
0
ファイル: aarch64-neon-vget.c プロジェクト: AlexDenisov/clang
// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
// CHECK:   ret i16 [[VGETQ_LANE]]
uint16_t test_vgetq_lane_u16(uint16x8_t a) {
  return vgetq_lane_u16(a, 7);
}
コード例 #9
0
uint16_t
test_vgetq_lane_u16_before (uint16x8_t in)
{
  /* { dg-error "lane -1 out of range 0 - 7" "" {target *-*-*} 0 } */
  return vgetq_lane_u16 (in, -1);
}
コード例 #10
0
uint16_t
test_vgetq_lane_u16_beyond (uint16x8_t in)
{
  /* { dg-error "lane 8 out of range 0 - 7" "" {target *-*-*} 0 } */
  return vgetq_lane_u16 (in, 8);
}