int add_real_vector64_scalar(short *x, long long int a, short *y, unsigned int N) { unsigned int i; // loop counter __m128i *x_128; __m128i *y_128; x_128 = (__m128i *)&x[0]; y_128 = (__m128i *)&y[0]; alpha_128 = _mm_set1_epi64((__m64) a); // we compute 4 cpx multiply for each loop for(i=0;i<(N>>3);i++) { y_128[0] = _mm_add_epi64(alpha_128, x_128[0]); y_128[1] = _mm_add_epi64(alpha_128, x_128[1]); y_128[2] = _mm_add_epi64(alpha_128, x_128[2]); y_128[3] = _mm_add_epi64(alpha_128, x_128[3]); x_128+=4; y_128+=4; } return(0); }
#else parasail_result_t *result = parasail_result_new_stats(); #endif #endif int32_t i = 0; int32_t j = 0; int32_t end_query = s1Len-1; int32_t end_ref = s2Len-1; const int64_t NEG_LIMIT = (-open < matrix->min ? INT64_MIN + open : INT64_MIN - matrix->min) + 1; const int64_t POS_LIMIT = INT64_MAX - matrix->max - 1; int64_t score = NEG_LIMIT; int64_t matches = NEG_LIMIT; int64_t similar = NEG_LIMIT; int64_t length = NEG_LIMIT; vec128i vNegLimit = _mm_set1_epi64(NEG_LIMIT); vec128i vPosLimit = _mm_set1_epi64(POS_LIMIT); vec128i vSaturationCheckMin = vPosLimit; vec128i vSaturationCheckMax = vNegLimit; vec128i vNegInf = _mm_set1_epi64(NEG_LIMIT); vec128i vOpen = _mm_set1_epi64(open); vec128i vGap = _mm_set1_epi64(gap); vec128i vZero = _mm_set1_epi64(0); vec128i vNegInf0 = _mm_insert_epi64(vZero, NEG_LIMIT, 1); vec128i vOne = _mm_set1_epi64(1); vec128i vN = _mm_set1_epi64(N); vec128i vGapN = _mm_set1_epi64(gap*N); vec128i vNegOne = _mm_set1_epi64(-1); vec128i vI = _mm_set_epi64(0,1); vec128i vJreset = _mm_set_epi64(0,-1); vec128i vMaxH = vNegInf;
int64_t * const restrict F_pr = _F_pr+PAD; #ifdef PARASAIL_TABLE parasail_result_t *result = parasail_result_new_table1(s1Len, s2Len); #else #ifdef PARASAIL_ROWCOL parasail_result_t *result = parasail_result_new_rowcol1(s1Len, s2Len); #else parasail_result_t *result = parasail_result_new(); #endif #endif int32_t i = 0; int32_t j = 0; int32_t end_query = 0; int32_t end_ref = 0; int64_t score = NEG_INF; vec128i vNegInf = _mm_set1_epi64(NEG_INF); vec128i vNegInf0 = _mm_srli_si128(vNegInf, 8); /* shift in a 0 */ vec128i vOpen = _mm_set1_epi64(open); vec128i vGap = _mm_set1_epi64(gap); vec128i vZero = _mm_set1_epi64(0); vec128i vOne = _mm_set1_epi64(1); vec128i vN = _mm_set1_epi64(N); vec128i vNegOne = _mm_set1_epi64(-1); vec128i vI = _mm_set_epi64(0,1); vec128i vJreset = _mm_set_epi64(0,-1); vec128i vMaxH = vNegInf; vec128i vEndI = vNegInf; vec128i vEndJ = vNegInf; vec128i vILimit = _mm_set1_epi64(s1Len); vec128i vJLimit = _mm_set1_epi64(s2Len);