static inline void arr_store_col( int *col, __m128i vH, int32_t t, int32_t seglen) { col[0*seglen+t] = (int32_t)_mm_extract_epi32_rpl(vH, 0); col[1*seglen+t] = (int32_t)_mm_extract_epi32_rpl(vH, 1); col[2*seglen+t] = (int32_t)_mm_extract_epi32_rpl(vH, 2); col[3*seglen+t] = (int32_t)_mm_extract_epi32_rpl(vH, 3); }
static inline void arr_store_si128( int *array, __m128i vH, int32_t t, int32_t seglen, int32_t d, int32_t dlen) { array[1LL*(0*seglen+t)*dlen + d] = (int32_t)_mm_extract_epi32_rpl(vH, 0); array[1LL*(1*seglen+t)*dlen + d] = (int32_t)_mm_extract_epi32_rpl(vH, 1); array[1LL*(2*seglen+t)*dlen + d] = (int32_t)_mm_extract_epi32_rpl(vH, 2); array[1LL*(3*seglen+t)*dlen + d] = (int32_t)_mm_extract_epi32_rpl(vH, 3); }
static inline void arr_store_rowcol( int *row, int *col, __m128i vWscore, int32_t i, int32_t s1Len, int32_t j, int32_t s2Len) { if (i+0 == s1Len-1 && 0 <= j-0 && j-0 < s2Len) { row[j-0] = (int32_t)_mm_extract_epi32_rpl(vWscore, 3); } if (j-0 == s2Len-1 && 0 <= i+0 && i+0 < s1Len) { col[(i+0)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 3); } if (i+1 == s1Len-1 && 0 <= j-1 && j-1 < s2Len) { row[j-1] = (int32_t)_mm_extract_epi32_rpl(vWscore, 2); } if (j-1 == s2Len-1 && 0 <= i+1 && i+1 < s1Len) { col[(i+1)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 2); } if (i+2 == s1Len-1 && 0 <= j-2 && j-2 < s2Len) { row[j-2] = (int32_t)_mm_extract_epi32_rpl(vWscore, 1); } if (j-2 == s2Len-1 && 0 <= i+2 && i+2 < s1Len) { col[(i+2)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 1); } if (i+3 == s1Len-1 && 0 <= j-3 && j-3 < s2Len) { row[j-3] = (int32_t)_mm_extract_epi32_rpl(vWscore, 0); } if (j-3 == s2Len-1 && 0 <= i+3 && i+3 < s1Len) { col[(i+3)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 0); } }
static inline void arr_store_si128( int *array, __m128i vWscore, int32_t i, int32_t s1Len, int32_t j, int32_t s2Len) { if (0 <= i+0 && i+0 < s1Len && 0 <= j-0 && j-0 < s2Len) { array[(i+0)*s2Len + (j-0)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 3); } if (0 <= i+1 && i+1 < s1Len && 0 <= j-1 && j-1 < s2Len) { array[(i+1)*s2Len + (j-1)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 2); } if (0 <= i+2 && i+2 < s1Len && 0 <= j-2 && j-2 < s2Len) { array[(i+2)*s2Len + (j-2)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 1); } if (0 <= i+3 && i+3 < s1Len && 0 <= j-3 && j-3 < s2Len) { array[(i+3)*s2Len + (j-3)] = (int32_t)_mm_extract_epi32_rpl(vWscore, 0); } }
static inline int32_t _mm_hmax_epi32_rpl(__m128i a) { a = _mm_max_epi32_rpl(a, _mm_srli_si128(a, 8)); a = _mm_max_epi32_rpl(a, _mm_srli_si128(a, 4)); return _mm_extract_epi32_rpl(a, 0); }