static inline void arr_store_col( int *col, __m256i vH, int32_t t, int32_t seglen) { col[0*seglen+t] = (int64_t)_mm256_extract_epi64_rpl(vH, 0); col[1*seglen+t] = (int64_t)_mm256_extract_epi64_rpl(vH, 1); col[2*seglen+t] = (int64_t)_mm256_extract_epi64_rpl(vH, 2); col[3*seglen+t] = (int64_t)_mm256_extract_epi64_rpl(vH, 3); }
static inline void arr_store_si256( int *array, __m256i vH, int32_t t, int32_t seglen, int32_t d, int32_t dlen) { array[1LL*(0*seglen+t)*dlen + d] = (int64_t)_mm256_extract_epi64_rpl(vH, 0); array[1LL*(1*seglen+t)*dlen + d] = (int64_t)_mm256_extract_epi64_rpl(vH, 1); array[1LL*(2*seglen+t)*dlen + d] = (int64_t)_mm256_extract_epi64_rpl(vH, 2); array[1LL*(3*seglen+t)*dlen + d] = (int64_t)_mm256_extract_epi64_rpl(vH, 3); }
static inline void arr_store_rowcol( int *row, int *col, __m256i vWscore, int32_t i, int32_t s1Len, int32_t j, int32_t s2Len) { if (i+0 == s1Len-1 && 0 <= j-0 && j-0 < s2Len) { row[j-0] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 3); } if (j-0 == s2Len-1 && 0 <= i+0 && i+0 < s1Len) { col[(i+0)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 3); } if (i+1 == s1Len-1 && 0 <= j-1 && j-1 < s2Len) { row[j-1] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 2); } if (j-1 == s2Len-1 && 0 <= i+1 && i+1 < s1Len) { col[(i+1)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 2); } if (i+2 == s1Len-1 && 0 <= j-2 && j-2 < s2Len) { row[j-2] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 1); } if (j-2 == s2Len-1 && 0 <= i+2 && i+2 < s1Len) { col[(i+2)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 1); } if (i+3 == s1Len-1 && 0 <= j-3 && j-3 < s2Len) { row[j-3] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 0); } if (j-3 == s2Len-1 && 0 <= i+3 && i+3 < s1Len) { col[(i+3)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 0); } }
static inline void arr_store_si256( int *array, __m256i vWscore, int32_t i, int32_t s1Len, int32_t j, int32_t s2Len) { if (0 <= i+0 && i+0 < s1Len && 0 <= j-0 && j-0 < s2Len) { array[(i+0)*s2Len + (j-0)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 3); } if (0 <= i+1 && i+1 < s1Len && 0 <= j-1 && j-1 < s2Len) { array[(i+1)*s2Len + (j-1)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 2); } if (0 <= i+2 && i+2 < s1Len && 0 <= j-2 && j-2 < s2Len) { array[(i+2)*s2Len + (j-2)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 1); } if (0 <= i+3 && i+3 < s1Len && 0 <= j-3 && j-3 < s2Len) { array[(i+3)*s2Len + (j-3)] = (int64_t)_mm256_extract_epi64_rpl(vWscore, 0); } }
static inline int64_t _mm256_hmax_epi64_rpl(__m256i a) { a = _mm256_max_epi64_rpl(a, _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0,0,0,0))); a = _mm256_max_epi64_rpl(a, _mm256_slli_si256(a, 8)); return _mm256_extract_epi64_rpl(a, 3); }