#else
#define FNAME parasail_sg_stats_diag_sse2_128_8
#endif
#endif

parasail_result_t* FNAME(
        const char * const restrict _s1, const int s1Len,
        const char * const restrict _s2, const int s2Len,
        const int open, const int gap, const parasail_matrix_t *matrix)
{
    const int32_t N = 16; /* number of values in vector */
    const int32_t PAD = N-1;
    const int32_t PAD2 = PAD*2;
    const int32_t s1Len_PAD = s1Len+PAD;
    const int32_t s2Len_PAD = s2Len+PAD;
    int8_t * const restrict s1      = parasail_memalign_int8_t(16, s1Len+PAD);
    int8_t * const restrict s2B     = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _tbl_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _del_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _mch_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _sim_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _len_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict s2 = s2B+PAD; /* will allow later for negative indices */
    int8_t * const restrict tbl_pr = _tbl_pr+PAD;
    int8_t * const restrict del_pr = _del_pr+PAD;
    int8_t * const restrict mch_pr = _mch_pr+PAD;
    int8_t * const restrict sim_pr = _sim_pr+PAD;
    int8_t * const restrict len_pr = _len_pr+PAD;
#ifdef PARASAIL_TABLE
    parasail_result_t *result = parasail_result_new_table3(s1Len, s2Len);
#else
#else
#define FNAME parasail_nw_stats_diag_neon_128_8
#endif
#endif

parasail_result_t* FNAME(
        const char * const restrict _s1, const int s1Len,
        const char * const restrict _s2, const int s2Len,
        const int open, const int gap, const parasail_matrix_t *matrix)
{
    const int32_t N = 16; /* number of values in vector */
    const int32_t PAD = N-1;
    const int32_t PAD2 = PAD*2;
    const int32_t s1Len_PAD = s1Len+PAD;
    const int32_t s2Len_PAD = s2Len+PAD;
    int8_t * const restrict s1      = parasail_memalign_int8_t(16, s1Len+PAD);
    int8_t * const restrict s2B     = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _H_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _HM_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _HS_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _HL_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _F_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _FM_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _FS_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict _FL_pr = parasail_memalign_int8_t(16, s2Len+PAD2);
    int8_t * const restrict s2 = s2B+PAD; /* will allow later for negative indices */
    int8_t * const restrict H_pr = _H_pr+PAD;
    int8_t * const restrict HM_pr = _HM_pr+PAD;
    int8_t * const restrict HS_pr = _HS_pr+PAD;
    int8_t * const restrict HL_pr = _HL_pr+PAD;
    int8_t * const restrict F_pr = _F_pr+PAD;
#else
#define FNAME parasail_sw_diag_avx2_256_8
#endif
#endif

parasail_result_t* FNAME(
        const char * const restrict _s1, const int s1Len,
        const char * const restrict _s2, const int s2Len,
        const int open, const int gap, const parasail_matrix_t *matrix)
{
    const int32_t N = 32; /* number of values in vector */
    const int32_t PAD = N-1;
    const int32_t PAD2 = PAD*2;
    const int32_t s1Len_PAD = s1Len+PAD;
    const int32_t s2Len_PAD = s2Len+PAD;
    int8_t * const restrict s1 = parasail_memalign_int8_t(32, s1Len+PAD);
    int8_t * const restrict s2B= parasail_memalign_int8_t(32, s2Len+PAD2);
    int8_t * const restrict _tbl_pr = parasail_memalign_int8_t(32, s2Len+PAD2);
    int8_t * const restrict _del_pr = parasail_memalign_int8_t(32, s2Len+PAD2);
    int8_t * const restrict s2 = s2B+PAD; /* will allow later for negative indices */
    int8_t * const restrict tbl_pr = _tbl_pr+PAD;
    int8_t * const restrict del_pr = _del_pr+PAD;
#ifdef PARASAIL_TABLE
    parasail_result_t *result = parasail_result_new_table1(s1Len, s2Len);
#else
#ifdef PARASAIL_ROWCOL
    parasail_result_t *result = parasail_result_new_rowcol1(s1Len, s2Len);
#else
    parasail_result_t *result = parasail_result_new();
#endif
#endif