void bla_trmm_check( char* dt_str, char* op_str, f77_char* sidea, f77_char* uploa, f77_char* transa, f77_char* diaga, f77_int* m, f77_int* n, f77_int* lda, f77_int* ldb ) { f77_int info = 0; f77_int left, right; f77_int lower, upper; f77_int nota, ta, conja; f77_int unita, nonua; f77_int nrowa; left = PASTEF770(lsame)( sidea, "L", (ftnlen)1, (ftnlen)1 ); right = PASTEF770(lsame)( sidea, "R", (ftnlen)1, (ftnlen)1 ); lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); ta = PASTEF770(lsame)( transa, "T", (ftnlen)1, (ftnlen)1 ); conja = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); unita = PASTEF770(lsame)( diaga, "U", (ftnlen)1, (ftnlen)1 ); nonua = PASTEF770(lsame)( diaga, "N", (ftnlen)1, (ftnlen)1 ); if ( left ) { nrowa = *m; } else { nrowa = *n; } if ( !left && !right ) info = 1; else if ( !lower && !upper ) info = 2; else if ( !nota && !ta && !conja ) info = 3; else if ( !unita && !nonua ) info = 4; else if ( *m < 0 ) info = 5; else if ( *n < 0 ) info = 6; else if ( *lda < bli_max( 1, nrowa ) ) info = 9; else if ( *ldb < bli_max( 1, *m ) ) info = 11; if ( info != 0 ) { char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; sprintf( func_str, "%s%-5s", dt_str, op_str ); PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); } }
void bla_her_check( char* dt_str, char* op_str, f77_char* uploc, f77_int* m, f77_int* incx, f77_int* lda ) { f77_int info = 0; f77_int lower, upper; lower = PASTEF770(lsame)( uploc, "L", (ftnlen)1, (ftnlen)1 ); upper = PASTEF770(lsame)( uploc, "U", (ftnlen)1, (ftnlen)1 ); if ( !lower && !upper ) info = 1; else if ( *m < 0 ) info = 2; else if ( *incx == 0 ) info = 5; else if ( *lda < bli_max( 1, *m ) ) info = 7; if ( info != 0 ) { char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; sprintf( func_str, "%s%-5s", dt_str, op_str ); PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); } }
void bla_ger_check( char* dt_str, char* op_str, f77_int* m, f77_int* n, f77_int* incx, f77_int* incy, f77_int* lda ) { f77_int info = 0; if ( *m < 0 ) info = 1; else if ( *n < 0 ) info = 2; else if ( *incx == 0 ) info = 5; else if ( *incy == 0 ) info = 7; else if ( *lda < bli_max( 1, *m ) ) info = 9; if ( info != 0 ) { char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; sprintf( func_str, "%s%-5s", dt_str, op_str ); PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); } }
void bla_her2k_check( char* dt_str, char* op_str, f77_char* uploa, f77_char* trans, f77_int* m, f77_int* k, f77_int* lda, f77_int* ldb, f77_int* ldc ) { f77_int info = 0; f77_int nota, conja; f77_int lower, upper; f77_int nrowa; nota = PASTEF770(lsame)( trans, "N", (ftnlen)1, (ftnlen)1 ); conja = PASTEF770(lsame)( trans, "C", (ftnlen)1, (ftnlen)1 ); lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); if ( nota ) { nrowa = *m; } else { nrowa = *k; } if ( !lower && !upper ) info = 1; else if ( !nota && !conja ) info = 2; else if ( *m < 0 ) info = 3; else if ( *k < 0 ) info = 4; else if ( *lda < bli_max( 1, nrowa ) ) info = 7; else if ( *ldb < bli_max( 1, nrowa ) ) info = 9; else if ( *ldc < bli_max( 1, *m ) ) info = 12; if ( info != 0 ) { char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; sprintf( func_str, "%s%-5s", dt_str, op_str ); PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); } }
void bla_hemm_check( char* dt_str, char* op_str, f77_char* sidea, f77_char* uploa, f77_int* m, f77_int* n, f77_int* lda, f77_int* ldb, f77_int* ldc ) { f77_int info = 0; f77_int left, right; f77_int lower, upper; f77_int nrowa; left = PASTEF770(lsame)( sidea, "L", (ftnlen)1, (ftnlen)1 ); right = PASTEF770(lsame)( sidea, "R", (ftnlen)1, (ftnlen)1 ); lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); if ( left ) { nrowa = *m; } else { nrowa = *n; } if ( !left && !right ) info = 1; else if ( !lower && !upper ) info = 2; else if ( *m < 0 ) info = 3; else if ( *n < 0 ) info = 4; else if ( *lda < bli_max( 1, nrowa ) ) info = 7; else if ( *ldb < bli_max( 1, *m ) ) info = 9; else if ( *ldc < bli_max( 1, *m ) ) info = 12; if ( info != 0 ) { char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; sprintf( func_str, "%s%-5s", dt_str, op_str ); PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); } }
void bli_cconjmr( uplo_t uplo, int m, int n, scomplex* a, int a_rs, int a_cs ) { float m1 = bli_sm1(); float* a_conj; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // We initialize for column-major. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix // by rows instead of by columns to increase spatial locality. if ( bli_is_row_storage( a_rs, a_cs ) ) { bli_swap_ints( n_iter, n_elem_max ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); } if ( bli_is_upper( uplo ) ) { for ( j = 0; j < n_iter; ++j ) { n_elem = bli_min( j + 1, n_elem_max ); a_conj = ( float* )( a + j*lda ) + 1; bli_sscal( n_elem, &m1, a_conj, 2*inca ); } } else // if ( bli_is_lower( uplo ) ) { for ( j = 0; j < n_iter; ++j ) { n_elem = bli_max( 0, n_elem_max - j ); a_conj = ( float* )( a + j*lda + j*inca ) + 1; if ( n_elem <= 0 ) break; bli_sscal( n_elem, &m1, a_conj, 2*inca ); } } }
void bli_dsetmr( uplo_t uplo, int m, int n, double* sigma, double* a, int a_rs, int a_cs ) { double* a_begin; int lda, inca; int n_iter; int n_elem_max; int n_elem; int j; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // Initialize with optimal values for column-major storage. n_iter = n; n_elem_max = m; lda = a_cs; inca = a_rs; // An optimization: if A is row-major, then let's access the matrix by // rows instead of by columns to increase spatial locality. if ( bli_is_row_storage( a_rs, a_cs ) ) { bli_swap_ints( n_iter, n_elem_max ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); } if ( bli_is_upper( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bli_min( j, n_elem_max ); a_begin = a + j*lda; bli_dsetv( n_elem, sigma, a_begin, inca ); } } else // if ( bli_is_lower( uplo ) ) { for ( j = 0; j < n_iter; j++ ) { n_elem = bli_max( 0, n_elem_max - j - 1 ); a_begin = a + j*lda + (j + 1)*inca; bli_dsetv( n_elem, sigma, a_begin, inca ); } } }
void bli_get_range_weighted( void* thr, dim_t all_start, dim_t all_end, dim_t block_factor, bool_t forward, dim_t* start, dim_t* end) { thrinfo_t* thread = (thrinfo_t*) thr; dim_t n_way = thread->n_way; dim_t work_id = thread->work_id; dim_t size = all_end - all_start; *start = 0; *end = all_end - all_start; double num = size*size / (double) n_way; if( forward ) { dim_t curr_caucus = n_way - 1; dim_t len = 0; while(1){ dim_t width = ceil(sqrt( len*len + num )) - len; // The width of the current caucus width = (width % block_factor == 0) ? width : width + block_factor - (width % block_factor); if( curr_caucus == work_id ) { *start = bli_max( 0 , *end - width ) + all_start; *end = *end + all_start; return; } else{ *end -= width; len += width; curr_caucus--; } } } else{ while(1){ dim_t width = ceil(sqrt(*start * *start + num)) - *start; width = (width % block_factor == 0) ? width : width + block_factor - (width % block_factor); if( work_id == 0 ) { *start = *start + all_start; *end = bli_min( *start + width, all_end ); return; } else{ *start = *start + width; } work_id--; } } }