dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p ) { dim_t panel_off; if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROWS ) { // For the "packed rows" schema, a single row is effectively one // row panel, and so we use the row offset as the panel offset. // Then we multiply this offset by the effective panel stride // (ie: the row stride) to arrive at the desired offset. panel_off = offmn * bli_obj_row_stride( *p ); } else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COLUMNS ) { // For the "packed columns" schema, a single column is effectively one // column panel, and so we use the column offset as the panel offset. // Then we multiply this offset by the effective panel stride // (ie: the column stride) to arrive at the desired offset. panel_off = offmn * bli_obj_col_stride( *p ); } else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROW_PANELS ) { // For the "packed row panels" schema, the column stride is equal to // the panel dimension (length). So we can divide it into offmn // (interpreted as a row offset) to arrive at a panel offset. Then // we multiply this offset by the panel stride to arrive at the total // offset to the panel (in units of elements). panel_off = offmn / bli_obj_col_stride( *p ); panel_off = panel_off * bli_obj_panel_stride( *p ); // Sanity check. if ( offmn % bli_obj_col_stride( *p ) > 0 ) bli_abort(); } else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COL_PANELS ) { // For the "packed column panels" schema, the row stride is equal to // the panel dimension (width). So we can divide it into offmn // (interpreted as a column offset) to arrive at a panel offset. Then // we multiply this offset by the panel stride to arrive at the total // offset to the panel (in units of elements). panel_off = offmn / bli_obj_row_stride( *p ); panel_off = panel_off * bli_obj_panel_stride( *p ); // Sanity check. if ( offmn % bli_obj_row_stride( *p ) > 0 ) bli_abort(); } else { panel_off = 0; bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } return panel_off; }
num_t bli_ind_map_cdt_to_index( num_t dt ) { // A non-complex datatype should never be passed in. if ( !bli_is_complex( dt ) ) bli_abort(); // Map the complex datatype to a zero-based index. if ( bli_is_scomplex( dt ) ) return 0; else /* if ( bli_is_dcomplex( dt ) ) */ return 1; }
err_t bli_check_error_code_helper( gint_t code, char* file, guint_t line ) { if ( code == BLIS_SUCCESS ) return code; if ( BLIS_ERROR_CODE_MAX < code && code < BLIS_ERROR_CODE_MIN ) { bli_print_msg( bli_error_string_for_code( code ), file, line ); bli_abort(); } else { bli_print_msg( bli_error_string_for_code( BLIS_UNDEFINED_ERROR_CODE ), file, line ); bli_abort(); } return code; }
void* bli_malloc_align ( malloc_ft f, size_t size, size_t align_size ) { const size_t ptr_size = sizeof( void* ); size_t align_offset = 0; void* p_orig; int8_t* p_byte; void** p_addr; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_malloc_align_check( f, size, align_size ); // Return early if zero bytes were requested. if ( size == 0 ) return NULL; // Add the alignment size and the size of a pointer to the number // of bytes to allocate. size += align_size + ptr_size; // Call the allocation function. p_orig = f( size ); // If NULL was returned, something is probably very wrong. if ( p_orig == NULL ) bli_abort(); // Advance the pointer by one pointer element. p_byte = p_orig; p_byte += ptr_size; // Compute the offset to the desired alignment. if ( bli_is_unaligned_to( ( siz_t )p_byte, ( siz_t )align_size ) ) { align_offset = align_size - bli_offset_past_alignment( ( siz_t )p_byte, ( siz_t )align_size ); } // Advance the pointer using the difference between the alignment // size and the alignment offset. p_byte += align_offset; // Compute the address of the pointer element just before the start // of the aligned address, and store the original address there. p_addr = ( void** )(p_byte - ptr_size); *p_addr = p_orig; // Return the aligned pointer. return p_byte; }
double bli_clock_helper() { LARGE_INTEGER clock_freq = {0}; LARGE_INTEGER clock_val; BOOL r_val; r_val = QueryPerformanceFrequency( &clock_freq ); if ( r_val == 0 ) { bli_print_msg( "QueryPerformanceFrequency() failed", __FILE__, __LINE__ ); bli_abort(); } r_val = QueryPerformanceCounter( &clock_val ); if ( r_val == 0 ) { bli_print_msg( "QueryPerformanceCounter() failed", __FILE__, __LINE__ ); bli_abort(); } return ( ( double) clock_val.QuadPart / ( double) clock_freq.QuadPart ); }
/* Subroutine */ int PASTEF770(xerbla)(bla_character *srname, bla_integer *info, ftnlen srname_len) { /* -- LAPACK auxiliary routine (preliminary version) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* February 29, 1992 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA is an error handler for the LAPACK routines. */ /* It is called by an LAPACK routine if an input parameter has an */ /* invalid value. A message is printed and execution stops. */ /* Installers may consider modifying the STOP statement in order to */ /* call system-specific exception-handling facilities. */ /* Arguments */ /* ========= */ /* SRNAME (input) CHARACTER*6 */ /* The name of the routine which called XERBLA. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ int i; for ( i = 0; i < srname_len; ++i ) srname[i] = toupper( srname[i] ); printf("** On entry to %6s, parameter number %2i had an illegal value\n", srname, (int)*info); bli_abort(); /* End of XERBLA */ return 0; } /* xerbla */
func_t* bli_gemm_cntl_ukrs( gemm_t* cntl ) { dim_t max_depth = 10; dim_t i; for ( i = 0; ; ++i ) { // If the gemm sub-tree is NULL, we are at the leaf. if ( cntl_sub_gemm( cntl ) == NULL ) break; // If the above branch was not taken, we can assume the gemm // sub-tree is valid. Here, we step down into that sub-tree. cntl = cntl_sub_gemm( cntl ); // Safeguard against infinite loops due to bad control tree // configuration. if ( i == max_depth ) bli_abort(); } return cntl_gemm_ukrs( cntl ); }
void bli_abort_msg( char* message ) { fprintf( stderr, "BLIS: %s\n", message ); fprintf( stderr, "BLIS: Aborting.\n" ); bli_abort(); }
void bli_unpackv_int( obj_t* p, obj_t* a, cntx_t* cntx, unpackv_t* cntl ) { // The unpackv operation consists of an optional casting post-process. // (This post-process is analogous to the cast pre-process in packv.) // Here are the following possible ways unpackv can execute: // 1. unpack and cast: Unpack to a temporary vector c and then cast // c to a. // 2. unpack only: Unpack directly to vector a since typecasting is // not needed. // 3. cast only: Not yet supported / not used. // 4. no-op: The control tree directs us to skip the unpack operation // entirely. No action is taken. obj_t c; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_unpackv_check( p, a, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply return. if ( cntl_is_noop( cntl ) ) { return; } // If p was aliased to a during the pack stage (because it was already // in an acceptable packed/contiguous format), then no unpack is actually // necessary, so we return. if ( bli_obj_is_alias_of( *p, *a ) ) { return; } // Now, if we are not skipping the unpack operation, then the only // question left is whether we are to typecast vector a after unpacking. if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) bli_abort(); /* if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) { // Initialize an object c for the intermediate typecast vector. bli_unpackv_init_cast( p, a, &c ); } else */ { // If no cast is needed, then aliasing object c to the original // vector serves as a minor optimization. This causes the unpackv // implementation to unpack directly into vector a. bli_obj_alias_to( *a, c ); } // Now we are ready to proceed with the unpacking. // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( p, &c, cntx, cntl ); // Now, if necessary, we cast the contents of c to vector a. If casting // was not necessary, then we are done because the call to the unpackv // implementation would have unpacked directly to vector a. /* if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) { // Copy/typecast vector c to vector a. // NOTE: Here, we use copynzv instead of copym because, in the cases // where we are unpacking/typecasting a real vector c to a complex // vector a, we want to touch only the real components of a, rather // than also set the imaginary components to zero. This comes about // because of the fact that, if we are unpacking real-to-complex, // then it is because all of the computation occurred in the real // domain, and so we would want to leave whatever imaginary values // there are in vector a untouched. Notice that for unpackings that // entail complex-to-complex data movements, the copynzv operation // behaves exactly as copym, so no use cases are lost (at least none // that I can think of). bli_copynzv( &c, a ); // NOTE: The above code/comment is outdated. What should happen is // as follows: // - If dt(a) is complex and dt(p) is real, then create an alias of // a and then tweak it so that it looks like a real domain object. // This will involve: // - projecting the datatype to real domain // - scaling both the row and column strides by 2 // ALL OF THIS should be done in the front-end, NOT here, as // unpackv() won't even be needed in that case. } */ }
void bli_packv_init ( obj_t* a, obj_t* p, cntx_t* cntx, packv_t* cntl ) { // The purpose of packm_init() is to initialize an object P so that // a source object A can be packed into P via one of the packv // implementations. This initialization includes acquiring a suitable // block of memory from the memory allocator, if such a block of memory // has not already been allocated previously. pack_t pack_schema; bszid_t bmult_id; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packv_check( a, p, cntx ); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply alias the object to its packed counterpart. if ( bli_cntl_is_noop( cntl ) ) { bli_obj_alias_to( a, p ); return; } // At this point, we can be assured that cntl is not NULL. Let us now // check to see if the object has already been packed to the desired // schema (as encoded in the control tree). If so, we can alias and // return, as above. // Note that in most cases, bli_obj_pack_schema() will return // BLIS_NOT_PACKED and thus packing will be called for (but in some // cases packing has already taken place). Also, not all combinations // of current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) ) { bli_obj_alias_to( a, p ); return; } // Now, if we are not skipping the pack operation, then the only question // left is whether we are to typecast vector a before packing. if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) ) bli_abort(); // Extract various fields from the control tree and pass them in // explicitly into _init_pack(). This allows external code generators // the option of bypassing usage of control trees altogether. pack_schema = cntl_pack_schema( cntl ); bmult_id = cntl_bmid( cntl ); // Initialize object p for the final packed vector. bli_packv_init_pack ( pack_schema, bmult_id, &a, p, cntx ); // Now p is ready to be packed. }
void bli_gemm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; gemm_voft f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( *a ) || bli_obj_has_zero_dim( *b ) ) { if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // If A or B is marked as being filled with zeros, scale C by beta and // return early. if ( bli_obj_is_zeros( *a ) || bli_obj_is_zeros( *b ) ) { // This should never execute. bli_abort(); if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // Alias A, B, and C in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); bli_obj_alias_to( *c, c_local ); // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Create the next node in the thrinfo_t structure. bli_thrinfo_grow( cntx, cntl, thread ); // Extract the function pointer from the current control tree node. f = bli_cntl_var_func( cntl ); // Somewhat hackish support for 3m3, 3m2, and 4m1b method implementations. { ind_t im = bli_cntx_get_ind_method( cntx ); if ( im != BLIS_NAT ) { if ( im == BLIS_3M3 && f == bli_gemm_packa ) f = bli_gemm3m3_packa; else if ( im == BLIS_3M2 && f == bli_gemm_ker_var2 ) f = bli_gemm3m2_ker_var2; else if ( im == BLIS_4M1B && f == bli_gemm_ker_var2 ) f = bli_gemm4mb_ker_var2; } } // Invoke the variant. f ( &a_local, &b_local, &c_local, cntx, cntl, thread ); }