void libxsmm_generator_sparse( const char* i_file_out, const char* i_routine_name, const libxsmm_xgemm_descriptor* i_xgemm_desc, const char* i_arch, const char* i_csc_file_in ) { /* CSC structure */ unsigned int* l_row_idx = NULL; unsigned int* l_column_idx = NULL; double* l_values = NULL; unsigned int l_row_count; unsigned int l_column_count; unsigned int l_element_count; /* init generated code object */ libxsmm_generated_code l_generated_code; l_generated_code.generated_code = NULL; l_generated_code.buffer_size = 0; l_generated_code.code_size = 0; l_generated_code.code_type = 0; l_generated_code.last_error = 0; /* add signature to code string */ libxsmm_function_signature( &l_generated_code, i_routine_name, i_xgemm_desc ); /* read CSC file and consturct CSC datastructure */ libxsmm_sparse_csc_reader( &l_generated_code, i_csc_file_in, &l_row_idx, &l_column_idx, &l_values, &l_row_count, &l_column_count, &l_element_count ); #if !defined(NDEBUG) { double *const l_tmp = (double*)malloc(l_row_count * l_column_count * sizeof(double)); unsigned int l_n; unsigned int l_m; printf("CSC matrix data structure we just read:\n"); printf("rows: %u, columns: %u, elements: %u\n", l_row_count, l_column_count, l_element_count); if (l_tmp == NULL) { fprintf( stderr, "LIBXSMM ERROR, Could allocate dense value array to test CSC datastructure!\n"); exit(-1); } for ( l_n = 0; l_n < (l_row_count * l_column_count); l_n++) { l_tmp[l_n] = 0.0; } for ( l_n = 0; l_n < l_column_count; l_n++) { const unsigned int l_column_elems = l_column_idx[l_n+1] - l_column_idx[l_n]; assert(l_column_idx[l_n+1] >= l_column_idx[l_n]); for ( l_m = 0; l_m < l_column_elems; l_m++) { l_tmp[(l_n * l_row_count) + l_row_idx[l_column_idx[l_n] + l_m]] = l_values[l_column_idx[l_n] + l_m]; } } for ( l_n = 0; l_n < l_row_count; l_n++) { for ( l_m = 0; l_m < l_column_count; l_m++) { printf("%f ", l_tmp[(l_m * l_row_count) + l_n]); } printf("\n"); } free( l_tmp ); } #endif /* generate the actual kernel code for current description depending on the architecture */ libxsmm_generator_sparse_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values ); /* close current function */ libxsmm_close_function( &l_generated_code ); /* free if not NULL */ if ( l_row_idx != NULL ) { free( l_row_idx ); } if ( l_column_idx != NULL ) { free( l_column_idx ); } if ( l_values != NULL ) { free( l_values ); } /* check for errors during code generation */ if ( l_generated_code.last_error != 0 ) { fprintf(stderr, "LIBXSMM ERROR there was an error generating code. Last known error is:\n%s\n", libxsmm_strerror(l_generated_code.last_error)); exit(-1); } /* append code to source file */ { FILE *const l_file_handle = fopen( i_file_out, "a" ); if ( l_file_handle != NULL ) { fputs( l_generated_code.generated_code, l_file_handle ); fclose( l_file_handle ); } else { fprintf(stderr, "LIBXSMM ERROR, libxsmm_generator_sparse: could not write to into destination source file\n"); exit(-1); } } /* free code memory */ free( l_generated_code.generated_code ); }
LIBXSMM_API void libxsmm_generator_spgemm( const char* i_file_out, const char* i_routine_name, const libxsmm_gemm_descriptor* i_xgemm_desc, const char* i_arch, const char* i_file_in, const int i_is_csr ) { /* CSC/CSR structure */ unsigned int* l_row_idx = NULL; unsigned int* l_column_idx = NULL; double* l_values = NULL; unsigned int l_row_count; unsigned int l_column_count; unsigned int l_element_count; /* init generated code object */ libxsmm_generated_code l_generated_code; l_generated_code.generated_code = NULL; l_generated_code.buffer_size = 0; l_generated_code.code_size = 0; l_generated_code.code_type = 0; l_generated_code.last_error = 0; /* add signature to code string */ if (i_is_csr == 3) { libxsmm_mmfunction_signature_asparse_reg( &l_generated_code, i_routine_name, i_xgemm_desc ); } else { libxsmm_mmfunction_signature( &l_generated_code, i_routine_name, i_xgemm_desc ); } /* check if generate to CSC */ /* @TODO, this i_is_csr is very hacky.... change it in future */ if ( (i_is_csr == 0) || (i_is_csr > 9) ) { /* read CSC file and construct CSC data structure */ libxsmm_sparse_csc_reader( &l_generated_code, i_file_in, &l_row_idx, &l_column_idx, &l_values, &l_row_count, &l_column_count, &l_element_count ); #if !defined(NDEBUG) { double *const l_tmp = (double*)malloc(l_row_count * l_column_count * sizeof(double)); unsigned int l_n; unsigned int l_m; /* mute static analysis about garbage content */ memset(l_tmp, 0, l_row_count * l_column_count * sizeof(double)); printf("CSC matrix data structure we just read:\n"); printf("rows: %u, columns: %u, elements: %u\n", l_row_count, l_column_count, l_element_count); if (l_tmp == NULL) { fprintf(stderr, "LIBXSMM fatal error:Could allocate dense value array to test CSC data structure!\n"); exit(-1); } for ( l_n = 0; l_n < (l_row_count * l_column_count); l_n++) { l_tmp[l_n] = 0.0; } for ( l_n = 0; l_n < l_row_count+1; l_n++) { printf("%u ", l_column_idx[l_n]); } printf("\n"); for ( l_n = 0; l_n < l_element_count; l_n++) { printf("%u ", l_row_idx[l_n]); } printf("\n"); for ( l_n = 0; l_n < l_element_count; l_n++) { printf("%f ", l_values[l_n]); } printf("\n"); for ( l_n = 0; l_n < l_column_count; l_n++) { const unsigned int l_column_elems = l_column_idx[l_n+1] - l_column_idx[l_n]; assert(l_column_idx[l_n+1] >= l_column_idx[l_n]); for ( l_m = 0; l_m < l_column_elems; l_m++) { l_tmp[(l_row_idx[l_column_idx[l_n] + l_m]*l_column_count) + l_n] = l_values[l_column_idx[l_n] + l_m]; } } assert(0 != l_tmp); for ( l_n = 0; l_n < l_row_count; l_n++) { for ( l_m = 0; l_m < l_column_count; l_m++) { printf("%f ", l_tmp[(l_n * l_column_count) + l_m]); } printf("\n"); } free( l_tmp ); } #endif /* generate the actual kernel code for current description depending on the architecture */ if (i_is_csr == 0) { libxsmm_generator_spgemm_csc_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values ); } else if (i_is_csr == 10) { libxsmm_generator_spgemm_csc_soa_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values ); } else { assert(0/*should not happen*/); } } else { /* read CSR file and construct CSR data structure */ libxsmm_sparse_csr_reader( &l_generated_code, i_file_in, &l_row_idx, &l_column_idx, &l_values, &l_row_count, &l_column_count, &l_element_count ); #if !defined(NDEBUG) { double *const l_tmp = (double*)malloc(l_row_count * l_column_count * sizeof(double)); unsigned int l_n; unsigned int l_m; /* mute static analysis about garbage content */ memset(l_tmp, 0, l_row_count * l_column_count * sizeof(double)); printf("CSR matrix data structure we just read:\n"); printf("rows: %u, columns: %u, elements: %u\n", l_row_count, l_column_count, l_element_count); if (l_tmp == NULL) { fprintf(stderr, "LIBXSMM fatal error:Could allocate dense value array to test CSR data structure!\n"); exit(-1); } for ( l_n = 0; l_n < (l_row_count * l_column_count); l_n++) { l_tmp[l_n] = 0.0; } for ( l_n = 0; l_n < l_row_count+1; l_n++) { printf("%u ", l_row_idx[l_n]); } printf("\n"); for ( l_n = 0; l_n < l_element_count; l_n++) { printf("%u ", l_column_idx[l_n]); } printf("\n"); for ( l_n = 0; l_n < l_element_count; l_n++) { printf("%f ", l_values[l_n]); } printf("\n"); for ( l_n = 0; l_n < l_row_count; l_n++) { const unsigned int l_row_elems = l_row_idx[l_n+1] - l_row_idx[l_n]; assert(l_row_idx[l_n+1] >= l_row_idx[l_n]); for ( l_m = 0; l_m < l_row_elems; l_m++) { l_tmp[(l_n * l_column_count) + l_column_idx[l_row_idx[l_n] + l_m]] = l_values[l_row_idx[l_n] + l_m]; } } assert(0 != l_tmp); for ( l_n = 0; l_n < l_row_count; l_n++) { for ( l_m = 0; l_m < l_column_count; l_m++) { printf("%f ", l_tmp[(l_n * l_column_count) + l_m]); } printf("\n"); } free( l_tmp ); } #endif if (i_is_csr == 1) { /* generate the actual kernel code for current description depending on the architecture */ libxsmm_generator_spgemm_csr_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values ); } else if (i_is_csr == 2) { /* generate the actual kernel code for current description depending on the architecture */ libxsmm_generator_spgemm_csr_soa_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values ); } else if (i_is_csr == 3) { /* generate the actual kernel code for current description depending on the architecture */ libxsmm_generator_spgemm_csr_reg_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values ); } else { assert(0/*should not happen*/); } } /* close current function */ libxsmm_close_function( &l_generated_code ); /* free if not NULL */ if ( l_row_idx != NULL ) { free( l_row_idx ); } if ( l_column_idx != NULL ) { free( l_column_idx ); } if ( l_values != NULL ) { free( l_values ); } /* check for errors during code generation */ if ( l_generated_code.last_error != 0 ) { LIBXSMM_HANDLE_ERROR_VERBOSE( &l_generated_code, l_generated_code.last_error ); exit(-1); } /* append code to source file */ { FILE *const l_file_handle = fopen( i_file_out, "a" ); if ( l_file_handle != NULL ) { assert(l_generated_code.generated_code != NULL); fputs( (const char*)l_generated_code.generated_code, l_file_handle ); fclose( l_file_handle ); } else { fprintf(stderr, "LIBXSMM ERROR: libxsmm_generator_spgemm could not write to into destination source file\n"); exit(-1); } } /* free code memory */ free( l_generated_code.generated_code ); }