示例#1
0
void libxsmm_generator_sparse( const char*                     i_file_out,
                               const char*                     i_routine_name,
                               const libxsmm_xgemm_descriptor* i_xgemm_desc,
                               const char*                     i_arch,
                               const char*                     i_csc_file_in ) {
  /* CSC structure */
  unsigned int* l_row_idx = NULL;
  unsigned int* l_column_idx = NULL;
  double* l_values = NULL;
  unsigned int l_row_count;
  unsigned int l_column_count;
  unsigned int l_element_count;

  /* init generated code object */
  libxsmm_generated_code l_generated_code;
  l_generated_code.generated_code = NULL;
  l_generated_code.buffer_size = 0;
  l_generated_code.code_size = 0;
  l_generated_code.code_type = 0;
  l_generated_code.last_error = 0;

  /* add signature to code string */
  libxsmm_function_signature( &l_generated_code, i_routine_name, i_xgemm_desc );

  /* read CSC file and consturct CSC datastructure */
  libxsmm_sparse_csc_reader( &l_generated_code, i_csc_file_in, &l_row_idx, &l_column_idx, &l_values, &l_row_count, &l_column_count, &l_element_count );

#if !defined(NDEBUG)
  {
    double *const l_tmp = (double*)malloc(l_row_count * l_column_count * sizeof(double));
    unsigned int l_n;
    unsigned int l_m;

    printf("CSC matrix data structure we just read:\n");
    printf("rows: %u, columns: %u, elements: %u\n", l_row_count, l_column_count, l_element_count);

    if (l_tmp == NULL) {
      fprintf( stderr, "LIBXSMM ERROR, Could allocate dense value array to test CSC datastructure!\n");
      exit(-1);
    }

    for ( l_n = 0; l_n < (l_row_count * l_column_count); l_n++) {
      l_tmp[l_n] = 0.0;
    }

    for ( l_n = 0; l_n < l_column_count; l_n++) {
      const unsigned int l_column_elems = l_column_idx[l_n+1] - l_column_idx[l_n];
      assert(l_column_idx[l_n+1] >= l_column_idx[l_n]);

      for ( l_m = 0; l_m < l_column_elems; l_m++) {
        l_tmp[(l_n * l_row_count) + l_row_idx[l_column_idx[l_n] + l_m]] = l_values[l_column_idx[l_n] + l_m];
      }
    }

    for ( l_n = 0; l_n < l_row_count; l_n++) {
      for ( l_m = 0; l_m < l_column_count; l_m++) {
        printf("%f ", l_tmp[(l_m * l_row_count) + l_n]);
      }
      printf("\n");
    }

    free( l_tmp );
  }
#endif

  /* generate the actual kernel code for current description depending on the architecture */
  libxsmm_generator_sparse_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values );

  /* close current function */
  libxsmm_close_function( &l_generated_code );

  /* free if not NULL */
  if ( l_row_idx != NULL ) {
    free( l_row_idx );
  }
  if ( l_column_idx != NULL ) {
    free( l_column_idx );
  }
  if ( l_values != NULL ) {
    free( l_values );
  }

  /* check for errors during code generation */
  if ( l_generated_code.last_error != 0 ) {
    fprintf(stderr, "LIBXSMM ERROR there was an error generating code. Last known error is:\n%s\n",
      libxsmm_strerror(l_generated_code.last_error));
    exit(-1);
  }

  /* append code to source file */
  {
    FILE *const l_file_handle = fopen( i_file_out, "a" );
    if ( l_file_handle != NULL ) {
      fputs( l_generated_code.generated_code, l_file_handle );
      fclose( l_file_handle );
    } else {
      fprintf(stderr, "LIBXSMM ERROR, libxsmm_generator_sparse: could not write to into destination source file\n");
      exit(-1);
    }
  }

  /* free code memory */
  free( l_generated_code.generated_code );
}
示例#2
0
LIBXSMM_API
void libxsmm_generator_spgemm( const char*                    i_file_out,
                               const char*                    i_routine_name,
                               const libxsmm_gemm_descriptor* i_xgemm_desc,
                               const char*                    i_arch,
                               const char*                    i_file_in,
                               const int                      i_is_csr ) {
  /* CSC/CSR structure */
  unsigned int* l_row_idx = NULL;
  unsigned int* l_column_idx = NULL;
  double* l_values = NULL;
  unsigned int l_row_count;
  unsigned int l_column_count;
  unsigned int l_element_count;

  /* init generated code object */
  libxsmm_generated_code l_generated_code;
  l_generated_code.generated_code = NULL;
  l_generated_code.buffer_size = 0;
  l_generated_code.code_size = 0;
  l_generated_code.code_type = 0;
  l_generated_code.last_error = 0;

  /* add signature to code string */
  if (i_is_csr == 3) {
    libxsmm_mmfunction_signature_asparse_reg( &l_generated_code, i_routine_name, i_xgemm_desc );
  } else {
    libxsmm_mmfunction_signature( &l_generated_code, i_routine_name, i_xgemm_desc );
  }

  /* check if generate to CSC */
  /* @TODO, this i_is_csr is very hacky.... change it in future */
  if ( (i_is_csr == 0) || (i_is_csr > 9) ) {
    /* read CSC file and construct CSC data structure */
    libxsmm_sparse_csc_reader( &l_generated_code, i_file_in, &l_row_idx, &l_column_idx, &l_values, &l_row_count, &l_column_count, &l_element_count );

#if !defined(NDEBUG)
    {
      double *const l_tmp = (double*)malloc(l_row_count * l_column_count * sizeof(double));
      unsigned int l_n;
      unsigned int l_m;

      /* mute static analysis about garbage content */
      memset(l_tmp, 0, l_row_count * l_column_count * sizeof(double));

      printf("CSC matrix data structure we just read:\n");
      printf("rows: %u, columns: %u, elements: %u\n", l_row_count, l_column_count, l_element_count);

      if (l_tmp == NULL) {
        fprintf(stderr, "LIBXSMM fatal error:Could allocate dense value array to test CSC data structure!\n");
        exit(-1);
      }

      for ( l_n = 0; l_n < (l_row_count * l_column_count); l_n++) {
        l_tmp[l_n] = 0.0;
      }

      for ( l_n = 0; l_n < l_row_count+1; l_n++) {
         printf("%u ", l_column_idx[l_n]);
      }
      printf("\n");
      for ( l_n = 0; l_n < l_element_count; l_n++) {
         printf("%u ", l_row_idx[l_n]);
      }
      printf("\n");
      for ( l_n = 0; l_n < l_element_count; l_n++) {
         printf("%f ", l_values[l_n]);
      }
      printf("\n");

      for ( l_n = 0; l_n < l_column_count; l_n++) {
        const unsigned int l_column_elems = l_column_idx[l_n+1] - l_column_idx[l_n];
        assert(l_column_idx[l_n+1] >= l_column_idx[l_n]);

        for ( l_m = 0; l_m < l_column_elems; l_m++) {
          l_tmp[(l_row_idx[l_column_idx[l_n] + l_m]*l_column_count) + l_n] = l_values[l_column_idx[l_n] + l_m];
        }
      }

      assert(0 != l_tmp);
      for ( l_n = 0; l_n < l_row_count; l_n++) {
        for ( l_m = 0; l_m < l_column_count; l_m++) {
          printf("%f ", l_tmp[(l_n * l_column_count) + l_m]);
        }
        printf("\n");
      }

      free( l_tmp );
    }
#endif
    /* generate the actual kernel code for current description depending on the architecture */
    if (i_is_csr == 0) {
      libxsmm_generator_spgemm_csc_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values );
    } else if (i_is_csr == 10) {
      libxsmm_generator_spgemm_csc_soa_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values );
    } else {
      assert(0/*should not happen*/);
    }
  } else {
    /* read CSR file and construct CSR data structure */
    libxsmm_sparse_csr_reader( &l_generated_code, i_file_in, &l_row_idx, &l_column_idx, &l_values, &l_row_count, &l_column_count, &l_element_count );

#if !defined(NDEBUG)
    {
      double *const l_tmp = (double*)malloc(l_row_count * l_column_count * sizeof(double));
      unsigned int l_n;
      unsigned int l_m;

      /* mute static analysis about garbage content */
      memset(l_tmp, 0, l_row_count * l_column_count * sizeof(double));

      printf("CSR matrix data structure we just read:\n");
      printf("rows: %u, columns: %u, elements: %u\n", l_row_count, l_column_count, l_element_count);

      if (l_tmp == NULL) {
        fprintf(stderr, "LIBXSMM fatal error:Could allocate dense value array to test CSR data structure!\n");
        exit(-1);
      }

      for ( l_n = 0; l_n < (l_row_count * l_column_count); l_n++) {
        l_tmp[l_n] = 0.0;
      }

      for ( l_n = 0; l_n < l_row_count+1; l_n++) {
         printf("%u ", l_row_idx[l_n]);
      }
      printf("\n");
      for ( l_n = 0; l_n < l_element_count; l_n++) {
         printf("%u ", l_column_idx[l_n]);
      }
      printf("\n");
      for ( l_n = 0; l_n < l_element_count; l_n++) {
         printf("%f ", l_values[l_n]);
      }
      printf("\n");

      for ( l_n = 0; l_n < l_row_count; l_n++) {
        const unsigned int l_row_elems = l_row_idx[l_n+1] - l_row_idx[l_n];
        assert(l_row_idx[l_n+1] >= l_row_idx[l_n]);

        for ( l_m = 0; l_m < l_row_elems; l_m++) {
          l_tmp[(l_n * l_column_count) + l_column_idx[l_row_idx[l_n] + l_m]] = l_values[l_row_idx[l_n] + l_m];
        }
      }

      assert(0 != l_tmp);
      for ( l_n = 0; l_n < l_row_count; l_n++) {
        for ( l_m = 0; l_m < l_column_count; l_m++) {
          printf("%f ", l_tmp[(l_n * l_column_count) + l_m]);
        }
        printf("\n");
      }

      free( l_tmp );
    }
#endif
    if (i_is_csr == 1) {
      /* generate the actual kernel code for current description depending on the architecture */
      libxsmm_generator_spgemm_csr_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values );
    } else if (i_is_csr == 2) {
      /* generate the actual kernel code for current description depending on the architecture */
      libxsmm_generator_spgemm_csr_soa_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values );
    } else if (i_is_csr == 3) {
      /* generate the actual kernel code for current description depending on the architecture */
      libxsmm_generator_spgemm_csr_reg_kernel( &l_generated_code, i_xgemm_desc, i_arch, l_row_idx, l_column_idx, l_values );
    } else {
      assert(0/*should not happen*/);
    }
  }

  /* close current function */
  libxsmm_close_function( &l_generated_code );

  /* free if not NULL */
  if ( l_row_idx != NULL ) {
    free( l_row_idx );
  }
  if ( l_column_idx != NULL ) {
    free( l_column_idx );
  }
  if ( l_values != NULL ) {
    free( l_values );
  }

  /* check for errors during code generation */
  if ( l_generated_code.last_error != 0 ) {
    LIBXSMM_HANDLE_ERROR_VERBOSE( &l_generated_code, l_generated_code.last_error );
    exit(-1);
  }

  /* append code to source file */
  {
    FILE *const l_file_handle = fopen( i_file_out, "a" );
    if ( l_file_handle != NULL ) {
      assert(l_generated_code.generated_code != NULL);
      fputs( (const char*)l_generated_code.generated_code, l_file_handle );
      fclose( l_file_handle );
    } else {
      fprintf(stderr, "LIBXSMM ERROR: libxsmm_generator_spgemm could not write to into destination source file\n");
      exit(-1);
    }
  }

  /* free code memory */
  free( l_generated_code.generated_code );
}