void load_file( const char *path, const char *mode, double *buff, size_t size ) { FILE *fp; fp = fgls_fopen( path, mode ); sync_read( buff, fp, size, 0 ); fclose( fp ); }
struct databel_fvi * load_databel_fvi( char *path ) { FILE *f; databel_fvi *fvi; size_t data_size; f = fgls_fopen( path, "r" ); fvi = (databel_fvi*) fgls_malloc( sizeof(databel_fvi) ); // Header fread( &fvi->fvi_header, sizeof(databel_fvi_header), 1, f ); // Labels data_size = (fvi->fvi_header.numVariables +fvi->fvi_header.numObservations ) * fvi->fvi_header.namelength * sizeof(char); fvi->fvi_data = (char *) fgls_malloc ( data_size ); // Load labels fread( fvi->fvi_data, 1, data_size, f ); fclose( f ); return fvi; }
/* * Out-of-core gemms: * - Z' XR * - Z' Y * Z is m x m * The other matrix is m x n */ void ooc_gemm( int m, int n, int ooc_b, double *Z, char *in, char *out, int threshold, const char *obj_type, char *obj_name, int namelength, int nthreads_avg ) { /* Files */ FILE *fp_in = fgls_fopen( in, "rb" ); FILE *fp_out = fgls_fopen( out, "wb" ); /* OOC Problem dimensions */ /*size_t max_elems_per_buffer = 1L << 26; // 64MElems, 512 MBs*/ /*max_elems_per_buffer = max_elems_per_buffer - max_elems_per_buffer % n;*/ /*size_t num_cols_per_buff = max_elems_per_buffer / n;*/ /* Asynchronous IO data structures */ double *in_comp, *out_comp; double_buffering db_in, db_out; // B, C double_buffering_init( &db_in, ooc_b * m * sizeof(double), fp_in, NULL ); // _fp, cf not needed in this case double_buffering_init( &db_out, ooc_b * m * sizeof(double), fp_out, NULL ); // _fp, cf not needed in this case /* BLAS constants */ double ONE = 1.0; double ZERO = 0.0; /* Read first piece of "in" */ double_buffering_read( &db_in, IO_BUFF, MIN( (size_t)ooc_b * m, (size_t)m * n ) * sizeof(double), 0); double_buffering_swap( &db_in ); int cur_n; int i; for ( i = 0; i < n; i += ooc_b ) { /* Read next piece of "in" */ size_t nbytes = i + ooc_b > n ? 1 : MIN( ooc_b * m, ( n - (size_t)( i + ooc_b ) ) * m ) * sizeof(double); off_t offset = i + ooc_b > n ? 0 : (off_t)(i + ooc_b) * m * sizeof(double); double_buffering_read( &db_in, IO_BUFF, nbytes, offset ); /* Wait for current piece of "in" */ #if VAMPIR VT_USER_START("OOC_GEMM_WAIT"); #endif double_buffering_wait( &db_in, COMP_BUFF ); #if VAMPIR VT_USER_END("OOC_GEMM_WAIT"); #endif /* Compute */ in_comp = double_buffering_get_comp_buffer( &db_in ); out_comp = double_buffering_get_comp_buffer( &db_out ); cur_n = MIN( ooc_b, (n - i) ); /*printf("Compute\n");*/ // Sanity check average( in_comp, m, cur_n, threshold, obj_type, &obj_name[i*namelength], namelength, 1, nthreads_avg ); #if VAMPIR VT_USER_START("OOC_GEMM"); #endif /*printf("\nPRE: "); print_timestamp(); fflush( stdout );*/ dgemm_("T", "N", &m, &cur_n, &m, &ONE, Z, &m, in_comp, &m, &ZERO, out_comp, &m); /*printf("\nPOST: "); print_timestamp(); fflush( stdout );*/ #if VAMPIR VT_USER_END("OOC_GEMM"); #endif /* Wait until previous piece of "out" is written */ if ( i > 0) double_buffering_wait( &db_out, IO_BUFF ); /* Write current piece of "out" */ double_buffering_write( &db_out, COMP_BUFF, MIN( ooc_b * m, (size_t)(n - i) * m ) * sizeof(double), (off_t)i * m * sizeof(double) ); /* Swap buffers */ double_buffering_swap( &db_in ); double_buffering_swap( &db_out ); } /* Wait for the remaining io calls issued */ double_buffering_wait( &db_in, COMP_BUFF ); double_buffering_wait( &db_out, IO_BUFF ); /* Clean-up */ double_buffering_destroy( &db_in ); double_buffering_destroy( &db_out ); fclose( fp_in ); fclose( fp_out ); }
// // GWAS config + data // void initialize_config( FGLS_config_t *cf, char *cov_base, char *phi_base, char *snp_base, char *pheno_base, char *out_base//, // char *var, int num_threads, int xtile, int ytile, int xb, int yb, int write_output ) { load_databel_info( cf ); // Problem dimensions cf->n = cf->XL_fvi->fvi_header.numObservations; cf->p = cf->XL_fvi->fvi_header.numVariables + 1; // Intercept included cf->m = cf->XR_fvi->fvi_header.numVariables; cf->t = cf->Y_fvi->fvi_header.numVariables; // Assuming wXR = 1 cf->wXL = cf->p - 1; cf->wXR = 1; // Algorithm parameters /*cf->x_b = x_b;*/ /*cf->y_b = y_b;*/ /*cf->num_threads = num_threads;*/ get_main_memory_size( &cf->totalMem, &cf->availMem ); estimate_block_sizes( cf, cf->var, cf->x_b == -1 || cf->y_b == -1 ); // if any is -1, estimate them /* if ( !(cf->x_b == -1 || cf->y_b == -1) ) { cf->x_b = xb; cf->y_b = yb; } cf->x_tile = xtile; cf->y_tile = ytile; */ /*cf->x_b = 10;*/ /*cf->y_b = 10;*/ // In/Out Files /* sprintf( cf->Phi_data_path, "%s.fvd", phi_base ); sprintf( cf->Phi_info_path, "%s.fvi", phi_base ); sprintf( cf->XL_data_path, "%s.fvd", cov_base ); sprintf( cf->XL_info_path, "%s.fvi", cov_base ); sprintf( cf->XR_data_path, "%s.fvd", snp_base ); sprintf( cf->XR_info_path, "%s.fvi", snp_base ); sprintf( cf->Y_data_path, "%s.fvd", pheno_base ); sprintf( cf->Y_info_path, "%s.fvi", pheno_base ); if ( write_output ) { sprintf( cf->B_data_path, "%s.out", out_base ); sprintf( cf->B_info_path, "%s.iout", out_base ); } else { sprintf( cf->B_data_path, "/dev/null" ); sprintf( cf->B_info_path, "/dev/null" ); } */ // Temporary files snprintf( cf->ZtXL_path, STR_BUFFER_SIZE, "%s.tmp", cov_base ); snprintf( cf->ZtXR_path, STR_BUFFER_SIZE, "%s.tmp", snp_base ); snprintf( cf->ZtY_path, STR_BUFFER_SIZE, "%s.tmp", pheno_base ); // Allocate memory and load in-core data // NOTE: only data which is input to GWAS // // Intermediate data as ZtXL will be handled on the fly // by the corresponding routines: // * ZtXL, only available after REML_eigen (if so) // * Z and W will be allocated and filled up during // the first eigen-decomposition of Phi, (if so) // ests needed in both cases chol and eigen cf->ests = (double *) fgls_malloc ( (3 + cf->wXL) * cf->t * sizeof(double) ); cf->h2 = cf->ests; cf->sigma2 = &cf->ests[ cf->t ]; cf->res_sigma2 = &cf->ests[ 2 * cf->t ]; cf->beta_ests = &cf->ests[ 3 * cf->t ]; cf->Phi = (double *) fgls_malloc ( cf->n * cf->n * sizeof(double) ); load_file ( cf->Phi_data_path, "rb", cf->Phi, cf->n * cf->n ); // Sanity check (Phi) checkNoNans(cf->n * cf->n, cf->Phi, "[ERROR] NaNs not allowed in Phi\n"); cf->XL = (double *) fgls_malloc ( cf->n * cf->wXL * sizeof(double) ); load_file ( cf->XL_data_path, "rb", cf->XL, cf->n * cf->wXL ); // Sanity check (XL) average( cf->XL, cf->n, cf->wXL, cf->threshold, "Covariate", &cf->XL_fvi->fvi_data[cf->n*NAMELENGTH], NAMELENGTH, 1, cf->num_threads ); cf->ZtXL = NULL; cf->Z = NULL; cf->W = NULL; // Open files for out-of-core data // Again, only for data input/output to GWAS. Intermediate, on the fly cf->XR = fgls_fopen( cf->XR_data_path, "rb" ); cf->ZtXR = NULL; cf->Y = fgls_fopen( cf->Y_data_path, "rb" ); cf->ZtY = NULL; cf->B = fgls_fopen( cf->B_data_path, "wb" ); return; }