Example #1
0
void double_buffering_write_V(  double_buffering *b, which_buffer wh_buff, size_t from_i, size_t to_i, size_t from_j, size_t to_j )
{
	FGLS_config_t *cf = b->cf;

	// Which buffer
	/*aiocb_list *buff = wh_buff == IO_BUFF ? b->io_l : b->comp_l;*/
	// Still buff_size available?
	size_t nbytes = ((to_i - from_i + 1) * cf->p * cf->p *
	                 (to_j - from_j + 1)) * sizeof(double);
	// Number of chunks to write
	/*int chunks_to_write = size_t_ceil( nbytes, (size_t)MAX_AIO_SIZE );*/
	// offset
	off_t offset = ((off_t)from_j * cf->m * cf->p * cf->p + 
			        (off_t)from_i * cf->p * cf->p * (to_j - from_j + 1)) * sizeof(double);

#if DEBUG
	printf("Writing V: %zu bytes from %jd\n", nbytes, offset);
	fflush(stdout);
#endif
	double_buffering_write( b, wh_buff, nbytes, offset );
}
Example #2
0
/*
 * Out-of-core gemms:
 *   - Z' XR
 *   - Z' Y
 * Z is m x m
 * The other matrix is m x n
 */
void ooc_gemm( int m, int n, int ooc_b, double *Z, char *in, char *out, 
		int threshold, const char *obj_type, char *obj_name, int namelength, int nthreads_avg )
{
	/* Files */
	FILE *fp_in  = fgls_fopen( in, "rb" );
	FILE *fp_out = fgls_fopen( out, "wb" );

    /* OOC Problem dimensions */
	/*size_t max_elems_per_buffer = 1L << 26; // 64MElems, 512 MBs*/
	/*max_elems_per_buffer = max_elems_per_buffer - max_elems_per_buffer % n;*/
	/*size_t num_cols_per_buff = max_elems_per_buffer / n;*/

    /* Asynchronous IO data structures */
	double *in_comp, *out_comp;
	double_buffering db_in, db_out; // B, C
	double_buffering_init( &db_in, ooc_b * m * sizeof(double),
			                fp_in, NULL ); // _fp, cf not needed in this case
	double_buffering_init( &db_out, ooc_b * m * sizeof(double),
			                fp_out, NULL ); // _fp, cf not needed in this case

    /* BLAS constants */
    double ONE  = 1.0;
    double ZERO = 0.0;

    /* Read first piece of "in" */
    double_buffering_read( &db_in, IO_BUFF,
                           MIN( (size_t)ooc_b * m, (size_t)m * n ) * sizeof(double), 0);
	double_buffering_swap( &db_in );

    int cur_n;
    int i;
    for ( i = 0; i < n; i += ooc_b ) 
    {
        /* Read next piece of "in" */
        size_t nbytes = i + ooc_b > n ? 1 : MIN( ooc_b * m, ( n - (size_t)( i + ooc_b ) ) * m ) * sizeof(double);
		off_t  offset = i + ooc_b > n ? 0 : (off_t)(i + ooc_b) * m * sizeof(double);
        double_buffering_read( &db_in, IO_BUFF, nbytes, offset );

        /* Wait for current piece of "in" */
#if VAMPIR
    VT_USER_START("OOC_GEMM_WAIT");
#endif
		double_buffering_wait( &db_in, COMP_BUFF );
#if VAMPIR
    VT_USER_END("OOC_GEMM_WAIT");
#endif

        /* Compute */
		in_comp  = double_buffering_get_comp_buffer( &db_in );
		out_comp = double_buffering_get_comp_buffer( &db_out );
        cur_n = MIN( ooc_b, (n - i) );
		/*printf("Compute\n");*/

		// Sanity check
		average( in_comp, m, cur_n, threshold, obj_type, &obj_name[i*namelength], namelength, 1, nthreads_avg );
#if VAMPIR
    VT_USER_START("OOC_GEMM");
#endif
	/*printf("\nPRE: ");  print_timestamp(); fflush( stdout );*/
        dgemm_("T", "N", &m, &cur_n, &m, &ONE, Z, &m, in_comp, &m, &ZERO, out_comp, &m);
		/*printf("\nPOST: "); print_timestamp(); fflush( stdout );*/
#if VAMPIR
    VT_USER_END("OOC_GEMM");
#endif

        /* Wait until previous piece of "out" is written */
        if ( i > 0)
			double_buffering_wait( &db_out, IO_BUFF );

        /* Write current piece of "out" */
		double_buffering_write( &db_out, COMP_BUFF,
				                MIN( ooc_b * m, (size_t)(n - i) * m ) * sizeof(double),
                                (off_t)i * m * sizeof(double) );

        /* Swap buffers */
		double_buffering_swap( &db_in );
		double_buffering_swap( &db_out );
    }

    /* Wait for the remaining io calls issued */
	double_buffering_wait( &db_in,  COMP_BUFF );
	double_buffering_wait( &db_out, IO_BUFF );

	/* Clean-up */
	double_buffering_destroy( &db_in );
	double_buffering_destroy( &db_out );

	fclose( fp_in );
	fclose( fp_out );
}