Beispiel #1
0
void bli_gemmtrsm_ukr_make_subparts( dim_t  k,
                                     obj_t* a,
                                     obj_t* b,
                                     obj_t* a1x,
                                     obj_t* a11,
                                     obj_t* bx1,
                                     obj_t* b11 )
{
	dim_t mr = bli_obj_length( *a );
	dim_t nr = bli_obj_width( *b );

	dim_t off_a1x, off_a11;
	dim_t off_bx1, off_b11;

	if ( bli_obj_is_lower( *a ) )
	{
		off_a1x = 0;
		off_a11 = k;
		off_bx1 = 0;
		off_b11 = k;
	}
	else
	{
		off_a1x = mr;
		off_a11 = 0;
		off_bx1 = mr;
		off_b11 = 0;
	}

	bli_obj_init_subpart_from( *a, *a1x );
	bli_obj_set_dims( mr, k, *a1x );
	bli_obj_inc_offs( 0, off_a1x, *a1x );

	bli_obj_init_subpart_from( *a, *a11 );
	bli_obj_set_dims( mr, mr, *a11 );
	bli_obj_inc_offs( 0, off_a11, *a11 );

	bli_obj_init_subpart_from( *b, *bx1 );
	bli_obj_set_dims( k, nr, *bx1 );
	bli_obj_inc_offs( off_bx1, 0, *bx1 );

	bli_obj_init_subpart_from( *b, *b11 );
	bli_obj_set_dims( mr, nr, *b11 );
	bli_obj_inc_offs( off_b11, 0, *b11 );

	// Mark a1x as having general structure (which overwrites the triangular
	// property it inherited from a).
	bli_obj_set_struc( BLIS_GENERAL, *a1x );

	// Set the diagonal offset of a11 to 0 (which overwrites the diagonal
	// offset value it inherited from a).
	bli_obj_set_diag_offset( 0, *a11 );
}
Beispiel #2
0
void bli_obj_create_without_buffer( num_t  dt,
                                    dim_t  m,
                                    dim_t  n,
                                    obj_t* obj )
{
	siz_t  elem_size;
	void*  s;

	if ( bli_error_checking_is_enabled() )
		bli_obj_create_without_buffer_check( dt, m, n, obj );

	// Query the size of one element of the object's pre-set datatype.
	elem_size = bli_datatype_size( dt );

	// Set any default properties that are appropriate.
	bli_obj_set_defaults( *obj );

	// Set the object root to itself, since obj is not presumed to be a view
	// into a larger matrix. This is typically the only time this field is
	// ever set; henceforth, subpartitions and aliases to this object will
	// get copies of this field, and thus always have access to its
	// "greatest-grand" parent (ie: the original parent, or "root", object).
	// However, there ARE a few places where it is convenient to reset the
	// root field explicitly via bli_obj_set_as_root(). (We do not list
	// those places here. Just grep for bli_obj_set_as_root within the
	// top-level 'frame' directory to see them.
	bli_obj_set_as_root( *obj );

	// Set individual fields.
	bli_obj_set_buffer( NULL, *obj );
	bli_obj_set_datatype( dt, *obj );
	bli_obj_set_elem_size( elem_size, *obj );
	bli_obj_set_target_datatype( dt, *obj );
	bli_obj_set_execution_datatype( dt, *obj );
	bli_obj_set_dims( m, n, *obj );
	bli_obj_set_offs( 0, 0, *obj );
	bli_obj_set_diag_offset( 0, *obj );

	// Set the internal scalar to 1.0.
	s = bli_obj_internal_scalar_buffer( *obj );

	if      ( bli_is_float( dt )    ) { bli_sset1s( *(( float*    )s) ); }
	else if ( bli_is_double( dt )   ) { bli_dset1s( *(( double*   )s) ); }
	else if ( bli_is_scomplex( dt ) ) { bli_cset1s( *(( scomplex* )s) ); }
	else if ( bli_is_dcomplex( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); }
}
Beispiel #3
0
int main( int argc, char** argv )
{
	bli_init();

#if 0
	obj_t a, b, c;
	obj_t aa, bb, cc;
	dim_t m, n, k;
	num_t dt;
	uplo_t uploa, uplob, uploc;

	{
		dt = BLIS_DOUBLE;

		m = 6;
		k = 6;
		n = 6;

		bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, k, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );

		uploa = BLIS_UPPER;
		uploa = BLIS_LOWER;
		bli_obj_set_struc( BLIS_TRIANGULAR, a );
		bli_obj_set_uplo( uploa, a );
		bli_obj_set_diag_offset( -2, a );

		uplob = BLIS_UPPER;
		uplob = BLIS_LOWER;
		bli_obj_set_struc( BLIS_TRIANGULAR, b );
		bli_obj_set_uplo( uplob, b );
		bli_obj_set_diag_offset( -2, b );

		uploc = BLIS_UPPER;
		//uploc = BLIS_LOWER;
		//uploc = BLIS_ZEROS;
		//uploc = BLIS_DENSE;
		bli_obj_set_struc( BLIS_HERMITIAN, c );
		//bli_obj_set_struc( BLIS_TRIANGULAR, c );
		bli_obj_set_uplo( uploc, c );
		bli_obj_set_diag_offset(  1, c );

		bli_obj_alias_to( a, aa ); (void)aa;
		bli_obj_alias_to( b, bb ); (void)bb;
		bli_obj_alias_to( c, cc ); (void)cc;

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );
		//bli_mkherm( &a );
		//bli_mktrim( &a );

		bli_prune_unref_mparts( &cc, BLIS_M,
		                        &aa, BLIS_N );

		bli_printm( "c orig", &c, "%4.1f", "" );
		bli_printm( "c alias", &cc, "%4.1f", "" );
		bli_printm( "a orig", &a, "%4.1f", "" );
		bli_printm( "a alias", &aa, "%4.1f", "" );
		//bli_obj_print( "a struct", &a );
	}
#endif

	dim_t  p_begin, p_max, p_inc;
	gint_t m_input, n_input;
	char   uploa_ch;
	doff_t diagoffa;
	dim_t  bf;
	dim_t  n_way;
	char   part_dim_ch;
	bool_t go_fwd;
	char   out_ch;

	obj_t  a;

	thrinfo_t thrinfo;
	dim_t  m, n;
	uplo_t uploa;
	bool_t part_m_dim, part_n_dim;
	bool_t go_bwd;
	dim_t  p;
	num_t  dt;
	dim_t  start, end;

	dim_t  width;
	siz_t  area;

	gint_t t_begin, t_stop, t_inc;
	dim_t  t;

	if ( argc == 13 )
	{
		sscanf( argv[1], "%lu", &p_begin );
		sscanf( argv[2], "%lu", &p_max );
		sscanf( argv[3], "%lu", &p_inc );
		sscanf( argv[4], "%ld", &m_input );
		sscanf( argv[5], "%ld", &n_input );
		sscanf( argv[6], "%c",  &uploa_ch );
		sscanf( argv[7], "%ld", &diagoffa );
		sscanf( argv[8], "%lu", &bf );
		sscanf( argv[9], "%lu", &n_way );
		sscanf( argv[10], "%c", &part_dim_ch );
		sscanf( argv[11], "%lu", &go_fwd );
		sscanf( argv[12], "%c", &out_ch );
	}
	else
	{
		printf( "\n" );
		printf( " %s\n", argv[0] );
		printf( "\n" );
		printf( "  Simulate the dimension ranges assigned to threads when\n" );
		printf( "  partitioning a matrix for parallelism in BLIS.\n" );
		printf( "\n" );
		printf( " Usage:\n" );
		printf( "\n" );
		printf( "  %s p_beg p_max p_inc m n uplo doff bf n_way part_dim go_fwd out\n", argv[0] );
		printf( "\n" );
		printf( "  p_beg:    the first problem size p to test.\n" );
		printf( "  p_max:    the maximum problem size p to test.\n" );
		printf( "  p_inc:    the increase in problem size p between tests.\n" );
		printf( "  m:        the m dimension:\n" );
		printf( "  n:        the n dimension:\n" );
		printf( "            if m,n = -1: bind m,n to problem size p.\n" );
		printf( "            if m,n =  0: bind m,n to p_max.\n" );
		printf( "            if m,n >  0: hold m,n = c constant for all p.\n" );
		printf( "  uplo:     the uplo field of the matrix being partitioned:\n" );
		printf( "            'l': lower-stored (BLIS_LOWER)\n" );
		printf( "            'u': upper-stored (BLIS_UPPER)\n" );
		printf( "            'd': densely-stored (BLIS_DENSE)\n" );
		printf( "  doff:     the diagonal offset of the matrix being partitioned.\n" );
		printf( "  bf:       the simulated blocking factor. all thread ranges must\n" );
		printf( "            be a multiple of bf, except for the range that contains\n" );
		printf( "            the edge case (if one exists). the blocking factor\n" );
		printf( "            would typically correspond to a register blocksize.\n" );
		printf( "  n_way:    the number of ways of parallelism for which we are\n" );
		printf( "            partitioning (i.e.: the number of threads, or thread\n" );
		printf( "            groups).\n" );
		printf( "  part_dim: the dimension to partition:\n" );
		printf( "            'm': partition the m dimension.\n" );
		printf( "            'n': partition the n dimension.\n" );
		printf( "  go_fwd:   the direction to partition:\n" );
		printf( "            '1': forward, e.g. left-to-right (part_dim = 'm') or\n" );
		printf( "                 top-to-bottom (part_dim = 'n')\n" );
		printf( "            '0': backward, e.g. right-to-left (part_dim = 'm') or\n" );
		printf( "                 bottom-to-top (part_dim = 'n')\n" );
		printf( "            NOTE: reversing the direction does not change the\n" );
		printf( "            subpartitions' widths, but it does change which end of\n" );
		printf( "            the index range receives the edge case, if it exists.\n" );
		printf( "  out:      the type of output per thread-column:\n" );
		printf( "            'w': the width (and area) of the thread's subpartition\n" );
		printf( "            'r': the actual ranges of the thread's subpartition\n" );
		printf( "                 where the start and end points of each range are\n" );
		printf( "                 inclusive and exclusive, respectively.\n" );
		printf( "\n" );

		exit(1);
	}

	if ( m_input == 0 ) m_input = p_max;
	if ( n_input == 0 ) n_input = p_max;

	if ( part_dim_ch == 'm' ) { part_m_dim = TRUE;  part_n_dim = FALSE; }
	else                      { part_m_dim = FALSE; part_n_dim = TRUE;  }

	go_bwd = !go_fwd;

	if      ( uploa_ch == 'l' ) uploa = BLIS_LOWER;
	else if ( uploa_ch == 'u' ) uploa = BLIS_UPPER;
	else                        uploa = BLIS_DENSE;

	if ( part_n_dim )
	{
		if ( bli_is_upper( uploa ) ) { t_begin = n_way-1; t_stop = -1;    t_inc = -1; }
		else /* if lower or dense */ { t_begin = 0;       t_stop = n_way; t_inc =  1; }
	}
	else // if ( part_m_dim )
	{
		if ( bli_is_lower( uploa ) ) { t_begin = n_way-1; t_stop = -1;    t_inc = -1; }
		else /* if upper or dense */ { t_begin = 0;       t_stop = n_way; t_inc =  1; }
	}

	printf( "\n" );
	printf( "  part: %3s   doff: %3ld   bf: %3ld   output: %s\n",
	        ( part_n_dim ? ( go_fwd ? "l2r" : "r2l" )
	                     : ( go_fwd ? "t2b" : "b2t" ) ),
	        diagoffa, bf,
            ( out_ch == 'w' ? "width(area)" : "ranges" ) );
	printf( "              uplo: %3c   nt: %3ld\n", uploa_ch, n_way );
	printf( "\n" );

	printf( "             " );
	for ( t = t_begin; t != t_stop; t += t_inc )
	{
		if ( part_n_dim )
		{
			if      ( t == t_begin      ) printf( "left...      " );
			else if ( t == t_stop-t_inc ) printf( "     ...right" );
			else                          printf( "             " );
		}
		else // if ( part_m_dim )
		{
			if      ( t == t_begin      ) printf( "top...       " );
			else if ( t == t_stop-t_inc ) printf( "    ...bottom" );
			else                          printf( "             " );
		}
	}
	printf( "\n" );


	printf( "%4c x %4c  ", 'm', 'n' );
	for ( t = t_begin; t != t_stop; t += t_inc )
	{
		printf( "%9s %lu  ", "thread", t );
	}
	printf( "\n" );
	printf( "-------------" );
	for ( t = t_begin; t != t_stop; t += t_inc )
	{
		printf( "-------------" );
	}
	printf( "\n" );


	for ( p = p_begin; p <= p_max; p += p_inc )
	{
		if ( m_input < 0 ) m = ( dim_t )p;
		else               m = ( dim_t )m_input;
		if ( n_input < 0 ) n = ( dim_t )p;
		else               n = ( dim_t )n_input;

		dt = BLIS_DOUBLE;
		
		bli_obj_create( dt, m, n, 0, 0, &a );

		bli_obj_set_struc( BLIS_TRIANGULAR, a );
		bli_obj_set_uplo( uploa, a );
		bli_obj_set_diag_offset( diagoffa, a );

		bli_randm( &a );

		printf( "%4lu x %4lu  ", m, n );

		for ( t = t_begin; t != t_stop; t += t_inc )
		{
			thrinfo.n_way   = n_way;
			thrinfo.work_id = t;

			if      ( part_n_dim && go_fwd )
				area = bli_get_range_weighted_l2r( &thrinfo, &a, bf, &start, &end );
			else if ( part_n_dim && go_bwd )
				area = bli_get_range_weighted_r2l( &thrinfo, &a, bf, &start, &end );
			else if ( part_m_dim && go_fwd )
				area = bli_get_range_weighted_t2b( &thrinfo, &a, bf, &start, &end );
			else // ( part_m_dim && go_bwd )
				area = bli_get_range_weighted_b2t( &thrinfo, &a, bf, &start, &end );

			width = end - start;

			if ( out_ch == 'w' ) printf( "%4lu(%6lu) ", width, area );
			else                 printf( "[%4lu,%4lu)  ", start, end );
		}

		printf( "\n" );

		bli_obj_free( &a );
	}

	bli_finalize();

	return 0;
}