Beispiel #1
0
siz_t bli_thread_get_range_weighted_b2t
     (
       thrinfo_t* thr,
       obj_t*     a,
       blksz_t*   bmult,
       dim_t*     start,
       dim_t*     end
     )
{
	siz_t area;

	// This function assigns area-weighted ranges in the m dimension
	// where the total range spans 0 to m-1 with 0 at the bottom end and
	// m-1 at the top end.

	if ( bli_obj_intersects_diag( *a ) &&
	     bli_obj_is_upper_or_lower( *a ) )
	{
		doff_t diagoff = bli_obj_diag_offset( *a );
		uplo_t uplo    = bli_obj_uplo( *a );
		dim_t  m       = bli_obj_length( *a );
		dim_t  n       = bli_obj_width( *a );
		dim_t  bf      = bli_blksz_get_def_for_obj( a, bmult );

		// Support implicit transposition.
		if ( bli_obj_has_trans( *a ) )
		{
			bli_reflect_about_diag( diagoff, uplo, m, n );
		}

		bli_reflect_about_diag( diagoff, uplo, m, n );

		bli_rotate180_trapezoid( diagoff, uplo );

		area = bli_thread_get_range_weighted_sub
		(
		  thr, diagoff, uplo, m, n, bf,
		  TRUE, start, end
		);
	}
	else // if dense or zeros
	{
		area = bli_thread_get_range_b2t
		(
		  thr, a, bmult,
		  start, end
		);
	}

	return area;
}
Beispiel #2
0
siz_t bli_thread_get_range_weighted
     (
       thrinfo_t* thread,
       doff_t     diagoff,
       uplo_t     uplo,
       dim_t      m,
       dim_t      n,
       dim_t      bf,
       bool_t     handle_edge_low,
       dim_t*     j_start_thr,
       dim_t*     j_end_thr
     )
{
	dim_t      n_way   = thread->n_way;
	dim_t      my_id   = thread->work_id;

	dim_t      bf_left = n % bf;

	dim_t      j;

	dim_t      off_j;
	doff_t     diagoff_j;
	dim_t      n_left;

	dim_t      width_j;

	dim_t      offm_inc, offn_inc;

	double     tri_dim, tri_area;
	double     area_total, area_per_thr;

	siz_t      area = 0;

	// In this function, we assume that the caller has already determined
	// that (a) the diagonal intersects the submatrix, and (b) the submatrix
	// is either lower- or upper-stored.

	if ( bli_is_lower( uplo ) )
	{
		// Prune away the unstored region above the diagonal, if it exists,
		// and then to the right of where the diagonal intersects the bottom,
		// if it exists. (Also, we discard the offset deltas since we don't
		// need to actually index into the subpartition.)
		bli_prune_unstored_region_top_l( diagoff, m, n, offm_inc );
		bli_prune_unstored_region_right_l( diagoff, m, n, offn_inc );

		// We don't need offm_inc, offn_inc here. These statements should
		// prevent compiler warnings.
		( void )offm_inc;
		( void )offn_inc;

		// Now that pruning has taken place, we know that diagoff >= 0.

		// Compute the total area of the submatrix, accounting for the
		// location of the diagonal, and divide it by the number of ways
		// of parallelism.
		tri_dim      = ( double )( n - diagoff - 1 );
		tri_area     = tri_dim * ( tri_dim + 1.0 ) / 2.0;
		area_total   = ( double )m * ( double )n - tri_area;
		area_per_thr = area_total / ( double )n_way;

		// Initialize some variables prior to the loop: the offset to the
		// current subpartition, the remainder of the n dimension, and
		// the diagonal offset of the current subpartition.
		off_j     = 0;
		diagoff_j = diagoff;
		n_left    = n;

		// Iterate over the subpartition indices corresponding to each
		// thread/caucus participating in the n_way parallelism.
		for ( j = 0; j < n_way; ++j )
		{
			// Compute the width of the jth subpartition, taking the
			// current diagonal offset into account, if needed.
			width_j = bli_thread_get_range_width_l( diagoff_j, m, n_left,
			                                        j, n_way,
			                                        bf, bf_left,
			                                        area_per_thr,
			                                        handle_edge_low );

			// If the current thread belongs to caucus j, this is his
			// subpartition. So we compute the implied index range and
			// end our search.
			if ( j == my_id )
			{
				*j_start_thr = off_j;
				*j_end_thr   = off_j + width_j;

				area = bli_find_area_trap_l( m, width_j, diagoff_j );

				break;
			}

			// Shift the current subpartition's starting and diagonal offsets,
			// as well as the remainder of the n dimension, according to the
			// computed width, and then iterate to the next subpartition.
			off_j     += width_j;
			diagoff_j -= width_j;
			n_left    -= width_j;
		}
	}
	else // if ( bli_is_upper( uplo ) )
	{
		// Express the upper-stored case in terms of the lower-stored case.

		// First, we convert the upper-stored trapezoid to an equivalent
		// lower-stored trapezoid by rotating it 180 degrees.
		bli_rotate180_trapezoid( diagoff, uplo );

		// Now that the trapezoid is "flipped" in the n dimension, negate
		// the bool that encodes whether to handle the edge case at the
		// low (or high) end of the index range.
		bli_toggle_bool( handle_edge_low );

		// Compute the appropriate range for the rotated trapezoid.
		area = bli_thread_get_range_weighted( thread, diagoff, uplo, m, n, bf,
		                                      handle_edge_low,
		                                      j_start_thr, j_end_thr );

		// Reverse the indexing basis for the subpartition ranges so that
		// the indices, relative to left-to-right iteration through the
		// unrotated upper-stored trapezoid, map to the correct columns
		// (relative to the diagonal). This amounts to subtracting the
		// range from n.
		bli_reverse_index_direction( *j_start_thr, *j_end_thr, n );
	}

	return area;
}