示例#1
0
FLA_Error FLA_Trsm_internal( FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t* cntl )
{
    FLA_Error r_val = FLA_SUCCESS;

    if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
        FLA_Trsm_internal_check( side, uplo, transa, diag, alpha, A, B, cntl );

    if      ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
              FLA_Obj_elemtype( A ) == FLA_MATRIX &&
              FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM )
    {
        // Recurse
        r_val = FLA_Trsm_internal( side,
                                   uplo,
                                   transa,
                                   diag,
                                   alpha,
                                   *FLASH_OBJ_PTR_AT( A ),
                                   *FLASH_OBJ_PTR_AT( B ),
                                   flash_trsm_cntl_mm );
    }
    else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
              FLA_Obj_elemtype( A ) == FLA_SCALAR &&
              FLASH_Queue_get_enabled( ) )
    {
        // Enqueue
        ENQUEUE_FLASH_Trsm( side, uplo, transa, diag, alpha, A, B, cntl );
    }
    else
    {
        if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER &&
                FLA_Obj_elemtype( A ) == FLA_SCALAR &&
                !FLASH_Queue_get_enabled( ) )
        {
            // Execute leaf
            cntl = flash_trsm_cntl_blas;
        }

        // Parameter combinations
        if      ( side == FLA_LEFT )
        {
            if      ( uplo == FLA_LOWER_TRIANGULAR )
            {
                if      ( transa == FLA_NO_TRANSPOSE )
                    r_val = FLA_Trsm_lln( diag, alpha, A, B, cntl );
                else if ( transa == FLA_TRANSPOSE )
                    r_val = FLA_Trsm_llt( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_NO_TRANSPOSE )
                    r_val = FLA_Trsm_llc( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_TRANSPOSE )
                    r_val = FLA_Trsm_llh( diag, alpha, A, B, cntl );
            }
            else if ( uplo == FLA_UPPER_TRIANGULAR )
            {
                if      ( transa == FLA_NO_TRANSPOSE )
                    r_val = FLA_Trsm_lun( diag, alpha, A, B, cntl );
                else if ( transa == FLA_TRANSPOSE )
                    r_val = FLA_Trsm_lut( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_NO_TRANSPOSE )
                    r_val = FLA_Trsm_luc( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_TRANSPOSE )
                    r_val = FLA_Trsm_luh( diag, alpha, A, B, cntl );
            }
        }
        else if ( side == FLA_RIGHT )
        {
            if      ( uplo == FLA_LOWER_TRIANGULAR )
            {
                if      ( transa == FLA_NO_TRANSPOSE )
                    r_val = FLA_Trsm_rln( diag, alpha, A, B, cntl );
                else if ( transa == FLA_TRANSPOSE )
                    r_val = FLA_Trsm_rlt( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_NO_TRANSPOSE )
                    r_val = FLA_Trsm_rlc( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_TRANSPOSE )
                    r_val = FLA_Trsm_rlh( diag, alpha, A, B, cntl );
            }
            else if ( uplo == FLA_UPPER_TRIANGULAR )
            {
                if      ( transa == FLA_NO_TRANSPOSE )
                    r_val = FLA_Trsm_run( diag, alpha, A, B, cntl );
                else if ( transa == FLA_TRANSPOSE )
                    r_val = FLA_Trsm_rut( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_NO_TRANSPOSE )
                    r_val = FLA_Trsm_ruc( diag, alpha, A, B, cntl );
                else if ( transa == FLA_CONJ_TRANSPOSE )
                    r_val = FLA_Trsm_ruh( diag, alpha, A, B, cntl );
            }
        }
    }

    return r_val;
}
示例#2
0
FLA_Error FLASH_Axpy_hierarchy( int direction, FLA_Obj alpha, FLA_Obj F, FLA_Obj* H )
{
	// Once we get down to a submatrix whose elements are scalars, we are down
	// to our base case.
	if ( FLA_Obj_elemtype( *H ) == FLA_SCALAR )
	{
		// Depending on which top-level function invoked us, we either axpy
		// the source data in the flat matrix to the leaf-level submatrix of
		// the hierarchical matrix, or axpy the data in the hierarchical
		// submatrix to the flat matrix.
		if      ( direction == FLA_FLAT_TO_HIER )
		{
#ifdef FLA_ENABLE_SCC
			if ( FLA_is_owner() )
#endif
			FLA_Axpy_external( alpha, F, *H );
		}
		else if ( direction == FLA_HIER_TO_FLAT )
		{
#ifdef FLA_ENABLE_SCC
			if ( FLA_is_owner() )
#endif
			FLA_Axpy_external( alpha, *H, F );
		}
	}
	else
	{
		FLA_Obj HL,  HR,       H0,  H1,  H2;
		FLA_Obj FL,  FR,       F0,  F1,  F2;

		FLA_Obj H1T,           H01,
		        H1B,           H11,
		                       H21;
		FLA_Obj F1T,           F01,
		        F1B,           F11,
		                       F21;

		dim_t b_m;
		dim_t b_n;

		FLA_Part_1x2( *H,    &HL,  &HR,      0, FLA_LEFT );
		FLA_Part_1x2(  F,    &FL,  &FR,      0, FLA_LEFT );

		while ( FLA_Obj_width( HL ) < FLA_Obj_width( *H ) )
		{
			FLA_Repart_1x2_to_1x3( HL,  /**/ HR,        &H0, /**/ &H1, &H2,
			                       1, FLA_RIGHT );

			// Get the scalar width of H1 and use that to determine the
			// width of F1.
			b_n = FLASH_Obj_scalar_width( H1 );

			FLA_Repart_1x2_to_1x3( FL,  /**/ FR,        &F0, /**/ &F1, &F2,
			                       b_n, FLA_RIGHT );

			// -------------------------------------------------------------

			FLA_Part_2x1( H1,    &H1T,
			                     &H1B,       0, FLA_TOP );
			FLA_Part_2x1( F1,    &F1T,
			                     &F1B,       0, FLA_TOP );

			while ( FLA_Obj_length( H1T ) < FLA_Obj_length( H1 ) )
			{
				FLA_Repart_2x1_to_3x1( H1T,               &H01,
				                    /* ** */            /* *** */
				                                          &H11,
				                       H1B,               &H21,        1, FLA_BOTTOM );

				// Get the scalar length of H11 and use that to determine the
				// length of F11.
				b_m = FLASH_Obj_scalar_length( H11 );

				FLA_Repart_2x1_to_3x1( F1T,               &F01,
				                    /* ** */            /* *** */
				                                          &F11,
				                       F1B,               &F21,        b_m, FLA_BOTTOM );
				// -------------------------------------------------------------

				// Recursively axpy between F11 and H11.
				FLASH_Axpy_hierarchy( direction, alpha, F11,
				                      FLASH_OBJ_PTR_AT( H11 ) );

				// -------------------------------------------------------------

				FLA_Cont_with_3x1_to_2x1( &H1T,               H01,
				                                              H11,
				                        /* ** */           /* *** */
				                          &H1B,               H21,     FLA_TOP );
				FLA_Cont_with_3x1_to_2x1( &F1T,               F01,
				                                              F11,
				                        /* ** */           /* *** */
				                          &F1B,               F21,     FLA_TOP );
			}

			// -------------------------------------------------------------

			FLA_Cont_with_1x3_to_1x2( &HL,  /**/ &HR,        H0, H1, /**/ H2,
			                          FLA_LEFT );
			FLA_Cont_with_1x3_to_1x2( &FL,  /**/ &FR,        F0, F1, /**/ F2,
			                          FLA_LEFT );
		}
	}

	return FLA_SUCCESS;
}