예제 #1
0
파일: reverse.hpp 프로젝트: barak/cppad
BaseVector ADFun<Base,RecBase>::Reverse(size_t q, const BaseVector &w)
{   // used to identify the RecBase type in calls to sweeps
    RecBase not_used_rec_base;

    // constants
    const Base zero(0);

    // temporary indices
    size_t i, j, k;

    // number of independent variables
    size_t n = ind_taddr_.size();

    // number of dependent variables
    size_t m = dep_taddr_.size();

    // check BaseVector is Simple Vector class with Base type elements
    CheckSimpleVector<Base, BaseVector>();

    CPPAD_ASSERT_KNOWN(
        size_t(w.size()) == m || size_t(w.size()) == (m * q),
        "Argument w to Reverse does not have length equal to\n"
        "the dimension of the range or dimension of range times q."
    );
    CPPAD_ASSERT_KNOWN(
        q > 0,
        "The first argument to Reverse must be greater than zero."
    );
    CPPAD_ASSERT_KNOWN(
        num_order_taylor_ >= q,
        "Less than q Taylor coefficients are currently stored"
        " in this ADFun object."
    );
    // special case where multiple forward directions have been computed,
    // but we are only using the one direction zero order results
    if( (q == 1) & (num_direction_taylor_ > 1) )
    {   num_order_taylor_ = 1;        // number of orders to copy
        size_t c = cap_order_taylor_; // keep the same capacity setting
        size_t r = 1;                 // only keep one direction
        capacity_order(c, r);
    }
    CPPAD_ASSERT_KNOWN(
        num_direction_taylor_ == 1,
        "Reverse mode for Forward(q, r, xq) with more than one direction"
        "\n(r > 1) is not yet supported for q > 1."
    );

    // initialize entire Partial matrix to zero
    local::pod_vector_maybe<Base> Partial(num_var_tape_ * q);
    for(i = 0; i < num_var_tape_; i++)
        for(j = 0; j < q; j++)
            Partial[i * q + j] = zero;

    // set the dependent variable direction
    // (use += because two dependent variables can point to same location)
    for(i = 0; i < m; i++)
    {   CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < num_var_tape_  );
        if( size_t(w.size()) == m )
            Partial[dep_taddr_[i] * q + q - 1] += w[i];
        else
        {   for(k = 0; k < q; k++)
                // ? should use += here, first make test to demonstrate bug
                Partial[ dep_taddr_[i] * q + k ] = w[i * q + k ];
        }
    }

    // evaluate the derivatives
    CPPAD_ASSERT_UNKNOWN( cskip_op_.size() == play_.num_op_rec() );
    CPPAD_ASSERT_UNKNOWN( load_op_.size()  == play_.num_load_op_rec() );
    local::play::const_sequential_iterator play_itr = play_.end();
    local::sweep::reverse(
        q - 1,
        n,
        num_var_tape_,
        &play_,
        cap_order_taylor_,
        taylor_.data(),
        q,
        Partial.data(),
        cskip_op_.data(),
        load_op_,
        play_itr,
        not_used_rec_base
    );

    // return the derivative values
    BaseVector value(n * q);
    for(j = 0; j < n; j++)
    {   CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] < num_var_tape_  );

        // independent variable taddr equals its operator taddr
        CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[j] ) == local::InvOp );

        // by the Reverse Identity Theorem
        // partial of y^{(k)} w.r.t. u^{(0)} is equal to
        // partial of y^{(q-1)} w.r.t. u^{(q - 1 - k)}
        if( size_t(w.size()) == m )
        {   for(k = 0; k < q; k++)
                value[j * q + k ] =
                    Partial[ind_taddr_[j] * q + q - 1 - k];
        }
        else
        {   for(k = 0; k < q; k++)
                value[j * q + k ] =
                    Partial[ind_taddr_[j] * q + k];
        }
    }
    CPPAD_ASSERT_KNOWN( ! ( hasnan(value) && check_for_nan_ ) ,
        "dw = f.Reverse(q, w): has a nan,\n"
        "but none of its Taylor coefficents are nan."
    );

    return value;
}
예제 #2
0
int opt_val_hes(
	const BaseVector&   x     , 
	const BaseVector&   y     , 
	Fun                 fun   , 
	BaseVector&         jac   ,
	BaseVector&         hes   )
{	// determine the base type
	typedef typename BaseVector::value_type Base;

	// check that BaseVector is a SimpleVector class with Base elements
	CheckSimpleVector<Base, BaseVector>();

	// determine the AD vector type
	typedef typename Fun::ad_vector ad_vector;

	// check that ad_vector is a SimpleVector class with AD<Base> elements
	CheckSimpleVector< AD<Base> , ad_vector >();

	// size of the x and y spaces
	size_t n = size_t(x.size());
	size_t m = size_t(y.size());

	// number of terms in the summation
	size_t ell = fun.ell();

	// check size of return values
	CPPAD_ASSERT_KNOWN(
		size_t(jac.size()) == n || jac.size() == 0,
		"opt_val_hes: size of the vector jac is not equal to n or zero"
	);
	CPPAD_ASSERT_KNOWN(
		size_t(hes.size()) == n * n || hes.size() == 0,
		"opt_val_hes: size of the vector hes is not equal to n * n or zero"
	);

	// some temporary indices
	size_t i, j, k;

	// AD version of S_k(x, y)
	ad_vector s_k(1);

	// ADFun version of S_k(x, y)
	ADFun<Base> S_k;

	// AD version of x 
	ad_vector a_x(n);

	// AD version of y
	ad_vector a_y(n);

	if( jac.size() > 0  )
	{	// this is the easy part, computing the V^{(1)} (x) which is equal 
		// to \partial_x F (x, y) (see Thoerem 2 of the reference).

		// copy x and y to AD version
		for(j = 0; j < n; j++)
			a_x[j] = x[j];
		for(j = 0; j < m; j++)
			a_y[j] = y[j];

		// initialize summation
		for(j = 0; j < n; j++)
			jac[j] = Base(0.);

		// add in \partial_x S_k (x, y)
		for(k = 0; k < ell; k++)
		{	// start recording
			Independent(a_x);
			// record
			s_k[0] = fun.s(k, a_x, a_y);
			// stop recording and store in S_k
			S_k.Dependent(a_x, s_k);
			// compute partial of S_k with respect to x
			BaseVector jac_k = S_k.Jacobian(x);
			// add \partial_x S_k (x, y) to jac
			for(j = 0; j < n; j++)
				jac[j] += jac_k[j];			 
		}
	}
	// check if we are done
	if( hes.size() == 0 )
		return 0;

	/*
	In this case, we need to compute the Hessian. Using Theorem 1 of the
	reference:
		Y^{(1)}(x) = - F_yy (x, y)^{-1} F_yx (x, y)
	Using Theorem 2 of the reference:
		V^{(2)}(x) = F_xx (x, y) + F_xy (x, y)  Y^{(1)}(x) 
	*/
	// Base and AD version of xy
	BaseVector xy(n + m);
	ad_vector a_xy(n + m);
	for(j = 0; j < n; j++)
		a_xy[j] = xy[j] = x[j]; 
	for(j = 0; j < m; j++)
		a_xy[n+j] = xy[n+j] = y[j]; 

	// Initialization summation for Hessian of F
	size_t nm_sq = (n + m) * (n + m);
	BaseVector F_hes(nm_sq);
	for(j = 0; j < nm_sq; j++)
		F_hes[j] = Base(0.);
	BaseVector hes_k(nm_sq); 

	// add in Hessian of S_k to hes
	for(k = 0; k < ell; k++)
	{	// start recording
		Independent(a_xy);
		// split out x
		for(j = 0; j < n; j++)
			a_x[j] = a_xy[j];
		// split out y
		for(j = 0; j < m; j++)
			a_y[j] = a_xy[n+j];
		// record
		s_k[0] = fun.s(k, a_x, a_y);
		// stop recording and store in S_k
		S_k.Dependent(a_xy, s_k);
		// when computing the Hessian it pays to optimize the tape
		S_k.optimize();
		// compute Hessian of S_k 
		hes_k = S_k.Hessian(xy, 0);
		// add \partial_x S_k (x, y) to jac
		for(j = 0; j < nm_sq; j++)
			F_hes[j] += hes_k[j];			 
	}
	// Extract F_yx
	BaseVector F_yx(m * n);
	for(i = 0; i < m; i++)
	{	for(j = 0; j < n; j++)
			F_yx[i * n + j] = F_hes[ (i+n)*(n+m) + j ];
	} 
	// Extract F_yy
	BaseVector F_yy(n * m);
	for(i = 0; i < m; i++)
	{	for(j = 0; j < m; j++)
			F_yy[i * m + j] = F_hes[ (i+n)*(n+m) + j + n ];
	} 

	// compute - Y^{(1)}(x) = F_yy (x, y)^{-1} F_yx (x, y)
	BaseVector neg_Y_x(m * n);
	Base logdet;
	int signdet = CppAD::LuSolve(m, n, F_yy, F_yx, neg_Y_x, logdet);
	if( signdet == 0 )
		return signdet;

	// compute hes = F_xx (x, y) + F_xy (x, y)  Y^{(1)}(x) 
	for(i = 0; i < n; i++)
	{	for(j = 0; j < n; j++)
		{	hes[i * n + j] = F_hes[ i*(n+m) + j ];
			for(k = 0; k < m; k++)
				hes[i*n+j] -= F_hes[i*(n+m) + k+n] * neg_Y_x[k*n+j];
		}
	}
	return signdet;
}
예제 #3
0
void ADFun<Base,RecBase>::subgraph_jac_rev(
    const BoolVector&                   select_domain  ,
    const BoolVector&                   select_range   ,
    const BaseVector&                   x              ,
    sparse_rcv<SizeVector, BaseVector>& matrix_out     )
{   size_t m = Range();
    size_t n = Domain();
    //
    // point at which we are evaluating Jacobian
    Forward(0, x);
    //
    // nnz and row, column, and row_major vectors for subset
    local::pod_vector<size_t> row_out;
    local::pod_vector<size_t> col_out;
    local::pod_vector_maybe<Base>   val_out;
    //
    // initialize reverse mode computation on subgraphs
    subgraph_reverse(select_domain);
    //
    // memory used to hold subgraph_reverse results
    BaseVector dw;
    SizeVector col;
    //
    // loop through selected independent variables
    for(size_t i = 0; i < m; ++i) if( select_range[i] )
    {   // compute Jacobian and sparsity for this dependent variable
        size_t q   = 1;
        subgraph_reverse(q, i, col, dw);
        CPPAD_ASSERT_UNKNOWN( size_t( dw.size() ) == n );
        //
        // offset for this dependent variable
        size_t index = row_out.size();
        CPPAD_ASSERT_UNKNOWN( col_out.size() == index );
        CPPAD_ASSERT_UNKNOWN( val_out.size() == index );
        //
        // extend vectors to hold results for this dependent variable
        size_t col_size = size_t( col.size() );
        row_out.extend( col_size );
        col_out.extend( col_size );
        val_out.extend( col_size );
        //
        // store results for this dependent variable
        for(size_t c = 0; c < col_size; ++c)
        {   row_out[index + c] = i;
            col_out[index + c] = col[c];
            val_out[index + c] = dw[ col[c] ];
        }
    }
    //
    // create sparsity pattern corresponding to row_out, col_out
    size_t nr  = m;
    size_t nc  = n;
    size_t nnz = row_out.size();
    sparse_rc<SizeVector> pattern(nr, nc, nnz);
    for(size_t k = 0; k < nnz; ++k)
        pattern.set(k, row_out[k], col_out[k]);
    //
    // create sparse matrix
    sparse_rcv<SizeVector, BaseVector> matrix(pattern);
    for(size_t k = 0; k < nnz; ++k)
        matrix.set(k,  val_out[k]);
    //
    // return matrix
    matrix_out = matrix;
    //
    return;
}