BaseVector ADFun<Base,RecBase>::Reverse(size_t q, const BaseVector &w) { // used to identify the RecBase type in calls to sweeps RecBase not_used_rec_base; // constants const Base zero(0); // temporary indices size_t i, j, k; // number of independent variables size_t n = ind_taddr_.size(); // number of dependent variables size_t m = dep_taddr_.size(); // check BaseVector is Simple Vector class with Base type elements CheckSimpleVector<Base, BaseVector>(); CPPAD_ASSERT_KNOWN( size_t(w.size()) == m || size_t(w.size()) == (m * q), "Argument w to Reverse does not have length equal to\n" "the dimension of the range or dimension of range times q." ); CPPAD_ASSERT_KNOWN( q > 0, "The first argument to Reverse must be greater than zero." ); CPPAD_ASSERT_KNOWN( num_order_taylor_ >= q, "Less than q Taylor coefficients are currently stored" " in this ADFun object." ); // special case where multiple forward directions have been computed, // but we are only using the one direction zero order results if( (q == 1) & (num_direction_taylor_ > 1) ) { num_order_taylor_ = 1; // number of orders to copy size_t c = cap_order_taylor_; // keep the same capacity setting size_t r = 1; // only keep one direction capacity_order(c, r); } CPPAD_ASSERT_KNOWN( num_direction_taylor_ == 1, "Reverse mode for Forward(q, r, xq) with more than one direction" "\n(r > 1) is not yet supported for q > 1." ); // initialize entire Partial matrix to zero local::pod_vector_maybe<Base> Partial(num_var_tape_ * q); for(i = 0; i < num_var_tape_; i++) for(j = 0; j < q; j++) Partial[i * q + j] = zero; // set the dependent variable direction // (use += because two dependent variables can point to same location) for(i = 0; i < m; i++) { CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < num_var_tape_ ); if( size_t(w.size()) == m ) Partial[dep_taddr_[i] * q + q - 1] += w[i]; else { for(k = 0; k < q; k++) // ? should use += here, first make test to demonstrate bug Partial[ dep_taddr_[i] * q + k ] = w[i * q + k ]; } } // evaluate the derivatives CPPAD_ASSERT_UNKNOWN( cskip_op_.size() == play_.num_op_rec() ); CPPAD_ASSERT_UNKNOWN( load_op_.size() == play_.num_load_op_rec() ); local::play::const_sequential_iterator play_itr = play_.end(); local::sweep::reverse( q - 1, n, num_var_tape_, &play_, cap_order_taylor_, taylor_.data(), q, Partial.data(), cskip_op_.data(), load_op_, play_itr, not_used_rec_base ); // return the derivative values BaseVector value(n * q); for(j = 0; j < n; j++) { CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] < num_var_tape_ ); // independent variable taddr equals its operator taddr CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[j] ) == local::InvOp ); // by the Reverse Identity Theorem // partial of y^{(k)} w.r.t. u^{(0)} is equal to // partial of y^{(q-1)} w.r.t. u^{(q - 1 - k)} if( size_t(w.size()) == m ) { for(k = 0; k < q; k++) value[j * q + k ] = Partial[ind_taddr_[j] * q + q - 1 - k]; } else { for(k = 0; k < q; k++) value[j * q + k ] = Partial[ind_taddr_[j] * q + k]; } } CPPAD_ASSERT_KNOWN( ! ( hasnan(value) && check_for_nan_ ) , "dw = f.Reverse(q, w): has a nan,\n" "but none of its Taylor coefficents are nan." ); return value; }
int opt_val_hes( const BaseVector& x , const BaseVector& y , Fun fun , BaseVector& jac , BaseVector& hes ) { // determine the base type typedef typename BaseVector::value_type Base; // check that BaseVector is a SimpleVector class with Base elements CheckSimpleVector<Base, BaseVector>(); // determine the AD vector type typedef typename Fun::ad_vector ad_vector; // check that ad_vector is a SimpleVector class with AD<Base> elements CheckSimpleVector< AD<Base> , ad_vector >(); // size of the x and y spaces size_t n = size_t(x.size()); size_t m = size_t(y.size()); // number of terms in the summation size_t ell = fun.ell(); // check size of return values CPPAD_ASSERT_KNOWN( size_t(jac.size()) == n || jac.size() == 0, "opt_val_hes: size of the vector jac is not equal to n or zero" ); CPPAD_ASSERT_KNOWN( size_t(hes.size()) == n * n || hes.size() == 0, "opt_val_hes: size of the vector hes is not equal to n * n or zero" ); // some temporary indices size_t i, j, k; // AD version of S_k(x, y) ad_vector s_k(1); // ADFun version of S_k(x, y) ADFun<Base> S_k; // AD version of x ad_vector a_x(n); // AD version of y ad_vector a_y(n); if( jac.size() > 0 ) { // this is the easy part, computing the V^{(1)} (x) which is equal // to \partial_x F (x, y) (see Thoerem 2 of the reference). // copy x and y to AD version for(j = 0; j < n; j++) a_x[j] = x[j]; for(j = 0; j < m; j++) a_y[j] = y[j]; // initialize summation for(j = 0; j < n; j++) jac[j] = Base(0.); // add in \partial_x S_k (x, y) for(k = 0; k < ell; k++) { // start recording Independent(a_x); // record s_k[0] = fun.s(k, a_x, a_y); // stop recording and store in S_k S_k.Dependent(a_x, s_k); // compute partial of S_k with respect to x BaseVector jac_k = S_k.Jacobian(x); // add \partial_x S_k (x, y) to jac for(j = 0; j < n; j++) jac[j] += jac_k[j]; } } // check if we are done if( hes.size() == 0 ) return 0; /* In this case, we need to compute the Hessian. Using Theorem 1 of the reference: Y^{(1)}(x) = - F_yy (x, y)^{-1} F_yx (x, y) Using Theorem 2 of the reference: V^{(2)}(x) = F_xx (x, y) + F_xy (x, y) Y^{(1)}(x) */ // Base and AD version of xy BaseVector xy(n + m); ad_vector a_xy(n + m); for(j = 0; j < n; j++) a_xy[j] = xy[j] = x[j]; for(j = 0; j < m; j++) a_xy[n+j] = xy[n+j] = y[j]; // Initialization summation for Hessian of F size_t nm_sq = (n + m) * (n + m); BaseVector F_hes(nm_sq); for(j = 0; j < nm_sq; j++) F_hes[j] = Base(0.); BaseVector hes_k(nm_sq); // add in Hessian of S_k to hes for(k = 0; k < ell; k++) { // start recording Independent(a_xy); // split out x for(j = 0; j < n; j++) a_x[j] = a_xy[j]; // split out y for(j = 0; j < m; j++) a_y[j] = a_xy[n+j]; // record s_k[0] = fun.s(k, a_x, a_y); // stop recording and store in S_k S_k.Dependent(a_xy, s_k); // when computing the Hessian it pays to optimize the tape S_k.optimize(); // compute Hessian of S_k hes_k = S_k.Hessian(xy, 0); // add \partial_x S_k (x, y) to jac for(j = 0; j < nm_sq; j++) F_hes[j] += hes_k[j]; } // Extract F_yx BaseVector F_yx(m * n); for(i = 0; i < m; i++) { for(j = 0; j < n; j++) F_yx[i * n + j] = F_hes[ (i+n)*(n+m) + j ]; } // Extract F_yy BaseVector F_yy(n * m); for(i = 0; i < m; i++) { for(j = 0; j < m; j++) F_yy[i * m + j] = F_hes[ (i+n)*(n+m) + j + n ]; } // compute - Y^{(1)}(x) = F_yy (x, y)^{-1} F_yx (x, y) BaseVector neg_Y_x(m * n); Base logdet; int signdet = CppAD::LuSolve(m, n, F_yy, F_yx, neg_Y_x, logdet); if( signdet == 0 ) return signdet; // compute hes = F_xx (x, y) + F_xy (x, y) Y^{(1)}(x) for(i = 0; i < n; i++) { for(j = 0; j < n; j++) { hes[i * n + j] = F_hes[ i*(n+m) + j ]; for(k = 0; k < m; k++) hes[i*n+j] -= F_hes[i*(n+m) + k+n] * neg_Y_x[k*n+j]; } } return signdet; }
void ADFun<Base,RecBase>::subgraph_jac_rev( const BoolVector& select_domain , const BoolVector& select_range , const BaseVector& x , sparse_rcv<SizeVector, BaseVector>& matrix_out ) { size_t m = Range(); size_t n = Domain(); // // point at which we are evaluating Jacobian Forward(0, x); // // nnz and row, column, and row_major vectors for subset local::pod_vector<size_t> row_out; local::pod_vector<size_t> col_out; local::pod_vector_maybe<Base> val_out; // // initialize reverse mode computation on subgraphs subgraph_reverse(select_domain); // // memory used to hold subgraph_reverse results BaseVector dw; SizeVector col; // // loop through selected independent variables for(size_t i = 0; i < m; ++i) if( select_range[i] ) { // compute Jacobian and sparsity for this dependent variable size_t q = 1; subgraph_reverse(q, i, col, dw); CPPAD_ASSERT_UNKNOWN( size_t( dw.size() ) == n ); // // offset for this dependent variable size_t index = row_out.size(); CPPAD_ASSERT_UNKNOWN( col_out.size() == index ); CPPAD_ASSERT_UNKNOWN( val_out.size() == index ); // // extend vectors to hold results for this dependent variable size_t col_size = size_t( col.size() ); row_out.extend( col_size ); col_out.extend( col_size ); val_out.extend( col_size ); // // store results for this dependent variable for(size_t c = 0; c < col_size; ++c) { row_out[index + c] = i; col_out[index + c] = col[c]; val_out[index + c] = dw[ col[c] ]; } } // // create sparsity pattern corresponding to row_out, col_out size_t nr = m; size_t nc = n; size_t nnz = row_out.size(); sparse_rc<SizeVector> pattern(nr, nc, nnz); for(size_t k = 0; k < nnz; ++k) pattern.set(k, row_out[k], col_out[k]); // // create sparse matrix sparse_rcv<SizeVector, BaseVector> matrix(pattern); for(size_t k = 0; k < nnz; ++k) matrix.set(k, val_out[k]); // // return matrix matrix_out = matrix; // return; }