void generate_reference() {
        m_repo.init_field("ccol", -1.0);
        m_repo.init_field("dcol", -1.0);
        m_repo.init_field("datacol", -1.0);

        Real *ccol = m_repo.field_h("ccol");
        Real *dcol = m_repo.field_h("dcol");
        Real *datacol = m_repo.field_h("datacol");

        u_stage_ = m_repo.field_h("u_stage");
        wcon_ = m_repo.field_h("wcon");
        u_pos_ = m_repo.field_h("u_pos");
        utens_ = m_repo.field_h("utens");
        utens_stage_ref_ = m_repo.field_h("utens_stage_ref");

        v_stage_ = m_repo.field_h("v_stage");
        v_pos_ = m_repo.field_h("v_pos");
        vtens_ = m_repo.field_h("vtens");
        vtens_stage_ref_ = m_repo.field_h("vtens_stage_ref");

        w_stage_ = m_repo.field_h("w_stage");
        w_pos_ = m_repo.field_h("w_pos");
        wtens_ = m_repo.field_h("wtens");
        wtens_stage_ref_ = m_repo.field_h("wtens_stage_ref");

        // Generate U
        for (int i = m_halo.m_i; i < m_domain.m_i - m_halo.m_i; ++i) {
            for (int j = m_halo.m_j; j < m_domain.m_j - m_halo.m_j; ++j) {

                forward_sweep(
                    i, j, 0,0, 8,8, 1, 0, ccol, dcol, wcon_, u_stage_, u_pos_, utens_, utens_stage_ref_, m_domain, m_strides);
                backward_sweep(i, j, 0,0,8,8,ccol, dcol, datacol, u_pos_, utens_stage_ref_, m_domain, m_strides);
            }
        }

        // Generate V
        for (int i = m_halo.m_i; i < m_domain.m_i - m_halo.m_i; ++i) {
            for (int j = m_halo.m_j; j < m_domain.m_j - m_halo.m_j; ++j) {

                forward_sweep(
                    i, j, 0,0, 8,8, 1, 0, ccol, dcol, wcon_, v_stage_, v_pos_, vtens_, vtens_stage_ref_, m_domain, m_strides);
                backward_sweep(i, j, 0,0,8,8,ccol, dcol, datacol, v_pos_, vtens_stage_ref_, m_domain, m_strides);
            }
        }

        // Generate W
        for (int i = m_halo.m_i; i < m_domain.m_i - m_halo.m_i; ++i) {
            for (int j = m_halo.m_j; j < m_domain.m_j - m_halo.m_j; ++j) {

                forward_sweep(
                    i, j, 0,0, 8,8, 1, 0, ccol, dcol, wcon_, w_stage_, w_pos_, wtens_, wtens_stage_ref_, m_domain, m_strides);
                backward_sweep(i, j, 0,0,8,8,ccol, dcol, datacol, w_pos_, wtens_stage_ref_, m_domain, m_strides);
            }
        }

    }
int main(int argc, char* argv[]) {

  if(argc != 2) {
    std::cerr << "Usage : " << argv[0] << "<0,1>" <<std::endl;
    std::cerr << "with : " << std::endl;
    std::cerr << "0 : quadratic loss" << std::endl;
    std::cerr << "1 : cross entropy loss" << std::endl;
    return -1;
  }

  bool quadratic_loss = (atoi(argv[1]) == 0);

  srand(time(NULL));

  // We compare our computation of the gradient to 
  // a finite difference approximation
  // The loss is also involved
  std::cout << "---------------------------------" << std::endl;
  std::cout << "Comparing the analytical gradient and numerical approximation " << std::endl;
  auto input = gaml::mlp::input<X>(INPUT_DIM, fillInput);
  auto l1 = gaml::mlp::layer(input, HIDDEN_LAYER_SIZE, gaml::mlp::mlp_sigmoid(), gaml::mlp::mlp_dsigmoid());
  auto l2 = gaml::mlp::layer(l1, HIDDEN_LAYER_SIZE, gaml::mlp::mlp_identity(), gaml::mlp::mlp_didentity());
  auto l3 = gaml::mlp::layer(l2, HIDDEN_LAYER_SIZE, gaml::mlp::mlp_tanh(), gaml::mlp::mlp_dtanh());
  auto l4 = gaml::mlp::layer(l3, OUTPUT_DIM, gaml::mlp::mlp_sigmoid(), gaml::mlp::mlp_dsigmoid());
  auto mlp = gaml::mlp::perceptron(l4, output_of);

  std::cout << "We use the following architecture : " << std::endl;
  std::cout << mlp << std::endl;
  std::cout << "which has a total of " << mlp.psize() << " parameters"<< std::endl;

  gaml::mlp::parameters_type params(mlp.psize());
  gaml::mlp::parameters_type paramsph(mlp.psize());
  gaml::mlp::values_type derivatives(mlp.psize());
  gaml::mlp::values_type forward_sweep(mlp.size());
  X x;

  auto loss_ce = gaml::mlp::loss::CrossEntropy();
  auto loss_quadratic = gaml::mlp::loss::Quadratic();

  auto f = [&mlp, &params] (const typename decltype(mlp)::input_type& x) -> gaml::mlp::values_type {
    auto output = mlp(x, params);
    gaml::mlp::values_type voutput(mlp.output_size());
    fillOutput(voutput.begin(), output);
    return voutput;
  };
  auto df = [&mlp, &forward_sweep, &params] (const typename decltype(mlp)::input_type& x, unsigned int parameter_dim) -> gaml::mlp::values_type {
    return mlp.deriv(x, params, forward_sweep, parameter_dim);
  };

  unsigned int nbtrials = 100;
  unsigned int nbfails = 0;
  std::cout << "I will compare " << nbtrials << " times a numerical approximation and the analytical gradient we compute" << std::endl;


  for(unsigned int t = 0 ; t < nbtrials ; ++t) {
  
    randomize_data(params, -1.0, 1.0);
    randomize_data(x, -1.0, 1.0);

    // Compute the output at params
    auto output = mlp(x, params);
    gaml::mlp::values_type raw_output(OUTPUT_DIM);
    fillOutput(raw_output.begin(), output);
    gaml::mlp::values_type raw_outputph(OUTPUT_DIM);

    // For computing the loss, we need a target
    gaml::mlp::values_type raw_target(OUTPUT_DIM);
    randomize_data(raw_target);

    double norm_dh = 0.0;

    for(unsigned int i = 0 ; i < mlp.psize() ; ++i) {
      // Let us compute params + h*[0 0 0 0 0 0 1 0 0 0 0 0], the 1 at the ith position
      std::copy(params.begin(), params.end(), paramsph.begin());
      double dh = (sqrt(DBL_EPSILON) * paramsph[i]);
      paramsph[i] += dh;
      norm_dh += dh*dh;
      // Compute the output at params + h
      auto outputph = mlp(x, paramsph);
      fillOutput(raw_outputph.begin(), outputph);
      
      // We now compute the approximation of the derivative
      if(quadratic_loss)
	derivatives[i] = (loss_quadratic(raw_target, raw_outputph) - loss_quadratic(raw_target, raw_output))/dh;
      else
	derivatives[i] = (loss_ce(raw_target, raw_outputph) - loss_ce(raw_target, raw_output))/dh;
	
    }
  
    // We now compute the analytical derivatives
    mlp(x, params);
    std::copy(mlp.begin(), mlp.end(), forward_sweep.begin());

    gaml::mlp::values_type our_derivatives(mlp.psize());
    for(unsigned int i = 0 ; i < mlp.psize() ; ++i) {
      if(quadratic_loss)
	our_derivatives[i] = loss_quadratic.deriv(x, raw_target, forward_sweep, f, df, i);
      else
	our_derivatives[i] = loss_ce.deriv(x, raw_target, forward_sweep, f, df, i);
	
    }
  
    // We finally compute the norm of the difference
    double error = 0.0;
    auto diter = derivatives.begin();
    for(auto& ourdi : our_derivatives) {
      error = (ourdi - *diter) * (ourdi - *diter);
      diter++;
    }
    error = sqrt(error);
    std::cout << "Error between the analytical and numerical gradients " << error << " with a step size of " << sqrt(norm_dh) << " in norm" << std::endl;
    if(error > 1e-7) 
      ++nbfails;

    /*
    std::cout << "numerical " << std::endl;
    for(auto & di : derivatives)
      std::cout << di << " ";
    std::cout << std::endl;
    std::cout << "our :" << std::endl;
    for(auto& di : our_derivatives)
      std::cout << di << " ";
    std::cout << std::endl;
    */

  }

  std::cout << nbfails << " / " << nbtrials << " with an error higher than 1e-7" << std::endl;
}
Example #3
0
Vector ADFun<Base>::Forward(
	size_t p                    , 
	const Vector& x_p           , 
	std::ostream& s             )
{	// temporary indices
	size_t i, j;

	// number of independent variables
	size_t n = ind_taddr_.size();

	// number of dependent variables
	size_t m = dep_taddr_.size();

	// check Vector is Simple Vector class with Base type elements
	CheckSimpleVector<Base, Vector>();

	CPPAD_ASSERT_KNOWN(
		size_t(x_p.size()) == n,
		"Second argument to Forward does not have length equal to\n"
		"the dimension of the domain for the corresponding ADFun."
	);
	CPPAD_ASSERT_KNOWN(
		p <= taylor_per_var_,
		"The number of taylor_ coefficient currently stored\n"
		"in this ADFun object is less than p."
	);  

	// check if the taylor_ matrix needs more columns
	if( taylor_col_dim_ <= p )
		capacity_taylor(p + 1);
	CPPAD_ASSERT_UNKNOWN( taylor_col_dim_ > p );

	// set the p-th order taylor_ coefficients for independent variables
	for(j = 0; j < n; j++)
	{	CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] < total_num_var_ );

		// ind_taddr_[j] is operator taddr for j-th independent variable
		CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[j] ) == InvOp );

		// It is also variable taddr for j-th independent variable
		taylor_[ind_taddr_[j] * taylor_col_dim_ + p] = x_p[j];
	}

	// evaluate the derivatives
	if( p == 0 )
	{
# if CPPAD_USE_FORWARD0SWEEP
		compare_change_ = forward0sweep(s, true,
			n, total_num_var_, &play_, taylor_col_dim_, taylor_.data()
		);
# else
		compare_change_ = forward_sweep(s, true,
			p, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data()
		);
# endif
	}
	else forward_sweep(s, false,
		p, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data()
	);

	// return the p-th order taylor_ coefficients for dependent variables
	Vector y_p(m);
	for(i = 0; i < m; i++)
	{	CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < total_num_var_ );
		y_p[i] = taylor_[dep_taddr_[i] * taylor_col_dim_ + p];
	}
# ifndef NDEBUG
	if( hasnan(y_p) )
	{	if( p == 0 )
		{	CPPAD_ASSERT_KNOWN(false,
				"y = f.Forward(0, x): has a nan in y."
			);  
		}
		else
		{	CPPAD_ASSERT_KNOWN(false,
				"y_p = f.Forward(p, x_p): has a nan in y_p for p > 0, "
				"but not for p = 0."
			);
		}
	}
# endif


	// now we have p + 1  taylor_ coefficients per variable
	taylor_per_var_ = p + 1;

	return y_p;
}