Beispiel #1
0
/// Perform one iteration of the SGD algorithm with specified gain
void
SvmAsgd::trainOne(const SVector &x, double y, double eta, double mu)
{
  // Renormalize if needed
  if (aDivisor > 1e5 || wDivisor > 1e5) renorm();
  // Forward
  double s = dot(w,x) / wDivisor + wBias;
  // SGD update for regularization term
  wDivisor = wDivisor / (1 - eta * lambda);
  // SGD update for loss term
  double d = LOSS::dloss(s, y);
  double etd = eta * d * wDivisor;
  if (etd != 0)
    w.add(x, etd);
  // Averaging
  if (mu >= 1)
    {
      a.clear();
      aDivisor = wDivisor;
      wFraction = 1;
    }
  else if (mu > 0)
    {
      if (etd != 0)
        a.add(x, - wFraction * etd);
      aDivisor = aDivisor / (1 - mu);
      wFraction = wFraction + mu * aDivisor / wDivisor;
    }
  // same for the bias
#if BIAS
  double etab = eta * 0.01;
#if REGULARIZED_BIAS
  wBias *= (1 - etab * lambda);
#endif
  wBias += etab * d;
  aBias += mu * (wBias - aBias);
#endif
}
Beispiel #2
0
/// Perform one iteration of the SGD algorithm with specified gain
/// This is the only function differentiating the averaged implicit from the
/// averaged (explicit) implementation. We simply merge the implementations for
/// the implicit update with averaging.
void
SvmAisgd::trainOne(const SVector &x, double y, double eta, double mu)
{
double etd = 0;
// HingeLoss case.
if(LOSS::name().compare("HingeLoss")==0) {
  double ypred = dot(x, w) / wDivisor;
  double implicitFactor = (1 + lambda * eta);
  if(1 - y * ypred / implicitFactor < 0)
  {
    wDivisor *= implicitFactor;
    // Update will be W_n+1 = Wn  / (1+lambda * eta)
  }
  else
  {
    double ypred = 0;  // computes x_t' theta_{t+1} (next update)
    for(const SVector::Pair *p = x; p->i >= 0; p++)
    {
      double w_i = w.get(p->i) / wDivisor;
      ypred += p->v * (w_i + p->v * eta * y);
    }

    if(1 - y * ypred / implicitFactor >= 0)
    {
      etd = eta * y * wDivisor;
      w.add(x, etd);
      wDivisor *= implicitFactor;
      // Update should be theta_{t+!1} = (1/(1+lambda eta)) * (theta_t + eta * yt * xt)
    }
    else
    {
      // do nothing (no update in parameters).
    }
  }

	if (wDivisor > 1e5) renorm();
}

else if(LOSS::name().compare("LogLoss")==0) {
  // Need to solve  ξ_t = at (yt - h(theta_t' xt + ξt ||xt||^2))
  // Solve approximately by using
  // ξt = (1 / (1 + at ||xt||^2 h'(theta_t'xt)) * at * (yt - h(theta_t' xt))
  // TODO(ptoulis): Use implicit Algorithm 1 of (Toulis, et.al., ICML14)
  double wx = dot(w, x) / wDivisor;
 	double ypred = 2 * (exp(wx) / (1 + exp(wx))) - 1;
  double implicitFactor = 1 + eta * dot(x, x) * ypred / (1 + exp(wx));

  double ksi_t = (1 / implicitFactor) * eta * (y - ypred);
  etd = wDivisor * ksi_t;
	w.add(x, etd);
}

else {
  cout << "#" << LOSS::name() << "# -- loss not found.";
}
  // Averaging
  if (mu >= 1)
    {
      a.clear();
      aDivisor = wDivisor;
      wFraction = 1;
    }
  else if (mu > 0)
    {
      if (etd != 0)
        a.add(x, - wFraction * etd);
      aDivisor = aDivisor / (1 - mu);
      wFraction = wFraction + mu * aDivisor / wDivisor;
    }
}