Beispiel #1
0
/// Perform one iteration of the SAG algorithm with gain eta
/// Argument i is the index of the loss in the saved dloss vector.
void
SvmSag::trainOne(const SVector &x, double y, double eta, int i)
{
  // compute loss
  double s = dot(w,x) * wa + wBias;
  if (wb != 0)
    s += dot(g,x) * wb;
  // compute dloss
  double d = LOSS::dloss(s, y);
  double od = sd[i - sdimin];
  sd[i - sdimin] = d;
  d = d - od;
  // update weights
  g.add(x, d);
  w.add(x, - d * wb / wa);
  double decay = 1 - lambda * eta;
  wa = wa * decay;
  wb = wb * decay + eta / m;
  if (wa < 1e-5)
    renorm();
  // same for the bias
#if BIAS
  double etab = eta * 0.01;
  gBias += d;
#if REGULARIZED_BIAS
  wBias *= (1 - etab * lambda);
#endif
  wBias += etab * gBias / m;
#endif
}
Beispiel #2
0
/// Perform one iteration of the SGD algorithm with specified gain
void
SvmAsgd::trainOne(const SVector &x, double y, double eta, double mu)
{
  // Renormalize if needed
  if (aDivisor > 1e5 || wDivisor > 1e5) renorm();
  // Forward
  double s = dot(w,x) / wDivisor + wBias;
  // SGD update for regularization term
  wDivisor = wDivisor / (1 - eta * lambda);
  // SGD update for loss term
  double d = LOSS::dloss(s, y);
  double etd = eta * d * wDivisor;
  if (etd != 0)
    w.add(x, etd);
  // Averaging
  if (mu >= 1)
    {
      a.clear();
      aDivisor = wDivisor;
      wFraction = 1;
    }
  else if (mu > 0)
    {
      if (etd != 0)
        a.add(x, - wFraction * etd);
      aDivisor = aDivisor / (1 - mu);
      wFraction = wFraction + mu * aDivisor / wDivisor;
    }
  // same for the bias
#if BIAS
  double etab = eta * 0.01;
#if REGULARIZED_BIAS
  wBias *= (1 - etab * lambda);
#endif
  wBias += etab * d;
  aBias += mu * (wBias - aBias);
#endif
}
//Winnie, train on samples, because the data has been randomly shuffled, just use the first m instances.
		void
SvmSgd::pre_trainOne(const SVector &x, double y, double eta)
{
		double s = dot(w,x) / wDivisor + wBias;
		// update for regularization term
		wDivisor = wDivisor / (1 - eta * lambda);
		if (wDivisor > 1e5) renorm();
		// update for loss term
		double d = LOSS::dloss(s, y);
		if (d != 0)
				w.add(x, eta * d * wDivisor);
		// same for the bias
#if BIAS
		double etab = eta * 0.01;
#if REGULARIZED_BIAS
		wBias *= (1 - etab * lambda);
#endif
		wBias += etab * d;
#endif
}
Beispiel #4
0
void 
SvmSgd::train(int imin, int imax, 
              const xvec_t &xp, const yvec_t &yp,
              const char *prefix)
{
  cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
  assert(imin <= imax);
  count = skip;
  for (int i=imin; i<=imax; i++)
    {
      const SVector &x = xp.at(i);
      double y = yp.at(i);
      double wx = dot(w,x);
      double z = y * (wx + bias);
      double eta = 1.0 / (lambda * t);
#if LOSS < LOGLOSS
      if (z < 1)
#endif
        {
          double etd = eta * dloss(z);
          w.add(x, etd * y);
#if BIAS
#if REGULARIZEBIAS
          bias *= 1 - eta * lambda * bscale;
#endif
          bias += etd * y * bscale;
#endif
        }
      if (--count <= 0)
        {
          double r = 1 - eta * lambda * skip;
          if (r < 0.8)
            r = pow(1 - eta * lambda, skip);
          w.scale(r);
          count = skip;
        }
      t += 1;
    }
  cout << prefix << setprecision(6) 
       << "Norm: " << dot(w,w) << ", Bias: " << bias << endl;
}
Beispiel #5
0
/// Perform one SGD iteration (used to determine eta)
void
SvmSag::trainSgdOne(const SVector &x, double y, double eta, int i)
{
  assert(wb == 0);
  double s = dot(w,x) * wa + wBias;
  wa = wa * (1 - eta * lambda);
  if (wa < 1e-5) 
    renorm();
  double d = LOSS::dloss(s, y);
  if (i >= 0)
    sd[i-sdimin] = d;
  if (d != 0)
    w.add(x, eta * d / wa);
#if BIAS
  double etab = eta * 0.01;
#if REGULARIZED_BIAS
  wBias *= (1 - etab * lambda);
#endif
  wBias += etab * d;
#endif
}
Beispiel #6
0
void
SvmSgd::train(int imin, int imax,
              const xvec_t &xp, const yvec_t &yp,
              const char *prefix)
{
    cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl;
    assert(imin <= imax);
    for (int i=imin; i<=imax; i++)
    {
        double eta = 1.0 / (lambda * t);
        double s = 1 - eta * lambda;
        wscale *= s;
        if (wscale < 1e-9)
        {
            w.scale(wscale);
            wscale = 1;
        }
        const SVector &x = xp.at(i);
        double y = yp.at(i);
        double wx = dot(w,x) * wscale;
        double z = y * (wx + bias);
#if LOSS < LOGLOSS
        if (z < 1)
#endif
        {
            double etd = eta * dloss(z);
            w.add(x, etd * y / wscale);
#if BIAS
            // Slower rate on the bias because
            // it learns at each iteration.
            bias += etd * y * 0.01;
#endif
        }
        t += 1;
    }
    double wnorm =  dot(w,w) * wscale * wscale;
    cout << prefix << setprecision(6)
         << "Norm: " << wnorm << ", Bias: " << bias << endl;
}
Beispiel #7
0
/// Perform one iteration of the SGD algorithm with specified gain
/// This is the only function differentiating the averaged implicit from the
/// averaged (explicit) implementation. We simply merge the implementations for
/// the implicit update with averaging.
void
SvmAisgd::trainOne(const SVector &x, double y, double eta, double mu)
{
double etd = 0;
// HingeLoss case.
if(LOSS::name().compare("HingeLoss")==0) {
  double ypred = dot(x, w) / wDivisor;
  double implicitFactor = (1 + lambda * eta);
  if(1 - y * ypred / implicitFactor < 0)
  {
    wDivisor *= implicitFactor;
    // Update will be W_n+1 = Wn  / (1+lambda * eta)
  }
  else
  {
    double ypred = 0;  // computes x_t' theta_{t+1} (next update)
    for(const SVector::Pair *p = x; p->i >= 0; p++)
    {
      double w_i = w.get(p->i) / wDivisor;
      ypred += p->v * (w_i + p->v * eta * y);
    }

    if(1 - y * ypred / implicitFactor >= 0)
    {
      etd = eta * y * wDivisor;
      w.add(x, etd);
      wDivisor *= implicitFactor;
      // Update should be theta_{t+!1} = (1/(1+lambda eta)) * (theta_t + eta * yt * xt)
    }
    else
    {
      // do nothing (no update in parameters).
    }
  }

	if (wDivisor > 1e5) renorm();
}

else if(LOSS::name().compare("LogLoss")==0) {
  // Need to solve  ξ_t = at (yt - h(theta_t' xt + ξt ||xt||^2))
  // Solve approximately by using
  // ξt = (1 / (1 + at ||xt||^2 h'(theta_t'xt)) * at * (yt - h(theta_t' xt))
  // TODO(ptoulis): Use implicit Algorithm 1 of (Toulis, et.al., ICML14)
  double wx = dot(w, x) / wDivisor;
 	double ypred = 2 * (exp(wx) / (1 + exp(wx))) - 1;
  double implicitFactor = 1 + eta * dot(x, x) * ypred / (1 + exp(wx));

  double ksi_t = (1 / implicitFactor) * eta * (y - ypred);
  etd = wDivisor * ksi_t;
	w.add(x, etd);
}

else {
  cout << "#" << LOSS::name() << "# -- loss not found.";
}
  // Averaging
  if (mu >= 1)
    {
      a.clear();
      aDivisor = wDivisor;
      wFraction = 1;
    }
  else if (mu > 0)
    {
      if (etd != 0)
        a.add(x, - wFraction * etd);
      aDivisor = aDivisor / (1 - mu);
      wFraction = wFraction + mu * aDivisor / wDivisor;
    }
}