Beispiel #1
0
/**  
 *  Compute loss and gradient of Huber hinge loss. 
 *  CAUTION: f is passed by reference and is changed within this
 *  function. This is done for efficiency reasons, otherwise we would
 *  have had to create a new copy of f.
 *   
 *  @param loss [write] loss value computed.
 *  @param f [read/write] prediction vector. 
 *  @param l [write] partial derivative of loss function w.r.t. f
 */
void CHuberHingeLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l)
{
   f.ElementWiseMult(_data->labels());
   double* yf = f.Data();
   double* Y = _data->labels().Data();
   int len = f.Length();
   loss = 0.0;
   l.Zero();

   for(int i=0; i < len; i++) 
   {
      double v = 1-yf[i];
      if(h < v)
      {
         loss += v;
         l.Set(i,-Y[i]);
      }
      else if(-h > v) {}
      else
      {
         loss += (v+h)*(v+h)/4/h;
         l.Set(i, -Y[i]*(v+h)/2/h);
      }
   }
}
Beispiel #2
0
/**  
 *  Compute loss and partial derivative of hinge loss w.r.t f
 *   
 *  @param loss [write] loss value computed.
 *  @param f [r/w] = X*w
 *  @param l [write] partial derivative of loss w.r.t. f
 */
void CLogisticLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l)
{
    l.Zero();  // for gradient computation i.e. grad := l'*X
    f.ElementWiseMult(_data->labels());
    double* f_array = f.Data();  // pointer to memory location of f (faster element access)
    int len = f.Length();	
    double exp_yf = 0.0;

    for(int i=0; i < len; i++)
    {
	if(fabs(f_array[i]) == 0.0)
        {
            loss += LN2;
            l.Set(i,-0.5);
        }
        else if (f_array[i] > 0.0)
        {
            exp_yf = exp(-f_array[i]);
            loss += log(1+exp_yf);
            l.Set(i,-exp_yf/(1+exp_yf));
        }
        else
        {
            exp_yf = exp(f_array[i]);
            loss += log(1+exp_yf) - f_array[i];
            l.Set(i,-1.0/(1+exp_yf));
        }
    }	
    l.ElementWiseMult(_data->labels());
}
Beispiel #3
0
/** The subgradient is chosen as sgn(w)
 */
void CL1N1::ComputeRegAndGradient(CModel& model, double& reg, TheMatrix& grad)
{
   reg = 0;
   TheMatrix &w = model.GetW();
   w.Norm1(reg);
   grad.Zero();
   for(int i=0; i<w.Length(); i++)
   {
      double val = 0;
      w.Get(i,val);
      grad.Set(i,SML::sgn(val));
   }
}
Beispiel #4
0
/**
 *  Compute loss and gradient of Least Absolute Deviation loss w.r.t f
 *
 *  @param loss [write] loss value computed.
 *  @param f [r/w] = X*w
 *  @param l [write] partial derivative of loss w.r.t. f
 */
void CLeastAbsDevLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l)
{
    loss = 0;
    l.Zero();
    double *Y_array = _data->labels().Data();
    double* f_array = f.Data();
    int len = f.Length();
    for(int i=0; i < len; i++)
    {
        double f_minus_y = f_array[i] - Y_array[i];
        loss += fabs(f_minus_y);
        l.Set(i, SML::sgn(f_minus_y));
    }
}
Beispiel #5
0
/**  
 *  Compute loss and gradient of novelty detection loss. 
 *  CAUTION: f is passed by reference and is changed within this
 *  function. This is done for efficiency reasons, otherwise we would
 *  have had to create a new copy of f.
 *   
 *  @param loss [write] loss value computed.
 *  @param f [read/write] prediction vector. 
 *  @param l [write] partial derivative of loss function w.r.t. f
 */
void CNoveltyLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l)
{
   double* f_array = f.Data();  // pointer to memory location of f (faster element access)
   int len = f.Length();
   l.Zero();  // grad := l'*X
   
   for(int i=0; i < len; i++) 
   {
      if(rho > f_array[i])
      {
         loss += rho - f_array[i];
         l.Set(i, -1.0);
      }
   }
}
/**  
 *  Compute loss and partial derivative of NDCGRank loss w.r.t f
 *   
 *  @param loss [write] loss value computed.
 *  @param f [r/w] = X*w
 *  @param l [write] partial derivative of loss w.r.t. f
 */
void CNDCGRankLoss::LossAndGrad(Scalar& loss, TheMatrix& f, TheMatrix& l)
{
  // chteo: here we make use of the subset information 
        
  loss = 0.0;	
  l.Zero();  
  Scalar* f_array = f.Data();  
  for(int q=0; q < _data->NumOfSubset(); q++)
    {
      //cout << "q = "<< q <<endl;
      int offset = _data->subset[q].startIndex;
      int subsetsize = _data->subset[q].size;
      current_ideal_pi = sort_vectors[q];
      vector<double> b = bs[q];

      //compute_coefficients(offset, subsetsize, y_array, current_ideal_pi, a, b);
      
      //cout << "before finding permutation\n";
      /* find the best permutation */
      find_permutation(subsetsize, offset, a, b, c, f_array, pi);
      //cout << "after finding permutation\n";

      //cout << "before finding delta\n";
      /* compute the loss */
      double value;
      delta(subsetsize, a, b, pi, value);
      //cout << "before finding delta\n";

      loss += value;
      
      for (int i=0;i<subsetsize;i++){
	loss = loss + c[i]*(get(f_array, offset, pi[i]) - get(f_array, offset, i));
      }
      
      for (int i=0;i<subsetsize;i++){
	//add(l, offset, i, c[pi[i]] - c[i]);
	add(l, offset, i, - c[i]);
	add(l, offset, pi[i], c[i]);
      }
    }
  

}
Beispiel #7
0
void CGenericLoss::ComputeLossAndGradient(double& loss, TheMatrix& grad)
{
  loss = 0;
  grad.Zero();
  TheMatrix &w = _model->GetW();
  double* dat = w.Data();
  double* raw_g = grad.Data();

  {
    double* resy;
    double* resybar;

    map<int,int> ybar;

    resy = new double [data->dim()];
    resybar = new double [data->dim()];

    minimize(data->nodeFeatures, &(data->nodeLabels), data->edgeFeatures, dat, dat + data->nNodeFeatures, ybar, data->nNodeFeatures, data->nEdgeFeatures, data->lossPositive, data->lossNegative, data->indexEdge, NULL, 1, data->firstOrderResponses);

    Phi(data->nodeFeatures, &(data->nodeLabels), data->edgeFeatures, data->nNodeFeatures, data->nEdgeFeatures, resy,    resy    + data->nNodeFeatures, data->indexEdge);
    Phi(data->nodeFeatures, &ybar,               data->edgeFeatures, data->nNodeFeatures, data->nEdgeFeatures, resybar, resybar + data->nNodeFeatures, data->indexEdge);
    
    loss += LabelLoss(data->nodeLabels, ybar, data->lossPositive, data->lossNegative, LOSS);

    for (int j = 0; j < (int) data->dim(); j ++)
    {
      loss += dat[j]*(resybar[j]-resy[j]);
      raw_g[j] += (1.0/data->N)*(resybar[j]-resy[j]);
    }

    delete [] resy;
    delete [] resybar;
  }

  loss = loss/data->N;
}
Beispiel #8
0
/**   Compute loss and gradient
 */
void CSMMMulticlassLoss::ComputeLossAndGradient(double& loss, TheMatrix& grad)
{
   iterNum ++;
   TheMatrix &w = _model->GetW();
   loss = 0;
   grad.Zero();
   TheMatrix g(grad, SML::DENSE);
   
   const vector<CSeqMulticlassLabel::seqlabel_struct> &Y = _data->labels();
   const vector<CSeqMulticlassFeature::seqfeature_struct> &X = _data->features();
   
   unsigned int trainExNum = 0;
   vector <int > cvmark = _data->Getcvmark();	
   for(unsigned int i=0; i < m; i++)
   {
      if(cvmark.size()!=0)			
      {
         if(cvmark[i]!=SMM::TRAIN_DATA)
            continue;
      }
      trainExNum ++;
      
      //if(cvmark)
      vector<unsigned int> ybar(X[i].len,0);
      vector<unsigned int> ybarlabel(X[i].len,0);
      double labelloss = 0;
      double marginloss = 0;
      double w_dot_g = 0.0;;
      
      // find best label y' and return the score wrt to y'
      if(verbosity>=2)
      {
         cout <<"ex:"<< i<< endl;fflush(stdout);
      }
      
      if(is_single_action_persequence)
         find_best_label_grammer(Y[i].pos,Y[i].type, X[i], w, ybar, ybarlabel, marginloss, labelloss, 0, _data->getNumOfClass());
      else
         find_best_label(Y[i].pos,Y[i].type, X[i], w, ybar, ybarlabel, marginloss, labelloss, 0, _data->getNumOfClass());
      
      double labelloss_y = 0;
      double marginloss_y = 0;
      double labelloss_ybar = 0;
      double marginloss_ybar = 0;
      
      
      ComputeLoss(Y[i].pos,Y[i].type,ybar,ybarlabel,X[i],w,marginloss_ybar,labelloss_ybar,1);
      if(lossw[0]!=0)
         labelloss+=lossw[0];
      
      if(lastDuration>0)
      {
         marginloss = marginloss_ybar;
         labelloss = labelloss_ybar;
      }
      if(verbosity>=3)
      {					
         ComputeLoss(Y[i].pos,Y[i].type,Y[i].pos,Y[i].type,X[i],w,marginloss_y,labelloss_y,1);
         printf("dp------marginloss:%2.4f---labelloss:%2.4f------\n",marginloss,labelloss);	
         printf("ybar----marginloss:%2.4f---labelloss:%2.4f------\n",marginloss_ybar,labelloss_ybar);
         printf("y-------marginloss:%2.4f---labelloss:%2.4f------\n",marginloss_y,labelloss_y);			
         if(abs(labelloss_ybar-labelloss)>1e-5)
         {
            printf("labelloss doesn't match!\n");
            //exit(0);
         }
         if(abs(marginloss_ybar-marginloss)>1e-5)
         {
            printf("marginloss_ybar_dp:%2.4f != marginloss_ybar_computeLoss:%2.4f\n",marginloss,marginloss_ybar);
            printf("marginloss doesn't match!\n");
         }
      }
      
      // construct the gradient vector for the part of true y
      const vector<unsigned int> &y = Y[i].pos;
      const vector<unsigned int> &ylabel = Y[i].type;
      g.Zero();
      
      for(unsigned int j=0; j < y.size(); j++)
      {
         //g.Add(*(X[i].phi_1[y[j]]));
         //g.Add(*(X[i].phi_2[y[j-1]][y[j]-y[j-1]-1]));
         _data->TensorPhi1(X[i].phi_1[y[j]],ylabel[j],0,tphi_1);
         g.Add(*tphi_1);
         if(j > 0)
         {
            _data->TensorPhi2(X[i].phi_2[y[j-1]][y[j]-y[j-1]-1], ylabel[j-1], ylabel[j], 0,0,tphi_2);
            g.Add(*tphi_2);			
         }
      }
      if(y.size() > 0)
      {
         //g.Add(*(X[i].phi_2[y[y.size()-1]][X[i].len-1 - y[y.size()-1]-1]));////
         _data->TensorPhi2(X[i].phi_2[y[y.size()-1]][X[i].len - y[y.size()-1]-1 ], ylabel[y.size()-1], 0,0,0,tphi_2);
         g.Add(*tphi_2);
      }
      
      // for predicted y'
      for(unsigned int j=0; j < ybar.size(); j++)
      {  
         //grad.Add(*(X[i].phi_1[ybar[j]]));                         
         //grad.Add(*(X[i].phi_2[ybar[j-1]][ybar[j]-ybar[j-1]-1]));
         _data->TensorPhi1(X[i].phi_1[ybar[j]],ybarlabel[j],0,tphi_1);
         grad.Add(*tphi_1);
         if(j>0)			
         {
            _data->TensorPhi2(X[i].phi_2[ybar[j-1]][ybar[j]-ybar[j-1]-1], ybarlabel[j-1], ybarlabel[j], 0,0,tphi_2);
            grad.Add(*tphi_2); ////			
         }
      }
      if(ybar.size() > 0)
      {
         //grad.Add(*(X[i].phi_2[ybar[ybar.size()-1]][X[i].len-1 - ybar[ybar.size()-1]-1]));
         _data->TensorPhi2(X[i].phi_2[ybar[ybar.size()-1]][X[i].len - ybar[ybar.size()-1]-1 ], ybarlabel[ybar.size()-1], 0, 0,0,tphi_2);
         grad.Add(*tphi_2);
      }
      grad.Minus(g);
      
      
      // accumulate the loss
      w.Dot(g, w_dot_g);	
      loss = loss - w_dot_g + marginloss + labelloss;    
      
   }
   scalingFactor = 1.0/trainExNum;
   grad.Scale(scalingFactor);	
   loss *= scalingFactor;        
   
   if(verbosity)
   {
      double gnorm = 0.0;
      grad.Norm2(gnorm);
      cout << "gradient norm=" << gnorm << endl;
   }
   //Evaluate(_model);
}