Пример #1
0
void CBMRM::DisplayAfterTrainingInfo(unsigned int iter, double finalExactObjVal, 
                                      double approxObjVal, double loss, 
                                      TheMatrix& w_best, CTimer& lossAndGradientTime,
                                      CTimer& innerSolverTime, CTimer& totalTime)
{
   // legends
   if(verbosity >= 1) 
   {
      printf("\n[Legends]\n");
      if(verbosity > 1)
         printf("pobj: primal objective function value"
                "\naobj: approximate objective function value\n");

      printf("gam: gamma (approximation error) "
             "\neps: lower bound on gam "
             "\nloss: loss function value "
             "\nreg: regularizer value\n");
   }
   
   double norm1 = 0, norm2 = 0, norminf = 0;
   w_best.Norm1(norm1);
   w_best.Norm2(norm2);
   w_best.NormInf(norminf);
   
   printf("\nNote: the final w is the w_t where J(w_t) is the smallest.\n");
   printf("No. of iterations:  %d\n",iter);
   printf("Primal obj. val.: %.6e\n",finalExactObjVal);
   printf("Approx obj. val.: %.6e\n",approxObjVal);
   printf("Primal - Approx.: %.6e\n",finalExactObjVal-approxObjVal);
   printf("Loss:             %.6e\n",loss);
   printf("|w|_1:            %.6e\n",norm1);
   printf("|w|_2:            %.6e\n",norm2);
   printf("|w|_oo:           %.6e\n",norminf);
   
   
   // display timing profile
   printf("\nCPU seconds in:\n");
   printf("1. loss and gradient: %8.2f\n", lossAndGradientTime.CPUTotal());
   printf("2. solver:            %8.2f\n", innerSolverTime.CPUTotal()); 
   printf("               Total: %8.2f\n", totalTime.CPUTotal());
   printf("Wall-clock total:     %8.2f\n", totalTime.WallclockTotal());
}
Пример #2
0
/**   Compute loss and gradient
 */
void CSMMMulticlassLoss::ComputeLossAndGradient(double& loss, TheMatrix& grad)
{
   iterNum ++;
   TheMatrix &w = _model->GetW();
   loss = 0;
   grad.Zero();
   TheMatrix g(grad, SML::DENSE);
   
   const vector<CSeqMulticlassLabel::seqlabel_struct> &Y = _data->labels();
   const vector<CSeqMulticlassFeature::seqfeature_struct> &X = _data->features();
   
   unsigned int trainExNum = 0;
   vector <int > cvmark = _data->Getcvmark();	
   for(unsigned int i=0; i < m; i++)
   {
      if(cvmark.size()!=0)			
      {
         if(cvmark[i]!=SMM::TRAIN_DATA)
            continue;
      }
      trainExNum ++;
      
      //if(cvmark)
      vector<unsigned int> ybar(X[i].len,0);
      vector<unsigned int> ybarlabel(X[i].len,0);
      double labelloss = 0;
      double marginloss = 0;
      double w_dot_g = 0.0;;
      
      // find best label y' and return the score wrt to y'
      if(verbosity>=2)
      {
         cout <<"ex:"<< i<< endl;fflush(stdout);
      }
      
      if(is_single_action_persequence)
         find_best_label_grammer(Y[i].pos,Y[i].type, X[i], w, ybar, ybarlabel, marginloss, labelloss, 0, _data->getNumOfClass());
      else
         find_best_label(Y[i].pos,Y[i].type, X[i], w, ybar, ybarlabel, marginloss, labelloss, 0, _data->getNumOfClass());
      
      double labelloss_y = 0;
      double marginloss_y = 0;
      double labelloss_ybar = 0;
      double marginloss_ybar = 0;
      
      
      ComputeLoss(Y[i].pos,Y[i].type,ybar,ybarlabel,X[i],w,marginloss_ybar,labelloss_ybar,1);
      if(lossw[0]!=0)
         labelloss+=lossw[0];
      
      if(lastDuration>0)
      {
         marginloss = marginloss_ybar;
         labelloss = labelloss_ybar;
      }
      if(verbosity>=3)
      {					
         ComputeLoss(Y[i].pos,Y[i].type,Y[i].pos,Y[i].type,X[i],w,marginloss_y,labelloss_y,1);
         printf("dp------marginloss:%2.4f---labelloss:%2.4f------\n",marginloss,labelloss);	
         printf("ybar----marginloss:%2.4f---labelloss:%2.4f------\n",marginloss_ybar,labelloss_ybar);
         printf("y-------marginloss:%2.4f---labelloss:%2.4f------\n",marginloss_y,labelloss_y);			
         if(abs(labelloss_ybar-labelloss)>1e-5)
         {
            printf("labelloss doesn't match!\n");
            //exit(0);
         }
         if(abs(marginloss_ybar-marginloss)>1e-5)
         {
            printf("marginloss_ybar_dp:%2.4f != marginloss_ybar_computeLoss:%2.4f\n",marginloss,marginloss_ybar);
            printf("marginloss doesn't match!\n");
         }
      }
      
      // construct the gradient vector for the part of true y
      const vector<unsigned int> &y = Y[i].pos;
      const vector<unsigned int> &ylabel = Y[i].type;
      g.Zero();
      
      for(unsigned int j=0; j < y.size(); j++)
      {
         //g.Add(*(X[i].phi_1[y[j]]));
         //g.Add(*(X[i].phi_2[y[j-1]][y[j]-y[j-1]-1]));
         _data->TensorPhi1(X[i].phi_1[y[j]],ylabel[j],0,tphi_1);
         g.Add(*tphi_1);
         if(j > 0)
         {
            _data->TensorPhi2(X[i].phi_2[y[j-1]][y[j]-y[j-1]-1], ylabel[j-1], ylabel[j], 0,0,tphi_2);
            g.Add(*tphi_2);			
         }
      }
      if(y.size() > 0)
      {
         //g.Add(*(X[i].phi_2[y[y.size()-1]][X[i].len-1 - y[y.size()-1]-1]));////
         _data->TensorPhi2(X[i].phi_2[y[y.size()-1]][X[i].len - y[y.size()-1]-1 ], ylabel[y.size()-1], 0,0,0,tphi_2);
         g.Add(*tphi_2);
      }
      
      // for predicted y'
      for(unsigned int j=0; j < ybar.size(); j++)
      {  
         //grad.Add(*(X[i].phi_1[ybar[j]]));                         
         //grad.Add(*(X[i].phi_2[ybar[j-1]][ybar[j]-ybar[j-1]-1]));
         _data->TensorPhi1(X[i].phi_1[ybar[j]],ybarlabel[j],0,tphi_1);
         grad.Add(*tphi_1);
         if(j>0)			
         {
            _data->TensorPhi2(X[i].phi_2[ybar[j-1]][ybar[j]-ybar[j-1]-1], ybarlabel[j-1], ybarlabel[j], 0,0,tphi_2);
            grad.Add(*tphi_2); ////			
         }
      }
      if(ybar.size() > 0)
      {
         //grad.Add(*(X[i].phi_2[ybar[ybar.size()-1]][X[i].len-1 - ybar[ybar.size()-1]-1]));
         _data->TensorPhi2(X[i].phi_2[ybar[ybar.size()-1]][X[i].len - ybar[ybar.size()-1]-1 ], ybarlabel[ybar.size()-1], 0, 0,0,tphi_2);
         grad.Add(*tphi_2);
      }
      grad.Minus(g);
      
      
      // accumulate the loss
      w.Dot(g, w_dot_g);	
      loss = loss - w_dot_g + marginloss + labelloss;    
      
   }
   scalingFactor = 1.0/trainExNum;
   grad.Scale(scalingFactor);	
   loss *= scalingFactor;        
   
   if(verbosity)
   {
      double gnorm = 0.0;
      grad.Norm2(gnorm);
      cout << "gradient norm=" << gnorm << endl;
   }
   //Evaluate(_model);
}