void CBMRM::DisplayAfterTrainingInfo(unsigned int iter, double finalExactObjVal, double approxObjVal, double loss, TheMatrix& w_best, CTimer& lossAndGradientTime, CTimer& innerSolverTime, CTimer& totalTime) { // legends if(verbosity >= 1) { printf("\n[Legends]\n"); if(verbosity > 1) printf("pobj: primal objective function value" "\naobj: approximate objective function value\n"); printf("gam: gamma (approximation error) " "\neps: lower bound on gam " "\nloss: loss function value " "\nreg: regularizer value\n"); } double norm1 = 0, norm2 = 0, norminf = 0; w_best.Norm1(norm1); w_best.Norm2(norm2); w_best.NormInf(norminf); printf("\nNote: the final w is the w_t where J(w_t) is the smallest.\n"); printf("No. of iterations: %d\n",iter); printf("Primal obj. val.: %.6e\n",finalExactObjVal); printf("Approx obj. val.: %.6e\n",approxObjVal); printf("Primal - Approx.: %.6e\n",finalExactObjVal-approxObjVal); printf("Loss: %.6e\n",loss); printf("|w|_1: %.6e\n",norm1); printf("|w|_2: %.6e\n",norm2); printf("|w|_oo: %.6e\n",norminf); // display timing profile printf("\nCPU seconds in:\n"); printf("1. loss and gradient: %8.2f\n", lossAndGradientTime.CPUTotal()); printf("2. solver: %8.2f\n", innerSolverTime.CPUTotal()); printf(" Total: %8.2f\n", totalTime.CPUTotal()); printf("Wall-clock total: %8.2f\n", totalTime.WallclockTotal()); }
/** Compute loss and gradient */ void CSMMMulticlassLoss::ComputeLossAndGradient(double& loss, TheMatrix& grad) { iterNum ++; TheMatrix &w = _model->GetW(); loss = 0; grad.Zero(); TheMatrix g(grad, SML::DENSE); const vector<CSeqMulticlassLabel::seqlabel_struct> &Y = _data->labels(); const vector<CSeqMulticlassFeature::seqfeature_struct> &X = _data->features(); unsigned int trainExNum = 0; vector <int > cvmark = _data->Getcvmark(); for(unsigned int i=0; i < m; i++) { if(cvmark.size()!=0) { if(cvmark[i]!=SMM::TRAIN_DATA) continue; } trainExNum ++; //if(cvmark) vector<unsigned int> ybar(X[i].len,0); vector<unsigned int> ybarlabel(X[i].len,0); double labelloss = 0; double marginloss = 0; double w_dot_g = 0.0;; // find best label y' and return the score wrt to y' if(verbosity>=2) { cout <<"ex:"<< i<< endl;fflush(stdout); } if(is_single_action_persequence) find_best_label_grammer(Y[i].pos,Y[i].type, X[i], w, ybar, ybarlabel, marginloss, labelloss, 0, _data->getNumOfClass()); else find_best_label(Y[i].pos,Y[i].type, X[i], w, ybar, ybarlabel, marginloss, labelloss, 0, _data->getNumOfClass()); double labelloss_y = 0; double marginloss_y = 0; double labelloss_ybar = 0; double marginloss_ybar = 0; ComputeLoss(Y[i].pos,Y[i].type,ybar,ybarlabel,X[i],w,marginloss_ybar,labelloss_ybar,1); if(lossw[0]!=0) labelloss+=lossw[0]; if(lastDuration>0) { marginloss = marginloss_ybar; labelloss = labelloss_ybar; } if(verbosity>=3) { ComputeLoss(Y[i].pos,Y[i].type,Y[i].pos,Y[i].type,X[i],w,marginloss_y,labelloss_y,1); printf("dp------marginloss:%2.4f---labelloss:%2.4f------\n",marginloss,labelloss); printf("ybar----marginloss:%2.4f---labelloss:%2.4f------\n",marginloss_ybar,labelloss_ybar); printf("y-------marginloss:%2.4f---labelloss:%2.4f------\n",marginloss_y,labelloss_y); if(abs(labelloss_ybar-labelloss)>1e-5) { printf("labelloss doesn't match!\n"); //exit(0); } if(abs(marginloss_ybar-marginloss)>1e-5) { printf("marginloss_ybar_dp:%2.4f != marginloss_ybar_computeLoss:%2.4f\n",marginloss,marginloss_ybar); printf("marginloss doesn't match!\n"); } } // construct the gradient vector for the part of true y const vector<unsigned int> &y = Y[i].pos; const vector<unsigned int> &ylabel = Y[i].type; g.Zero(); for(unsigned int j=0; j < y.size(); j++) { //g.Add(*(X[i].phi_1[y[j]])); //g.Add(*(X[i].phi_2[y[j-1]][y[j]-y[j-1]-1])); _data->TensorPhi1(X[i].phi_1[y[j]],ylabel[j],0,tphi_1); g.Add(*tphi_1); if(j > 0) { _data->TensorPhi2(X[i].phi_2[y[j-1]][y[j]-y[j-1]-1], ylabel[j-1], ylabel[j], 0,0,tphi_2); g.Add(*tphi_2); } } if(y.size() > 0) { //g.Add(*(X[i].phi_2[y[y.size()-1]][X[i].len-1 - y[y.size()-1]-1]));//// _data->TensorPhi2(X[i].phi_2[y[y.size()-1]][X[i].len - y[y.size()-1]-1 ], ylabel[y.size()-1], 0,0,0,tphi_2); g.Add(*tphi_2); } // for predicted y' for(unsigned int j=0; j < ybar.size(); j++) { //grad.Add(*(X[i].phi_1[ybar[j]])); //grad.Add(*(X[i].phi_2[ybar[j-1]][ybar[j]-ybar[j-1]-1])); _data->TensorPhi1(X[i].phi_1[ybar[j]],ybarlabel[j],0,tphi_1); grad.Add(*tphi_1); if(j>0) { _data->TensorPhi2(X[i].phi_2[ybar[j-1]][ybar[j]-ybar[j-1]-1], ybarlabel[j-1], ybarlabel[j], 0,0,tphi_2); grad.Add(*tphi_2); //// } } if(ybar.size() > 0) { //grad.Add(*(X[i].phi_2[ybar[ybar.size()-1]][X[i].len-1 - ybar[ybar.size()-1]-1])); _data->TensorPhi2(X[i].phi_2[ybar[ybar.size()-1]][X[i].len - ybar[ybar.size()-1]-1 ], ybarlabel[ybar.size()-1], 0, 0,0,tphi_2); grad.Add(*tphi_2); } grad.Minus(g); // accumulate the loss w.Dot(g, w_dot_g); loss = loss - w_dot_g + marginloss + labelloss; } scalingFactor = 1.0/trainExNum; grad.Scale(scalingFactor); loss *= scalingFactor; if(verbosity) { double gnorm = 0.0; grad.Norm2(gnorm); cout << "gradient norm=" << gnorm << endl; } //Evaluate(_model); }