/** * Compute loss and gradient of Huber hinge loss. * CAUTION: f is passed by reference and is changed within this * function. This is done for efficiency reasons, otherwise we would * have had to create a new copy of f. * * @param loss [write] loss value computed. * @param f [read/write] prediction vector. * @param l [write] partial derivative of loss function w.r.t. f */ void CHuberHingeLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l) { f.ElementWiseMult(_data->labels()); double* yf = f.Data(); double* Y = _data->labels().Data(); int len = f.Length(); loss = 0.0; l.Zero(); for(int i=0; i < len; i++) { double v = 1-yf[i]; if(h < v) { loss += v; l.Set(i,-Y[i]); } else if(-h > v) {} else { loss += (v+h)*(v+h)/4/h; l.Set(i, -Y[i]*(v+h)/2/h); } } }
/** * Compute loss and partial derivative of hinge loss w.r.t f * * @param loss [write] loss value computed. * @param f [r/w] = X*w * @param l [write] partial derivative of loss w.r.t. f */ void CLogisticLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l) { l.Zero(); // for gradient computation i.e. grad := l'*X f.ElementWiseMult(_data->labels()); double* f_array = f.Data(); // pointer to memory location of f (faster element access) int len = f.Length(); double exp_yf = 0.0; for(int i=0; i < len; i++) { if(fabs(f_array[i]) == 0.0) { loss += LN2; l.Set(i,-0.5); } else if (f_array[i] > 0.0) { exp_yf = exp(-f_array[i]); loss += log(1+exp_yf); l.Set(i,-exp_yf/(1+exp_yf)); } else { exp_yf = exp(f_array[i]); loss += log(1+exp_yf) - f_array[i]; l.Set(i,-1.0/(1+exp_yf)); } } l.ElementWiseMult(_data->labels()); }
/** * Compute NDCGRank loss. CAUTION: f is passed by reference and is * changed within this function. This is done for efficiency reasons, * otherwise we would have had to create a new copy of f. * * @param loss [write] loss value computed. * @param f [read/write] prediction vector. */ void CNDCGRankLoss::Loss(Scalar& loss, TheMatrix& f) { // chteo: here we make use of the subset information loss = 0.0; Scalar* f_array = f.Data(); for(int q=0; q < _data->NumOfSubset(); q++) { int offset = _data->subset[q].startIndex; int subsetsize = _data->subset[q].size; current_ideal_pi = sort_vectors[q]; vector<double> b = bs[q]; //compute_coefficients(offset, subsetsize, y_array, current_ideal_pi, a, b); /* find the best permutation */ find_permutation(subsetsize, offset, a, b, c, f_array, pi); /* compute the loss */ double value; delta(subsetsize, a, b, pi, value); loss += value; for (int i=0;i<subsetsize;i++){ loss = loss + c[i]*(get(f_array, offset, pi[i]) - get(f_array, offset, i)); } //free(c); //free(a); //free(b); //free(pi); } }
/** * Compute loss and gradient of Least Absolute Deviation loss w.r.t f * * @param loss [write] loss value computed. * @param f [r/w] = X*w * @param l [write] partial derivative of loss w.r.t. f */ void CLeastAbsDevLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l) { loss = 0; l.Zero(); double *Y_array = _data->labels().Data(); double* f_array = f.Data(); int len = f.Length(); for(int i=0; i < len; i++) { double f_minus_y = f_array[i] - Y_array[i]; loss += fabs(f_minus_y); l.Set(i, SML::sgn(f_minus_y)); } }
/** * Compute loss and gradient of novelty detection loss. * CAUTION: f is passed by reference and is changed within this * function. This is done for efficiency reasons, otherwise we would * have had to create a new copy of f. * * @param loss [write] loss value computed. * @param f [read/write] prediction vector. * @param l [write] partial derivative of loss function w.r.t. f */ void CNoveltyLoss::LossAndGrad(double& loss, TheMatrix& f, TheMatrix& l) { double* f_array = f.Data(); // pointer to memory location of f (faster element access) int len = f.Length(); l.Zero(); // grad := l'*X for(int i=0; i < len; i++) { if(rho > f_array[i]) { loss += rho - f_array[i]; l.Set(i, -1.0); } } }
/** * Compute hinge loss. CAUTION: f is passed by reference and is * changed within this function. This is done for efficiency reasons, * otherwise we would have had to create a new copy of f. * * @param loss [write] loss value computed. * @param f [read/write] prediction vector. */ void CLogisticLoss::Loss(double& loss, TheMatrix& f) { loss = 0; f.ElementWiseMult(_data->labels()); // f = y*f double* f_array = f.Data(); // pointer to memory location of f (faster element access) int len = f.Length(); for(int i=0; i < len; i++) { if(fabs(f_array[i]) == 0.0) loss += LN2; else if (f_array[i] > 0.0) loss += log(1+exp(-f_array[i])); else loss += log(1+exp(f_array[i])) - f_array[i]; } }
/** * Compute loss and partial derivative of NDCGRank loss w.r.t f * * @param loss [write] loss value computed. * @param f [r/w] = X*w * @param l [write] partial derivative of loss w.r.t. f */ void CNDCGRankLoss::LossAndGrad(Scalar& loss, TheMatrix& f, TheMatrix& l) { // chteo: here we make use of the subset information loss = 0.0; l.Zero(); Scalar* f_array = f.Data(); for(int q=0; q < _data->NumOfSubset(); q++) { //cout << "q = "<< q <<endl; int offset = _data->subset[q].startIndex; int subsetsize = _data->subset[q].size; current_ideal_pi = sort_vectors[q]; vector<double> b = bs[q]; //compute_coefficients(offset, subsetsize, y_array, current_ideal_pi, a, b); //cout << "before finding permutation\n"; /* find the best permutation */ find_permutation(subsetsize, offset, a, b, c, f_array, pi); //cout << "after finding permutation\n"; //cout << "before finding delta\n"; /* compute the loss */ double value; delta(subsetsize, a, b, pi, value); //cout << "before finding delta\n"; loss += value; for (int i=0;i<subsetsize;i++){ loss = loss + c[i]*(get(f_array, offset, pi[i]) - get(f_array, offset, i)); } for (int i=0;i<subsetsize;i++){ //add(l, offset, i, c[pi[i]] - c[i]); add(l, offset, i, - c[i]); add(l, offset, pi[i], c[i]); } } }
void CGenericLoss::ComputeLossAndGradient(double& loss, TheMatrix& grad) { loss = 0; grad.Zero(); TheMatrix &w = _model->GetW(); double* dat = w.Data(); double* raw_g = grad.Data(); { double* resy; double* resybar; map<int,int> ybar; resy = new double [data->dim()]; resybar = new double [data->dim()]; minimize(data->nodeFeatures, &(data->nodeLabels), data->edgeFeatures, dat, dat + data->nNodeFeatures, ybar, data->nNodeFeatures, data->nEdgeFeatures, data->lossPositive, data->lossNegative, data->indexEdge, NULL, 1, data->firstOrderResponses); Phi(data->nodeFeatures, &(data->nodeLabels), data->edgeFeatures, data->nNodeFeatures, data->nEdgeFeatures, resy, resy + data->nNodeFeatures, data->indexEdge); Phi(data->nodeFeatures, &ybar, data->edgeFeatures, data->nNodeFeatures, data->nEdgeFeatures, resybar, resybar + data->nNodeFeatures, data->indexEdge); loss += LabelLoss(data->nodeLabels, ybar, data->lossPositive, data->lossNegative, LOSS); for (int j = 0; j < (int) data->dim(); j ++) { loss += dat[j]*(resybar[j]-resy[j]); raw_g[j] += (1.0/data->N)*(resybar[j]-resy[j]); } delete [] resy; delete [] resybar; } loss = loss/data->N; }