double SvmSgd::my_evaluateEta(int imin, int imax, const xvec_t &xp, const yvec_t &yp, double eta00) { SvmSgd clone(*this); // take a copy of the current state cout << "[my_evaluateEta: clone.wDivisor: ]" << setprecision(12) << clone.wDivisor << " clone.t: " << clone.t << " clone.eta0: " << clone.eta0 << endl; cout << "Trying eta=" << eta00 ; assert(imin <= imax); double _t = 0; double eta = 0; for (int i=imin; i<=imax; i++){ eta = eta00 / (1 + lambda * eta00 * _t); //cout << "[my_evaluateEta:] Eta: " << eta << endl; clone.trainOne(xp.at(i), yp.at(i), eta); _t++; } double loss = 0; double cost = 0; for (int i=imin; i<=imax; i++) clone.testOne(xp.at(i), yp.at(i), &loss, 0); loss = loss / (imax - imin + 1); cost = loss + 0.5 * lambda * clone.wnorm(); cout <<" yields loss " << loss << endl; // cout << "Trying eta=" << eta << " yields cost " << cost << endl; return cost; }
void load(const char *fname, xvec_t &xp, yvec_t &yp) { cout << "Loading " << fname << "." << endl; igzstream f; f.open(fname); if (! f.good()) { cerr << "ERROR: cannot open " << fname << "." << endl; exit(10); } int pcount = 0; int ncount = 0; bool binary; string suffix = fname; if (suffix.size() >= 7) suffix = suffix.substr(suffix.size() - 7); if (suffix == ".dat.gz") binary = false; else if (suffix == ".bin.gz") binary = true; else { cerr << "ERROR: filename should end with .bin.gz or .dat.gz" << endl; exit(10); } while (f.good()) { SVector x; double y; if (binary) { y = (f.get()) ? +1 : -1; x.load(f); } else { f >> y >> x; } if (f.good()) { assert(y == +1 || y == -1); xp.push_back(x); yp.push_back(y); if (y > 0) pcount += 1; else ncount += 1; if (x.size() > dim) dim = x.size(); } if (trainsize > 0 && xp.size() > (unsigned int)trainsize) break; } cout << "Read " << pcount << "+" << ncount << "=" << pcount + ncount << " examples." << endl; }
void SvmSgd::test(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); int nerr = 0; double cost = 0; for (int i=imin; i<=imax; i++) { const SVector &x = xp.at(i); double y = yp.at(i); double wx = dot(w,x); double z = y * (wx + bias); if (z <= 0) nerr += 1; #if LOSS < LOGLOSS if (z < 1) #endif cost += loss(z); } int n = imax - imin + 1; double loss = cost / n; cost = loss + 0.5 * lambda * dot(w,w); cout << prefix << setprecision(4) << "Misclassification: " << (double)nerr * 100.0 / n << "%." << endl; cout << prefix << setprecision(12) << "Cost: " << cost << "." << endl; cout << prefix << setprecision(12) << "Loss: " << loss << "." << endl; }
double SvmAisgd::evaluateEta(int imin, int imax, const xvec_t &xp, const yvec_t &yp, double eta) { SvmAisgd clone(*this); // take a copy of the current state assert(imin <= imax); for (int i=imin; i<=imax; i++) clone.trainOne(xp.at(i), yp.at(i), eta, 1.0); double loss = 0; double cost = 0; for (int i=imin; i<=imax; i++) clone.testOne(xp.at(i), yp.at(i), &loss, 0); loss = loss / (imax - imin + 1); cost = loss + 0.5 * lambda * clone.wnorm(); // cout << "Trying eta=" << eta << " yields cost " << cost << endl; return cost; }
static void loadmult_datafile_sub(istream &f, bool binary, const char *fname, xvec_t &xp, yvec_t &yp, int &maxdim, int maxrows) { cout << "# Reading file " << fname << endl; if (! f.good()) assertfail("Cannot open " << fname); int pcount = 0; while (f.good() && maxrows--) { double y; SVector x; y = (f.get()); x.load(f); if (f.good()) { xp.push_back(x); yp.push_back(y); pcount += 1; if (x.size() > maxdim) maxdim = x.size(); } } cout << "# Read " << pcount << " examples." << endl; }
double SvmSgdSJE::evaluate_objective(double *xp, const yvec_t yp, double *emb_mat, double *att_mat){ double risk = 0.0; cout << "start to evaluate" << endl; for(int i = 0; i < nsamples; i++){ double max_score = 0.0; int ni = yp.at(i) - 1; double norm_emb = 0.0; double xproj[emb_dim*dims]; memset(xproj, 0, emb_dim*dims*sizeof(double)); for(int j = 0; j < emb_dim; j++){ for(int k = 0; k < dims; k++){ xproj[j] += xp[dims*i+k] * emb_mat[dims*j+k]; norm_emb += lambda * emb_mat[dims*j+k] * emb_mat[dims*j+k]; } } for(int c = 0; c < nclass; c++){ double score = 0.0; if(c != ni) score += 1; for(int j = 0; j < emb_dim; j++){ score += xproj[j] * (att_mat[emb_dim*c+j] - att_mat[emb_dim*ni+j]); } if(max_score < score){ max_score = score; } } if(max_score > 0){ risk += max_score; } else cout << "The risk is below zero" << endl; risk += norm_emb; } return risk; }
/// Perform a test pass void SvmAisgd::test(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Testing on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); double nerr = 0; double loss = 0; for (int i=imin; i<=imax; i++) testOne(xp.at(i), yp.at(i), &loss, &nerr); nerr = nerr / (imax - imin + 1); loss = loss / (imax - imin + 1); double cost = loss + 0.5 * lambda * anorm(); cout << prefix << "Loss=" << setprecision(12) << loss << " Cost=" << setprecision(12) << cost << " Misclassification=" << setprecision(4) << 100 * nerr << "%." << endl; }
/// Perform a training epoch void SvmAisgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); assert(eta0 > 0); for (int i=imin; i<=imax; i++) { double eta = eta0 / pow(1 + lambda * eta0 * t, 0.75); double mu = (t <= tstart) ? 1.0 : mu0 / (1 + mu0 * (t - tstart)); trainOne(xp.at(i), yp.at(i), eta, mu); t += 1; } cout << prefix << setprecision(6) << "wNorm=" << wnorm() << " aNorm=" << anorm(); #if BIAS cout << " wBias=" << wBias << " aBias=" << aBias; #endif cout << endl; }
void SvmSgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); count = skip; for (int i=imin; i<=imax; i++) { const SVector &x = xp.at(i); double y = yp.at(i); double wx = dot(w,x); double z = y * (wx + bias); double eta = 1.0 / (lambda * t); #if LOSS < LOGLOSS if (z < 1) #endif { double etd = eta * dloss(z); w.add(x, etd * y); #if BIAS #if REGULARIZEBIAS bias *= 1 - eta * lambda * bscale; #endif bias += etd * y * bscale; #endif } if (--count <= 0) { double r = 1 - eta * lambda * skip; if (r < 0.8) r = pow(1 - eta * lambda, skip); w.scale(r); count = skip; } t += 1; } cout << prefix << setprecision(6) << "Norm: " << dot(w,w) << ", Bias: " << bias << endl; }
/// Perform a SAG training epoch void SvmSag::trainSag(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); assert(imin >= sdimin); assert(imax <= sdimax); assert(eta > 0); uniform_int_generator generator(imin, imax); for (int i=imin; i<=imax; i++) { int ii = generator(); trainOne(xp.at(ii), yp.at(ii), eta, ii); t += 1; } cout << prefix << setprecision(6) << "wNorm=" << wnorm(); #if BIAS cout << " wBias=" << wBias; #endif cout << endl; }
/// Perform initial training epoch void SvmSag::trainInit(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); assert(eta > 0); assert(m == 0); sd.resize(imax - imin + 1); sdimin = imin; sdimax = imax; for (int i=imin; i<=imax; i++) { m += 1; trainOne(xp.at(i), yp.at(i), eta, i); t += 1; } cout << prefix << setprecision(6) << "wNorm=" << wnorm(); #if BIAS cout << " wBias=" << wBias; #endif cout << endl; }
void SvmSgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); for (int i=imin; i<=imax; i++) { double eta = 1.0 / (lambda * t); double s = 1 - eta * lambda; wscale *= s; if (wscale < 1e-9) { w.scale(wscale); wscale = 1; } const SVector &x = xp.at(i); double y = yp.at(i); double wx = dot(w,x) * wscale; double z = y * (wx + bias); #if LOSS < LOGLOSS if (z < 1) #endif { double etd = eta * dloss(z); w.add(x, etd * y / wscale); #if BIAS // Slower rate on the bias because // it learns at each iteration. bias += etd * y * 0.01; #endif } t += 1; } double wnorm = dot(w,w) * wscale * wscale; cout << prefix << setprecision(6) << "Norm: " << wnorm << ", Bias: " << bias << endl; }
/// Perform a training epoch void SvmSgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { #if VERBOSE cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; #endif assert(imin <= imax); assert(eta0 > 0); for (int i=imin; i<=imax; i++) { double eta = eta0 / (1 + lambda * eta0 * t); trainOne(xp.at(i), yp.at(i), eta); t += 1; } #if VERBOSE cout << prefix << setprecision(6) << "wNorm=" << wnorm(); #if BIAS cout << " wBias=" << wBias; #endif cout << endl; #endif }
/// Perform a training epoch void SvmSgd::train(int imin, int imax, const xvec_t &xp, const yvec_t &yp, const char *prefix) { cout << prefix << "Training on [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); assert(eta0 > 0); //cout << "wDivisor: " << wDivisor << " wBias: " << wBias<< endl; for (int i=imin; i<=imax; i++) { double eta = eta0 / (1 + lambda * eta0 * t); //cout << "[my_evaluateEta:] Eta: " << eta << endl; trainOne(xp.at(i), yp.at(i), eta); t += 1; } //cout << "\nAfter training: \n wDivisor: " << wDivisor << " wBias: " << wBias<< endl; cout << prefix << setprecision(6) << "wNorm=" << wnorm(); #if BIAS cout << " wBias=" << wBias; #endif cout << endl; }
/// Testing double SvmSgdSJE::test(int imin, int imax, double *xp, const yvec_t &yp, int dims, double *emb_mat, int emb_dim, double *att_mat, const char* prefix) { cout << prefix << " Testing Multi-class for [" << imin << ", " << imax << "]." << endl; assert(imin <= imax); int nsamples = imax-imin+1; double* scores = new double[nclass*nsamples]; int* conf_mat = new int[nclass*nclass]; memset(conf_mat,0,sizeof(int)*nclass*nclass); for(int i = 0; i < nclass*nsamples; i++) scores[i] = 0.0; double xproj[emb_dim]; for(int i = 0; i < nsamples; i++) { // project test images onto label embedding space for(int iy = 0; iy < emb_dim; iy++) xproj[iy] = 0.0; // project training images onto label embedding space double xproj_norm = 0; for(int iy = 0; iy < emb_dim; iy++) { for(int ix = 0; ix < dims; ix++) { xproj[iy] += xp[dims*i + ix] * emb_mat[dims*iy + ix]; } xproj_norm += xproj[iy] * xproj[iy]; } // normalize the projected vector xproj_norm = sqrt(xproj_norm); if(xproj_norm != 0) { // calculate the scores using dot product similarity using classifiers for(int iy = 0; iy < emb_dim; iy++) xproj[iy] = xproj[iy] / xproj_norm; } for(int j = 0; j < nclass; j++) { for(int iy = 0; iy < emb_dim; iy++) scores[nsamples*j + i] += xproj[iy] * att_mat[emb_dim*j + iy]; } } double nerr = 0; for(int i = 0; i < nsamples; i++) { int true_class = int(yp.at(i)-1); double max_score = -1.0f; int predicted_class = -1; for(int c = 0; c < nclass; c++) { if ( scores[nsamples*c+i] > max_score ) { predicted_class = c; max_score = scores[nsamples*c+i]; } } conf_mat[nclass*true_class+predicted_class]++; //cout << true_class << " " << predicted_class << endl; if( true_class != predicted_class ) nerr++; } nerr = nerr / nsamples; cout << " Per image accuracy = " << setprecision(4) << 100-(100 * nerr) << "%." << endl; double sum_diag_conf=0; double sum_each_line; for(int i = 0; i < nclass; i++) { sum_each_line=0; for(int j=0; j < nclass; j++) { sum_each_line = sum_each_line + conf_mat[i*nclass+j]; } cout << " Class = " << i << " accuracy = " << setprecision(4) << double(conf_mat[i*nclass+i]/sum_each_line) << "%." << endl; sum_diag_conf = sum_diag_conf + double(double(conf_mat[i*nclass+i])/sum_each_line); } cout << " Per " << prefix << " class accuracy = " << setprecision(4) << 100 * double(sum_diag_conf / nclass) << "%." << endl; double acc = double(sum_diag_conf / nclass); delete[] scores; delete[] conf_mat; return acc; }
/// Training the svms with SJE using ranking objective void SvmSgdSJE::train(int imin, int imax, double *xp, const yvec_t &yp, int dims, double *att_mat, int cls_dim, double *emb_mat, int emb_dim, const char *prefix) { cout << prefix << " Training SJE for lbd = " << lambda << ", eta = " << eta << " and " << nclass << " classes" << endl; assert(imin <= imax); double xproj[emb_dim]; //int i = 0; for(int i = imin; i <= imax; i++) { //i = rand()%imax + imin; for(int iy = 0; iy < emb_dim; iy++) xproj[iy] = 0.0; // project training images onto label embedding space double xproj_norm = 0; for(int iy = 0; iy < emb_dim; iy++) { for(int ix = 0; ix < dims; ix++) { xproj[iy] += xp[dims*i + ix] * emb_mat[dims*iy + ix]; } xproj_norm += xproj[iy] * xproj[iy]; } // normalize the projected vector xproj_norm = sqrt(xproj_norm); for(int iy = 0; iy < emb_dim; iy++) xproj[iy] = xproj[iy] / xproj_norm; int best_index = -1; double best_score = 0.0; for(int j = 0; j < nclass; j++) { double score = 0.0; for(int iy = 0; iy < emb_dim; iy++) score += xproj[iy] * att_mat[emb_dim*j + iy]; //delta(y_n,y)= 1 if y_n != y if(j != yp.at(i) - 1) score += 1; if(score > best_score) { best_score = score; best_index = j; } } //update the embedding matrix when the decision is wrong if(best_index != int(yp.at(i)-1) && best_index != -1) { // memcpy(prev_emb_mat, emb_mat, emb_dim*dims*sizeof(double)); int ni = int(yp.at(i) - 1); for(int iy = 0; iy < emb_dim; iy++) { for(int ix = 0; ix < dims; ix++) { emb_mat[dims*iy + ix] -= eta * (xp[dims*i + ix] * (att_mat[emb_dim*best_index + iy] - att_mat[emb_dim*ni + iy]) + lambda*emb_mat[dims*iy + ix]); } } } t += 1; } //cout << "gradient norm=" << gradient_norm(g,dims*emb_dim) << endl; }