fmat GD(const fmat& X, const fmat& Y, double alpha, fmat Theta, int iters) { int m = X.n_rows; // Filas = casos int n = X.n_cols; // Columnas = features + BIAS int c = Y.n_cols; // Categorias posibles double lambda = 4.0; double loss; cout << "Training dimensions: " << m << "x" << n << endl; fmat reg(n, c); fmat gradient(n, c); for (int i = 0; i < iters; i++) { // cout << "iterancion " << i << endl; Theta = gdStep(Theta, X, Y, alpha, lambda); cout << "terminada la iteración " << i; #ifndef NDEBUG if (i % 10 == 0) { loss = logloss(predict(X, Theta), Y); cout << " logloss " << loss; } #endif cout << endl; } loss = logloss(predict(X, Theta), Y); cout << "Logloss final: " << loss << endl; return Theta; }
// X es la matriz de datos ([m casos] X [n features + BIAS]) // X ya se supone normalizada, y con la columna de BIAS agregada. // Y es la matriz de respuestas ([m casos] X [c categorias posibles]) fmat SGD(const fmat& X, const fmat& Y, double alpha) { int m = X.n_rows; // Filas = casos int n = X.n_cols; // Columnas = features + BIAS int c = Y.n_cols; // Categorias posibles double lambda = 4.0; fmat Theta(n, c); fmat reg(n, c); fmat gradient(n, c); Theta.fill(0.0); int its = m/SGD_N; fmat subX, subY; double loss; for (int i = 0; i < GD_IT; i++) { // SGD. Debería modularizar un poco esto. Quizás con un define. // cout << "iterancion " << i << endl; for (int j = 0; j < its; j++) { subX = X.rows(SGD_N*j, SGD_N*(j+1)-1); subY = Y.rows(SGD_N*j, SGD_N*(j+1)-1); Theta = gdStep(Theta, subX, subY, alpha, lambda); } // Tomo las filas que faltan. subX = X.rows(its*SGD_N, m - 1); subY = Y.rows(its*SGD_N, m - 1); Theta = gdStep(Theta, subX, subY, alpha, lambda); cout << "terminada la iteración: %d" << i; #ifndef NDEBUG if (i % 10 == 0) { loss = logloss(predict(X, Theta), Y); cout << " logloss %G" << loss; } #endif cout << endl; } loss = logloss(predict(X, Theta), Y); cout << "Logloss final: " << loss << endl; return Theta; }
double compute_llhood(std::vector<double>& xx, std::vector<double> & yy, std::vector<sparse_array> & rows) { double llhood = 0; int rs = logregprob->ny; for(int i=0; i<rs; i++) { sparse_array& row = rows[i]; double Ax=0; for(int j=0; j<row.length(); j++) { Ax += xx[row.idxs[j]]*row.values[j]; } llhood -= logloss(-yy[i]*Ax); } return llhood; }
double NeuralNet::trainNet(const std::vector<double>& data, const std::vector<double>& trainingOutput, const unsigned int outType) { int outputLayer = m_layers.size(); int inputLayer = 0; std::vector<double> output,error,delta,prevOut,nextDeltas; double cost = 0.0; // run net forward output = runNet( data ); error = computeError( output, trainingOutput ); switch(outType) { case SCALAR: cost = computeMSE( error ); break; case PROB: cost = logloss( output, trainingOutput ); break; } // propagate error backward through layers for(int i=outputLayer; i>0; i--) { // calculate initial gradient if( i==outputLayer ) delta = error; else delta = m_layers[i-1]->computeDeltas(error,m_layers[i]->retrieveWeights()); if( i > 1) prevOut = m_layers[ i-1-1 ]->retrieveOutputs(); if( i <= 1) prevOut = data; m_layers[i-1]->updateWeights( prevOut,delta,m_learningRate,m_momentum,m_weightDecay ); error = delta; } return cost; }