int main(int argc, char** argv){ double learning_rate; int batch_size; vector<int> layers; string train_fname; int max_epoch; float valid_ratio=0.1; string output_model; string structure; Net d; Net bestModel; //Set parameters float early_stop_thres = 0.03; //ex: ./run 0.01 5-4-3 300 train_file_name output_model_name model_name if(argc < 7){ printf("Usage:\n"); printf("./train learning_rate(0.01) batch_size(10) structure(5-4-3) max_epoch(100) train_file output_model [load_model]\n"); return 0; }else{ learning_rate = atof(argv[1]); batch_size = atoi(argv[2]); string lyr(argv[3]); structure = lyr.c_str(); vector<string> x = split(lyr,"-"); for(int i=0;i<x.size();i++){ layers.push_back(atoi(x[i].c_str())); } max_epoch = atoi(argv[4]); train_fname.assign(argv[5]); output_model.assign(argv[6]); } //Initialize neural network srand(time(NULL)); if(argc == 7){ d.load_model(layers); }else if(argc == 8){ string m_name(argv[7]); d.load_model(m_name); }else{ printf("wrong parameters\n"); return 0; } d.learning_rate = learning_rate; d.batch_size = batch_size; //Loading data puts("Loading training data..."); d.load_train_data(train_fname,d.data,d.label,d.index); vector<int> valid_index(d.index.begin(), d.index.begin()+d.index.size()*valid_ratio); vector<int> train_index(d.index.begin()+d.index.size()*valid_ratio, d.index.end()); //Pre-training for (int layer = 0; layer < layers.size() - 1; layer++) { d.initDeltaRBM(layer); for (int epoch = 0; epoch<max_epoch; epoch++){ random_shuffle(train_index.begin(), train_index.end()); int j = 0; for (vector<int>::iterator it = train_index.begin(); it != train_index.end(); ++it, ++j){ d.gibbSample(layer, d.data[*it]); if ((j % d.batch_size == 0) && j != 0) d.updateRBM(layer); } } } //Training puts("Start training..."); float best_error_rate = 1; int flat_count = 0; for(int epoch=0;epoch<max_epoch;epoch++){ random_shuffle(train_index.begin(), train_index.end()); int j=0; for(vector<int>::iterator it=train_index.begin();it!=train_index.end();++it,++j){ mat y = zeros<mat>(layers.back(),1); d.feedforward(d.data[*it]); y(d.label[*it],0) = 1; d.backprop(y); if((j % d.batch_size == 0) && j != 0) d.update(); } float train_err = d.report_error_rate(d.data,d.label, train_index); float valid_err = d.report_error_rate(d.data,d.label, valid_index); printf("epoch %d\ttrain err:%f\tvalid err:%f\n", epoch,train_err,valid_err); d.save_model(output_model, structure); if (valid_err < best_error_rate) { best_error_rate = valid_err; bestModel = d; } if (valid_err - best_error_rate > early_stop_thres) { bestModel.save_model(output_model, structure); printf("early stop here, and save the best model\n"); break; } } }