void train(ME_Model & model, const string & filename) { ifstream ifile(filename.c_str()); if (!ifile) { cerr << "error: cannot open " << filename << endl; exit(1); } string line; int n = 0; while (getline(ifile, line)) { vector<Token> vs = read_line(line); for (int j = 0; j < (int)vs.size(); j++) { ME_Sample mes = sample(vs, j); model.add_training_sample(mes); } if (n++ > 10000) break; } model.use_l1_regularizer(1.0); // model.use_l2_regularizer(1.0); // model.use_SGD(); model.set_heldout(100); model.train(); model.save_to_file("model"); }
// Train model RcppExport SEXP train_model(double l1=0, double l2=0, bool sgd=FALSE, int sgd_iter=30, double sgd_eta0=1, double sgd_alpha=0.85, int heldout=0) { Rprintf("Training the new model...\n"); if (heldout > 0) model.set_heldout(heldout); if (l1 > 0) model.use_l1_regularizer(l1); else if (l2 > 0) model.use_l2_regularizer(l2); else if (sgd) model.use_SGD(); model.train(); string model_data = model.save_to_string(); vector< vector<string> > weights = export_weights(); List rs = List::create(model_data,weights[0],weights[1],weights[2]); return rs; }
int bidir_train(const vector<Sentence> & vs, int para) { // vme.clear(); // vme.resize(16); for (int t = 0; t < 16; t++) { if (t != 15 && t != 0) continue; // for (int t = 15; t >= 0; t--) { vector<ME_Sample> train; if (para != -1 && t % 4 != para) continue; // if (t % 2 == 1) continue; cerr << "type = " << t << endl; cerr << "extracting features..."; int n = 0; for (vector<Sentence>::const_iterator i = vs.begin(); i != vs.end(); i++) { const Sentence & s = *i; for (int j = 0; j < s.size(); j++) { string pos_left1 = "BOS", pos_left2 = "BOS2"; if (j >= 1) pos_left1 = s[j-1].pos; if (j >= 2) pos_left2 = s[j-2].pos; string pos_right1 = "EOS", pos_right2 = "EOS2"; if (j <= int(s.size()) - 2) pos_right1 = s[j+1].pos; if (j <= int(s.size()) - 3) pos_right2 = s[j+2].pos; if ( (t & 0x8) == 0 ) pos_left2 = ""; if ( (t & 0x4) == 0 ) pos_left1 = ""; if ( (t & 0x2) == 0 ) pos_right1 = ""; if ( (t & 0x1) == 0 ) pos_right2 = ""; train.push_back(mesample(s, j, pos_left2, pos_left1, pos_right1, pos_right2)); } // if (n++ > 1000) break; } cerr << "done" << endl; ME_Model m; // m.set_heldout(1000,0); // m.train(train, 2, 1000, 0); m.train(train, 2, 0, 1); char buf[1000]; sprintf(buf, "model.bidir.%d", t); m.save_to_file(buf); } }
void train(ME_Model & model, const string & input, const string & model_path) { ifstream ifile(input.c_str()); if (!ifile) { cerr << "error: cannot open " << input << endl; exit(1); } string line; while (getline(ifile, line)) { vector<string> vs = split(line); ME_Sample mes(vs, true); model.add_training_sample(mes); } model.train(); model.save(model_path); }
int main(int argc, char *argv[]) { ME_Model model; int ncols = 7; char *train = argv[1]; char *test = argv[2]; int buffer = 300; char *buf = (char*) malloc( buffer * sizeof(char)); FILE *fp; int count = 0; if ( ( fp = fopen( filename, "r" ) ) != NULL ) { while ( fgets( buf, buffer, fp ) != NULL ) { double *spl = split(buf, '\t', ncols); for(unsigned char i=0; i<ncols; i++) { add_feat_to_model(model, spl); } // printf("%d processed\n", count); count++; free(spl); if(count>2000000) { break; } } fclose( fp ); } else { fprintf( stderr, "Error opening file %s\n", filename ); free(buf); return 1; } printf("Start training\n"); model.train(); model.save_to_file("model"); free(buf); return 0; }