void initCacheLRU( int tailleCache, int ordre, lm_t *lmlocal , dict_t *dict , char const * nomMach) { FILE *f=NULL; char nom1[1024], nomVocabPondere[1024]; lm=lmlocal; if (nomMach !=NULL) { f=fopen(nomMach,"r"); if (f==NULL) { fprintf(stderr,"can't open %s\n",nomMach); exit(1); } fscanf(f,"%s",nomVocabPondere); fscanf(f,"%i",&nbTrainer); //machine number cslm= new TrainerNgramSlist *[nbTrainer]; poids= new double[nbTrainer]; cslm_wlist = new WordList; if (cslm_wlist != NULL) cslm_wlist->Read(nomVocabPondere); /* shortlist length will be set with TrainerNgramSlist class */ for (int i =0; i<nbTrainer; i++) { if (fscanf(f,"%s%lf",nom1,poids+i)!=2) {fprintf(stderr,"error reading machine %i\n",i);exit(1);} std::ifstream ifs; ifs.open(nom1,ios::binary); CHECK_FILE(ifs,nom1); Mach *m = Mach::Read(ifs); ifs.close(); m->Info(); cslm[i] = new TrainerNgramSlist(m, cslm_wlist, (char*)""); } fclose(f); lmMakeIndexCSLM(dict,lm,cslm_wlist); } TAILLEMAX=tailleCache+1; resuCSLM=(float **) malloc(nbTrainer*sizeof(float*)); resuCSLM[0]=(float *) malloc(nbTrainer*sizeof(float)*TAILLEMAX); for (int i=1 ; i<nbTrainer ;i++) resuCSLM[i]=resuCSLM[i-1]+TAILLEMAX; ordreMax=ordre; if (ordre%2 ==1) ordre++; ordreHash=ordre/2; ram=(Gram * )malloc(sizeof( Gram) * TAILLEMAX); ram->w=(unsigned int *) calloc(TAILLEMAX*ordre, sizeof(unsigned int) ); for (int i =1 ;i<TAILLEMAX; i++) ram[i].w=ram[i-1].w+ordre; }
int main (int argc, char *argv[]) { ifstream ifs; Mach *m; for (int i=1; i<argc; i++) { ifs.open(argv[i],ios::binary); CHECK_FILE(ifs,argv[i]); cout << endl << "Information on machine: " << argv[i] << endl; m = Mach::Read(ifs); cout << "Using file version " << Mach::GetFileId() << endl; m->Info(); ifs.close(); delete m; } GpuUnlock(); return 0; }
int main3(int argc, char *argv[]) { MachConfig mach_config(true); string mach_fname, test_fname, dev_fname; int curr_it = 0; Mach *mlp; // select available options mach_config .sel_cmdline_option<std::string>("mach,m" , true ) .sel_cmdline_option<std::string>("test-data,t" , true ) .sel_cmdline_option<std::string>("dev-data,d" , true) ; // parse parameters if (mach_config.parse_options(argc, argv)) { // get parameters mach_fname = mach_config.get_mach(); test_fname = mach_config.get_test_data(); dev_fname = mach_config.get_dev_data(); curr_it = mach_config.get_curr_iter(); //gold file CONF_test_file = test_fname; CONF_gold_file = dev_fname; } else if (mach_config.help_request()) usage2(mach_config); else { if (mach_config.parsing_error()) usage2(mach_config, false); Error(mach_config.get_error_string().c_str()); } // Check if existing machine exists const char *mach_fname_cstr = mach_fname.c_str(); struct stat stat_struct; if (stat(mach_fname_cstr, &stat_struct)==0) { // read existing network ifstream ifs; ifs.open(mach_fname_cstr,ios::binary); CHECK_FILE(ifs,mach_fname_cstr); mlp = Mach::Read(ifs); ifs.close(); cout << "Found existing machine with " << mlp->GetNbBackw() << " backward passes, continuing training at iteration " << curr_it+1 << endl; } else { Error("No such machine for eval."); } //mlp->Info(); //evaluating the test files cout << "Now evaluating the test file..." << endl; CONLLReader* reader = new CONLLReader(); CONLLWriter* writer = new CONLLWriter(); reader->startReading(CONF_test_file.c_str()); writer->startWriting(CONF_output_file.c_str()); //the list HashMap * wl = training_space::load_wordlist(CONF_wl_file.c_str()); //some variables int oov_num = 0; //out of vocabulary int sen_num = 0; //sentence number int token_num = 0; //token number int miss_count = 0; //only work if the testing file already have answers int TIME_start = clock() / 1000; int TIME_start_fine = 0; //calculate DependencyInstance* x = reader->getNext(); while(x != NULL){ if(sen_num%500 == 0){ cout << "Having processed " << sen_num << ";period is "<< (clock()/1000)-TIME_start_fine << "ms."<< endl; TIME_start_fine = clock() / 1000; } sen_num++; int length = x->forms->size(); token_num += length - 1; double *tmp_scores = new double[length*length*2]; //construct scores using nn int num_pair = length*(length-1); //2 * (0+(l-1))*l/2 REAL *mach_x = new REAL[num_pair*CONF_X_dim]; REAL *mach_y = new REAL[num_pair*CONF_Y_dim]; int* word_index = new int[length+2]; //including <s> and </s> for(int i=0;i<length;i++){ HashMap::iterator iter = wl->find(x->forms->at(i)); if(iter == wl->end()){ oov_num++; word_index[i+1] = wl->find(&unknown_token)->second; } else word_index[i+1] = iter->second; } string sen_s = SENTENCE_START; string sen_e = SENTENCE_END; word_index[0] = wl->find(&sen_s)->second; word_index[length+1] = wl->find(&sen_e)->second; int pair_count = 0; REAL* assign_x = mach_x; for(int ii=0;ii<length;ii++){ for(int j=ii+1;j<length;j++){ for(int lr=0;lr<2;lr++){ #ifdef WHICH_TWO //2 word pair if(lr==E_RIGHT){ *assign_x++ = (REAL)word_index[ii+1]; *assign_x++ = (REAL)word_index[j+1]; } else{ *assign_x++ = (REAL)word_index[j+1]; *assign_x++ = (REAL)word_index[ii+1]; } #endif #ifdef WHICH_SIX //build mach_x : 6 words group(h-1 h h+1 m-1 m m+1) if(lr==E_RIGHT){ *assign_x++ = (REAL)word_index[ii]; *assign_x++ = (REAL)word_index[ii+1]; *assign_x++ = (REAL)word_index[ii+2]; *assign_x++ = (REAL)word_index[j]; *assign_x++ = (REAL)word_index[j+1]; *assign_x++ = (REAL)word_index[j+2]; } else{ *assign_x++ = (REAL)word_index[j]; *assign_x++ = (REAL)word_index[j+1]; *assign_x++ = (REAL)word_index[j+2]; *assign_x++ = (REAL)word_index[ii]; *assign_x++ = (REAL)word_index[ii+1]; *assign_x++ = (REAL)word_index[ii+2]; } #endif } } } //- give it to nn mlp->evaluate(mach_x,mach_y,num_pair,CONF_X_dim,CONF_Y_dim); REAL* assign_y = mach_y; for(int ii=0;ii<length;ii++){ for(int j=ii+1;j<length;j++){ for(int lr=0;lr<2;lr++){ int index = get_index2(length,ii,j,lr); #ifdef ULTIMATE_DEBUG cout << ii << "to" << j << ((lr==E_RIGHT)?'r':'l') << ":" << *assign_y << endl; #endif tmp_scores[index] = *assign_y++; } } } //- decode and write vector<int> *ret = decodeProjective(length,tmp_scores); for(int i2=1;i2<length;i2++){ //ignore root if((*ret)[i2] != (*(x->heads))[i2]) miss_count ++; } #ifdef ULTIMATE_DEBUG { double score_here=0; double score_right=0; for(int i2=1;i2<length;i2++){ //ignore root int head_here = (*ret)[i2]; int head_right = (*(x->heads))[i2]; score_here += get_score_from_array(length,head_here,i2,tmp_scores); score_right += get_score_from_array(length,head_right,i2,tmp_scores); } cout << "Right:" << score_right << ";Output:" << score_here << endl; } #endif delete x->heads; x->heads = ret; writer->write(x); delete x; delete []tmp_scores; delete []mach_x; delete []mach_y; delete []word_index; x = reader->getNext(); } reader->finishReading(); writer->finishWriting(); delete reader; delete writer; cout << "Finished testing in " << (clock()/1000-TIME_start) << "ms" << endl; //conclude and evaluate cout << "Testing data description:\n" << "Sentences: " << sen_num << '\n' << "Tokens: " << token_num << '\n' << "OOV token: " << oov_num << '\n' << "Miss token: " << miss_count << endl; string t; DependencyEvaluator::evaluate(CONF_gold_file,CONF_output_file,t,false); return 0; }
int main2(int argc, char *argv[]) { MachConfig mach_config(true); string mach_fname, train_fname, dev_fname; int curr_it = 0; Mach *mlp; // select available options mach_config .sel_cmdline_option<std::string>("mach,m" , true ) .sel_cmdline_option<std::string>("train-data,t" , true ) .sel_cmdline_option<std::string>("dev-data,d" , false) .sel_cmdline_option<REAL> ("lrate-beg,L" , false) .sel_cmdline_option<REAL> ("lrate-mult,M" , false) .sel_cmdline_option<REAL> ("weight-decay,W", false) .sel_cmdline_option<int> ("curr-iter,C" , false) .sel_cmdline_option<int> ("last-iter,I" , false) .sel_cmdline_option<int> ("block-size,B" , false) ; // parse parameters if (mach_config.parse_options(argc, argv)) { // get parameters mach_fname = mach_config.get_mach(); train_fname = mach_config.get_train_data(); dev_fname = mach_config.get_dev_data(); curr_it = mach_config.get_curr_iter(); } else if (mach_config.help_request()) usage(mach_config); else { if (mach_config.parsing_error()) usage(mach_config, false); Error(mach_config.get_error_string().c_str()); } // Check if existing machine exists const char *mach_fname_cstr = mach_fname.c_str(); struct stat stat_struct; if (stat(mach_fname_cstr, &stat_struct)==0) { // read existing network ifstream ifs; ifs.open(mach_fname_cstr,ios::binary); CHECK_FILE(ifs,mach_fname_cstr); mlp = Mach::Read(ifs); ifs.close(); cout << "Found existing machine with " << mlp->GetNbBackw() << " backward passes, continuing training at iteration " << curr_it+1 << endl; } else { mlp=mach_config.get_machine(); if (mlp == NULL) Error(mach_config.get_error_string().c_str()); } mlp->Info(); ErrFctMSE errfct(*mlp); //ErrFctCrossEnt errfct(*mlp); Trainer trainer(mlp, &errfct, (char *)train_fname.c_str(), (dev_fname.empty() ? NULL : (char *)dev_fname.c_str()), mach_config.get_lrate_beg(), mach_config.get_lrate_mult(), mach_config.get_weight_decay(), mach_config.get_last_iter(), curr_it); cout << "Initial error rate: " << 100.0*trainer.TestDev() << "%" << endl; char sfname[1024], *p; strcpy(sfname, mach_fname_cstr); p=strstr(sfname, ".mach"); if (p) { *p=0; strcat(sfname,".best.mach"); } trainer.TrainAndTest(sfname); // save machine at the end ofstream fs; fs.open(mach_fname_cstr,ios::binary); CHECK_FILE(fs,argv[4]); mlp->Write(fs); fs.close(); GpuUnlock(); if (mlp) delete mlp; return 0; }
void eval() { const char *mach_fname_cstr = CONF_mach_file.c_str(); Mach *mlp; struct stat stat_struct; if (stat(mach_fname_cstr, &stat_struct)==0) { // read existing network ifstream ifs; ifs.open(mach_fname_cstr,ios::binary); CHECK_FILE(ifs,mach_fname_cstr); mlp = Mach::Read(ifs); ifs.close(); cout << "Found existing machine" << endl; } else { Error("No such machine for eval."); } //evaluating the test files cout << "Now evaluating the test file..." << endl; CONLLReader* reader = new CONLLReader(); CONLLWriter* writer = new CONLLWriter(); reader->startReading(CONF_test_file.c_str()); writer->startWriting(CONF_output_file.c_str()); //the list HashMap * wl = load_wordlist(CONF_vocab_file.c_str()); #ifdef INSANE_DEBUG FILE* x_file = fdopen(3,"w"); #endif //some variables int oov_num = 0; //out of vocabulary int sen_num = 0; //sentence number int token_num = 0; //token number int miss_count = 0; //only work if the testing file already have answers int TIME_start = clock() / 1000; int TIME_start_fine = 0; //calculate DependencyInstance* x = reader->getNext(); while(x != NULL){ if(sen_num%1000 == 0){ cout << "Having processed " << sen_num << ";period is "<< (clock()/1000)-TIME_start_fine << "ms."<< endl; TIME_start_fine = clock() / 1000; } sen_num++; int length = x->forms->size(); token_num += length - 1; double *tmp_scores = new double[length*length*2]; //construct scores using nn int num_pair = length*(length-1); //2 * (0+(l-1))*l/2 REAL *mach_x = new REAL[num_pair*IND_CONF_x_dim_final]; REAL *mach_y = new REAL[num_pair*CONF_y_class_size]; int *word_index = get_word_index(length,x,wl,&oov_num); int pair_count = 0; REAL* assign_x = mach_x; for(int ii=0;ii<length;ii++){ for(int j=ii+1;j<length;j++){ for(int lr=0;lr<2;lr++){ //build mach_x if(lr==E_RIGHT) fill_feature(length,ii,j,word_index,assign_x); else fill_feature(length,j,ii,word_index,assign_x); assign_x += IND_CONF_x_dim_final; } } } //- give it to nn mlp->evaluate(mach_x,mach_y,num_pair,IND_CONF_x_dim_final,CONF_y_class_size); REAL* assign_y = mach_y; for(int ii=0;ii<length;ii++){ for(int j=ii+1;j<length;j++){ for(int lr=0;lr<2;lr++){ int index = get_index2(length,ii,j,lr); //important ... double temp = 0; if(CONF_if_y_calss){ for(int c=0;c<CONF_y_class_size;c++) temp += (*assign_y++)*c; tmp_scores[index] = temp; } else tmp_scores[index] = *assign_y++; } } } //- decode and write vector<int> *ret = decodeProjective(length,tmp_scores); for(int i2=1;i2<length;i2++){ //ignore root if((*ret)[i2] != (*(x->heads))[i2]) miss_count ++; } delete x->heads; x->heads = ret; writer->write(x); delete x; delete []tmp_scores; #ifdef INSANE_DEBUG fprintf(x_file,"Sentence %d:\n",sen_num); for(int i=0;i<num_pair;i++){ for(int j=0;j<IND_CONF_x_dim_final;j++) fprintf(x_file,"%d ",(int)mach_x[i*IND_CONF_x_dim_final+j]); fprintf(x_file,"\n"); } #endif delete []mach_x; delete []mach_y; delete []word_index; x = reader->getNext(); } #ifdef INSANE_DEBUG fclose(x_file); #endif reader->finishReading(); writer->finishWriting(); delete reader; delete writer; cout << "Finished testing in " << (clock()/1000-TIME_start) << "ms" << endl; //conclude and evaluate cout << "Testing data description:\n" << "Sentences: " << sen_num << '\n' << "Tokens: " << token_num << '\n' << "OOV token: " << oov_num << '\n' << "Miss token: " << miss_count << endl; string t; DependencyEvaluator::evaluate(CONF_gold_file,CONF_output_file,t,false); }