void Method2_pairs::each_prepare_data_oneiter() { delete []data; delete []gradient; //for gradient gradient = new REAL[mach->GetWidth()*mach->GetOdim()]; mach->SetGradOut(gradient); //prepare all //-- first all int num_pairs = 0; int sentences = training_corpus->size(); for(int i=0;i<sentences;i++){ int length = training_corpus->at(i)->length(); //here duplicate right ones and exclude root as mod // -- length-2 excludes self,real-head num_pairs += (length-2)*(length-1)*2; } //-- generate all int real_num_pairs = 0; data = new REAL[num_pairs*mach->GetIdim()]; REAL* assign_x = data; FeatureGenO1* feat_o1 = (FeatureGenO1*)feat_gen; //force it for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); int length = x->length(); for(int mod=1;mod<length;mod++){ int head = x->heads->at(mod); for(int j=0;j<length;j++){ //length-2 if(j==head || j==mod) continue; //always first right and then wrong feat_gen->fill_one(assign_x,x,head,mod); assign_x += mach->GetIdim(); feat_gen->fill_one(assign_x,x,j,mod); assign_x += mach->GetIdim(); real_num_pairs += 2; } } } current = 0; end = real_num_pairs; //shuffle --- make sure shuffle 2 at the same time(here really lazy to write another shuffle,so ...) shuffle_data(data,data,2*mach->GetIdim(),2*mach->GetIdim(), real_num_pairs*mach->GetIdim(),real_num_pairs*mach->GetIdim(),10); //sample cout << "--Data for this iter: samples all " << end << " resample: " << (int)(end*parameters->CONF_NN_resample) << endl; end = (int)(end*parameters->CONF_NN_resample); }
void TokenDictionary::InitializeFromDependencyReader(DependencyReader *reader) { LOG(INFO) << "Creating token dictionary..."; int form_cutoff = FLAGS_form_cutoff; int form_lower_cutoff = FLAGS_form_cutoff; int lemma_cutoff = FLAGS_lemma_cutoff; int feats_cutoff = FLAGS_feats_cutoff; int pos_cutoff = FLAGS_pos_cutoff; int cpos_cutoff = FLAGS_cpos_cutoff; int prefix_length = FLAGS_prefix_length; int suffix_length = FLAGS_suffix_length; bool form_case_sensitive = FLAGS_form_case_sensitive; vector<int> form_freqs; vector<int> form_lower_freqs; vector<int> lemma_freqs; vector<int> feats_freqs; vector<int> pos_freqs; vector<int> cpos_freqs; Alphabet form_alphabet; Alphabet form_lower_alphabet; Alphabet lemma_alphabet; Alphabet feats_alphabet; Alphabet pos_alphabet; Alphabet cpos_alphabet; string special_symbols[NUM_SPECIAL_TOKENS]; special_symbols[TOKEN_UNKNOWN] = kTokenUnknown; special_symbols[TOKEN_START] = kTokenStart; special_symbols[TOKEN_STOP] = kTokenStop; for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { prefix_alphabet_.Insert(special_symbols[i]); suffix_alphabet_.Insert(special_symbols[i]); form_alphabet.Insert(special_symbols[i]); form_lower_alphabet.Insert(special_symbols[i]); lemma_alphabet.Insert(special_symbols[i]); feats_alphabet.Insert(special_symbols[i]); pos_alphabet.Insert(special_symbols[i]); cpos_alphabet.Insert(special_symbols[i]); // Counts of special symbols are set to -1: form_freqs.push_back(-1); form_lower_freqs.push_back(-1); lemma_freqs.push_back(-1); feats_freqs.push_back(-1); pos_freqs.push_back(-1); cpos_freqs.push_back(-1); } // Go through the corpus and build the dictionaries, // counting the frequencies. reader->Open(pipe_->GetOptions()->GetTrainingFilePath()); DependencyInstance *instance = static_cast<DependencyInstance*>(reader->GetNext()); while (instance != NULL) { int instance_length = instance->size(); for (int i = 0; i < instance_length; ++i) { int id; // Add form to alphabet. std::string form = instance->GetForm(i); std::string form_lower(form); transform(form_lower.begin(), form_lower.end(), form_lower.begin(), ::tolower); if (!form_case_sensitive) form = form_lower; id = form_alphabet.Insert(form); if (id >= form_freqs.size()) { CHECK_EQ(id, form_freqs.size()); form_freqs.push_back(0); } ++form_freqs[id]; // Add lower-case form to alphabet. id = form_lower_alphabet.Insert(form_lower); if (id >= form_lower_freqs.size()) { CHECK_EQ(id, form_lower_freqs.size()); form_lower_freqs.push_back(0); } ++form_lower_freqs[id]; // Add lemma to alphabet. id = lemma_alphabet.Insert(instance->GetLemma(i)); if (id >= lemma_freqs.size()) { CHECK_EQ(id, lemma_freqs.size()); lemma_freqs.push_back(0); } ++lemma_freqs[id]; // Add prefix/suffix to alphabet. // TODO: add varying lengths. string prefix = form.substr(0, prefix_length); id = prefix_alphabet_.Insert(prefix); int start = form.length() - suffix_length; if (start < 0) start = 0; string suffix = form.substr(start, suffix_length); id = suffix_alphabet_.Insert(suffix); // Add POS to alphabet. id = pos_alphabet.Insert(instance->GetPosTag(i)); if (id >= pos_freqs.size()) { CHECK_EQ(id, pos_freqs.size()); pos_freqs.push_back(0); } ++pos_freqs[id]; // Add CPOS to alphabet. id = cpos_alphabet.Insert(instance->GetCoarsePosTag(i)); if (id >= cpos_freqs.size()) { CHECK_EQ(id, cpos_freqs.size()); cpos_freqs.push_back(0); } ++cpos_freqs[id]; // Add FEATS to alphabet. for (int j = 0; j < instance->GetNumMorphFeatures(i); ++j) { id = feats_alphabet.Insert(instance->GetMorphFeature(i,j)); if (id >= feats_freqs.size()) { CHECK_EQ(id, feats_freqs.size()); feats_freqs.push_back(0); } ++feats_freqs[id]; } } delete instance; instance = static_cast<DependencyInstance*>(reader->GetNext()); } reader->Close(); // Now adjust the cutoffs if necessary. while (true) { form_alphabet_.clear(); for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { form_alphabet_.Insert(special_symbols[i]); } for (Alphabet::iterator iter = form_alphabet.begin(); iter != form_alphabet.end(); ++iter) { if (form_freqs[iter->second] > form_cutoff) { form_alphabet_.Insert(iter->first); } } if (form_alphabet_.size() < kMaxFormAlphabetSize) break; ++form_cutoff; LOG(INFO) << "Incrementing form cutoff to " << form_cutoff << "..."; } while (true) { form_lower_alphabet_.clear(); for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { form_lower_alphabet_.Insert(special_symbols[i]); } for (Alphabet::iterator iter = form_lower_alphabet.begin(); iter != form_lower_alphabet.end(); ++iter) { if (form_lower_freqs[iter->second] > form_lower_cutoff) { form_lower_alphabet_.Insert(iter->first); } } if (form_lower_alphabet_.size() < kMaxFormAlphabetSize) break; ++form_lower_cutoff; LOG(INFO) << "Incrementing lower-case form cutoff to " << form_lower_cutoff << "..."; } while (true) { lemma_alphabet_.clear(); for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { lemma_alphabet_.Insert(special_symbols[i]); } for (Alphabet::iterator iter = lemma_alphabet.begin(); iter != lemma_alphabet.end(); ++iter) { if (lemma_freqs[iter->second] > lemma_cutoff) { lemma_alphabet_.Insert(iter->first); } } if (lemma_alphabet_.size() < kMaxLemmaAlphabetSize) break; ++lemma_cutoff; LOG(INFO) << "Incrementing lemma cutoff to " << lemma_cutoff << "..."; } while (true) { pos_alphabet_.clear(); for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { pos_alphabet_.Insert(special_symbols[i]); } for (Alphabet::iterator iter = pos_alphabet.begin(); iter != pos_alphabet.end(); ++iter) { if (pos_freqs[iter->second] > pos_cutoff) { pos_alphabet_.Insert(iter->first); } } if (pos_alphabet_.size() < kMaxPosAlphabetSize) break; ++pos_cutoff; LOG(INFO) << "Incrementing POS cutoff to " << pos_cutoff << "..."; } while (true) { cpos_alphabet_.clear(); for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { cpos_alphabet_.Insert(special_symbols[i]); } for (Alphabet::iterator iter = cpos_alphabet.begin(); iter != cpos_alphabet.end(); ++iter) { if (cpos_freqs[iter->second] > cpos_cutoff) { cpos_alphabet_.Insert(iter->first); } } if (cpos_alphabet_.size() < kMaxCoarsePosAlphabetSize) break; ++cpos_cutoff; LOG(INFO) << "Incrementing CPOS cutoff to " << cpos_cutoff << "..."; } while (true) { feats_alphabet_.clear(); for (int i = 0; i < NUM_SPECIAL_TOKENS; ++i) { feats_alphabet_.Insert(special_symbols[i]); } for (Alphabet::iterator iter = feats_alphabet.begin(); iter != feats_alphabet.end(); ++iter) { if (feats_freqs[iter->second] > feats_cutoff) { feats_alphabet_.Insert(iter->first); } } if (feats_alphabet_.size() < kMaxFeatsAlphabetSize) break; ++feats_cutoff; LOG(INFO) << "Incrementing FEATS cutoff to " << feats_cutoff << "..."; } form_alphabet_.StopGrowth(); form_lower_alphabet_.StopGrowth(); lemma_alphabet_.StopGrowth(); prefix_alphabet_.StopGrowth(); suffix_alphabet_.StopGrowth(); feats_alphabet_.StopGrowth(); pos_alphabet_.StopGrowth(); cpos_alphabet_.StopGrowth(); LOG(INFO) << "Number of forms: " << form_alphabet_.size() << endl << "Number of lower-case forms: " << form_lower_alphabet_.size() << endl << "Number of lemmas: " << lemma_alphabet_.size() << endl << "Number of prefixes: " << prefix_alphabet_.size() << endl << "Number of suffixes: " << suffix_alphabet_.size() << endl << "Number of feats: " << feats_alphabet_.size() << endl << "Number of pos: " << pos_alphabet_.size() << endl << "Number of cpos: " << cpos_alphabet_.size(); CHECK_LT(form_alphabet_.size(), 0xffff); CHECK_LT(form_lower_alphabet_.size(), 0xffff); CHECK_LT(lemma_alphabet_.size(), 0xffff); CHECK_LT(prefix_alphabet_.size(), 0xffff); CHECK_LT(suffix_alphabet_.size(), 0xffff); CHECK_LT(feats_alphabet_.size(), 0xffff); CHECK_LT(pos_alphabet_.size(), 0xff); CHECK_LT(cpos_alphabet_.size(), 0xff); // TODO: Remove this (only for debugging purposes). BuildNames(); }
void M2_p2o2::each_train_one_iter() { static bool** STA_noprobs = 0; //static ine, init only once if(STA_noprobs==0 && !filter_read(STA_noprobs)){ //init only once int all_tokens_train=0,all_token_filter_wrong=0; time_t now; time(&now); cout << "-Preparing no_probs at " << ctime(&now) << endl; STA_noprobs = new bool*[training_corpus->size()]; for(unsigned int i=0;i<training_corpus->size();i++){ DependencyInstance* x = training_corpus->at(i); STA_noprobs[i] = get_cut_o1(x,mfo1,dict,hp->CONF_score_o1filter_cut); all_tokens_train += x->length()-1; for(int m=1;m<x->length();m++) if(STA_noprobs[i][get_index2(x->length(),x->heads->at(m),m)]) all_token_filter_wrong ++; } cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl; filter_write(STA_noprobs); } //per-sentence approach int num_sentences = training_corpus->size(); //statistics int skip_sent_num = 0; int all_forward_instance = 0; int all_inst_right = 0; int all_inst_wrong = 0; //some useful info int odim = mach->get_odim(); //training time_t now; time(&now); //ctime is not rentrant ! use ctime_r() instead if needed cout << "##*** // Start the p2o2 training for iter " << cur_iter << " at " << ctime(&now) << "with lrate " << cur_lrate << endl; cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl; for(int i=0;i<num_sentences;){ //random skip (instead of shuffling every time) if(drand48() > hp->CONF_NN_resample || training_corpus->at(i)->length() >= hp->CONF_higho_toolong){ skip_sent_num ++; i ++; continue; } mach->prepare_batch(); //if nesterov update before each batch (pre-update) if(hp->CONF_NESTEROV_MOMENTUM) mach->nesterov_update(hp->CONF_UPDATE_WAY,hp->CONF_MOMENTUM_ALPHA); //main batch int this_sentence = 0; int this_instance = 0; for(;;){ //forward DependencyInstance* x = training_corpus->at(i); const int length = x->length(); nn_input* the_inputs; REAL *fscores = forward_scores_o2sib(x,mach,&the_inputs,dict->get_helper(),0,STA_noprobs[i],hp); this_instance += the_inputs->get_numi(); all_forward_instance += the_inputs->get_numi(); all_inst_right += the_inputs->inst_good; all_inst_wrong += the_inputs->inst_bad; this_sentence ++; i++; the_scores::Scores<REAL_SCORES>* rscores = get_the_scores(the_inputs,fscores,mach->get_odim(),the_inputs->get_numi()); REAL_SCORES* tmp_marginals = LencodeMarginals_o2sib(length,*rscores); // //two situations // int length = x->length(); // if(!hp->CONF_labeled){ // //calculate prob // rscores = rearrange_scores_o2sib(x,mach,the_inputs,fscores,0,0,0,hp); // tmp_marginals = encodeMarginals_o2sib(length,rscores); // } // else{ // //calculate prob // rscores = rearrange_scores_o2sib(x,mach,the_inputs,fscores,0,0,0,hp); // tmp_marginals = LencodeMarginals_o2sib(length,rscores,mach->get_odim()); // } //set gradients int HERE_dim = the_inputs->num_width; REAL* to_assign = fscores; for(int ii=0;ii<the_inputs->num_inst*HERE_dim;ii+=HERE_dim){ int tmph = the_inputs->inputs->at(ii); int tmpm = the_inputs->inputs->at(ii+1); int tmps = the_inputs->inputs->at(ii+2); if(tmps<0) tmps = tmph; int tmp_goal = the_inputs->goals->at(ii/HERE_dim); REAL_SCORES* from_mar = tmp_marginals+odim*(ii/HERE_dim); for(int once=0;once<odim;once++,to_assign++){ if(tmp_goal == once) *to_assign = -1 * (1 - from_mar[once]) + *to_assign * hp->CONF_score_p2reg; else *to_assign = from_mar[once] + *to_assign * hp->CONF_score_p2reg; //now object is maximum } } //backward mach->backward(fscores); delete the_inputs; delete rscores; delete []tmp_marginals; if(i>=num_sentences) break; //out of the mini-batch while(training_corpus->at(i)->length() >= hp->CONF_higho_toolong){ //HAVE to compromise, bad choice skip_sent_num ++; i ++; } if(i>=num_sentences) break; if(hp->CONF_minibatch > 0){ if(this_sentence >= hp->CONF_minibatch) break; } else{ if(this_instance >= -1*hp->CONF_minibatch) break; } } //real update mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH); } cout << "Iter done, skip " << skip_sent_num << " sentences and f&b " << all_forward_instance << ";good/bad: " << all_inst_right << "/" << all_inst_wrong << endl; }
void M4_o2::each_train_one_iter() { //per-sentence approach int num_sentences = training_corpus->size(); //statistics int skip_sent_num = 0; int all_forward_instance = 0; int all_inst_right = 0; int all_inst_wrong = 0; //some useful info int odim = mach->get_odim(); //training time_t now; time(&now); //ctime is not rentrant ! use ctime_r() instead if needed cout << "##*** //M4O2// Start the training for iter " << cur_iter << " at " << ctime(&now) << "with lrate " << cur_lrate << endl; cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl; vector<DependencyInstance*> xs; int all_token=0,all_right=0; for(int i=0;i<num_sentences;){ //random skip (instead of shuffling every time) if(drand48() > hp->CONF_NN_resample || training_corpus->at(i)->length() >= hp->CONF_higho_toolong){ skip_sent_num ++; i ++; continue; } //main batch int this_instance_toupdate = 0; int this_tokens = 0; for(;;){ //forward DependencyInstance* x = training_corpus->at(i); xs.push_back(x); Process::parse_o2sib(x,mfo1,0,true); //add margin MAYBE // -- statistic all_token += x->length()-1; for(int i2=1;i2<x->length();i2++){ //ignore root if((*(x->predict_heads))[i2] == (*(x->heads))[i2]) all_right ++; else this_instance_toupdate++; } // this_tokens += x->length() - 1; i++; if(i>=num_sentences) break; //out of the mini-batch while(training_corpus->at(i)->length() >= hp->CONF_higho_toolong){ //HAVE to compromise, bad choice skip_sent_num ++; i ++; } if(i>=num_sentences) break; if (hp->CONF_minibatch > 0) { if (int(xs.size()) >= hp->CONF_minibatch) break; } else { if (this_instance_toupdate >= -1 * hp->CONF_minibatch) break; } } //backward for(int ii=0;ii<xs.size();ii++){ DependencyInstance* x = xs[ii]; nn_input* good; nn_input* bad; M3_pro2::get_nninput_o2sib(x,&good,&bad,dict); MM_margin_backward(mach, good, 1, hp->CONF_score_p2reg); MM_margin_backward(mach, bad, -1, hp->CONF_score_p2reg); delete good;delete bad; } int this_sentence = xs.size(); xs.clear(); //real update if (hp->CONF_mbatch_way == 1) mach->set_this_mbsize(this_tokens*this_tokens); else if (hp->CONF_mbatch_way == 2) mach->set_this_mbsize(this_sentence*this_sentence); mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH); } cout << "Iter done, skip " << skip_sent_num << " sentences." << "AND training UAS:" << all_right << "/" << all_token << "=" << all_right/(0.0+all_token) << endl; }
void Method9_O3g::each_prepare_data_oneiter() { delete []data; delete []target; delete []gradient; //for gradient gradient = new REAL[mach->GetWidth()*mach->GetOdim()]; mach->SetGradOut(gradient); int sentences = training_corpus->size(); int idim = mach->GetIdim(); int odim = mach->GetOdim(); //only one time when o1_filter(decoding o1 is quite expensive) static REAL* data_right = 0; static REAL* data_wrong = 0; static int tmpall_right=0; static int tmpall_wrong=0; static int tmpall_bad=0; int whether_o1_filter = 0; if(parameters->CONF_NN_highO_o1mach.length() > 0 && parameters->CONF_NN_highO_o1filter) whether_o1_filter = 1; //************WE MUST SPECIFY O1_FILTER****************// if(!whether_o1_filter){ cout << "No o1-filter for o2g, too expensive!!" << endl; exit(1); } //************WE MUST SPECIFY O1_FILTER****************// if(data_right==0){ //1.o1-filter (MUST HAVE) FeatureGenO1* feat_temp_o1 = new FeatureGenO1(dict,parameters->CONF_x_window, parameters->CONF_add_distance,parameters->CONF_add_pos,parameters->CONF_add_distance_parent); double** all_scores_o1 = new double*[sentences]; int all_tokens_train=0,all_token_filter_wrong=0; for(int i=0;i<sentences;i++){ all_scores_o1[i] = 0; DependencyInstance* x = training_corpus->at(i); all_scores_o1[i] = get_scores_o1(x,parameters,mach_o1,feat_temp_o1); double* scores_o1_filter = all_scores_o1[i]; all_tokens_train += x->length(); for(int i2=1;i2<x->length();i2++){ //ignore root if(score_noprob(scores_o1_filter[get_index2(x->length(),x->heads->at(i2),i2)])) all_token_filter_wrong ++; } } cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl; time_t now; time(&now);cout << "#Finish o1-filter at " << ctime(&now) << flush; //2.first pass --- figure out the numbers int tmp2_right=0,tmp2_wrong=0,tmp2_bad=0; int tmp3_right=0,tmp3_wrong=0,tmp3_bad=0; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); double* scores_o1_filter = all_scores_o1[i]; int length = x->length(); for(int m=1;m<length;m++){ //2.1 special (0,0,c,m) when h==0 int noprob_0m = score_noprob(scores_o1_filter[get_index2(length,0,m)]); int link_0m = (x->heads->at(m)==0); int c = -1; for(int mid=m-1;mid>0;mid--){ if(x->heads->at(mid)==0){ c = mid; break; } } if(link_0m && c<0) tmp2_right++; else if(noprob_0m) tmp2_bad++; else tmp2_wrong++; for(int mid=1;mid<m;mid++){ if(link_0m && mid==c) tmp3_right++; else if(noprob_0m || score_noprob(scores_o1_filter[get_index2(length,0,mid)])) tmp3_bad++; else tmp3_wrong++; } //2.2. ordinary ones for(int h=1;h<length;h++){ //h>=1 if(h==m) continue; //get information int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); bool link_hm = (x->heads->at(m)==h); int noprob_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int c=-1; //inside sibling if(link_hm){ if(h>m){ for(int ii=m+1;ii<h;ii++) if(x->heads->at(ii)==h){ c = ii; break; } } else{ for(int ii=m-1;ii>h;ii--) if(x->heads->at(ii)==h){ c = ii; break; } }} //for g and c for(int g=0;g<length;g++){ if(g==h || g==m || g==c) continue; bool link_gh = (x->heads->at(h)==g); int noprob_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); int nonproj_g = (g>=small && g<=large); if(link_hm && link_gh && c<0) tmp2_right++; else if(noprob_hm || noprob_gh || nonproj_g) tmp2_bad++; else tmp2_wrong++; for(int mid=small+1;mid<large;mid++){ if(link_hm && link_gh && mid==c) tmp3_right++; else if(noprob_hm || noprob_gh || nonproj_g || score_noprob(scores_o1_filter[get_index2(length,h,mid)])) tmp3_bad++; else tmp3_wrong++; } } } } } tmpall_right=tmp2_right+tmp3_right; tmpall_wrong=tmp2_wrong+tmp3_wrong; tmpall_bad=tmp2_bad+tmp3_bad; printf("--Stat<all,2,3>:right(%d,%d,%d),wrong(%d,%d,%d),bad(%d,%d,%d)\n",tmpall_right,tmp2_right,tmp3_right, tmpall_wrong,tmp2_wrong,tmp3_wrong,tmpall_bad,tmp2_bad,tmp3_bad); //3.sweep second time and adding them //-allocate data_right = new REAL[tmpall_right*idim]; data_wrong = new REAL[tmpall_wrong*idim]; REAL* assign_right = data_right; REAL* assign_wrong = data_wrong; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); double* scores_o1_filter = all_scores_o1[i]; int length = x->length(); for(int m=1;m<length;m++){ //2.1 special (0,0,c,m) when h==0 int noprob_0m = score_noprob(scores_o1_filter[get_index2(length,0,m)]); int link_0m = (x->heads->at(m)==0); int c = -1; for(int mid=m-1;mid>0;mid--){ if(x->heads->at(mid)==0){ c = mid; break; } } if(link_0m && c<0){ feat_gen->fill_one(assign_right,x,0,m,-1,0);assign_right += idim; } else if(noprob_0m){} else{ feat_gen->fill_one(assign_wrong,x,0,m,-1,0);assign_wrong += idim; } for(int mid=1;mid<m;mid++){ if(link_0m && mid==c){ feat_gen->fill_one(assign_right,x,0,m,mid,0);assign_right += idim; } else if(noprob_0m || score_noprob(scores_o1_filter[get_index2(length,0,mid)])){} else{ feat_gen->fill_one(assign_wrong,x,0,m,mid,0);assign_wrong += idim; } } //2.2. ordinary ones for(int h=1;h<length;h++){ //h>=1 if(h==m) continue; //get information int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); bool link_hm = (x->heads->at(m)==h); int noprob_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int c=-1; //inside sibling if(link_hm){ if(h>m){ for(int ii=m+1;ii<h;ii++) if(x->heads->at(ii)==h){ c = ii; break; } } else{ for(int ii=m-1;ii>h;ii--) if(x->heads->at(ii)==h){ c = ii; break; } }} //for g and c for(int g=0;g<length;g++){ if(g==h || g==m || g==c) continue; bool link_gh = (x->heads->at(h)==g); int noprob_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); int nonproj_g = (g>=small && g<=large); if(link_hm && link_gh && c<0){ feat_gen->fill_one(assign_right,x,h,m,-1,g);assign_right += idim; } else if(noprob_hm || noprob_gh || nonproj_g){} else{ feat_gen->fill_one(assign_wrong,x,h,m,-1,g);assign_wrong += idim; } for(int mid=small+1;mid<large;mid++){ if(link_hm && link_gh && mid==c){ feat_gen->fill_one(assign_right,x,h,m,mid,g);assign_right += idim; } else if(noprob_hm || noprob_gh || nonproj_g || score_noprob(scores_o1_filter[get_index2(length,h,mid)])){} else{ feat_gen->fill_one(assign_wrong,x,h,m,mid,g);assign_wrong += idim; } } } } } } for(int i=0;i<sentences;i++){ delete [](all_scores_o1[i]); } delete []all_scores_o1; time(&now);cout << "#Finish data-gen at " << ctime(&now) << flush; } //then considering CONF_NN_resample and copy them to finish data if(parameters->CONF_NN_resample < 1){ //get part of the wrong ones --- but first shuffle them shuffle_data(data_wrong,data_wrong,idim,idim,tmpall_wrong*idim,tmpall_wrong*idim,10); } int tmp_sumup = tmpall_wrong*parameters->CONF_NN_resample + tmpall_right; data = new REAL[tmp_sumup*idim]; target = new REAL[tmp_sumup]; memcpy(data,data_right,tmpall_right*idim*sizeof(REAL)); memcpy(data+tmpall_right*idim,data_wrong,tmpall_wrong*parameters->CONF_NN_resample*idim*sizeof(REAL)); for(int i=0;i<tmp_sumup;i++){ if(i<tmpall_right) target[i] = 1; else target[i] = 0; } shuffle_data(data,target,idim,1,tmp_sumup*idim,tmp_sumup,10); //final shuffle cout << "--M9, Data for this iter: samples all " << tmpall_right+tmpall_wrong << " resample: " << tmp_sumup << endl; current = 0; end = tmp_sumup; }
void DependencyDictionary::CreateLabelDictionary(DependencyReader *reader) { LOG(INFO) << "Creating label dictionary..."; vector<int> label_freqs; // Go through the corpus and build the label dictionary, // counting the frequencies. reader->Open(pipe_->GetOptions()->GetTrainingFilePath()); DependencyInstance *instance = reader->GetNext(); while (instance != NULL) { int instance_length = instance->size(); for (int i = 1; i < instance_length; ++i) { int id; // Add dependency label to alphabet. id = label_alphabet_.Insert(instance->GetDependencyRelation(i)); if (id >= label_freqs.size()) { CHECK_EQ(id, label_freqs.size()); label_freqs.push_back(0); } ++label_freqs[id]; } delete instance; instance = reader->GetNext(); } reader->Close(); label_alphabet_.StopGrowth(); // Go through the corpus and build the existing labels for each head-modifier // POS pair. existing_labels_.clear(); existing_labels_.resize(token_dictionary_->GetNumPosTags(), vector<vector<int> >( token_dictionary_->GetNumPosTags())); maximum_left_distances_.clear(); maximum_left_distances_.resize(token_dictionary_->GetNumPosTags(), vector<int>( token_dictionary_->GetNumPosTags(), 0)); maximum_right_distances_.clear(); maximum_right_distances_.resize(token_dictionary_->GetNumPosTags(), vector<int>( token_dictionary_->GetNumPosTags(), 0)); reader->Open(pipe_->GetOptions()->GetTrainingFilePath()); instance = reader->GetNext(); while (instance != NULL) { int instance_length = instance->size(); for (int i = 1; i < instance_length; ++i) { int id; int head = instance->GetHead(i); const string &modifier_pos = instance->GetPosTag(i); const string &head_pos = instance->GetPosTag(head); int modifier_pos_id = token_dictionary_->GetPosTagId(modifier_pos); int head_pos_id = token_dictionary_->GetPosTagId(head_pos); if (modifier_pos_id < 0) modifier_pos_id = TOKEN_UNKNOWN; if (head_pos_id < 0) head_pos_id = TOKEN_UNKNOWN; //CHECK_GE(modifier_pos_id, 0); //CHECK_GE(head_pos_id, 0); id = label_alphabet_.Lookup(instance->GetDependencyRelation(i)); CHECK_GE(id, 0); // Insert new label in the set of existing labels, if it is not there // already. NOTE: this is inefficient, maybe we should be using a // different data structure. vector<int> &labels = existing_labels_[modifier_pos_id][head_pos_id]; int j; for (j = 0; j < labels.size(); ++j) { if (labels[j] == id) break; } if (j == labels.size()) labels.push_back(id); // Update the maximum distances if necessary. if (head != 0) { if (head < i) { // Right attachment. if (i - head > maximum_right_distances_[modifier_pos_id][head_pos_id]) { maximum_right_distances_[modifier_pos_id][head_pos_id] = i - head; } } else { // Left attachment. if (head - i > maximum_left_distances_[modifier_pos_id][head_pos_id]) { maximum_left_distances_[modifier_pos_id][head_pos_id] = head - i; } } } } delete instance; instance = reader->GetNext(); } reader->Close(); LOG(INFO) << "Number of labels: " << label_alphabet_.size(); }
DependencyInstance *DependencyReader::GetNext() { // Fill all fields for the entire sentence. vector<vector<string> > sentence_fields; string line; if (is_.is_open()) { while (!is_.eof()) { getline(is_, line); if (line.length() <= 0) break; vector<string> fields; StringSplit(line, "\t", &fields); sentence_fields.push_back(fields); } } // Sentence length. int length = sentence_fields.size(); // Convert to array of forms, lemmas, etc. // Note: the first token is the root symbol. vector<string> forms(length+1); vector<string> brownall(length+1); vector<string> lemmas(length+1); vector<string> cpos(length+1); vector<string> brown4(length+1); vector<string> brown6(length+1); vector<string> pos(length+1); vector<vector<string> > feats(length+1); vector<string> deprels(length+1); vector<int> selects(length+1); vector<int> heads(length+1); forms[0] = "_root_"; brownall[0] = "_root_"; lemmas[0] = "_root_"; cpos[0] = "_root_"; brown4[0] = "_root_"; brown6[0] = "_root_"; pos[0] = "_root_"; deprels[0] = "_root_"; heads[0] = -1; feats[0] = vector<string>(1, "_root_"); // LPK: the root should always be selected, otherwises nothing can be linked to the root selects[0] = 1; for(int i = 0; i < length; i++) { const vector<string> &info = sentence_fields[i]; // LPK_TODO: If we would like to add an line to the original Conll format -- augmented Conll Format, // we would want to read something from here with index longer than 9 // Also, at the same time, the format of the DependencyInstance should be changed forms[i+1] = info[1]; brownall[i+1] = info[12]; lemmas[i+1] = info[2]; cpos[i+1] = info[3]; brown4[i+1] = info[10]; brown6[i+1] = info[11]; pos[i+1] = info[4]; string feat_seq = info[5]; if (0 == feat_seq.compare("_")) { // LPK: the underline means nothing in this field feats[i+1].clear(); } else { // LPK: every field is actually a vector of string even it only has one value inside that StringSplit(feat_seq, "|", &feats[i+1]); } // LPK: save the dependency relation deprels[i+1] = info[7]; // LPK: check the index of the head into the vector heads stringstream ss(info[6]); ss >> heads[i+1]; stringstream sss(info[13]); //VLOG(2) << "input into select " << info[13]; sss >> selects[i+1]; } DependencyInstance *instance = NULL; if (length > 0) { instance = new DependencyInstance; instance->Initialize(forms, brownall, lemmas, cpos, brown4, brown6, pos, feats, deprels, heads, selects); } return instance; }
double DependencyEvaluator::evaluate(std::string &act_file, std::string &pred_file, std::string &format, bool labeled){ set<string> punctSet = set<string>(); punctSet.insert("''"); punctSet.insert("``"); punctSet.insert("."); punctSet.insert(":"); punctSet.insert(","); punctSet.insert("PU"); //for CTB CONLLReader* goldReader = new CONLLReader(); goldReader->startReading(act_file.c_str()); CONLLReader* predictedReader = new CONLLReader(); predictedReader->startReading(pred_file.c_str()); int total = 0; int total_root = 0; int total_non_root = 0; int corr = 0; int corr_root = 0; int corr_non_root = 0; int corrL = 0; int corrL_root = 0; int corrL_non_root = 0; int numsent = 0; int corrsent = 0; int corrsentL = 0; int totalNoPunc = 0; int totalNoPunc_root = 0; int totalNoPunc_non_root = 0; int corrNoPunc = 0; int corrNoPunc_root = 0; int corrNoPunc_non_root = 0; int corrLNoPunc = 0; int corrLNoPunc_root = 0; int corrLNoPunc_non_root = 0; int corrsentNoPunc = 0; int corrsentLNoPunc = 0; DependencyInstance* goldInstance = goldReader->getNext(); DependencyInstance* predInstance = predictedReader->getNext(); while(goldInstance != NULL){ int instanceLength = goldInstance->length(); if(instanceLength != predInstance->length()){ cout<<"Lengths do not match on sentence "<<numsent<<endl; } vector<int>* goldHeads = goldInstance->heads; vector<string*>* goldLabels = goldInstance->deprels; vector<int>* predHeads = predInstance->heads; //because after reading, the predict ones goes there vector<string*>* predLabels = predInstance->deprels; vector<string*>* pos = goldInstance->postags; bool whole = true; bool wholeL = true; bool wholeNP = true; bool wholeLNP = true; for(int i = 1; i < instanceLength; i++){ if((*goldHeads)[i] == 0){ total_root++; } else{ total_non_root++; } if((*predHeads)[i] == (*goldHeads)[i]){ corr++; if((*goldHeads)[i] == 0){ corr_root++; } else{ corr_non_root++; } if(labeled){ if((*(*predLabels)[i]) == (*(*goldLabels)[i])){ corrL++; if((*goldHeads)[i] == 0){ corrL_root++; } else{ corrL_non_root++; } } else{ wholeL = false; } } } else{ whole = false; wholeL = false; } if(punctSet.count(*((*pos)[i])) <= 0){ totalNoPunc++; if((*goldHeads)[i] == 0){ totalNoPunc_root++; } else{ totalNoPunc_non_root++; } if((*predHeads)[i] == (*goldHeads)[i]){ corrNoPunc++; if((*goldHeads)[i] == 0){ corrNoPunc_root++; } else{ corrNoPunc_non_root++; } if(labeled){ if((*(*predLabels)[i]) == (*(*goldLabels)[i])){ corrLNoPunc++; if((*goldHeads)[i] == 0){ corrLNoPunc_root++; } else{ corrLNoPunc_non_root++; } } else{ wholeLNP = false; } } } else{ wholeNP = false; wholeLNP = false; } } } total += instanceLength - 1; if(whole){ corrsent++; } if(wholeL){ corrsentL++; } if(wholeNP){ corrsentNoPunc++; } if(wholeLNP){ corrsentLNoPunc++; } numsent++; delete(goldInstance); delete(predInstance); goldInstance = goldReader->getNext(); predInstance = predictedReader->getNext(); } printf("Tokens: %d\n", total); printf("Correct: %d\n", corr); printf("Unlabeled Accuracy: %.2lf%%\n", ((double)corr) * 100 / total); printf("Unlabeled Complete Correct: %.2lf%%\n", ((double)corrsent) *100 / numsent); if(labeled){ printf("Labeled Accuracy: %.2lf%%\n", ((double)corrL) * 100 / total); printf("Labeled Complete Correct: %.2lf%%\n", ((double)corrsentL) * 100 / numsent); } printf("\n"); printf("Tokens Root: %d\n", total_root); printf("Correct Root: %d\n", corr_root); printf("Unlabeled Accuracy Root: %.2lf%%\n", ((double)corr_root) * 100 / total_root); if(labeled){ printf("Labeled Accuracy Root: %.2lf%%\n", ((double)corrL_root) * 100 / total_root); } printf("\n"); printf("Tokens Non Root: %d\n", total_non_root); printf("Correct Non Root: %d\n", corr_non_root); printf("Unlabeled Accuracy Non Root: %.2lf%%\n", ((double)corr_non_root) * 100 / total_non_root); if(labeled){ printf("Labeled Accuracy Non Root: %.2lf%%\n", ((double)corrL_non_root) * 100 / total_non_root); } printf("\n"); printf("Tokens No Punc: %d\n", totalNoPunc); printf("Correct No Punc: %d\n", corrNoPunc); printf("Unlabeled Accuracy No Punc: %.2lf%%\n", ((double)corrNoPunc) * 100 / totalNoPunc); printf("Unlabeled Complete Correct No Punc: %.2lf%%\n", ((double)corrsentNoPunc) *100 / numsent); if(labeled){ printf("Labeled Accuracy No Punc: %.2lf%%\n", ((double)corrLNoPunc) * 100 / totalNoPunc); printf("Labeled Complete Correct No Punc: %.2lf%%\n", ((double)corrsentLNoPunc) * 100 / numsent); } printf("\n"); printf("Tokens No Punc Root: %d\n", totalNoPunc_root); printf("Correct No Punc Root: %d\n", corrNoPunc_root); printf("Unlabeled Accuracy No Punc Root: %.2lf%%\n", ((double)corrNoPunc_root) * 100 / totalNoPunc_root); if(labeled){ printf("Labeled Accuracy No Punc Root: %.2lf%%\n", ((double)corrLNoPunc_root) * 100 / totalNoPunc_root); } printf("\n"); printf("Tokens No Punc Non Root: %d\n", totalNoPunc_non_root); printf("Correct No Punc Non Root: %d\n", corrNoPunc_non_root); printf("Unlabeled Accuracy No Punc Non Root: %.2lf%%\n", ((double)corrNoPunc_non_root) * 100 / totalNoPunc_non_root); if(labeled){ printf("Labeled Accuracy No Punc Non Root: %.2lf%%\n", ((double)corrLNoPunc_non_root) * 100 / totalNoPunc_non_root); } goldReader->finishReading(); predictedReader->finishReading(); delete(goldReader); delete(predictedReader); return ((double)corr) / total; }
void M2_p2o1::each_train_one_iter() { //per-sentence approach int num_sentences = training_corpus->size(); //statistics int skip_sent_num = 0; int all_forward_instance = 0; int all_inst_right = 0; int all_inst_wrong = 0; //some useful info int odim = mach->get_odim(); //training time_t now; time(&now); //ctime is not rentrant ! use ctime_r() instead if needed cout << "##*** //p2o1// Start the training for iter " << cur_iter << " at " << ctime(&now) << "with lrate " << cur_lrate << endl; cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl; for(int i=0;i<num_sentences;){ //random skip (instead of shuffling every time) if(drand48() > hp->CONF_NN_resample){ skip_sent_num ++; i ++; continue; } mach->prepare_batch(); //if nesterov update before each batch (pre-update) if(hp->CONF_NESTEROV_MOMENTUM) mach->nesterov_update(hp->CONF_UPDATE_WAY,hp->CONF_MOMENTUM_ALPHA); //main batch int this_sentence = 0; int this_instance = 0; int this_tokens = 0; for(;;){ //forward DependencyInstance* x = training_corpus->at(i); nn_input* the_inputs; REAL *fscores = forward_scores_o1(x,mach,&the_inputs,dict->get_helper(),0,hp); double* rscores = 0; double* tmp_marginals = 0; this_instance += the_inputs->get_numi(); all_forward_instance += the_inputs->get_numi(); all_inst_right += the_inputs->inst_good; all_inst_wrong += the_inputs->inst_bad; this_sentence ++; this_tokens += x->length()-1; i++; adjust_scores_before(the_inputs, fscores, odim, hp->CONF_margin); //two situations int length = x->length(); if(!hp->CONF_labeled){ //calculate prob rscores = rearrange_scores_o1(x,mach,the_inputs,fscores,0,0,hp); tmp_marginals = encodeMarginals(length,rscores); } else{ //calculate prob rscores = rearrange_scores_o1(x,mach,the_inputs,fscores,0,0,hp); tmp_marginals = LencodeMarginals(length,rscores,mach->get_odim()); } adjust_scores_after(the_inputs, fscores, odim, hp->CONF_margin); //set gradients int HERE_dim = the_inputs->num_width; REAL* to_assign = fscores; for(int ii=0;ii<the_inputs->num_inst*HERE_dim;ii+=HERE_dim){ int tmph = the_inputs->inputs->at(ii); int tmpm = the_inputs->inputs->at(ii+1); int tmp_goal = the_inputs->goals->at(ii/HERE_dim); for(int once=0;once<odim;once++,to_assign++){ if(tmp_goal == once) *to_assign = -1 * (1 - tmp_marginals[get_index2(length,tmph,tmpm,once,odim)]) + *to_assign * hp->CONF_score_p2reg; else *to_assign = tmp_marginals[get_index2(length,tmph,tmpm,once,odim)] + *to_assign * hp->CONF_score_p2reg; //now object is maximum } } //backward mach->backward(fscores); //mach->check_gradients(the_inputs); delete the_inputs; delete []fscores; delete []rscores; delete []tmp_marginals; //out of the mini-batch if(i>=num_sentences) break; if(hp->CONF_minibatch > 0){ if(this_sentence >= hp->CONF_minibatch) break; } else{ if(this_instance >= -1*hp->CONF_minibatch) break; } } //real update if(hp->CONF_mbatch_way == 1) mach->set_this_mbsize(this_tokens*this_tokens); else if(hp->CONF_mbatch_way == 2) mach->set_this_mbsize(this_sentence*this_sentence); mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH); } cout << "Iter done, skip " << skip_sent_num << " sentences and f&b " << all_forward_instance << ";good/bad: " << all_inst_right << "/" << all_inst_wrong << endl; }
void Method8_O2g::each_prepare_data_oneiter() { delete []data; delete []target; delete []gradient; //for gradient gradient = new REAL[mach->GetWidth()*mach->GetOdim()]; mach->SetGradOut(gradient); //FeatureGenO2sib* feat_o2 = (FeatureGenO2sib*)feat_gen; //force it int sentences = training_corpus->size(); int idim = mach->GetIdim(); int odim = mach->GetOdim(); //only one time when o1_filter(decoding o1 is quite expensive) static REAL* data_right = 0; static REAL* data_wrong = 0; static int tmpall_right=0; static int tmpall_wrong=0; static int tmpall_bad=0; int whether_o1_filter = 0; if(parameters->CONF_NN_highO_o1mach.length() > 0 && parameters->CONF_NN_highO_o1filter) whether_o1_filter = 1; //************WE MUST SPECIFY O1_FILTER****************// if(!whether_o1_filter){ cout << "No o1-filter for o2g, too expensive!!" << endl; exit(1); } //************WE MUST SPECIFY O1_FILTER****************// if(data_right==0 || !whether_o1_filter){ //sweep all once and count FeatureGenO1* feat_temp_o1 = new FeatureGenO1(dict,parameters->CONF_x_window, parameters->CONF_add_distance,parameters->CONF_add_pos,parameters->CONF_add_distance_parent); double** all_scores_o1 = new double*[sentences]; int all_tokens_train=0,all_token_filter_wrong=0; for(int i=0;i<sentences;i++){ all_scores_o1[i] = 0; if(whether_o1_filter){ DependencyInstance* x = training_corpus->at(i); all_scores_o1[i] = get_scores_o1(x,parameters,mach_o1,feat_temp_o1); double* scores_o1_filter = all_scores_o1[i]; all_tokens_train += x->length(); for(int i2=1;i2<x->length();i2++){ //ignore root if(score_noprob(scores_o1_filter[get_index2(x->length(),x->heads->at(i2),i2)])) all_token_filter_wrong ++; } } } if(whether_o1_filter) cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl; time_t now; time(&now);cout << "#Finish o1-filter at " << ctime(&now) << flush; int length_sofar_fordebugging = 0; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); double* scores_o1_filter = all_scores_o1[i]; int length = x->length(); /* //------debugging------ ###tmpall_becauseof_unprojective### length_sofar_fordebugging += length - 1; if(!whether_o1_filter) scores_o1_filter = new double[length*length]; //------debugging------ */ for(int m=1;m<length;m++){ //first special (0,0,m) if(x->heads->at(m) == 0) tmpall_right++; else if(score_noprob(scores_o1_filter[get_index2(length,0,m)])) tmpall_bad++; else tmpall_wrong++; //then (g,h,m) for(int h=1;h<length;h++){ if(m==h) continue; int nope_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int link_hm = (x->heads->at(m)==h); int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); for(int g=0;g<length;g++){ if(g==h || g==m) continue; //if(g>=s && g<=t)continue; ###allow non-projective here### int nope_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); if(link_hm && x->heads->at(h)==g) tmpall_right++; else if(nope_hm || nope_gh || (g>=small && g<=large)) //no non-projective tmpall_bad++; else tmpall_wrong++; } } } /* //------debugging------ if(tmpall_right != length_sofar_fordebugging){ cout << i << ": sth strange happen" << endl; } if(!whether_o1_filter) delete [] scores_o1_filter; //------debugging------ */ } printf("--Stat:%d,%d,%d\n",tmpall_right,tmpall_wrong,tmpall_bad); //sweep second time and adding them //-allocate data_right = new REAL[tmpall_right*idim]; data_wrong = new REAL[tmpall_wrong*idim]; REAL* assign_right = data_right; REAL* assign_wrong = data_wrong; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); int length = x->length(); double* scores_o1_filter = all_scores_o1[i]; for(int m=1;m<length;m++){ //first special (0,0,m) if(x->heads->at(m) == 0){ feat_gen->fill_one(assign_right,x,0,m,0);assign_right += idim; } else if(score_noprob(scores_o1_filter[get_index2(length,0,m)])){} else{ feat_gen->fill_one(assign_wrong,x,0,m,0);assign_wrong += idim; } //then (g,h,m) for(int h=1;h<length;h++){ if(m==h) continue; int nope_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int link_hm = (x->heads->at(m)==h); int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); for(int g=0;g<length;g++){ if(g==h || g==m) continue; //if(g>=s && g<=t)continue; ###allow non-projective here### int nope_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); if(link_hm && x->heads->at(h)==g){ feat_gen->fill_one(assign_right,x,h,m,g);assign_right += idim; } else if(nope_hm || nope_gh || (g>=small && g<=large)) //no non-projective {} else{ feat_gen->fill_one(assign_wrong,x,h,m,g);assign_wrong += idim; } } } } } for(int i=0;i<sentences;i++){ delete [](all_scores_o1[i]); } delete []all_scores_o1; time(&now);cout << "#Finish data-gen at " << ctime(&now) << flush; } //then considering CONF_NN_resample and copy them to finish data if(parameters->CONF_NN_resample < 1){ //get part of the wrong ones --- but first shuffle them shuffle_data(data_wrong,data_wrong,idim,idim,tmpall_wrong*idim,tmpall_wrong*idim,10); } int tmp_sumup = tmpall_wrong*parameters->CONF_NN_resample + tmpall_right; data = new REAL[tmp_sumup*idim]; target = new REAL[tmp_sumup]; memcpy(data,data_right,tmpall_right*idim*sizeof(REAL)); memcpy(data+tmpall_right*idim,data_wrong,tmpall_wrong*parameters->CONF_NN_resample*idim*sizeof(REAL)); for(int i=0;i<tmp_sumup;i++){ if(i<tmpall_right) target[i] = 1; else target[i] = 0; } shuffle_data(data,target,idim,1,tmp_sumup*idim,tmp_sumup,10); //final shuffle cout << "--Data for this iter(M8:o2g): samples all " << tmpall_right+tmpall_wrong << " resample: " << tmp_sumup << endl; current = 0; end = tmp_sumup; if(!whether_o1_filter){ delete[] data_right; delete[] data_wrong; } }