static void trans_o3g(double* s,double* nope,long len,int ln) { { //to log number double* tmp_yes = s; double* tmp_nope = nope; for(long i=0;i<len*len*len*len;i++){ SET_LOG_HERE(tmp_yes,tmp_nope,ln); tmp_yes += ln; tmp_nope += 1; } } //sum for(int m=1;m<len;m++){ double all_nope = 0; //1.calculate nope {//h=0 all_nope += nope[get_index2_o3g(len,0,0,0,m)]; //special one for(int c=m-1;c>0;c--) all_nope += nope[get_index2_o3g(len,0,0,c,m)]; //0,0,c,m } for(int h=1;h<len;h++){ if(h==m) continue; int small = GET_MIN_ONE(h,m); int large = GET_MAX_ONE(h,m); for(int g=0;g<small;g++){ all_nope += nope[get_index2_o3g(len,g,h,h,m)]; //g,h,h,m for(int c=small+1;c<large;c++) all_nope += nope[get_index2_o3g(len,g,h,c,m)]; //g,h,c,m } for(int g=large+1;g<len;g++){ all_nope += nope[get_index2_o3g(len,g,h,h,m)]; //g,h,h,m for(int c=small+1;c<large;c++) all_nope += nope[get_index2_o3g(len,g,h,c,m)]; //g,h,c,m } } //2.then ... { long ind = 0; ind = get_index2_o3g(len,0,0,0,m); //special one for(int la=0;la<ln;la++){ long ind_la = get_index2_o3g(len,0,0,0,m,la,ln); s[ind_la] += all_nope-nope[ind]; } for(int c=m-1;c>0;c--){ ind = get_index2_o3g(len,0,0,c,m); //0,0,c,m for(int la=0;la<ln;la++){ long ind_la = get_index2_o3g(len,0,0,c,m,la,ln); s[ind_la] += all_nope-nope[ind]; } } } for(int h=1;h<len;h++){ long ind = 0; if(h==m) continue; int small = GET_MIN_ONE(h,m); int large = GET_MAX_ONE(h,m); for(int g=0;g<small;g++){ ind = get_index2_o3g(len,g,h,h,m); //g,h,h,m for(int la=0;la<ln;la++){ long ind_la = get_index2_o3g(len,g,h,h,m,la,ln); s[ind_la] += all_nope-nope[ind]; } for(int c=small+1;c<large;c++){ ind = get_index2_o3g(len,g,h,c,m); //g,h,c,m for(int la=0;la<ln;la++){ long ind_la = get_index2_o3g(len,g,h,c,m,la,ln); s[ind_la] += all_nope-nope[ind]; } } } for(int g=large+1;g<len;g++){ ind = get_index2_o3g(len,g,h,h,m); //g,h,h,m for(int la=0;la<ln;la++){ long ind_la = get_index2_o3g(len,g,h,h,m,la,ln); s[ind_la] += all_nope-nope[ind]; } for(int c=small+1;c<large;c++){ ind = get_index2_o3g(len,g,h,c,m); //g,h,c,m for(int la=0;la<ln;la++){ long ind_la = get_index2_o3g(len,g,h,c,m,la,ln); s[ind_la] += all_nope-nope[ind]; } } } } } }
void M2_p2o3::each_train_one_iter() { static bool** STA_noprobs = 0; //static ine, init only once if(STA_noprobs==0 && !filter_read(STA_noprobs)){ //init only once int all_tokens_train=0,all_token_filter_wrong=0; time_t now; time(&now); cout << "-Preparing no_probs at " << ctime(&now) << endl; STA_noprobs = new bool*[training_corpus->size()]; for(unsigned int i=0;i<training_corpus->size();i++){ DependencyInstance* x = training_corpus->at(i); STA_noprobs[i] = get_cut_o1(x,mfo1,dict,hp->CONF_score_o1filter_cut); all_tokens_train += x->length()-1; for(int m=1;m<x->length();m++) if(STA_noprobs[i][get_index2(x->length(),x->heads->at(m),m)]) all_token_filter_wrong ++; } cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl; filter_write(STA_noprobs); } //per-sentence approach int num_sentences = training_corpus->size(); //statistics int skip_sent_num = 0; int all_forward_instance = 0; int all_inst_right = 0; int all_inst_wrong = 0; //some useful info int odim = mach->get_odim(); //training time_t now; time(&now); //ctime is not rentrant ! use ctime_r() instead if needed cout << "##*** // Start the p2o3 training for iter " << cur_iter << " at " << ctime(&now) << "with lrate " << cur_lrate << endl; cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl; for(int i=0;i<num_sentences;){ //random skip (instead of shuffling every time) if(drand48() > hp->CONF_NN_resample || training_corpus->at(i)->length() >= hp->CONF_higho_toolong || training_corpus->at(i)->length() <= hp->CONF_higho_tooshort){ skip_sent_num ++; i ++; continue; } mach->prepare_batch(); //if nesterov update before each batch (pre-update) if(hp->CONF_NESTEROV_MOMENTUM) mach->nesterov_update(hp->CONF_UPDATE_WAY,hp->CONF_MOMENTUM_ALPHA); //main batch int this_sentence = 0; int this_instance = 0; for(;;){ //forward DependencyInstance* x = training_corpus->at(i); int length = x->length(); nn_input* the_inputs; REAL *fscores = forward_scores_o3g(x,mach,&the_inputs,dict->get_helper(),0,STA_noprobs[i],hp); this_instance += the_inputs->get_numi(); all_forward_instance += the_inputs->get_numi(); all_inst_right += the_inputs->inst_good; all_inst_wrong += the_inputs->inst_bad; this_sentence ++; i++; the_scores::Scores<REAL_SCORES>* rscores = get_the_scores(the_inputs,fscores,mach->get_odim(),the_inputs->get_numi()); REAL_SCORES* tmp_marginals = LencodeMarginals_o3g(length,*rscores); // //two situations // int length = x->length(); // if(!hp->CONF_labeled){ // //calculate prob // rscores = rearrange_scores_o3g(x,mach,the_inputs,fscores,0,0,0,0,hp); // tmp_marginals = encodeMarginals_o3g(length,rscores); // } // else{ // //calculate prob // rscores = rearrange_scores_o3g(x,mach,the_inputs,fscores,0,0,0,0,hp); // tmp_marginals = LencodeMarginals_o3g(length,rscores,mach->get_odim()); // } //set gradients int HERE_dim = the_inputs->num_width; REAL* to_assign = fscores; for(int ii=0;ii<the_inputs->num_inst*HERE_dim;ii+=HERE_dim){ int tmph = the_inputs->inputs->at(ii); int tmpm = the_inputs->inputs->at(ii+1); int tmps = the_inputs->inputs->at(ii+2); int tmpg = the_inputs->inputs->at(ii+3); if(tmps<0) tmps = tmph; if(tmpg<0) tmpg = 0; int tmp_goal = the_inputs->goals->at(ii/HERE_dim); bool tmp_nonproj = ((tmpg>GET_MIN_ONE(tmph,tmpm)) && (tmpg<GET_MAX_ONE(tmph,tmpm))); //!!maybe non-proj right link REAL_SCORES* from_mar = tmp_marginals+odim*(ii/HERE_dim); for(int once=0;once<odim;once++,to_assign++){ if(tmp_nonproj){ *to_assign = 0; //no gradient continue; } double tmp_one_from_m = from_mar[once]; if(tmp_goal == once) *to_assign = -1 * (1 - tmp_one_from_m) + *to_assign * hp->CONF_score_p2reg; else *to_assign = tmp_one_from_m + *to_assign * hp->CONF_score_p2reg; //now object is maximum } } //backward mach->backward(fscores); delete the_inputs; delete rscores; delete []tmp_marginals; if(i>=num_sentences) break; //out of the mini-batch while(training_corpus->at(i)->length() >= hp->CONF_higho_toolong || training_corpus->at(i)->length() <= hp->CONF_higho_tooshort){ //HAVE to compromise, bad choice skip_sent_num ++; i ++; } if(i>=num_sentences) break; if(hp->CONF_minibatch > 0){ if(this_sentence >= hp->CONF_minibatch) break; } else{ if(this_instance >= -1*hp->CONF_minibatch) break; } } //real update mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH); } cout << "Iter done, skip " << skip_sent_num << " sentences and f&b " << all_forward_instance << ";good/bad: " << all_inst_right << "/" << all_inst_wrong << endl; }
void Method9_O3g::each_prepare_data_oneiter() { delete []data; delete []target; delete []gradient; //for gradient gradient = new REAL[mach->GetWidth()*mach->GetOdim()]; mach->SetGradOut(gradient); int sentences = training_corpus->size(); int idim = mach->GetIdim(); int odim = mach->GetOdim(); //only one time when o1_filter(decoding o1 is quite expensive) static REAL* data_right = 0; static REAL* data_wrong = 0; static int tmpall_right=0; static int tmpall_wrong=0; static int tmpall_bad=0; int whether_o1_filter = 0; if(parameters->CONF_NN_highO_o1mach.length() > 0 && parameters->CONF_NN_highO_o1filter) whether_o1_filter = 1; //************WE MUST SPECIFY O1_FILTER****************// if(!whether_o1_filter){ cout << "No o1-filter for o2g, too expensive!!" << endl; exit(1); } //************WE MUST SPECIFY O1_FILTER****************// if(data_right==0){ //1.o1-filter (MUST HAVE) FeatureGenO1* feat_temp_o1 = new FeatureGenO1(dict,parameters->CONF_x_window, parameters->CONF_add_distance,parameters->CONF_add_pos,parameters->CONF_add_distance_parent); double** all_scores_o1 = new double*[sentences]; int all_tokens_train=0,all_token_filter_wrong=0; for(int i=0;i<sentences;i++){ all_scores_o1[i] = 0; DependencyInstance* x = training_corpus->at(i); all_scores_o1[i] = get_scores_o1(x,parameters,mach_o1,feat_temp_o1); double* scores_o1_filter = all_scores_o1[i]; all_tokens_train += x->length(); for(int i2=1;i2<x->length();i2++){ //ignore root if(score_noprob(scores_o1_filter[get_index2(x->length(),x->heads->at(i2),i2)])) all_token_filter_wrong ++; } } cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl; time_t now; time(&now);cout << "#Finish o1-filter at " << ctime(&now) << flush; //2.first pass --- figure out the numbers int tmp2_right=0,tmp2_wrong=0,tmp2_bad=0; int tmp3_right=0,tmp3_wrong=0,tmp3_bad=0; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); double* scores_o1_filter = all_scores_o1[i]; int length = x->length(); for(int m=1;m<length;m++){ //2.1 special (0,0,c,m) when h==0 int noprob_0m = score_noprob(scores_o1_filter[get_index2(length,0,m)]); int link_0m = (x->heads->at(m)==0); int c = -1; for(int mid=m-1;mid>0;mid--){ if(x->heads->at(mid)==0){ c = mid; break; } } if(link_0m && c<0) tmp2_right++; else if(noprob_0m) tmp2_bad++; else tmp2_wrong++; for(int mid=1;mid<m;mid++){ if(link_0m && mid==c) tmp3_right++; else if(noprob_0m || score_noprob(scores_o1_filter[get_index2(length,0,mid)])) tmp3_bad++; else tmp3_wrong++; } //2.2. ordinary ones for(int h=1;h<length;h++){ //h>=1 if(h==m) continue; //get information int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); bool link_hm = (x->heads->at(m)==h); int noprob_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int c=-1; //inside sibling if(link_hm){ if(h>m){ for(int ii=m+1;ii<h;ii++) if(x->heads->at(ii)==h){ c = ii; break; } } else{ for(int ii=m-1;ii>h;ii--) if(x->heads->at(ii)==h){ c = ii; break; } }} //for g and c for(int g=0;g<length;g++){ if(g==h || g==m || g==c) continue; bool link_gh = (x->heads->at(h)==g); int noprob_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); int nonproj_g = (g>=small && g<=large); if(link_hm && link_gh && c<0) tmp2_right++; else if(noprob_hm || noprob_gh || nonproj_g) tmp2_bad++; else tmp2_wrong++; for(int mid=small+1;mid<large;mid++){ if(link_hm && link_gh && mid==c) tmp3_right++; else if(noprob_hm || noprob_gh || nonproj_g || score_noprob(scores_o1_filter[get_index2(length,h,mid)])) tmp3_bad++; else tmp3_wrong++; } } } } } tmpall_right=tmp2_right+tmp3_right; tmpall_wrong=tmp2_wrong+tmp3_wrong; tmpall_bad=tmp2_bad+tmp3_bad; printf("--Stat<all,2,3>:right(%d,%d,%d),wrong(%d,%d,%d),bad(%d,%d,%d)\n",tmpall_right,tmp2_right,tmp3_right, tmpall_wrong,tmp2_wrong,tmp3_wrong,tmpall_bad,tmp2_bad,tmp3_bad); //3.sweep second time and adding them //-allocate data_right = new REAL[tmpall_right*idim]; data_wrong = new REAL[tmpall_wrong*idim]; REAL* assign_right = data_right; REAL* assign_wrong = data_wrong; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); double* scores_o1_filter = all_scores_o1[i]; int length = x->length(); for(int m=1;m<length;m++){ //2.1 special (0,0,c,m) when h==0 int noprob_0m = score_noprob(scores_o1_filter[get_index2(length,0,m)]); int link_0m = (x->heads->at(m)==0); int c = -1; for(int mid=m-1;mid>0;mid--){ if(x->heads->at(mid)==0){ c = mid; break; } } if(link_0m && c<0){ feat_gen->fill_one(assign_right,x,0,m,-1,0);assign_right += idim; } else if(noprob_0m){} else{ feat_gen->fill_one(assign_wrong,x,0,m,-1,0);assign_wrong += idim; } for(int mid=1;mid<m;mid++){ if(link_0m && mid==c){ feat_gen->fill_one(assign_right,x,0,m,mid,0);assign_right += idim; } else if(noprob_0m || score_noprob(scores_o1_filter[get_index2(length,0,mid)])){} else{ feat_gen->fill_one(assign_wrong,x,0,m,mid,0);assign_wrong += idim; } } //2.2. ordinary ones for(int h=1;h<length;h++){ //h>=1 if(h==m) continue; //get information int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); bool link_hm = (x->heads->at(m)==h); int noprob_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int c=-1; //inside sibling if(link_hm){ if(h>m){ for(int ii=m+1;ii<h;ii++) if(x->heads->at(ii)==h){ c = ii; break; } } else{ for(int ii=m-1;ii>h;ii--) if(x->heads->at(ii)==h){ c = ii; break; } }} //for g and c for(int g=0;g<length;g++){ if(g==h || g==m || g==c) continue; bool link_gh = (x->heads->at(h)==g); int noprob_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); int nonproj_g = (g>=small && g<=large); if(link_hm && link_gh && c<0){ feat_gen->fill_one(assign_right,x,h,m,-1,g);assign_right += idim; } else if(noprob_hm || noprob_gh || nonproj_g){} else{ feat_gen->fill_one(assign_wrong,x,h,m,-1,g);assign_wrong += idim; } for(int mid=small+1;mid<large;mid++){ if(link_hm && link_gh && mid==c){ feat_gen->fill_one(assign_right,x,h,m,mid,g);assign_right += idim; } else if(noprob_hm || noprob_gh || nonproj_g || score_noprob(scores_o1_filter[get_index2(length,h,mid)])){} else{ feat_gen->fill_one(assign_wrong,x,h,m,mid,g);assign_wrong += idim; } } } } } } for(int i=0;i<sentences;i++){ delete [](all_scores_o1[i]); } delete []all_scores_o1; time(&now);cout << "#Finish data-gen at " << ctime(&now) << flush; } //then considering CONF_NN_resample and copy them to finish data if(parameters->CONF_NN_resample < 1){ //get part of the wrong ones --- but first shuffle them shuffle_data(data_wrong,data_wrong,idim,idim,tmpall_wrong*idim,tmpall_wrong*idim,10); } int tmp_sumup = tmpall_wrong*parameters->CONF_NN_resample + tmpall_right; data = new REAL[tmp_sumup*idim]; target = new REAL[tmp_sumup]; memcpy(data,data_right,tmpall_right*idim*sizeof(REAL)); memcpy(data+tmpall_right*idim,data_wrong,tmpall_wrong*parameters->CONF_NN_resample*idim*sizeof(REAL)); for(int i=0;i<tmp_sumup;i++){ if(i<tmpall_right) target[i] = 1; else target[i] = 0; } shuffle_data(data,target,idim,1,tmp_sumup*idim,tmp_sumup,10); //final shuffle cout << "--M9, Data for this iter: samples all " << tmpall_right+tmpall_wrong << " resample: " << tmp_sumup << endl; current = 0; end = tmp_sumup; }
void Method8_O2g::each_prepare_data_oneiter() { delete []data; delete []target; delete []gradient; //for gradient gradient = new REAL[mach->GetWidth()*mach->GetOdim()]; mach->SetGradOut(gradient); //FeatureGenO2sib* feat_o2 = (FeatureGenO2sib*)feat_gen; //force it int sentences = training_corpus->size(); int idim = mach->GetIdim(); int odim = mach->GetOdim(); //only one time when o1_filter(decoding o1 is quite expensive) static REAL* data_right = 0; static REAL* data_wrong = 0; static int tmpall_right=0; static int tmpall_wrong=0; static int tmpall_bad=0; int whether_o1_filter = 0; if(parameters->CONF_NN_highO_o1mach.length() > 0 && parameters->CONF_NN_highO_o1filter) whether_o1_filter = 1; //************WE MUST SPECIFY O1_FILTER****************// if(!whether_o1_filter){ cout << "No o1-filter for o2g, too expensive!!" << endl; exit(1); } //************WE MUST SPECIFY O1_FILTER****************// if(data_right==0 || !whether_o1_filter){ //sweep all once and count FeatureGenO1* feat_temp_o1 = new FeatureGenO1(dict,parameters->CONF_x_window, parameters->CONF_add_distance,parameters->CONF_add_pos,parameters->CONF_add_distance_parent); double** all_scores_o1 = new double*[sentences]; int all_tokens_train=0,all_token_filter_wrong=0; for(int i=0;i<sentences;i++){ all_scores_o1[i] = 0; if(whether_o1_filter){ DependencyInstance* x = training_corpus->at(i); all_scores_o1[i] = get_scores_o1(x,parameters,mach_o1,feat_temp_o1); double* scores_o1_filter = all_scores_o1[i]; all_tokens_train += x->length(); for(int i2=1;i2<x->length();i2++){ //ignore root if(score_noprob(scores_o1_filter[get_index2(x->length(),x->heads->at(i2),i2)])) all_token_filter_wrong ++; } } } if(whether_o1_filter) cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl; time_t now; time(&now);cout << "#Finish o1-filter at " << ctime(&now) << flush; int length_sofar_fordebugging = 0; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); double* scores_o1_filter = all_scores_o1[i]; int length = x->length(); /* //------debugging------ ###tmpall_becauseof_unprojective### length_sofar_fordebugging += length - 1; if(!whether_o1_filter) scores_o1_filter = new double[length*length]; //------debugging------ */ for(int m=1;m<length;m++){ //first special (0,0,m) if(x->heads->at(m) == 0) tmpall_right++; else if(score_noprob(scores_o1_filter[get_index2(length,0,m)])) tmpall_bad++; else tmpall_wrong++; //then (g,h,m) for(int h=1;h<length;h++){ if(m==h) continue; int nope_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int link_hm = (x->heads->at(m)==h); int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); for(int g=0;g<length;g++){ if(g==h || g==m) continue; //if(g>=s && g<=t)continue; ###allow non-projective here### int nope_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); if(link_hm && x->heads->at(h)==g) tmpall_right++; else if(nope_hm || nope_gh || (g>=small && g<=large)) //no non-projective tmpall_bad++; else tmpall_wrong++; } } } /* //------debugging------ if(tmpall_right != length_sofar_fordebugging){ cout << i << ": sth strange happen" << endl; } if(!whether_o1_filter) delete [] scores_o1_filter; //------debugging------ */ } printf("--Stat:%d,%d,%d\n",tmpall_right,tmpall_wrong,tmpall_bad); //sweep second time and adding them //-allocate data_right = new REAL[tmpall_right*idim]; data_wrong = new REAL[tmpall_wrong*idim]; REAL* assign_right = data_right; REAL* assign_wrong = data_wrong; for(int i=0;i<sentences;i++){ DependencyInstance* x = training_corpus->at(i); int length = x->length(); double* scores_o1_filter = all_scores_o1[i]; for(int m=1;m<length;m++){ //first special (0,0,m) if(x->heads->at(m) == 0){ feat_gen->fill_one(assign_right,x,0,m,0);assign_right += idim; } else if(score_noprob(scores_o1_filter[get_index2(length,0,m)])){} else{ feat_gen->fill_one(assign_wrong,x,0,m,0);assign_wrong += idim; } //then (g,h,m) for(int h=1;h<length;h++){ if(m==h) continue; int nope_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]); int link_hm = (x->heads->at(m)==h); int small = GET_MIN_ONE(m,h); int large = GET_MAX_ONE(m,h); for(int g=0;g<length;g++){ if(g==h || g==m) continue; //if(g>=s && g<=t)continue; ###allow non-projective here### int nope_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]); if(link_hm && x->heads->at(h)==g){ feat_gen->fill_one(assign_right,x,h,m,g);assign_right += idim; } else if(nope_hm || nope_gh || (g>=small && g<=large)) //no non-projective {} else{ feat_gen->fill_one(assign_wrong,x,h,m,g);assign_wrong += idim; } } } } } for(int i=0;i<sentences;i++){ delete [](all_scores_o1[i]); } delete []all_scores_o1; time(&now);cout << "#Finish data-gen at " << ctime(&now) << flush; } //then considering CONF_NN_resample and copy them to finish data if(parameters->CONF_NN_resample < 1){ //get part of the wrong ones --- but first shuffle them shuffle_data(data_wrong,data_wrong,idim,idim,tmpall_wrong*idim,tmpall_wrong*idim,10); } int tmp_sumup = tmpall_wrong*parameters->CONF_NN_resample + tmpall_right; data = new REAL[tmp_sumup*idim]; target = new REAL[tmp_sumup]; memcpy(data,data_right,tmpall_right*idim*sizeof(REAL)); memcpy(data+tmpall_right*idim,data_wrong,tmpall_wrong*parameters->CONF_NN_resample*idim*sizeof(REAL)); for(int i=0;i<tmp_sumup;i++){ if(i<tmpall_right) target[i] = 1; else target[i] = 0; } shuffle_data(data,target,idim,1,tmp_sumup*idim,tmp_sumup,10); //final shuffle cout << "--Data for this iter(M8:o2g): samples all " << tmpall_right+tmpall_wrong << " resample: " << tmp_sumup << endl; current = 0; end = tmp_sumup; if(!whether_o1_filter){ delete[] data_right; delete[] data_wrong; } }