C++ (Cpp) DependencyInstance::length примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: DependencyInstance

Метод/Функция: length

Примеров на hotexamples.com: 7

C++ (Cpp) DependencyInstance::length - 7 примеров найдено. Это лучшие примеры C++ (Cpp) кода для DependencyInstance::length, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

length(7)

GetPosTag(2)

size(2)

GetCoarsePosTag(1)

GetDependencyRelation(1)

GetForm(1)

GetHead(1)

GetLemma(1)

GetMorphFeature(1)

GetNumMorphFeatures(1)

Initialize(1)

Пример #1

Показать файл

Файл: Method2_pairs.cpp Проект: liangkai/nngdparser

void Method2_pairs::each_prepare_data_oneiter()
{
	delete []data;
	delete []gradient;

	//for gradient
	gradient = new REAL[mach->GetWidth()*mach->GetOdim()];
	mach->SetGradOut(gradient);

	//prepare all
	//-- first all
	int num_pairs = 0;
	int sentences = training_corpus->size();
	for(int i=0;i<sentences;i++){
		int length = training_corpus->at(i)->length();
		//here duplicate right ones and exclude root as mod
		// -- length-2 excludes self,real-head
		num_pairs += (length-2)*(length-1)*2;
	}
	//-- generate all
	int real_num_pairs = 0;
	data = new REAL[num_pairs*mach->GetIdim()];
	REAL* assign_x = data;
	FeatureGenO1* feat_o1 = (FeatureGenO1*)feat_gen;	//force it
	for(int i=0;i<sentences;i++){
		DependencyInstance* x = training_corpus->at(i);
		int length = x->length();
		for(int mod=1;mod<length;mod++){
			int head = x->heads->at(mod);
			for(int j=0;j<length;j++){	//length-2
				if(j==head || j==mod)
					continue;
				//always first right and then wrong
				feat_gen->fill_one(assign_x,x,head,mod);
				assign_x += mach->GetIdim();
				feat_gen->fill_one(assign_x,x,j,mod);
				assign_x += mach->GetIdim();
				real_num_pairs += 2;
			}
		}
	}
	current = 0;
	end = real_num_pairs;
	//shuffle --- make sure shuffle 2 at the same time(here really lazy to write another shuffle,so ...)
	shuffle_data(data,data,2*mach->GetIdim(),2*mach->GetIdim(),
			real_num_pairs*mach->GetIdim(),real_num_pairs*mach->GetIdim(),10);
	//sample
	cout << "--Data for this iter: samples all " << end << " resample: " << (int)(end*parameters->CONF_NN_resample) << endl;
	end = (int)(end*parameters->CONF_NN_resample);
}

Пример #2

Показать файл

Файл: M2_p2o2.cpp Проект: zzzsss/parsing3plus

void M2_p2o2::each_train_one_iter()
{
	static bool** STA_noprobs = 0;	//static ine, init only once
	if(STA_noprobs==0 && !filter_read(STA_noprobs)){
		//init only once
		int all_tokens_train=0,all_token_filter_wrong=0;
		time_t now;
		time(&now);
		cout << "-Preparing no_probs at " << ctime(&now) << endl;
		STA_noprobs = new bool*[training_corpus->size()];
		for(unsigned int i=0;i<training_corpus->size();i++){
			DependencyInstance* x = training_corpus->at(i);
			STA_noprobs[i] = get_cut_o1(x,mfo1,dict,hp->CONF_score_o1filter_cut);
			all_tokens_train += x->length()-1;
			for(int m=1;m<x->length();m++)
				if(STA_noprobs[i][get_index2(x->length(),x->heads->at(m),m)])
					all_token_filter_wrong ++;
		}
		cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl;
		filter_write(STA_noprobs);
	}

	//per-sentence approach
	int num_sentences = training_corpus->size();
	//statistics
	int skip_sent_num = 0;
	int all_forward_instance = 0;
	int all_inst_right = 0;
	int all_inst_wrong = 0;
	//some useful info
	int odim = mach->get_odim();
	//training
	time_t now;
	time(&now); //ctime is not rentrant ! use ctime_r() instead if needed
	cout << "##*** // Start the p2o2 training for iter " << cur_iter << " at " << ctime(&now)
			<< "with lrate " << cur_lrate << endl;
	cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl;
	for(int i=0;i<num_sentences;){
		//random skip (instead of shuffling every time)
		if(drand48() > hp->CONF_NN_resample || training_corpus->at(i)->length() >= hp->CONF_higho_toolong){
			skip_sent_num ++;
			i ++;
			continue;
		}

		mach->prepare_batch();
		//if nesterov update before each batch (pre-update)
		if(hp->CONF_NESTEROV_MOMENTUM)
			mach->nesterov_update(hp->CONF_UPDATE_WAY,hp->CONF_MOMENTUM_ALPHA);
		//main batch
		int this_sentence = 0;
		int this_instance = 0;
		for(;;){
			//forward
			DependencyInstance* x = training_corpus->at(i);
			const int length = x->length();
			nn_input* the_inputs;
			REAL *fscores = forward_scores_o2sib(x,mach,&the_inputs,dict->get_helper(),0,STA_noprobs[i],hp);

			this_instance += the_inputs->get_numi();
			all_forward_instance += the_inputs->get_numi();
			all_inst_right += the_inputs->inst_good;
			all_inst_wrong += the_inputs->inst_bad;
			this_sentence ++;
			i++;

			the_scores::Scores<REAL_SCORES>* rscores = get_the_scores(the_inputs,fscores,mach->get_odim(),the_inputs->get_numi());
			REAL_SCORES* tmp_marginals = LencodeMarginals_o2sib(length,*rscores);
//			//two situations
//			int length = x->length();
//			if(!hp->CONF_labeled){
//				//calculate prob
//				rscores = rearrange_scores_o2sib(x,mach,the_inputs,fscores,0,0,0,hp);
//				tmp_marginals = encodeMarginals_o2sib(length,rscores);
//			}
//			else{
//				//calculate prob
//				rscores = rearrange_scores_o2sib(x,mach,the_inputs,fscores,0,0,0,hp);
//				tmp_marginals = LencodeMarginals_o2sib(length,rscores,mach->get_odim());
//			}
			//set gradients
			int HERE_dim = the_inputs->num_width;
			REAL* to_assign = fscores;
			for(int ii=0;ii<the_inputs->num_inst*HERE_dim;ii+=HERE_dim){
				int tmph = the_inputs->inputs->at(ii);
				int tmpm = the_inputs->inputs->at(ii+1);
				int tmps = the_inputs->inputs->at(ii+2);
				if(tmps<0)
					tmps = tmph;
				int tmp_goal = the_inputs->goals->at(ii/HERE_dim);
				REAL_SCORES* from_mar = tmp_marginals+odim*(ii/HERE_dim);
				for(int once=0;once<odim;once++,to_assign++){
					if(tmp_goal == once)
						*to_assign = -1 * (1 - from_mar[once]) + *to_assign * hp->CONF_score_p2reg;
					else
						*to_assign = from_mar[once] + *to_assign * hp->CONF_score_p2reg;	//now object is maximum
				}
			}

			//backward
			mach->backward(fscores);

			delete the_inputs;
			delete rscores;
			delete []tmp_marginals;

			if(i>=num_sentences)
				break;
			//out of the mini-batch
			while(training_corpus->at(i)->length() >= hp->CONF_higho_toolong){	//HAVE to compromise, bad choice
				skip_sent_num ++;
				i ++;
			}
			if(i>=num_sentences)
				break;
			if(hp->CONF_minibatch > 0){
				if(this_sentence >= hp->CONF_minibatch)
					break;
			}
			else{
				if(this_instance >= -1*hp->CONF_minibatch)
					break;
			}
		}
		//real update
		mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH);
	}
	cout << "Iter done, skip " << skip_sent_num << " sentences and f&b " << all_forward_instance
			<< ";good/bad: " << all_inst_right << "/" << all_inst_wrong << endl;
}

Пример #3

Показать файл

Файл: Method9_O3g.cpp Проект: liangkai/nngdparser

void Method9_O3g::each_prepare_data_oneiter()
{
	delete []data;
	delete []target;
	delete []gradient;
	//for gradient
	gradient = new REAL[mach->GetWidth()*mach->GetOdim()];
	mach->SetGradOut(gradient);
	int sentences = training_corpus->size();
	int idim = mach->GetIdim();
	int odim = mach->GetOdim();

	//only one time when o1_filter(decoding o1 is quite expensive)
	static REAL* data_right = 0;
	static REAL* data_wrong = 0;
	static int tmpall_right=0;
	static int tmpall_wrong=0;
	static int tmpall_bad=0;
	int whether_o1_filter = 0;
	if(parameters->CONF_NN_highO_o1mach.length() > 0 && parameters->CONF_NN_highO_o1filter)
		whether_o1_filter = 1;

	//************WE MUST SPECIFY O1_FILTER****************//
	if(!whether_o1_filter){
		cout << "No o1-filter for o2g, too expensive!!" << endl;
		exit(1);
	}
	//************WE MUST SPECIFY O1_FILTER****************//

	if(data_right==0){
	//1.o1-filter (MUST HAVE)
	FeatureGenO1* feat_temp_o1 = new FeatureGenO1(dict,parameters->CONF_x_window,
					parameters->CONF_add_distance,parameters->CONF_add_pos,parameters->CONF_add_distance_parent);
	double** all_scores_o1 = new double*[sentences];
	int all_tokens_train=0,all_token_filter_wrong=0;
	for(int i=0;i<sentences;i++){
			all_scores_o1[i] = 0;
			DependencyInstance* x = training_corpus->at(i);
			all_scores_o1[i] = get_scores_o1(x,parameters,mach_o1,feat_temp_o1);
			double* scores_o1_filter = all_scores_o1[i];
			all_tokens_train += x->length();
			for(int i2=1;i2<x->length();i2++){	//ignore root
				if(score_noprob(scores_o1_filter[get_index2(x->length(),x->heads->at(i2),i2)]))
					all_token_filter_wrong ++;
		}
	}
	cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl;
	time_t now;
	time(&now);cout << "#Finish o1-filter at " << ctime(&now) << flush;

	//2.first pass --- figure out the numbers
	int tmp2_right=0,tmp2_wrong=0,tmp2_bad=0;
	int tmp3_right=0,tmp3_wrong=0,tmp3_bad=0;
	for(int i=0;i<sentences;i++){
		DependencyInstance* x = training_corpus->at(i);
		double* scores_o1_filter = all_scores_o1[i];
		int length = x->length();
		for(int m=1;m<length;m++){
			//2.1 special (0,0,c,m)	when h==0
			int noprob_0m = score_noprob(scores_o1_filter[get_index2(length,0,m)]);
			int link_0m = (x->heads->at(m)==0);
			int c = -1;
			for(int mid=m-1;mid>0;mid--){
				if(x->heads->at(mid)==0){
					c = mid;
					break;
				}
			}
			if(link_0m && c<0)
				tmp2_right++;
			else if(noprob_0m)
				tmp2_bad++;
			else
				tmp2_wrong++;
			for(int mid=1;mid<m;mid++){
				if(link_0m && mid==c)
					tmp3_right++;
				else if(noprob_0m || score_noprob(scores_o1_filter[get_index2(length,0,mid)]))
					tmp3_bad++;
				else
					tmp3_wrong++;
			}
			//2.2. ordinary ones
			for(int h=1;h<length;h++){	//h>=1
				if(h==m)
					continue;
				//get information
				int small = GET_MIN_ONE(m,h);
				int large = GET_MAX_ONE(m,h);
				bool link_hm = (x->heads->at(m)==h);
				int noprob_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]);
				int c=-1;	//inside sibling
				if(link_hm){
				if(h>m){
					for(int ii=m+1;ii<h;ii++)
						if(x->heads->at(ii)==h){
							c = ii;
							break;
						}
				}
				else{
					for(int ii=m-1;ii>h;ii--)
						if(x->heads->at(ii)==h){
							c = ii;
							break;
						}
				}}
				//for g and c
				for(int g=0;g<length;g++){
					if(g==h || g==m || g==c)
						continue;
					bool link_gh = (x->heads->at(h)==g);
					int noprob_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]);
					int nonproj_g = (g>=small && g<=large);
					if(link_hm && link_gh && c<0)
						tmp2_right++;
					else if(noprob_hm || noprob_gh || nonproj_g)
						tmp2_bad++;
					else
						tmp2_wrong++;
					for(int mid=small+1;mid<large;mid++){
						if(link_hm && link_gh && mid==c)
							tmp3_right++;
						else if(noprob_hm || noprob_gh || nonproj_g || score_noprob(scores_o1_filter[get_index2(length,h,mid)]))
							tmp3_bad++;
						else
							tmp3_wrong++;
					}
				}
			}
		}
	}
	tmpall_right=tmp2_right+tmp3_right;
	tmpall_wrong=tmp2_wrong+tmp3_wrong;
	tmpall_bad=tmp2_bad+tmp3_bad;
	printf("--Stat<all,2,3>:right(%d,%d,%d),wrong(%d,%d,%d),bad(%d,%d,%d)\n",tmpall_right,tmp2_right,tmp3_right,
			tmpall_wrong,tmp2_wrong,tmp3_wrong,tmpall_bad,tmp2_bad,tmp3_bad);

	//3.sweep second time and adding them
	//-allocate
	data_right = new REAL[tmpall_right*idim];
	data_wrong = new REAL[tmpall_wrong*idim];
	REAL* assign_right = data_right;
	REAL* assign_wrong = data_wrong;
	for(int i=0;i<sentences;i++){
		DependencyInstance* x = training_corpus->at(i);
		double* scores_o1_filter = all_scores_o1[i];
		int length = x->length();
		for(int m=1;m<length;m++){
			//2.1 special (0,0,c,m)	when h==0
			int noprob_0m = score_noprob(scores_o1_filter[get_index2(length,0,m)]);
			int link_0m = (x->heads->at(m)==0);
			int c = -1;
			for(int mid=m-1;mid>0;mid--){
				if(x->heads->at(mid)==0){
					c = mid;
					break;
				}
			}
			if(link_0m && c<0){
				feat_gen->fill_one(assign_right,x,0,m,-1,0);assign_right += idim;
			}
			else if(noprob_0m){}
			else{
				feat_gen->fill_one(assign_wrong,x,0,m,-1,0);assign_wrong += idim;
			}
			for(int mid=1;mid<m;mid++){
				if(link_0m && mid==c){
					feat_gen->fill_one(assign_right,x,0,m,mid,0);assign_right += idim;
				}
				else if(noprob_0m || score_noprob(scores_o1_filter[get_index2(length,0,mid)])){}
				else{
					feat_gen->fill_one(assign_wrong,x,0,m,mid,0);assign_wrong += idim;
				}
			}
			//2.2. ordinary ones
			for(int h=1;h<length;h++){	//h>=1
				if(h==m)
					continue;
				//get information
				int small = GET_MIN_ONE(m,h);
				int large = GET_MAX_ONE(m,h);
				bool link_hm = (x->heads->at(m)==h);
				int noprob_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]);
				int c=-1;	//inside sibling
				if(link_hm){
				if(h>m){
					for(int ii=m+1;ii<h;ii++)
						if(x->heads->at(ii)==h){
							c = ii;
							break;
						}
				}
				else{
					for(int ii=m-1;ii>h;ii--)
						if(x->heads->at(ii)==h){
							c = ii;
							break;
						}
				}}
				//for g and c
				for(int g=0;g<length;g++){
					if(g==h || g==m || g==c)
						continue;
					bool link_gh = (x->heads->at(h)==g);
					int noprob_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]);
					int nonproj_g = (g>=small && g<=large);
					if(link_hm && link_gh && c<0){
						feat_gen->fill_one(assign_right,x,h,m,-1,g);assign_right += idim;
					}
					else if(noprob_hm || noprob_gh || nonproj_g){}
					else{
						feat_gen->fill_one(assign_wrong,x,h,m,-1,g);assign_wrong += idim;
					}
					for(int mid=small+1;mid<large;mid++){
						if(link_hm && link_gh && mid==c){
							feat_gen->fill_one(assign_right,x,h,m,mid,g);assign_right += idim;
						}
						else if(noprob_hm || noprob_gh || nonproj_g || score_noprob(scores_o1_filter[get_index2(length,h,mid)])){}
						else{
							feat_gen->fill_one(assign_wrong,x,h,m,mid,g);assign_wrong += idim;
						}
					}
				}
			}
		}
	}
	for(int i=0;i<sentences;i++){
		delete [](all_scores_o1[i]);
	}
	delete []all_scores_o1;
	time(&now);cout << "#Finish data-gen at " << ctime(&now) << flush;
	}

	//then considering CONF_NN_resample and copy them to finish data
	if(parameters->CONF_NN_resample < 1){
		//get part of the wrong ones --- but first shuffle them
		shuffle_data(data_wrong,data_wrong,idim,idim,tmpall_wrong*idim,tmpall_wrong*idim,10);
	}
	int tmp_sumup = tmpall_wrong*parameters->CONF_NN_resample + tmpall_right;
	data = new REAL[tmp_sumup*idim];
	target = new REAL[tmp_sumup];
	memcpy(data,data_right,tmpall_right*idim*sizeof(REAL));
	memcpy(data+tmpall_right*idim,data_wrong,tmpall_wrong*parameters->CONF_NN_resample*idim*sizeof(REAL));
	for(int i=0;i<tmp_sumup;i++){
		if(i<tmpall_right)
			target[i] = 1;
		else
			target[i] = 0;
	}
	shuffle_data(data,target,idim,1,tmp_sumup*idim,tmp_sumup,10);	//final shuffle
	cout << "--M9, Data for this iter: samples all " << tmpall_right+tmpall_wrong << " resample: " << tmp_sumup << endl;
	current = 0;
	end = tmp_sumup;
}

Пример #4

Показать файл

Файл: M4_mmo2.cpp Проект: zzsfornlp/nnpgdparser

void M4_o2::each_train_one_iter()
{
	//per-sentence approach
	int num_sentences = training_corpus->size();
	//statistics
	int skip_sent_num = 0;
	int all_forward_instance = 0;
	int all_inst_right = 0;
	int all_inst_wrong = 0;
	//some useful info
	int odim = mach->get_odim();
	//training
	time_t now;
	time(&now); //ctime is not rentrant ! use ctime_r() instead if needed
	cout << "##*** //M4O2// Start the training for iter " << cur_iter << " at " << ctime(&now)
			<< "with lrate " << cur_lrate << endl;
	cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl;

	vector<DependencyInstance*> xs;
	int all_token=0,all_right=0;
	for(int i=0;i<num_sentences;){
		//random skip (instead of shuffling every time)
		if(drand48() > hp->CONF_NN_resample || training_corpus->at(i)->length() >= hp->CONF_higho_toolong){
			skip_sent_num ++;
			i ++;
			continue;
		}
		//main batch
		int this_instance_toupdate = 0;
		int this_tokens = 0;
		for(;;){
			//forward
			DependencyInstance* x = training_corpus->at(i);
			xs.push_back(x);

			Process::parse_o2sib(x,mfo1,0,true);	//add margin MAYBE
			// -- statistic
			all_token += x->length()-1;
			for(int i2=1;i2<x->length();i2++){	//ignore root
				if((*(x->predict_heads))[i2] == (*(x->heads))[i2])
					all_right ++;
				else
					this_instance_toupdate++;
			}
			//
			this_tokens += x->length() - 1;
			i++;

			if(i>=num_sentences)
				break;
			//out of the mini-batch
			while(training_corpus->at(i)->length() >= hp->CONF_higho_toolong){	//HAVE to compromise, bad choice
				skip_sent_num ++;
				i ++;
			}
			if(i>=num_sentences)
				break;
			if (hp->CONF_minibatch > 0) {
				if (int(xs.size()) >= hp->CONF_minibatch)
					break;
			}
			else {
				if (this_instance_toupdate >= -1 * hp->CONF_minibatch)
					break;
			}
		}

		//backward
		for(int ii=0;ii<xs.size();ii++){
			DependencyInstance* x = xs[ii];
			nn_input* good;
			nn_input* bad;
			M3_pro2::get_nninput_o2sib(x,&good,&bad,dict);
			MM_margin_backward(mach, good, 1, hp->CONF_score_p2reg);
			MM_margin_backward(mach, bad, -1, hp->CONF_score_p2reg);
			delete good;delete bad;
		}
		int this_sentence = xs.size();
		xs.clear();
		//real update
		if (hp->CONF_mbatch_way == 1)
			mach->set_this_mbsize(this_tokens*this_tokens);
		else if (hp->CONF_mbatch_way == 2)
			mach->set_this_mbsize(this_sentence*this_sentence);
		mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH);
	}
	cout << "Iter done, skip " << skip_sent_num << " sentences." << "AND training UAS:"
			<< all_right << "/" << all_token << "=" << all_right/(0.0+all_token) << endl;
}

Пример #5

Показать файл

Файл: DependencyEvaluator.cpp Проект: zzsfornlp/nnpgdparser

double DependencyEvaluator::evaluate(std::string &act_file, std::string &pred_file, std::string &format, bool labeled){

	set<string> punctSet = set<string>();
	punctSet.insert("''");
	punctSet.insert("``");
	punctSet.insert(".");
	punctSet.insert(":");
	punctSet.insert(",");
	punctSet.insert("PU");	//for CTB

	CONLLReader* goldReader = new CONLLReader();
	goldReader->startReading(act_file.c_str());

	CONLLReader* predictedReader = new CONLLReader();
	predictedReader->startReading(pred_file.c_str());

	int total = 0;
	int total_root = 0;
	int total_non_root = 0;
	int corr = 0;
	int corr_root = 0;
	int corr_non_root = 0;
	int corrL = 0;
	int corrL_root = 0;
	int corrL_non_root = 0;
	int numsent = 0;
	int corrsent = 0;
	int corrsentL = 0;
	
	int totalNoPunc = 0;
	int totalNoPunc_root = 0;
	int totalNoPunc_non_root = 0;
	int corrNoPunc = 0;
	int corrNoPunc_root = 0;
	int corrNoPunc_non_root = 0;
	int corrLNoPunc = 0;
	int corrLNoPunc_root = 0;
	int corrLNoPunc_non_root = 0;
	int corrsentNoPunc = 0;
	int corrsentLNoPunc = 0;

	DependencyInstance* goldInstance = goldReader->getNext();
	DependencyInstance* predInstance = predictedReader->getNext();


	while(goldInstance != NULL){
		int instanceLength = goldInstance->length();

		if(instanceLength != predInstance->length()){
			cout<<"Lengths do not match on sentence "<<numsent<<endl;
		}

		vector<int>* goldHeads = goldInstance->heads;
		vector<string*>* goldLabels = goldInstance->deprels;
		vector<int>* predHeads = predInstance->heads;		//because after reading, the predict ones goes there
		vector<string*>* predLabels = predInstance->deprels;

		vector<string*>* pos = goldInstance->postags;

		bool whole = true;
		bool wholeL = true;

		bool wholeNP = true;
		bool wholeLNP = true;

		for(int i = 1; i < instanceLength; i++){
			if((*goldHeads)[i] == 0){
				total_root++;
			}
			else{
				total_non_root++;
			}
			if((*predHeads)[i] == (*goldHeads)[i]){
				corr++;
				if((*goldHeads)[i] == 0){
					corr_root++;
				}
				else{
					corr_non_root++;
				}
				if(labeled){
					if((*(*predLabels)[i]) == (*(*goldLabels)[i])){
						corrL++;
						if((*goldHeads)[i] == 0){
							corrL_root++;
						}
						else{
							corrL_non_root++;
						}
					}
					else{
						wholeL = false;
					}
				}
			}
			else{
				whole = false;
				wholeL = false;
			}

			if(punctSet.count(*((*pos)[i])) <= 0){
				totalNoPunc++;
				if((*goldHeads)[i] == 0){
					totalNoPunc_root++;
				}
				else{
					totalNoPunc_non_root++;
				}
				if((*predHeads)[i] == (*goldHeads)[i]){
					corrNoPunc++;
					if((*goldHeads)[i] == 0){
						corrNoPunc_root++;
					}
					else{
						corrNoPunc_non_root++;
					}
					if(labeled){
						if((*(*predLabels)[i]) == (*(*goldLabels)[i])){
							corrLNoPunc++;
							if((*goldHeads)[i] == 0){
								corrLNoPunc_root++;
							}
							else{
								corrLNoPunc_non_root++;
							}
						}
						else{
							wholeLNP = false;
						}
					}
				}
				else{
					wholeNP = false;
					wholeLNP = false;
				}
			}
		}
		total += instanceLength - 1;
		if(whole){
			corrsent++;
		}
		if(wholeL){
			corrsentL++;
		}
		if(wholeNP){
			corrsentNoPunc++;
		}
		if(wholeLNP){
			corrsentLNoPunc++;
		}
		numsent++;

		delete(goldInstance);
		delete(predInstance);
		goldInstance = goldReader->getNext();
		predInstance = predictedReader->getNext();
	}

	printf("Tokens: %d\n", total);
	printf("Correct: %d\n", corr);
	printf("Unlabeled Accuracy: %.2lf%%\n", ((double)corr) * 100 / total);
	printf("Unlabeled Complete Correct: %.2lf%%\n", ((double)corrsent) *100 / numsent);
	if(labeled){
		printf("Labeled Accuracy: %.2lf%%\n", ((double)corrL) * 100 / total);
		printf("Labeled Complete Correct: %.2lf%%\n", ((double)corrsentL) * 100 / numsent);
	}

	printf("\n");

	printf("Tokens Root: %d\n", total_root);
	printf("Correct Root: %d\n", corr_root);
	printf("Unlabeled Accuracy Root: %.2lf%%\n", ((double)corr_root) * 100 / total_root);
	if(labeled){
		printf("Labeled Accuracy Root: %.2lf%%\n", ((double)corrL_root) * 100 / total_root);
	}

	printf("\n");

	printf("Tokens Non Root: %d\n", total_non_root);
	printf("Correct Non Root: %d\n", corr_non_root);
	printf("Unlabeled Accuracy Non Root: %.2lf%%\n", ((double)corr_non_root) * 100 / total_non_root);
	if(labeled){
		printf("Labeled Accuracy Non Root: %.2lf%%\n", ((double)corrL_non_root) * 100 / total_non_root);
	}

	printf("\n");

	printf("Tokens No Punc: %d\n", totalNoPunc);
	printf("Correct No Punc: %d\n", corrNoPunc);
	printf("Unlabeled Accuracy No Punc: %.2lf%%\n", ((double)corrNoPunc) * 100 / totalNoPunc);
	printf("Unlabeled Complete Correct No Punc: %.2lf%%\n", ((double)corrsentNoPunc) *100 / numsent);
	if(labeled){
		printf("Labeled Accuracy No Punc: %.2lf%%\n", ((double)corrLNoPunc) * 100 / totalNoPunc);
		printf("Labeled Complete Correct No Punc: %.2lf%%\n", ((double)corrsentLNoPunc) * 100 / numsent);
	}

	printf("\n");

	printf("Tokens No Punc Root: %d\n", totalNoPunc_root);
	printf("Correct No Punc Root: %d\n", corrNoPunc_root);
	printf("Unlabeled Accuracy No Punc Root: %.2lf%%\n", ((double)corrNoPunc_root) * 100 / totalNoPunc_root);
	if(labeled){
		printf("Labeled Accuracy No Punc Root: %.2lf%%\n", ((double)corrLNoPunc_root) * 100 / totalNoPunc_root);
	}

	printf("\n");

	printf("Tokens No Punc Non Root: %d\n", totalNoPunc_non_root);
	printf("Correct No Punc Non Root: %d\n", corrNoPunc_non_root);
	printf("Unlabeled Accuracy No Punc Non Root: %.2lf%%\n", ((double)corrNoPunc_non_root) * 100 / totalNoPunc_non_root);
	if(labeled){
		printf("Labeled Accuracy No Punc Non Root: %.2lf%%\n", ((double)corrLNoPunc_non_root) * 100 / totalNoPunc_non_root);
	}

	goldReader->finishReading();
	predictedReader->finishReading();
	delete(goldReader);
	delete(predictedReader);
	return ((double)corr) / total;
}

Пример #6

Показать файл

Файл: M2_p2o1.cpp Проект: zzsfornlp/nnpgdparser

void M2_p2o1::each_train_one_iter()
{
	//per-sentence approach
	int num_sentences = training_corpus->size();
	//statistics
	int skip_sent_num = 0;
	int all_forward_instance = 0;
	int all_inst_right = 0;
	int all_inst_wrong = 0;
	//some useful info
	int odim = mach->get_odim();
	//training
	time_t now;
	time(&now); //ctime is not rentrant ! use ctime_r() instead if needed
	cout << "##*** //p2o1// Start the training for iter " << cur_iter << " at " << ctime(&now)
			<< "with lrate " << cur_lrate << endl;
	cout << "#Sentences is " << num_sentences << " and resample (about)" << num_sentences*hp->CONF_NN_resample << endl;
	for(int i=0;i<num_sentences;){
		//random skip (instead of shuffling every time)
		if(drand48() > hp->CONF_NN_resample){
			skip_sent_num ++;
			i ++;
			continue;
		}

		mach->prepare_batch();
		//if nesterov update before each batch (pre-update)
		if(hp->CONF_NESTEROV_MOMENTUM)
			mach->nesterov_update(hp->CONF_UPDATE_WAY,hp->CONF_MOMENTUM_ALPHA);
		//main batch
		int this_sentence = 0;
		int this_instance = 0;
		int this_tokens = 0;
		for(;;){
			//forward
			DependencyInstance* x = training_corpus->at(i);
			nn_input* the_inputs;
			REAL *fscores = forward_scores_o1(x,mach,&the_inputs,dict->get_helper(),0,hp);
			double* rscores = 0;
			double* tmp_marginals = 0;

			this_instance += the_inputs->get_numi();
			all_forward_instance += the_inputs->get_numi();
			all_inst_right += the_inputs->inst_good;
			all_inst_wrong += the_inputs->inst_bad;
			this_sentence ++;
			this_tokens += x->length()-1;
			i++;

			adjust_scores_before(the_inputs, fscores, odim, hp->CONF_margin);
			//two situations
			int length = x->length();
			if(!hp->CONF_labeled){
				//calculate prob
				rscores = rearrange_scores_o1(x,mach,the_inputs,fscores,0,0,hp);
				tmp_marginals = encodeMarginals(length,rscores);
			}
			else{
				//calculate prob
				rscores = rearrange_scores_o1(x,mach,the_inputs,fscores,0,0,hp);
				tmp_marginals = LencodeMarginals(length,rscores,mach->get_odim());
			}
			adjust_scores_after(the_inputs, fscores, odim, hp->CONF_margin);

			//set gradients
			int HERE_dim = the_inputs->num_width;
			REAL* to_assign = fscores;
			for(int ii=0;ii<the_inputs->num_inst*HERE_dim;ii+=HERE_dim){
				int tmph = the_inputs->inputs->at(ii);
				int tmpm = the_inputs->inputs->at(ii+1);
				int tmp_goal = the_inputs->goals->at(ii/HERE_dim);
				for(int once=0;once<odim;once++,to_assign++){
					if(tmp_goal == once)
						*to_assign = -1 * (1 - tmp_marginals[get_index2(length,tmph,tmpm,once,odim)]) + *to_assign * hp->CONF_score_p2reg;
					else
						*to_assign = tmp_marginals[get_index2(length,tmph,tmpm,once,odim)] + *to_assign * hp->CONF_score_p2reg;	//now object is maximum
				}
			}

			//backward
			mach->backward(fscores);

			//mach->check_gradients(the_inputs);

			delete the_inputs;
			delete []fscores;
			delete []rscores;
			delete []tmp_marginals;

			//out of the mini-batch
			if(i>=num_sentences)
				break;
			if(hp->CONF_minibatch > 0){
				if(this_sentence >= hp->CONF_minibatch)
					break;
			}
			else{
				if(this_instance >= -1*hp->CONF_minibatch)
					break;
			}
		}
		//real update
		if(hp->CONF_mbatch_way == 1)
			mach->set_this_mbsize(this_tokens*this_tokens);
		else if(hp->CONF_mbatch_way == 2)
			mach->set_this_mbsize(this_sentence*this_sentence);
		mach->update(hp->CONF_UPDATE_WAY,cur_lrate,hp->CONF_NN_WD,hp->CONF_MOMENTUM_ALPHA,hp->CONF_RMS_SMOOTH);
	}
	cout << "Iter done, skip " << skip_sent_num << " sentences and f&b " << all_forward_instance
			<< ";good/bad: " << all_inst_right << "/" << all_inst_wrong << endl;
}

Пример #7

Показать файл

Файл: Method8_O2g.cpp Проект: liangkai/nngdparser

void Method8_O2g::each_prepare_data_oneiter()
{
	delete []data;
	delete []target;
	delete []gradient;
	//for gradient
	gradient = new REAL[mach->GetWidth()*mach->GetOdim()];
	mach->SetGradOut(gradient);
	//FeatureGenO2sib* feat_o2 = (FeatureGenO2sib*)feat_gen;	//force it
	int sentences = training_corpus->size();
	int idim = mach->GetIdim();
	int odim = mach->GetOdim();

	//only one time when o1_filter(decoding o1 is quite expensive)
	static REAL* data_right = 0;
	static REAL* data_wrong = 0;
	static int tmpall_right=0;
	static int tmpall_wrong=0;
	static int tmpall_bad=0;
	int whether_o1_filter = 0;
	if(parameters->CONF_NN_highO_o1mach.length() > 0 && parameters->CONF_NN_highO_o1filter)
		whether_o1_filter = 1;

	//************WE MUST SPECIFY O1_FILTER****************//
	if(!whether_o1_filter){
		cout << "No o1-filter for o2g, too expensive!!" << endl;
		exit(1);
	}
	//************WE MUST SPECIFY O1_FILTER****************//

	if(data_right==0 || !whether_o1_filter){
	//sweep all once and count
	FeatureGenO1* feat_temp_o1 = new FeatureGenO1(dict,parameters->CONF_x_window,
					parameters->CONF_add_distance,parameters->CONF_add_pos,parameters->CONF_add_distance_parent);
	double** all_scores_o1 = new double*[sentences];
	int all_tokens_train=0,all_token_filter_wrong=0;
	for(int i=0;i<sentences;i++){
		all_scores_o1[i] = 0;
		if(whether_o1_filter){
			DependencyInstance* x = training_corpus->at(i);
			all_scores_o1[i] = get_scores_o1(x,parameters,mach_o1,feat_temp_o1);
			double* scores_o1_filter = all_scores_o1[i];
			all_tokens_train += x->length();
			for(int i2=1;i2<x->length();i2++){	//ignore root
				if(score_noprob(scores_o1_filter[get_index2(x->length(),x->heads->at(i2),i2)]))
					all_token_filter_wrong ++;
			}
		}
	}
	if(whether_o1_filter)
		cout << "For o1 filter: all " << all_tokens_train << ";filter wrong " << all_token_filter_wrong << endl;
	time_t now;
	time(&now);cout << "#Finish o1-filter at " << ctime(&now) << flush;

	int length_sofar_fordebugging = 0;
	for(int i=0;i<sentences;i++){
		DependencyInstance* x = training_corpus->at(i);
		double* scores_o1_filter = all_scores_o1[i];
		int length = x->length();
		/*
		//------debugging------ ###tmpall_becauseof_unprojective###
		length_sofar_fordebugging += length - 1;
		if(!whether_o1_filter)
			scores_o1_filter = new double[length*length];
		//------debugging------
		*/
		for(int m=1;m<length;m++){
			//first special (0,0,m)
			if(x->heads->at(m) == 0)
				tmpall_right++;
			else if(score_noprob(scores_o1_filter[get_index2(length,0,m)]))
				tmpall_bad++;
			else
				tmpall_wrong++;
			//then (g,h,m)
			for(int h=1;h<length;h++){
				if(m==h)
					continue;
				int nope_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]);
				int link_hm = (x->heads->at(m)==h);
				int small = GET_MIN_ONE(m,h);
				int large = GET_MAX_ONE(m,h);
				for(int g=0;g<length;g++){
					if(g==h || g==m)
						continue;
					//if(g>=s && g<=t)continue;	###allow non-projective here###
					int nope_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]);
					if(link_hm && x->heads->at(h)==g)
						tmpall_right++;
					else if(nope_hm || nope_gh || (g>=small && g<=large))	//no non-projective
						tmpall_bad++;
					else
						tmpall_wrong++;
				}
			}
		}
		/*
		//------debugging------
		if(tmpall_right != length_sofar_fordebugging){
			cout << i << ": sth strange happen" << endl;
		}
		if(!whether_o1_filter)
			delete [] scores_o1_filter;
		//------debugging------
		*/
	}
	printf("--Stat:%d,%d,%d\n",tmpall_right,tmpall_wrong,tmpall_bad);

	//sweep second time and adding them
	//-allocate
	data_right = new REAL[tmpall_right*idim];
	data_wrong = new REAL[tmpall_wrong*idim];
	REAL* assign_right = data_right;
	REAL* assign_wrong = data_wrong;
	for(int i=0;i<sentences;i++){
		DependencyInstance* x = training_corpus->at(i);
		int length = x->length();
		double* scores_o1_filter = all_scores_o1[i];
		for(int m=1;m<length;m++){
			//first special (0,0,m)
			if(x->heads->at(m) == 0){
				feat_gen->fill_one(assign_right,x,0,m,0);assign_right += idim;
			}
			else if(score_noprob(scores_o1_filter[get_index2(length,0,m)])){}
			else{
				feat_gen->fill_one(assign_wrong,x,0,m,0);assign_wrong += idim;
			}
			//then (g,h,m)
			for(int h=1;h<length;h++){
				if(m==h)
					continue;
				int nope_hm = score_noprob(scores_o1_filter[get_index2(length,h,m)]);
				int link_hm = (x->heads->at(m)==h);
				int small = GET_MIN_ONE(m,h);
				int large = GET_MAX_ONE(m,h);
				for(int g=0;g<length;g++){
					if(g==h || g==m)
						continue;
					//if(g>=s && g<=t)continue;	###allow non-projective here###
					int nope_gh = score_noprob(scores_o1_filter[get_index2(length,g,h)]);
					if(link_hm && x->heads->at(h)==g){
						feat_gen->fill_one(assign_right,x,h,m,g);assign_right += idim;
					}
					else if(nope_hm || nope_gh || (g>=small && g<=large))	//no non-projective
					{}
					else{
						feat_gen->fill_one(assign_wrong,x,h,m,g);assign_wrong += idim;
					}
				}
			}
		}
	}

	for(int i=0;i<sentences;i++){
		delete [](all_scores_o1[i]);
	}
	delete []all_scores_o1;
	time(&now);cout << "#Finish data-gen at " << ctime(&now) << flush;
	}

	//then considering CONF_NN_resample and copy them to finish data
	if(parameters->CONF_NN_resample < 1){
		//get part of the wrong ones --- but first shuffle them
		shuffle_data(data_wrong,data_wrong,idim,idim,tmpall_wrong*idim,tmpall_wrong*idim,10);
	}
	int tmp_sumup = tmpall_wrong*parameters->CONF_NN_resample + tmpall_right;
	data = new REAL[tmp_sumup*idim];
	target = new REAL[tmp_sumup];
	memcpy(data,data_right,tmpall_right*idim*sizeof(REAL));
	memcpy(data+tmpall_right*idim,data_wrong,tmpall_wrong*parameters->CONF_NN_resample*idim*sizeof(REAL));
	for(int i=0;i<tmp_sumup;i++){
		if(i<tmpall_right)
			target[i] = 1;
		else
			target[i] = 0;
	}
	shuffle_data(data,target,idim,1,tmp_sumup*idim,tmp_sumup,10);	//final shuffle
	cout << "--Data for this iter(M8:o2g): samples all " << tmpall_right+tmpall_wrong << " resample: " << tmp_sumup << endl;
	current = 0;
	end = tmp_sumup;
	if(!whether_o1_filter){
		delete[] data_right;
		delete[] data_wrong;
	}
}