Example #1
0
void initCacheLRU( int tailleCache, int ordre, lm_t *lmlocal , dict_t *dict ,  char const  * nomMach) {
  FILE *f=NULL;
 
  char nom1[1024], nomVocabPondere[1024];
 
 lm=lmlocal;
  if (nomMach !=NULL) {
    f=fopen(nomMach,"r");
    if (f==NULL) {
      fprintf(stderr,"can't open %s\n",nomMach);
      exit(1);
    }
    fscanf(f,"%s",nomVocabPondere);
    fscanf(f,"%i",&nbTrainer); //machine number
    cslm= new TrainerNgramSlist *[nbTrainer];
    poids= new double[nbTrainer];
    cslm_wlist = new WordList;
    if (cslm_wlist != NULL)
      cslm_wlist->Read(nomVocabPondere); /* shortlist length will be set with TrainerNgramSlist class */
    for (int i =0; i<nbTrainer; i++)
      {
	if (fscanf(f,"%s%lf",nom1,poids+i)!=2) {fprintf(stderr,"error reading machine %i\n",i);exit(1);}

	std::ifstream ifs;
	ifs.open(nom1,ios::binary);
	CHECK_FILE(ifs,nom1);
	Mach *m = Mach::Read(ifs);
	ifs.close();
	m->Info();
	cslm[i] = new  TrainerNgramSlist(m, cslm_wlist, (char*)"");
      }
    fclose(f);
    lmMakeIndexCSLM(dict,lm,cslm_wlist);
    
  }
  TAILLEMAX=tailleCache+1;
  resuCSLM=(float **) malloc(nbTrainer*sizeof(float*));
  resuCSLM[0]=(float *) malloc(nbTrainer*sizeof(float)*TAILLEMAX);
  for (int i=1 ; i<nbTrainer ;i++)
    resuCSLM[i]=resuCSLM[i-1]+TAILLEMAX;

  
  ordreMax=ordre;
  if (ordre%2 ==1) ordre++;
  ordreHash=ordre/2;
  ram=(Gram * )malloc(sizeof( Gram) * TAILLEMAX);
  ram->w=(unsigned int *) calloc(TAILLEMAX*ordre, sizeof(unsigned int) );
 
  for (int i =1 ;i<TAILLEMAX; i++) ram[i].w=ram[i-1].w+ordre;
}  
Example #2
0
int main (int argc, char *argv[])
{
  ifstream ifs;
  Mach *m;

  for (int i=1; i<argc; i++) {
    ifs.open(argv[i],ios::binary);
    CHECK_FILE(ifs,argv[i]);
    cout << endl << "Information on machine: " << argv[i] << endl;
    m = Mach::Read(ifs);
    cout << "Using file version " << Mach::GetFileId() << endl;
    m->Info();
    ifs.close();
    delete m;
  }

  GpuUnlock();
  return 0;
}
Example #3
0
int main3(int argc, char *argv[])
{
  MachConfig mach_config(true);
  string mach_fname, test_fname, dev_fname;
  int curr_it = 0;
  Mach *mlp;

  // select available options
  mach_config
    .sel_cmdline_option<std::string>("mach,m"        , true )
    .sel_cmdline_option<std::string>("test-data,t"  , true )
    .sel_cmdline_option<std::string>("dev-data,d"    , true)
    ;

  // parse parameters
  if (mach_config.parse_options(argc, argv)) {
    // get parameters
    mach_fname  = mach_config.get_mach();
    test_fname = mach_config.get_test_data();
    dev_fname   = mach_config.get_dev_data();
    curr_it     = mach_config.get_curr_iter();

    //gold file
    CONF_test_file = test_fname;
    CONF_gold_file = dev_fname;
  }
  else if (mach_config.help_request())
    usage2(mach_config);
  else {
    if (mach_config.parsing_error())
      usage2(mach_config, false);
    Error(mach_config.get_error_string().c_str());
  }

    // Check if existing machine exists
  const char *mach_fname_cstr = mach_fname.c_str();
  struct stat stat_struct;
  if (stat(mach_fname_cstr, &stat_struct)==0) {
      // read existing network
    ifstream ifs;
    ifs.open(mach_fname_cstr,ios::binary);
    CHECK_FILE(ifs,mach_fname_cstr);
    mlp = Mach::Read(ifs);
    ifs.close();
    cout << "Found existing machine with " << mlp->GetNbBackw()
         << " backward passes, continuing training at iteration " << curr_it+1 << endl;
  }
  else {
    Error("No such machine for eval.");
  }
  //mlp->Info();

  	//evaluating the test files
  	cout << "Now evaluating the test file..." << endl;
  	CONLLReader* reader = new CONLLReader();
  	CONLLWriter* writer = new CONLLWriter();
  	reader->startReading(CONF_test_file.c_str());
  	writer->startWriting(CONF_output_file.c_str());
  	//the list
  	HashMap * wl = training_space::load_wordlist(CONF_wl_file.c_str());

  	//some variables
  	int oov_num = 0;	//out of vocabulary
  	int sen_num = 0;	//sentence number
  	int token_num = 0;	//token number
  	int miss_count = 0;	//only work if the testing file already have answers
  	int TIME_start = clock() / 1000;
  	int TIME_start_fine = 0;
  	//calculate
	DependencyInstance* x = reader->getNext();
	while(x != NULL){
		if(sen_num%500 == 0){
			cout << "Having processed " << sen_num << ";period is "<<
					(clock()/1000)-TIME_start_fine << "ms."<< endl;
			TIME_start_fine = clock() / 1000;
		}
		sen_num++;
		int length = x->forms->size();
		token_num += length - 1;
		double *tmp_scores = new double[length*length*2];

		//construct scores using nn
		int num_pair = length*(length-1);	//2 * (0+(l-1))*l/2
		REAL *mach_x = new REAL[num_pair*CONF_X_dim];
		REAL *mach_y = new REAL[num_pair*CONF_Y_dim];
		int* word_index = new int[length+2];	//including <s> and </s>
		for(int i=0;i<length;i++){
			HashMap::iterator iter = wl->find(x->forms->at(i));
			if(iter == wl->end()){
				oov_num++;
				word_index[i+1] = wl->find(&unknown_token)->second;
			}
			else
				word_index[i+1] = iter->second;
		}
		string sen_s = SENTENCE_START;
		string sen_e = SENTENCE_END;
		word_index[0] = wl->find(&sen_s)->second;
		word_index[length+1] = wl->find(&sen_e)->second;

		int pair_count = 0;
		REAL* assign_x = mach_x;
		for(int ii=0;ii<length;ii++){
			for(int j=ii+1;j<length;j++){
				for(int lr=0;lr<2;lr++){
#ifdef WHICH_TWO
					//2 word pair
					if(lr==E_RIGHT){
						*assign_x++ = (REAL)word_index[ii+1];
						*assign_x++ = (REAL)word_index[j+1];
					}
					else{
						*assign_x++ = (REAL)word_index[j+1];
						*assign_x++ = (REAL)word_index[ii+1];
					}
#endif
#ifdef WHICH_SIX
					//build mach_x : 6 words group(h-1 h h+1 m-1 m m+1)
					if(lr==E_RIGHT){
						*assign_x++ = (REAL)word_index[ii];
						*assign_x++ = (REAL)word_index[ii+1];
						*assign_x++ = (REAL)word_index[ii+2];
						*assign_x++ = (REAL)word_index[j];
						*assign_x++ = (REAL)word_index[j+1];
						*assign_x++ = (REAL)word_index[j+2];
					}
					else{
						*assign_x++ = (REAL)word_index[j];
						*assign_x++ = (REAL)word_index[j+1];
						*assign_x++ = (REAL)word_index[j+2];
						*assign_x++ = (REAL)word_index[ii];
						*assign_x++ = (REAL)word_index[ii+1];
						*assign_x++ = (REAL)word_index[ii+2];
					}
#endif
				}
			}
		}
		//- give it to nn
		mlp->evaluate(mach_x,mach_y,num_pair,CONF_X_dim,CONF_Y_dim);
		REAL* assign_y = mach_y;
		for(int ii=0;ii<length;ii++){
			for(int j=ii+1;j<length;j++){
				for(int lr=0;lr<2;lr++){
					int index = get_index2(length,ii,j,lr);
#ifdef ULTIMATE_DEBUG
					cout << ii << "to" << j << ((lr==E_RIGHT)?'r':'l')
							<< ":" << *assign_y << endl;
#endif
					tmp_scores[index] = *assign_y++;
				}
			}
		}
		//- decode and write
		vector<int> *ret = decodeProjective(length,tmp_scores);
		for(int i2=1;i2<length;i2++){	//ignore root
			if((*ret)[i2] != (*(x->heads))[i2])
				miss_count ++;
		}
#ifdef ULTIMATE_DEBUG
		{
			double score_here=0;
			double score_right=0;
			for(int i2=1;i2<length;i2++){	//ignore root
				int head_here = (*ret)[i2];
				int head_right = (*(x->heads))[i2];
				score_here += get_score_from_array(length,head_here,i2,tmp_scores);
				score_right += get_score_from_array(length,head_right,i2,tmp_scores);
			}
			cout << "Right:" << score_right << ";Output:" << score_here << endl;
		}
#endif
		delete x->heads;
		x->heads = ret;
		writer->write(x);
		delete x;
		delete []tmp_scores;
		delete []mach_x;
		delete []mach_y;
		delete []word_index;
		x = reader->getNext();
	}
	reader->finishReading();
	writer->finishWriting();
	delete reader;
	delete writer;
	cout << "Finished testing in " << (clock()/1000-TIME_start) << "ms" << endl;

	//conclude and evaluate
	cout << "Testing data description:\n"
			<< "Sentences: " << sen_num << '\n'
			<< "Tokens: " << token_num << '\n'
			<< "OOV token: " << oov_num << '\n'
			<< "Miss token: " << miss_count << endl;
	string t;
	DependencyEvaluator::evaluate(CONF_gold_file,CONF_output_file,t,false);

  return 0;
}
Example #4
0
int main2(int argc, char *argv[])
{
  MachConfig mach_config(true);
  string mach_fname, train_fname, dev_fname;
  int curr_it = 0;
  Mach *mlp;

  // select available options
  mach_config
    .sel_cmdline_option<std::string>("mach,m"        , true )
    .sel_cmdline_option<std::string>("train-data,t"  , true )
    .sel_cmdline_option<std::string>("dev-data,d"    , false)
    .sel_cmdline_option<REAL>       ("lrate-beg,L"   , false)
    .sel_cmdline_option<REAL>       ("lrate-mult,M"  , false)
    .sel_cmdline_option<REAL>       ("weight-decay,W", false)
    .sel_cmdline_option<int>        ("curr-iter,C"   , false)
    .sel_cmdline_option<int>        ("last-iter,I"   , false)
    .sel_cmdline_option<int>        ("block-size,B"  , false)
    ;

  // parse parameters
  if (mach_config.parse_options(argc, argv)) {
    // get parameters
    mach_fname  = mach_config.get_mach();
    train_fname = mach_config.get_train_data();
    dev_fname   = mach_config.get_dev_data();
    curr_it     = mach_config.get_curr_iter();
  }
  else if (mach_config.help_request())
    usage(mach_config);
  else {
    if (mach_config.parsing_error())
      usage(mach_config, false);
    Error(mach_config.get_error_string().c_str());
  }

    // Check if existing machine exists
  const char *mach_fname_cstr = mach_fname.c_str();
  struct stat stat_struct;
  if (stat(mach_fname_cstr, &stat_struct)==0) {
      // read existing network
    ifstream ifs;
    ifs.open(mach_fname_cstr,ios::binary);
    CHECK_FILE(ifs,mach_fname_cstr);
    mlp = Mach::Read(ifs);
    ifs.close();
    cout << "Found existing machine with " << mlp->GetNbBackw()
         << " backward passes, continuing training at iteration " << curr_it+1 << endl;
  }
  else {
    mlp=mach_config.get_machine();
    if (mlp == NULL)
      Error(mach_config.get_error_string().c_str());
  }

  mlp->Info();

  ErrFctMSE errfct(*mlp);
  //ErrFctCrossEnt errfct(*mlp);
  Trainer trainer(mlp, &errfct, (char *)train_fname.c_str(),
      (dev_fname.empty() ? NULL : (char *)dev_fname.c_str()),
      mach_config.get_lrate_beg(), mach_config.get_lrate_mult(),
      mach_config.get_weight_decay(), mach_config.get_last_iter(), curr_it);
  cout << "Initial error rate: " << 100.0*trainer.TestDev() << "%" << endl;

  char sfname[1024], *p;
  strcpy(sfname, mach_fname_cstr);
  p=strstr(sfname, ".mach");
  if (p) { *p=0; strcat(sfname,".best.mach"); }
  trainer.TrainAndTest(sfname);

    // save machine at the end
  ofstream fs;
  fs.open(mach_fname_cstr,ios::binary);
  CHECK_FILE(fs,argv[4]);
  mlp->Write(fs);
  fs.close();

  GpuUnlock();
  if (mlp) delete mlp;

  return 0;
}
Example #5
0
void eval()
{
	const char *mach_fname_cstr = CONF_mach_file.c_str();
	Mach *mlp;
	struct stat stat_struct;
	if (stat(mach_fname_cstr, &stat_struct)==0) {
		// read existing network
	    ifstream ifs;
	    ifs.open(mach_fname_cstr,ios::binary);
	    CHECK_FILE(ifs,mach_fname_cstr);
	    mlp = Mach::Read(ifs);
	    ifs.close();
	    cout << "Found existing machine" << endl;
	}
	else {
		Error("No such machine for eval.");
	}

  	//evaluating the test files
  	cout << "Now evaluating the test file..." << endl;
  	CONLLReader* reader = new CONLLReader();
  	CONLLWriter* writer = new CONLLWriter();
  	reader->startReading(CONF_test_file.c_str());
  	writer->startWriting(CONF_output_file.c_str());
  	//the list
  	HashMap * wl = load_wordlist(CONF_vocab_file.c_str());
#ifdef INSANE_DEBUG
  	FILE* x_file = fdopen(3,"w");
#endif

  	//some variables
  	int oov_num = 0;	//out of vocabulary
  	int sen_num = 0;	//sentence number
  	int token_num = 0;	//token number
  	int miss_count = 0;	//only work if the testing file already have answers
  	int TIME_start = clock() / 1000;
  	int TIME_start_fine = 0;
  	//calculate
	DependencyInstance* x = reader->getNext();
	while(x != NULL){
		if(sen_num%1000 == 0){
			cout << "Having processed " << sen_num << ";period is "<<
					(clock()/1000)-TIME_start_fine << "ms."<< endl;
			TIME_start_fine = clock() / 1000;
		}
		sen_num++;
		int length = x->forms->size();
		token_num += length - 1;
		double *tmp_scores = new double[length*length*2];

		//construct scores using nn
		int num_pair = length*(length-1);	//2 * (0+(l-1))*l/2
		REAL *mach_x = new REAL[num_pair*IND_CONF_x_dim_final];
		REAL *mach_y = new REAL[num_pair*CONF_y_class_size];
		int *word_index = get_word_index(length,x,wl,&oov_num);

		int pair_count = 0;
		REAL* assign_x = mach_x;
		for(int ii=0;ii<length;ii++){
			for(int j=ii+1;j<length;j++){
				for(int lr=0;lr<2;lr++){
					//build mach_x
					if(lr==E_RIGHT)
						fill_feature(length,ii,j,word_index,assign_x);
					else
						fill_feature(length,j,ii,word_index,assign_x);
					assign_x += IND_CONF_x_dim_final;
				}
			}
		}
		//- give it to nn
		mlp->evaluate(mach_x,mach_y,num_pair,IND_CONF_x_dim_final,CONF_y_class_size);
		REAL* assign_y = mach_y;
		for(int ii=0;ii<length;ii++){
			for(int j=ii+1;j<length;j++){
				for(int lr=0;lr<2;lr++){
					int index = get_index2(length,ii,j,lr);
					//important ...
					double temp = 0;
					if(CONF_if_y_calss){
						for(int c=0;c<CONF_y_class_size;c++)
							temp += (*assign_y++)*c;
						tmp_scores[index] = temp;
					}
					else
						tmp_scores[index] = *assign_y++;
				}
			}
		}
		//- decode and write
		vector<int> *ret = decodeProjective(length,tmp_scores);
		for(int i2=1;i2<length;i2++){	//ignore root
			if((*ret)[i2] != (*(x->heads))[i2])
				miss_count ++;
		}
		delete x->heads;
		x->heads = ret;
		writer->write(x);
		delete x;
		delete []tmp_scores;
#ifdef INSANE_DEBUG
		fprintf(x_file,"Sentence %d:\n",sen_num);
  		for(int i=0;i<num_pair;i++){
  			for(int j=0;j<IND_CONF_x_dim_final;j++)
  				fprintf(x_file,"%d ",(int)mach_x[i*IND_CONF_x_dim_final+j]);
  			fprintf(x_file,"\n");
  		}
#endif
		delete []mach_x;
		delete []mach_y;
		delete []word_index;
		x = reader->getNext();
	}
#ifdef INSANE_DEBUG
	fclose(x_file);
#endif
	reader->finishReading();
	writer->finishWriting();
	delete reader;
	delete writer;
	cout << "Finished testing in " << (clock()/1000-TIME_start) << "ms" << endl;

	//conclude and evaluate
	cout << "Testing data description:\n"
			<< "Sentences: " << sen_num << '\n'
			<< "Tokens: " << token_num << '\n'
			<< "OOV token: " << oov_num << '\n'
			<< "Miss token: " << miss_count << endl;
	string t;
	DependencyEvaluator::evaluate(CONF_gold_file,CONF_output_file,t,false);
}