예제 #1
파일: main.cpp 프로젝트: hznlp/giza-kn
double StartTraining(int&result)
  double errors=0.0;
  vcbList eTrainVcbList, fTrainVcbList;

  string repFilename = Prefix + ".gizacfg" ;
  ofstream of2(repFilename.c_str());
  writeParameters(of2,getGlobalParSet(),-1) ;

  cout << "reading vocabulary files \n";
  cout << "Source vocabulary list has " << eTrainVcbList.uniqTokens() << " unique tokens \n";
  cout << "Target vocabulary list has " << fTrainVcbList.uniqTokens() << " unique tokens \n";
  vcbList eTestVcbList(eTrainVcbList) ;
  vcbList fTestVcbList(fTrainVcbList) ;
  corpus = new sentenceHandler(CorpusFilename.c_str(), &eTrainVcbList, &fTrainVcbList);

  if (TestCorpusFilename == "NONE")
    TestCorpusFilename = "";

  if (TestCorpusFilename != ""){
    cout << "Test corpus will be read from: " << TestCorpusFilename << '\n';
      testCorpus= new sentenceHandler(TestCorpusFilename.c_str(), 
						       &eTestVcbList, &fTestVcbList);
      cout << " Test total # sentence pairs : " <<(*testCorpus).getTotalNoPairs1()<<" weighted:"<<(*testCorpus).getTotalNoPairs2() <<'\n';

      cout << "Size of the source portion of test corpus: " << eTestVcbList.totalVocab() << " tokens\n";
      cout << "Size of the target portion of test corpus: " << fTestVcbList.totalVocab() << " tokens \n";
      cout << "In source portion of the test corpus, only " << eTestVcbList.uniqTokensInCorpus() << " unique tokens appeared\n";
      cout << "In target portion of the test corpus, only " << fTestVcbList.uniqTokensInCorpus() << " unique tokens appeared\n";
      cout << "ratio (target/source) : " << double(fTestVcbList.totalVocab()) /
	eTestVcbList.totalVocab() << '\n';
  cout << " Train total # sentence pairs (weighted): " << corpus->getTotalNoPairs2() << '\n';
  cout << "Size of source portion of the training corpus: " << eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2() << " tokens\n";
  cout << "Size of the target portion of the training corpus: " << fTrainVcbList.totalVocab() << " tokens \n";
  cout << "In source portion of the training corpus, only " << eTrainVcbList.uniqTokensInCorpus() << " unique tokens appeared\n";
  cout << "In target portion of the training corpus, only " << fTrainVcbList.uniqTokensInCorpus() << " unique tokens appeared\n";
  cout << "lambda for PP calculation in IBM-1,IBM-2,HMM:= " << double(fTrainVcbList.totalVocab()) << "/(" << eTrainVcbList.totalVocab() << "-" << corpus->getTotalNoPairs2() << ")=";
  LAMBDA = double(fTrainVcbList.totalVocab()) / (eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2());
  cout << "= " << LAMBDA << '\n';
  // load dictionary
  Dictionary *dictionary;  
  useDict = !dictionary_Filename.empty();
  if (useDict) dictionary = new Dictionary(dictionary_Filename.c_str());
  else dictionary = new Dictionary("");
  int minIter=0;
  if( CoocurrenceFile.length()==0 )
  //ifstream coocs(CoocurrenceFile.c_str());
  tmodel<COUNT, PROB> tTable(CoocurrenceFile);
  tmodel<COUNT, PROB> tTable;

  model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList,tTable,trainPerp, 
	    *corpus,&testPerp, testCorpus, 
	    trainViterbiPerp, &testViterbiPerp);
   amodel<PROB>  aTable(false);
   amodel<COUNT> aCountTable(false);
   model2 m2(m1,aTable,aCountTable);
   hmm h(m2);
   model3 m3(m2); 
   if(ReadTablePrefix.length() )
       string number = "final";
       string tfile,afilennfile,dfile,d4file,p0file,afile,nfile; //d5file
       tfile = ReadTablePrefix + ".t3." + number ;
       afile = ReadTablePrefix + ".a3." + number ;
       nfile = ReadTablePrefix + ".n3." + number ;
       dfile = ReadTablePrefix + ".d3." + number ;
       d4file = ReadTablePrefix + ".d4." + number ;
       //d5file = ReadTablePrefix + ".d5." + number ;
       p0file = ReadTablePrefix + ".p0_3." + number ;
       sentPair sent ;
       double p0;
       ifstream p0f(p0file.c_str());
       p0f >> p0;
       d4model d4m(MAX_SENTENCE_LENGTH);
       //d5model d5m(d4m);
       if( corpus||testCorpus )
	   sentenceHandler *x=corpus;
	   cout << "Text corpus exists.\n";
	     Vector<WordIndex>& es = sent.eSent;
	     Vector<WordIndex>& fs = sent.fSent;
	     int l=es.size()-1;
	     int m=fs.size()-1;
	     transpair_model4 tm4(es,fs,m1.tTable,m2.aTable,m3.dTable,m3.nTable,1-p0,p0,&d4m);
	     alignment al(l,m);
	     cout << "I use the alignment " << sent.sentenceNo-1 << '\n';
	     transpair_model3 tm3(es,fs,m1.tTable,m2.aTable,m3.dTable,m3.nTable,1-p0,p0,0);
	     double p=tm3.prob_of_target_and_alignment_given_source(al,1);
	     cout << "Sentence " << sent.sentenceNo << " has IBM-3 prob " << p << '\n';
	     cout << "Sentence " << sent.sentenceNo << " has IBM-4 prob " << p << '\n';
	     //transpair_model5 tm5(es,fs,m1.tTable,m2.aTable,m3.dTable,m3.nTable,1-p0,p0,&d5m);
	     //cout << "Sentence " << sent.sentenceNo << " has IBM-5 prob " << p << '\n';
	   cout << "No corpus exists.\n";
예제 #2
main ( int argc, char *argv[] )
     TuringMachine<5> tm1 ( 9, 0, 11, 1, 15, 2, 17, 3, 11, 4, 23, 5, 24, 6, 3,  7, 21, 9, 0 ); // Marxen-Buntrock, 4097
     TuringMachine<5> tm2 ( 9, 0, 11, 1, 18, 2, 15, 3, 23, 4, 3,  5, 15, 7, 29, 8, 5,  9, 8 ); // Marxen-Buntrock, 4096
     TuringMachine<5> tm3 ( 9, 0, 11, 1, 5,  2, 15, 3, 20, 4, 3,  5, 15, 7, 29, 8, 24, 9, 11 ); // Marxen-Buntrock, 4095
     TuringMachine<5> tm4 ( 9, 0, 11, 1, 5,  2, 15, 3, 20, 4, 3,  5, 15, 7, 29, 8, 15, 9, 11 ); // Marxen-Buntrock, 4095
     TuringMachine<5> tm5 ( 9, 0, 11, 1, 5,  2, 15, 3, 9,  4, 5,  5, 21, 6, 5,  7, 27, 9, 12 ); // Marxen-Buntrock, 4097
     TuringMachine<5> tm6 ( 9, 0, 11, 1, 5,  2, 15, 3, 23, 4, 3,  5, 15, 7, 26, 8, 15, 9, 11 ); // Marxen-Buntrock, 4096
     TuringMachine<5> tm7 ( 9, 0, 11, 2, 15, 3, 17, 4, 26, 5, 18, 6, 15, 7, 6,  8, 23, 9, 5 ); // Uhing, 1471
     TuringMachine<5> tm8 ( 9, 0, 11, 1, 15, 2, 0,  3, 18, 4, 3,  6, 9,  7, 29, 8, 20, 9, 8 ); // Uhing, 1915
     TuringMachine<5> tm9 ( 9, 0, 11, 1, 12, 2, 17, 3, 23, 4, 3,  5, 8,  6, 26, 8, 15, 9, 5 ); // Schult, 501
     TuringMachine<5> tm10 ( 9, 0, 9,  1, 12, 2, 15, 3, 21, 4, 29, 5, 1,  7, 24, 8, 2,  9, 27 );// 160
     TuringMachine<5> tm11 ( 9, 0, 21, 1, 9,  2, 24, 3, 6,  4, 3,  5, 20, 6, 17, 7, 0,  9, 15 );// 32
     TuringMachine<5> tm12 ( 9, 0, 9,  1, 11, 2, 17, 3, 21, 4, 19, 5, 29, 6, 5,  7, 6,  8, 8 );// 26
     TuringMachine<5> tm13 ( 9, 0, 9,  1, 11, 2, 15, 3, 20, 4, 21, 5, 27, 6, 4,  7, 2,  8, 12 );// 21
     TuringMachine<5> tm14 ( 9, 0, 9,  1, 11, 2, 26, 3, 23, 4, 27, 5, 2,  7, 17, 8, 5,  9, 13 );// 19
     TuringMachine<5> tm15 ( 9, 0, 9,  1, 11, 2, 5,  3, 20, 4, 17, 5, 24, 7, 29, 8, 15, 9, 1 );// 14
     TuringMachine<5> tm16 ( 9, 0, 9,  1, 11, 2, 5,  3, 20, 4, 15, 5, 0,  6, 26, 7, 17, 8, 27 );// 15
     TuringMachine<5> tm17 ( 9, 0, 9,  1, 11, 2, 5,  3, 20, 4, 27, 5, 29, 7, 17, 8, 8,  9, 0 );// 16
     TuringMachine<5> tm18 ( 9, 0, 9,  1, 11, 2, 12, 3, 23, 4, 18, 5, 15, 6, 29, 7, 5,  8, 2 );// 17
     TuringMachine<5> tm19 ( 9, 0, 9,  1, 11, 2, 5,  3, 23, 4, 27, 6, 17, 7, 20, 8, 0,  9, 13 );// 18
     TuringMachine<5> tm20 ( 9, 0, 9,  1, 11, 2, 5,  3, 20, 4, 15, 5, 0,  6, 27, 7, 17, 9, 1 );// 13 1 marad a 14-esbol.
     TuringMachine<5> tm21 ( 9, 0, 9,  1, 11, 2, 5,  3, 20, 4, 15, 5, 0,  7, 17, 8, 15, 9, 1 );// 12
     TuringMachine<5> tm22 ( 9, 0, 9,  1, 14, 2, 5,  3, 21, 4, 8,  5, 1,  6, 3,  8, 15, 9, 1 );// 11
     TuringMachine<5> tm23 ( 9, 0, 6,  1, 17, 2, 15, 3, 3,  4, 5,  5, 20, 6, 3,  8, 15, 9, 1 );// 10
     TuringMachine<5> tm24 ( 9, 0, 9,  1, 17, 2, 18, 3, 1,  4, 5,  6, 15, 7, 29, 8, 15, 9, 1 );// 9
     TuringMachine<5> tm25 ( 9, 0, 9,  1, 14, 2, 18, 3, 3,  4, 5,  6, 15, 7, 29, 8, 15, 9, 1 );// 8
     TuringMachine<5> tm26 ( 9, 0, 25, 1, 11, 2, 23, 3, 27, 4, 1,  5, 21, 6, 5,  7, 16, 8, 21 );// 6
     TuringMachine<5> tm27 ( 9, 0, 25, 1, 11, 2, 23, 4, 23, 5, 27, 6, 5,  7, 17, 8, 9,  9, 29 ); // 5
     TuringMachine<5> tm28 ( 9, 0, 9,  1, 11, 2, 5,  3, 20, 4, 17, 5, 24, 7, 29, 8, 15, 9, 1 );// 4
     TuringMachine<5> tm29 ( 9, 0, 16, 1, 14, 2, 18, 3, 11, 4, 27, 5, 29, 7, 27, 8, 21, 9, 12 ); // 3
     TuringMachine<5> tm30 ( 9, 0, 16, 1, 14, 2, 18, 3, 12, 4, 27, 5, 9,  6, 26, 7, 0,  8, 29 ); // 2
     TuringMachine<5> mbR ( 9, 0, 11, 1, 15, 2, 17, 3, 1,  4, 23, 5, 24, 6, 3,  7, 21, 9, 0 );// Marxen-Buntrock, (recombinated 70740809, 4097)


     OrchMach1<5>* om;

     if ( std::getenv ( "SLURM_ARRAY_TASK_ID" ) ) {

          switch ( std::atoi ( std::getenv ( "SLURM_ARRAY_TASK_ID" ) ) ) {

               // Breeds that were surely divergent were deleted.

          case 1:
               om = new OrchMach1<5> ( 18,
                                       &tm10, &tm11, &tm12, &tm13, &tm14,
                                       &tm15, &tm16, &tm17, &tm18, &tm19,
                                       &tm20, &tm21, &tm22, &tm23, &tm24,
                                       &tm25, &tm29, &tm30

          case 4:
               om = new OrchMach1<5> ( 17,
                                       &tm10, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm29, &tm30

          case 7:
               om = new OrchMach1<5> ( 31-7,
                                       &tm8, &tm9, &tm10,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,
          case 8:
               om = new OrchMach1<5> ( 31-8,
                                       &tm9, &tm10,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,

          case 9:
               om = new OrchMach1<5> ( 31-9,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,
          case 10:
               om = new OrchMach1<5> ( 31-10,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,

          case 12:
               om = new OrchMach1<5> ( 15,

                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30


          case 13:
               om = new OrchMach1<5> ( 18,
                                       &tm10, &tm11, &tm12, &tm13, &tm14,
                                       &tm15, &tm16, &tm17, &tm18, &tm19,
                                       &tm20, &tm21, &tm22, &tm23, &tm24,
                                       &tm25, &tm29, &tm30

          case 16:
               om = new OrchMach1<5> ( 17,
                                       &tm10, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm29, &tm30

          case 19:
               om = new OrchMach1<5> ( 31-7,
                                       &tm8, &tm9, &tm10,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,
          case 20:
               om = new OrchMach1<5> ( 31-8,
                                       &tm9, &tm10,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,

          case 21:
               om = new OrchMach1<5> ( 31-9,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,
          case 22:
               om = new OrchMach1<5> ( 31-10,
                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm16, &tm17, &tm18, &tm19, &tm20,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30,

          case 24:
               om = new OrchMach1<5> ( 15,

                                       &tm11, &tm12, &tm13, &tm14, &tm15,
                                       &tm21, &tm22, &tm23, &tm24, &tm25,
                                       &tm26, &tm27, &tm28, &tm29, &tm30



     } else
          om = new OrchMach1<5> ( 5,  &tm2, &tm12, &tm13, &tm14, &tm15 );


     delete om;