Exemplo n.º 1
0
int main()
{
	vector<unsigned int> trans = f.insertSentence("Dies ist ein Test");
	vector<unsigned int> ref = f.insertSentence("ein Test Dies ist");
	cout << trans[1] << trans[2] << trans[3] << endl;
	cout << f.getWord(trans[1]) << f.getWord(trans[2]) << f.getWord(trans[3]) << endl;
	//Bleu *current = new Bleu(trans,ref);

	return 0;
}
Exemplo n.º 2
0
int main(int argc, char** argv)
{
	if (argc < 4)
	{
		cerr << "ERROR: not enough parameters" << endl << "Usage: " << argv[0] << " source-doc target-doc alignment" << endl;
		return 0; // EXIT_FAILURE;
	}

	// open files
	ifstream src(argv[1]);
	ifstream dest(argv[2]);

	if (!src.good())
	{
		cerr << "ERROR: Opening source-language ("<< argv[1] <<") failed." << endl;
		return 0; // EXIT_FAILURE;
	}

	if (!dest.good())
	{
		cerr << "ERROR: Opening target-language ("<< argv[2] <<") failed." << endl;
		return 0; // EXIT_FAILURE;
	}

	// variables holding all data

	/// holds the Alignment and finds all valid phrases etc..
	Alignment* aligObj;
	try
	{
		aligObj = new Alignment(argv[3]);
	}
	catch (bool openFileFail)
	{
		cerr << "ERROR: Opening alignment ("<< argv[3] <<") failed."  << endl;
		return 0; // EXIT_FAILURE;
	}

	/// src-lang phrase count (prefixtree of words) - initialized with the 0-word as root
	Tree *phrasesF = new Tree(new Node(0));

	/// target-lang phrase count (prefixtree of words) - initialized with the 0-word as root
	Tree *phrasesE = new Tree(new Node(0));

	/// phrasepair count (prefixtree of prefixtrees) - initialized with a Tree as root which is initialized with the 0-word as root
	TreeOfTrees* phrasePairs = new TreeOfTrees(new NodeOfTrees(0, NULL));

	vector<unsigned int> srcWords;
	vector<unsigned int> destWords;
	string srcLine, destLine;

	unsigned int lineNr = 1;
	// get src and dest lines
	while (getline(src,srcLine) && getline(dest,destLine))
	{

		//put all words of the sentence in source language-lexicon and the value of the word into the lang-object
		srcWords = f.insertSentence(srcLine);
		destWords = e.insertSentence(destLine);

		// init the alignment for these sentences
		aligObj->nextSentence(srcLine.size(), destLine.size());

		// get phrases
		unsigned int i1=0,i2=0;
		for (unsigned int j1 = 0; j1<srcLine.size(); j1++)
		{
			//for (unsigned int j2 = i1; j2<srcLine.size(); j2++)
			for (unsigned int j2 = i1; j2<srcLine.size() && j2-j1<=MAX_PHRASE_LENGTH; j2++)
			{
				i1 = aligObj->getMinTargetAlig(j1, j2);
				i2 = aligObj->getMaxTargetAlig(j1, j2);
				if (aligObj->getMinSrcAlig(i1, i2) == j1 && aligObj->getMaxSrcAlig(i1, i2) == j2)
				{
					PhrasePair* p = aligObj->outputPhrase(j1, j2, i1, i2, srcWords, destWords);

					// put phrase in source-lang-obj
					phrasesF->insert(p->src);

					// put phrase in target-lang-obj
					phrasesE->insert(p->target);

					// count phrasePair
					phrasePairs->insert(p);
				}
			}
		}

//		if (lineNr/100==(double)lineNr/(double)100) 
//			cout << "line" << lineNr << endl;
//		if (lineNr>TRAINING_LINES) break;
//		lineNr++;
	}	 

	showFreqPhrases(phrasePairs, phrasesF, phrasesE, f, e);

	return 0; //EXIT_SUCCESS;
}