Exemple #1
0
static langType getTwoGramLanguage (FILE* input,
				    const parserCandidate  *const candidates, unsigned int n_candidates)
{
	langType result;
	unsigned int i;

	for (result = LANG_AUTO, i = 0; candidates[i].lang != LANG_IGNORE; i++)
		if (LanguageTable [candidates[i].lang]->tg_entries == NULL
		    || findTgTableEntry(LanguageTable [candidates[i].lang], candidates[i].spec) == NULL)
		{
			result = LANG_IGNORE;
			break;
		}

	if (result == LANG_AUTO)
	{

		unsigned char* t;

		t = tg_create();
		tg_load(t, input);

		result = determineTwoGramLanguage(t, candidates, n_candidates);

		verbose("winner of tg tournament: %s\n", LanguageTable[result]->name);

		tg_destroy(t);

	}
	return result;
}
Exemple #2
0
extern void addCorpusFile (const langType language,
			   const char* const spec, vString* const corpus_file, boolean pattern_p)
{
	FILE *input;
	unsigned char* tg_table;
	vString* vspec;

	input = fopen (vStringValue (corpus_file), "rb");
	if (input == NULL)
		error (FATAL,
		       "failed in open %s as corpus", vStringValue (corpus_file));

	tg_table = tg_create ();
	if (!tg_table)
		error (FATAL,
		       "failed allocating memory for tg entry");

	tg_load (tg_table, input);
	fclose (input);

	vspec = pattern_p? vStringNewInit (spec): ext2ptrnNew (spec);
	addTgEntryFull (language, vspec, tg_table, corpus_file);
}