static langType getTwoGramLanguage (FILE* input, const parserCandidate *const candidates, unsigned int n_candidates) { langType result; unsigned int i; for (result = LANG_AUTO, i = 0; candidates[i].lang != LANG_IGNORE; i++) if (LanguageTable [candidates[i].lang]->tg_entries == NULL || findTgTableEntry(LanguageTable [candidates[i].lang], candidates[i].spec) == NULL) { result = LANG_IGNORE; break; } if (result == LANG_AUTO) { unsigned char* t; t = tg_create(); tg_load(t, input); result = determineTwoGramLanguage(t, candidates, n_candidates); verbose("winner of tg tournament: %s\n", LanguageTable[result]->name); tg_destroy(t); } return result; }
extern void addCorpusFile (const langType language, const char* const spec, vString* const corpus_file, boolean pattern_p) { FILE *input; unsigned char* tg_table; vString* vspec; input = fopen (vStringValue (corpus_file), "rb"); if (input == NULL) error (FATAL, "failed in open %s as corpus", vStringValue (corpus_file)); tg_table = tg_create (); if (!tg_table) error (FATAL, "failed allocating memory for tg entry"); tg_load (tg_table, input); fclose (input); vspec = pattern_p? vStringNewInit (spec): ext2ptrnNew (spec); addTgEntryFull (language, vspec, tg_table, corpus_file); }