/** * Test Analyzer::languageFromFile(). * \param analyzer analyzer * \param fileName file name */ void testLanguageFromFile(Analyzer& analyzer, const char* fileName) { LanguageID id; if(! analyzer.languageFromFile(fileName, id)) { cerr << "error to get language ID from file " << fileName << endl; exit(1); } cout << "primary language: " << Knowledge::getLanguageNameFromID(id) << endl << endl; }
/** * Run Analyzer::languageFromString() for each line in \e fileName, and print those different results with Analyzer::languageFromFile(). * \param analyzer analyzer * \param fileName file name * * It would test like below: * 1. print the primary language of the file. * 2. print those lines which primary language is not the same as the file. */ void testLanguageFromFileLine(Analyzer& analyzer, const char* fileName) { LanguageID fileID; if(! analyzer.languageFromFile(fileName, fileID)) { cerr << "error to get language ID from file " << fileName << endl; exit(1); } cout << "file primary language: " << Knowledge::getLanguageNameFromID(fileID) << endl << endl; ifstream ifs(fileName); if(! ifs) { cerr << "error in opening file " << fileName << endl; exit(1); } cout << "========== the lines with different primary language:" << endl; string line; LanguageID lineID; int totalLine = 0; int diffLine = 0; while(getline(ifs, line)) { if(line.empty()) continue; ++totalLine; if(! analyzer.languageFromString(line.c_str(), lineID)) { cerr << "error to get language ID from string " << line << endl; exit(1); } if(lineID != fileID) { cout << Knowledge::getLanguageNameFromID(lineID) << "\t" << line << endl; ++diffLine; } } cout << endl; cout << "========== statistics result:" << endl; cout << "diff lines: " << diffLine << endl; cout << "total lines: " << totalLine << endl; cout << "line correct rate: "; if(totalLine) cout << (double)(totalLine - diffLine) / totalLine * 100 << "%" << endl; else cout << "0%" << endl; }