/** * Test Analyzer::encodingFromFile(). * \param analyzer analyzer * \param fileName file name */ void testEncodingFromFile(Analyzer& analyzer, const char* fileName) { EncodingID id; if(! analyzer.encodingFromFile(fileName, id)) { cerr << "error to get encoding ID from file " << fileName << endl; exit(1); } cout << "encoding: " << Knowledge::getEncodingNameFromID(id) << endl << endl; }
/** * Run Analyzer::encodingFromString() for each line in \e fileName, and print those different results with Analyzer::encodingFromFile(). * \param analyzer analyzer * \param fileName file name * * It would test like below: * 1. print the encoding of the file. * 2. print those lines which encoding is not the same as the file. */ void testEncodingFromFileLine(Analyzer& analyzer, const char* fileName) { EncodingID fileID; if(! analyzer.encodingFromFile(fileName, fileID)) { cerr << "error to get encoding ID from file " << fileName << endl; exit(1); } cout << "file encoding: " << Knowledge::getEncodingNameFromID(fileID) << endl << endl; ifstream ifs(fileName); if(! ifs) { cerr << "error in opening file " << fileName << endl; exit(1); } cout << "========== the lines with different encoding:" << endl; string line; EncodingID lineID; int totalLine = 0; int diffLine = 0; while(getline(ifs, line)) { if(line.empty()) continue; ++totalLine; if(! analyzer.encodingFromString(line.c_str(), lineID)) { cerr << "error to get encoding ID from string " << line << endl; exit(1); } if(lineID != fileID) { cout << Knowledge::getEncodingNameFromID(lineID) << "\t" << line << endl; ++diffLine; } } cout << endl; cout << "========== statistics result:" << endl; cout << "diff lines: " << diffLine << endl; cout << "total lines: " << totalLine << endl; cout << "line correct rate: "; if(totalLine) cout << (double)(totalLine - diffLine) / totalLine * 100 << "%" << endl; else cout << "0%" << endl; }