Beispiel #1
0
/**
 * Test Analyzer::encodingFromFile().
 * \param analyzer analyzer
 * \param fileName file name
 */
void testEncodingFromFile(Analyzer& analyzer, const char* fileName)
{
    EncodingID id;
    if(! analyzer.encodingFromFile(fileName, id))
    {
        cerr << "error to get encoding ID from file " << fileName << endl;
        exit(1);
    }

    cout << "encoding: " << Knowledge::getEncodingNameFromID(id) << endl << endl;
}
Beispiel #2
0
/**
 * Run Analyzer::encodingFromString() for each line in \e fileName, and print those different results with Analyzer::encodingFromFile().
 * \param analyzer analyzer
 * \param fileName file name
 *
 * It would test like below:
 * 1. print the encoding of the file.
 * 2. print those lines which encoding is not the same as the file.
 */
void testEncodingFromFileLine(Analyzer& analyzer, const char* fileName)
{
    EncodingID fileID;
    if(! analyzer.encodingFromFile(fileName, fileID))
    {
        cerr << "error to get encoding ID from file " << fileName << endl;
        exit(1);
    }

    cout << "file encoding: " << Knowledge::getEncodingNameFromID(fileID) << endl << endl;

    ifstream ifs(fileName);
    if(! ifs)
    {
        cerr << "error in opening file " << fileName << endl;
        exit(1);
    }

    cout << "========== the lines with different encoding:" << endl;

    string line;
    EncodingID lineID;
    int totalLine = 0;
    int diffLine = 0;
    while(getline(ifs, line))
    {
        if(line.empty())
            continue;

        ++totalLine;

        if(! analyzer.encodingFromString(line.c_str(), lineID))
        {
            cerr << "error to get encoding ID from string " << line << endl;
            exit(1);
        }
        if(lineID != fileID)
        {
            cout << Knowledge::getEncodingNameFromID(lineID) << "\t" << line << endl;
            ++diffLine;
        }
    }

    cout << endl;
    cout << "========== statistics result:" << endl;
    cout << "diff lines: " << diffLine << endl;
    cout << "total lines: " << totalLine << endl;
    cout << "line correct rate: ";
    if(totalLine)
        cout << (double)(totalLine - diffLine) / totalLine * 100 << "%" << endl;
    else
        cout << "0%" << endl;
}