예제 #1
0
bool LexicsCutter::_ReadInnormativeWords(const std::string& fileName)
{
    char szLine[1024];

    FILE* file = fopen(fileName.c_str(), "rb");
    if (!file)
        return false;

    while (!feof(file))
    {
        szLine[0] = 0x0;
        fgets(szLine, 1020, file);

        std::string line;
        if (!_ProcessLine(szLine, line))
            continue;

        // Create word vector of vectors
        LC_WordVector vw;
        std::string ch;
        unsigned int pos = 0;
        while (ReadUTF8(line, ch, pos))
        {
            LC_LetterSet vl;

            // Initialize letter set with letter read
            vl.insert(ch);

            // Find letter analogs and push them onto the vector
            LC_AnalogMap::iterator itr = _analogMap.find(ch);
            if (itr != _analogMap.end())
                // Analogs present, iterate
                for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); ++itr2)
                    vl.insert(*itr2);

            // Add letter vector to word vector
            vw.push_back(vl);
        }

        // Push new word to words list
        _wordList.push_back(vw);
    }
    fclose(file);
    return true;
}
예제 #2
0
bool LexicsCutter::Read_Innormative_Words(std::string& FileName)
{
    FILE *ma_file;
    char line[1024];
    unsigned int pos;
    std::string line_s;
    std::string lchar;

    ma_file = fopen(FileName.c_str(), "rb");

    if (!ma_file)
    {
        sLog.outError("Chat lexics cutter disabled. Reason: LexicsCutterWordsFile file does not exist in the server directory.");
        return false;
    }

    while (!feof(ma_file))
    {
        line[0] = 0x0;
        fgets(line, 1020, ma_file);

        // check for UTF8 prefix and comment
        if (strlen(line) >= 3)
        {
            if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF')
            {
                strncpy(&line[0], &line[3], strlen(line) - 3);
            }
        }

        if (strlen(line) >= 2)
        {
            if (line[0] == '/' && line[1] == '/') continue;
        }

        // check for empty string
        line_s = line;
        line_s = trim(line_s, "\x0A\x0D\x20");
        if (line_s == "") continue;

        // process line without CR/LF
        line_s = line;
        line_s = trim(line_s, "\x0A\x0D");

        // create word vector of vectors
        LC_WordVector vw;
        pos = 0;
        while (ReadUTF8(line_s, lchar, pos))
        {
            // create letter set
            LC_LetterSet vl;

            // initialize letter set with letter read
            vl.insert(lchar);

            // find letter analogs and push them onto the vector
            LC_AnalogMap::iterator itr = AnalogMap.find(lchar);
            if (itr != AnalogMap.end())
            {
                // analogs present, iterate
                for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++)
                {
                    vl.insert(*itr2);
                }
            }

            // add letter vector to word vector
            vw.push_back(vl);
        }

        // push new word to words list
        WordList.push_back(vw);
    }

    fclose(ma_file);

    return true;
}
예제 #3
0
bool LexicsCutter::ReadInnormativeWords(std::string& fileName)
{
    char line[1024];
    unsigned int pos;
    std::string line_s;
    std::string lchar;

    FILE* file = fopen(fileName.c_str(), "rb");
    if (!file)
        return false;

    while (!feof(file))
    {
        line[0] = 0x0;
        fgets(line, 1020, file);

        // check for UTF8 prefix and comment
        if (strlen(line) >= 3)
            if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF')
                strncpy(&line[0], &line[3], strlen(line) - 3);

        if (strlen(line) >= 2)
            if (line[0] == '/' && line[1] == '/')
                continue;

        // check for empty string
        line_s = line; 
        line_s = trim(line_s, "\x0A\x0D\x20");
        if (line_s == "")
            continue;

        // process line without CR/LF
        line_s = line; 
        line_s = trim(line_s, "\x0A\x0D");
    
        // create word vector of vectors
        LC_WordVector vw;
        pos = 0;
        while (ReadUTF8(line_s, lchar, pos))
        {
            // create letter set
            LC_LetterSet vl;

            // initialize letter set with letter read
            vl.insert(lchar);

            // find letter analogs and push them onto the vector
            LC_AnalogMap::iterator itr = m_AnalogMap.find(lchar);
            if (itr != m_AnalogMap.end())
                // analogs present, iterate
                for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++)
                    vl.insert(*itr2);

            // add letter vector to word vector
            vw.push_back(vl);
        }

        // push new word to words list
        m_WordList.push_back(vw);
    }
    fclose(file);
    return true;
}