bool LexicsCutter::_ReadInnormativeWords(const std::string& fileName) { char szLine[1024]; FILE* file = fopen(fileName.c_str(), "rb"); if (!file) return false; while (!feof(file)) { szLine[0] = 0x0; fgets(szLine, 1020, file); std::string line; if (!_ProcessLine(szLine, line)) continue; // Create word vector of vectors LC_WordVector vw; std::string ch; unsigned int pos = 0; while (ReadUTF8(line, ch, pos)) { LC_LetterSet vl; // Initialize letter set with letter read vl.insert(ch); // Find letter analogs and push them onto the vector LC_AnalogMap::iterator itr = _analogMap.find(ch); if (itr != _analogMap.end()) // Analogs present, iterate for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); ++itr2) vl.insert(*itr2); // Add letter vector to word vector vw.push_back(vl); } // Push new word to words list _wordList.push_back(vw); } fclose(file); return true; }
bool LexicsCutter::Compare_Word(std::string& str, unsigned int pos, LC_WordVector word) { std::string lchar_prev; std::string lchar; // read first letter of the word into lchar_prev ReadUTF8(str, lchar, pos); // okay, here we go, comparing word // first letter is already okay, we do begin from second and go on LC_WordVector::iterator i = word.begin(); i++; while (i != word.end()) { // get letter from word, return false if the string is shorter if (!ReadUTF8(str, lchar, pos)) return(false); // check, if the letter is in the set LC_LetterSet ls = *i; if (ls.count(lchar) == 0) { // letter is not in set, but we must check, if it is not space or repeat if ( (!(IgnoreMiddleSpaces && (lchar == " "))) && (!(IgnoreLetterRepeat && (lchar == lchar_prev))) ) { // no checks viable return(false); } } else { // next word letter i++; } // set previous string letter to compare if needed (this check can really conserve time) if (IgnoreLetterRepeat) lchar_prev = lchar; } return(true); }
bool LexicsCutter::_CompareWord(const std::string& str, unsigned int pos, LC_WordVector word) const { std::string chPrev; std::string ch; // Read first letter of the word into lchar_prev ReadUTF8(str, ch, pos); // Compare word // First letter is already okay, we do begin from second and go on LC_WordVector::iterator i = word.begin(); ++i; while (i != word.end()) { // Get letter from word, return false if the string is shorter if (!ReadUTF8(str, ch, pos)) return false; // Check, if the letter is in the set LC_LetterSet ls = *i; if (ls.count(ch) == 0) { // Letter is not in set, but we must check, if it is not space or repeat if ((!(_ignoreMiddleSpaces && (ch == " "))) && (!(_ignoreLetterRepeat && (ch == chPrev)))) // No checks viable return false; } else // Next word letter ++i; // Set previous string letter to compare if needed (this check can really conserve time) if (_ignoreLetterRepeat) chPrev = ch; } return true; }
bool LexicsCutter::Read_Innormative_Words(std::string& FileName) { FILE *ma_file; char line[1024]; unsigned int pos; std::string line_s; std::string lchar; ma_file = fopen(FileName.c_str(), "rb"); if (!ma_file) { sLog.outError("Chat lexics cutter disabled. Reason: LexicsCutterWordsFile file does not exist in the server directory."); return false; } while (!feof(ma_file)) { line[0] = 0x0; fgets(line, 1020, ma_file); // check for UTF8 prefix and comment if (strlen(line) >= 3) { if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF') { strncpy(&line[0], &line[3], strlen(line) - 3); } } if (strlen(line) >= 2) { if (line[0] == '/' && line[1] == '/') continue; } // check for empty string line_s = line; line_s = trim(line_s, "\x0A\x0D\x20"); if (line_s == "") continue; // process line without CR/LF line_s = line; line_s = trim(line_s, "\x0A\x0D"); // create word vector of vectors LC_WordVector vw; pos = 0; while (ReadUTF8(line_s, lchar, pos)) { // create letter set LC_LetterSet vl; // initialize letter set with letter read vl.insert(lchar); // find letter analogs and push them onto the vector LC_AnalogMap::iterator itr = AnalogMap.find(lchar); if (itr != AnalogMap.end()) { // analogs present, iterate for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++) { vl.insert(*itr2); } } // add letter vector to word vector vw.push_back(vl); } // push new word to words list WordList.push_back(vw); } fclose(ma_file); return true; }
bool LexicsCutter::ReadInnormativeWords(std::string& fileName) { char line[1024]; unsigned int pos; std::string line_s; std::string lchar; FILE* file = fopen(fileName.c_str(), "rb"); if (!file) return false; while (!feof(file)) { line[0] = 0x0; fgets(line, 1020, file); // check for UTF8 prefix and comment if (strlen(line) >= 3) if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF') strncpy(&line[0], &line[3], strlen(line) - 3); if (strlen(line) >= 2) if (line[0] == '/' && line[1] == '/') continue; // check for empty string line_s = line; line_s = trim(line_s, "\x0A\x0D\x20"); if (line_s == "") continue; // process line without CR/LF line_s = line; line_s = trim(line_s, "\x0A\x0D"); // create word vector of vectors LC_WordVector vw; pos = 0; while (ReadUTF8(line_s, lchar, pos)) { // create letter set LC_LetterSet vl; // initialize letter set with letter read vl.insert(lchar); // find letter analogs and push them onto the vector LC_AnalogMap::iterator itr = m_AnalogMap.find(lchar); if (itr != m_AnalogMap.end()) // analogs present, iterate for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++) vl.insert(*itr2); // add letter vector to word vector vw.push_back(vl); } // push new word to words list m_WordList.push_back(vw); } fclose(file); return true; }