bool LexicsCutter::CheckLexics(std::string& Phrase) { std::string lchar; LC_WordMap::iterator i; std::pair<LC_WordMap::iterator, LC_WordMap::iterator> ii; if (Phrase.size() == 0) return false; // first, convert the string, adding spaces and removing invalid characters // also create fast position vector for the new positions std::string str = " "; unsigned int pos = 0; while (ReadUTF8(Phrase, lchar, pos)) if (m_sInvalidChars.find(lchar) == std::string::npos) str.append(lchar); // string prepared, now parse it and scan for all the words unsigned int pos_prev = 0; pos = 0; while (ReadUTF8(str, lchar, pos)) { // got character, now try to find wordmap for it ii = m_WordMap.equal_range(lchar); // iterate over all found words for (i = ii.first; i != ii.second; i++) // compare word at initial position if (CompareWord(str, pos_prev, m_WordList[i->second])) return true; // set initial position to the current position pos_prev = pos; } return false; }
bool LexicsCutter::_ReadLetterAnalogs(const std::string& fileName) { char szLine[1024]; FILE* file = fopen(fileName.c_str(), "rb"); if (!file) return false; while (!feof(file)) { szLine[0] = 0x0; fgets(szLine, 1020, file); std::string line; if (!_ProcessLine(szLine, line)) continue; std::string ch; unsigned int pos = 0; if (ReadUTF8(line, ch, pos)) { // Create analogs vector std::string analog; LC_AnalogVector av; while (ReadUTF8(line, analog, pos)) av.push_back(analog); // Store vector in hash map _analogMap[ch] = av; } } fclose(file); return true; }
bool LexicsCutter::Read_Letter_Analogs(std::string& FileName) { FILE *ma_file; char line[1024]; unsigned int pos; std::string line_s; std::string lchar; std::string lanalog; ma_file = fopen(FileName.c_str(), "rb"); while (!feof(ma_file)) { line[0] = 0x0; fgets(line, 1020, ma_file); // check for UTF8 prefix and comments if (strlen(line) >= 3) { if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF') { strncpy(&line[0], &line[3], strlen(line) - 3); } } if (strlen(line) >= 2) { if (line[0] == '/' && line[1] == '/') continue; } // check for empty string line_s = line; line_s = trim(line_s, "\x0A\x0D\x20"); if (line_s == "") continue; // process line without CR/LF line_s = line; line_s = trim(line_s, "\x0A\x0D"); pos = 0; if (ReadUTF8(line_s, lchar, pos)) { // create analogs vector LC_AnalogVector av; while (ReadUTF8(line_s, lanalog, pos)) { av.push_back(lanalog); } // store vector in hash map AnalogMap[lchar] = av; } } fclose(ma_file); return true; }
void Display_AddText(int Length, const char *UTF8Text) { _SysDebug("%i '%.*s'", Length, Length, UTF8Text); // Copy as many characters (not bytes, have to trim off the last char) as we can to the current line // - then roll over to the next line while( Length > 0 ) { int space = giDisplayCols - giCurrentCol; int bytes = 0; while( space && bytes < Length ) { uint32_t cp; bytes += ReadUTF8(UTF8Text+bytes, &cp); if( Unicode_IsPrinting(cp) ) { space --; giCurrentCol ++; } } Display_int_PushString(bytes, UTF8Text); UTF8Text += bytes; Length -= bytes; if( Length != 0 ) { // Next line giCurrentLinePos = 0; giCurrentCol = 0; giCurrentLine ++; } } }
// Return true if message contains bad lexics LexicsCheckResult LexicsCutter::CheckLexics(const std::string& msg) { LC_WordMap::iterator i; std::pair<LC_WordMap::iterator, LC_WordMap::iterator> ii; if (msg.size() == 0) return LCR_GOOD; // Remove links - invalid link is a subject to punish LinkExtractor extractor(msg); if (!extractor.IsValidMessage()) return LCR_INVALID_LINK; // Remove links from message std::string s = extractor.RemoveLinks(); // First, convert the string, adding spaces and removing invalid characters. // Also create fast position vector for the new positions std::string ch; std::string str(" "); unsigned int pos = 0; while (ReadUTF8(s, ch, pos)) if (_invalidChars.find(ch) == std::string::npos) str.append(ch); // String prepared, now parse it and scan for all the words unsigned int prevPos = 0; pos = 0; while (ReadUTF8(str, ch, pos)) { // Got character, now try to find wordmap for it ii = _wordMap.equal_range(ch); // Iterate over all found words for (i = ii.first; i != ii.second; i++) // Compare word at initial position if (_CompareWord(str, prevPos, _wordList[i->second])) return LCR_BAD; // Set initial position to the current position prevPos = pos; } return LCR_GOOD; }
bool LexicsCutter::Compare_Word(std::string& str, unsigned int pos, LC_WordVector word) { std::string lchar_prev; std::string lchar; // read first letter of the word into lchar_prev ReadUTF8(str, lchar, pos); // okay, here we go, comparing word // first letter is already okay, we do begin from second and go on LC_WordVector::iterator i = word.begin(); i++; while (i != word.end()) { // get letter from word, return false if the string is shorter if (!ReadUTF8(str, lchar, pos)) return(false); // check, if the letter is in the set LC_LetterSet ls = *i; if (ls.count(lchar) == 0) { // letter is not in set, but we must check, if it is not space or repeat if ( (!(IgnoreMiddleSpaces && (lchar == " "))) && (!(IgnoreLetterRepeat && (lchar == lchar_prev))) ) { // no checks viable return(false); } } else { // next word letter i++; } // set previous string letter to compare if needed (this check can really conserve time) if (IgnoreLetterRepeat) lchar_prev = lchar; } return(true); }
bool LexicsCutter::_CompareWord(const std::string& str, unsigned int pos, LC_WordVector word) const { std::string chPrev; std::string ch; // Read first letter of the word into lchar_prev ReadUTF8(str, ch, pos); // Compare word // First letter is already okay, we do begin from second and go on LC_WordVector::iterator i = word.begin(); ++i; while (i != word.end()) { // Get letter from word, return false if the string is shorter if (!ReadUTF8(str, ch, pos)) return false; // Check, if the letter is in the set LC_LetterSet ls = *i; if (ls.count(ch) == 0) { // Letter is not in set, but we must check, if it is not space or repeat if ((!(_ignoreMiddleSpaces && (ch == " "))) && (!(_ignoreLetterRepeat && (ch == chPrev)))) // No checks viable return false; } else // Next word letter ++i; // Set previous string letter to compare if needed (this check can really conserve time) if (_ignoreLetterRepeat) chPrev = ch; } return true; }
bool LexicsCutter::_ReadInnormativeWords(const std::string& fileName) { char szLine[1024]; FILE* file = fopen(fileName.c_str(), "rb"); if (!file) return false; while (!feof(file)) { szLine[0] = 0x0; fgets(szLine, 1020, file); std::string line; if (!_ProcessLine(szLine, line)) continue; // Create word vector of vectors LC_WordVector vw; std::string ch; unsigned int pos = 0; while (ReadUTF8(line, ch, pos)) { LC_LetterSet vl; // Initialize letter set with letter read vl.insert(ch); // Find letter analogs and push them onto the vector LC_AnalogMap::iterator itr = _analogMap.find(ch); if (itr != _analogMap.end()) // Analogs present, iterate for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); ++itr2) vl.insert(*itr2); // Add letter vector to word vector vw.push_back(vl); } // Push new word to words list _wordList.push_back(vw); } fclose(file); return true; }
void Display_Newline(int bCarriageReturn) { // Display_Flush(); // Going down! giCurrentLine ++; if( giCurrentLine == giDisplayLines ) giCurrentLine = 0; if( giCurrentLine == giFirstLine ) { giFirstLine ++; if(giFirstLine == giDisplayLines) giFirstLine = 0; } if( bCarriageReturn ) { giCurrentLinePos = 0; giCurrentCol = 0; } else { giCurrentLinePos = 0; int i = giCurrentCol; if( !gasDisplayLines[giCurrentLine] ) { giCurrentCol = 0; while(i--) Display_AddText(1, " "); } else { while( i -- ) { uint32_t cp; giCurrentLinePos += ReadUTF8(gasDisplayLines[giCurrentLine]+giCurrentLinePos, &cp); if( !Unicode_IsPrinting(cp) ) i ++; } } } }
bool LexicsCutter::Read_Innormative_Words(std::string& FileName) { FILE *ma_file; char line[1024]; unsigned int pos; std::string line_s; std::string lchar; ma_file = fopen(FileName.c_str(), "rb"); if (!ma_file) { sLog.outError("Chat lexics cutter disabled. Reason: LexicsCutterWordsFile file does not exist in the server directory."); return false; } while (!feof(ma_file)) { line[0] = 0x0; fgets(line, 1020, ma_file); // check for UTF8 prefix and comment if (strlen(line) >= 3) { if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF') { strncpy(&line[0], &line[3], strlen(line) - 3); } } if (strlen(line) >= 2) { if (line[0] == '/' && line[1] == '/') continue; } // check for empty string line_s = line; line_s = trim(line_s, "\x0A\x0D\x20"); if (line_s == "") continue; // process line without CR/LF line_s = line; line_s = trim(line_s, "\x0A\x0D"); // create word vector of vectors LC_WordVector vw; pos = 0; while (ReadUTF8(line_s, lchar, pos)) { // create letter set LC_LetterSet vl; // initialize letter set with letter read vl.insert(lchar); // find letter analogs and push them onto the vector LC_AnalogMap::iterator itr = AnalogMap.find(lchar); if (itr != AnalogMap.end()) { // analogs present, iterate for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++) { vl.insert(*itr2); } } // add letter vector to word vector vw.push_back(vl); } // push new word to words list WordList.push_back(vw); } fclose(ma_file); return true; }
void TtfFont::PlotString(const std::string &str, SBezierList *sbl, Vector origin, Vector u, Vector v) { ssassert(fontFace != NULL, "Expected font face to be loaded"); FT_Pos dx = 0; for(char32_t chr : ReadUTF8(str)) { uint32_t gid = FT_Get_Char_Index(fontFace, chr); if (gid == 0) { dbp("freetype: CID-to-GID mapping for CID 0x%04x failed: %s; using CID as GID", chr, ft_error_string(gid)); } FT_F26Dot6 scale = fontFace->units_per_EM; if(int fterr = FT_Set_Char_Size(fontFace, scale, scale, 72, 72)) { dbp("freetype: cannot set character size: %s", ft_error_string(fterr)); return; } /* * Stupid hacks: * - if we want fake-bold, use FT_Outline_Embolden(). This actually looks * quite good. * - if we want fake-italic, apply a shear transform [1 s s 1 0 0] here using * FT_Set_Transform. This looks decent at small font sizes and bad at larger * ones, antialiasing mitigates this considerably though. */ if(int fterr = FT_Load_Glyph(fontFace, gid, FT_LOAD_NO_BITMAP | FT_LOAD_NO_HINTING)) { dbp("freetype: cannot load glyph (gid %d): %s", gid, ft_error_string(fterr)); return; } /* A point that has x = xMin should be plotted at (dx0 + lsb); fix up * our x-position so that the curve-generating code will put stuff * at the right place. * * There's no point in getting the glyph BBox here - not only can it be * needlessly slow sometimes, but because we're about to render a single glyph, * what we want actually *is* the CBox. * * This is notwithstanding that this makes extremely little sense, this * looks like a workaround for either mishandling the start glyph on a line, * or as a really hacky pseudo-track-kerning (in which case it works better than * one would expect! especially since most fonts don't set track kerning). */ FT_BBox cbox; FT_Outline_Get_CBox(&fontFace->glyph->outline, &cbox); FT_Pos bx = dx - cbox.xMin; // Yes, this is what FreeType calls left-side bearing. // Then interchangeably uses that with "left-side bearing". Sigh. bx += fontFace->glyph->metrics.horiBearingX; OutlineData data = {}; data.origin = origin; data.u = u; data.v = v; data.beziers = sbl; data.factor = 1.0f/(float)scale; data.bx = bx; if(int fterr = FT_Outline_Decompose(&fontFace->glyph->outline, &outline_funcs, &data)) { dbp("freetype: bezier decomposition failed (gid %d): %s", gid, ft_error_string(fterr)); } // And we're done, so advance our position by the requested advance // width, plus the user-requested extra advance. dx += fontFace->glyph->advance.x; } }
bool LexicsCutter::ReadInnormativeWords(std::string& fileName) { char line[1024]; unsigned int pos; std::string line_s; std::string lchar; FILE* file = fopen(fileName.c_str(), "rb"); if (!file) return false; while (!feof(file)) { line[0] = 0x0; fgets(line, 1020, file); // check for UTF8 prefix and comment if (strlen(line) >= 3) if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF') strncpy(&line[0], &line[3], strlen(line) - 3); if (strlen(line) >= 2) if (line[0] == '/' && line[1] == '/') continue; // check for empty string line_s = line; line_s = trim(line_s, "\x0A\x0D\x20"); if (line_s == "") continue; // process line without CR/LF line_s = line; line_s = trim(line_s, "\x0A\x0D"); // create word vector of vectors LC_WordVector vw; pos = 0; while (ReadUTF8(line_s, lchar, pos)) { // create letter set LC_LetterSet vl; // initialize letter set with letter read vl.insert(lchar); // find letter analogs and push them onto the vector LC_AnalogMap::iterator itr = m_AnalogMap.find(lchar); if (itr != m_AnalogMap.end()) // analogs present, iterate for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++) vl.insert(*itr2); // add letter vector to word vector vw.push_back(vl); } // push new word to words list m_WordList.push_back(vw); } fclose(file); return true; }