Exemplo n.º 1
0
bool LexicsCutter::CheckLexics(std::string& Phrase)
{
    std::string lchar;
    LC_WordMap::iterator i;
    std::pair<LC_WordMap::iterator, LC_WordMap::iterator> ii;

    if (Phrase.size() == 0)
        return false;

    // first, convert the string, adding spaces and removing invalid characters
    // also create fast position vector for the new positions
    std::string str = " ";
    unsigned int pos = 0;
    while (ReadUTF8(Phrase, lchar, pos))
        if (m_sInvalidChars.find(lchar) == std::string::npos)
            str.append(lchar);

    // string prepared, now parse it and scan for all the words
    unsigned int pos_prev = 0;
    pos = 0;
    while (ReadUTF8(str, lchar, pos))
    {
        // got character, now try to find wordmap for it
        ii = m_WordMap.equal_range(lchar);
        // iterate over all found words
        for (i = ii.first; i != ii.second; i++)
            // compare word at initial position
            if (CompareWord(str, pos_prev, m_WordList[i->second]))
                return true;
       // set initial position to the current position
       pos_prev = pos;
    }
    return false;
}
Exemplo n.º 2
0
bool LexicsCutter::_ReadLetterAnalogs(const std::string& fileName)
{
    char szLine[1024];

    FILE* file = fopen(fileName.c_str(), "rb");
    if (!file)
        return false;

    while (!feof(file))
    {
        szLine[0] = 0x0;
        fgets(szLine, 1020, file);

        std::string line;
        if (!_ProcessLine(szLine, line))
            continue;

        std::string ch;
        unsigned int pos = 0;
        if (ReadUTF8(line, ch, pos))
        {
            // Create analogs vector
            std::string analog;
            LC_AnalogVector av;
            while (ReadUTF8(line, analog, pos))
                av.push_back(analog);

            // Store vector in hash map
            _analogMap[ch] = av;
        }
    }
    fclose(file);
    return true;
}
Exemplo n.º 3
0
bool LexicsCutter::Read_Letter_Analogs(std::string& FileName)
{
    FILE *ma_file;
    char line[1024];
    unsigned int pos;
    std::string line_s;
    std::string lchar;
    std::string lanalog;

    ma_file = fopen(FileName.c_str(), "rb");

    while (!feof(ma_file))
    {
        line[0] = 0x0;
        fgets(line, 1020, ma_file);

        // check for UTF8 prefix and comments
        if (strlen(line) >= 3)
        {
            if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF')
            {
                strncpy(&line[0], &line[3], strlen(line) - 3);
            }
        }

        if (strlen(line) >= 2)
        {
            if (line[0] == '/' && line[1] == '/') continue;
        }

        // check for empty string
        line_s = line;
        line_s = trim(line_s, "\x0A\x0D\x20");
        if (line_s == "") continue;

        // process line without CR/LF
        line_s = line;
        line_s = trim(line_s, "\x0A\x0D");

        pos = 0;
        if (ReadUTF8(line_s, lchar, pos))
        {
            // create analogs vector
            LC_AnalogVector av;
            while (ReadUTF8(line_s, lanalog, pos))
            {
                av.push_back(lanalog);
            }

            // store vector in hash map
            AnalogMap[lchar] = av;
        }
    }

    fclose(ma_file);

    return true;
}
Exemplo n.º 4
0
void Display_AddText(int Length, const char *UTF8Text)
{
	_SysDebug("%i '%.*s'", Length, Length, UTF8Text);
	// Copy as many characters (not bytes, have to trim off the last char) as we can to the current line
	// - then roll over to the next line
	while( Length > 0 )
	{
		 int	space = giDisplayCols - giCurrentCol;
		 int	bytes = 0;
		while( space && bytes < Length )
		{
			uint32_t	cp;
			bytes += ReadUTF8(UTF8Text+bytes, &cp);
			if( Unicode_IsPrinting(cp) ) {
				space --;
				giCurrentCol ++;
			}
		}
	
		Display_int_PushString(bytes, UTF8Text);

		UTF8Text += bytes;
		Length -= bytes;
		if( Length != 0 )
		{
			// Next line
			giCurrentLinePos = 0;
			giCurrentCol = 0;
			giCurrentLine ++;
		}
	}
}
Exemplo n.º 5
0
// Return true if message contains bad lexics
LexicsCheckResult LexicsCutter::CheckLexics(const std::string& msg)
{
    LC_WordMap::iterator i;
    std::pair<LC_WordMap::iterator, LC_WordMap::iterator> ii;

    if (msg.size() == 0)
        return LCR_GOOD;

    // Remove links - invalid link is a subject to punish
    LinkExtractor extractor(msg);
    if (!extractor.IsValidMessage())
        return LCR_INVALID_LINK;
    // Remove links from message
    std::string s = extractor.RemoveLinks();
    // First, convert the string, adding spaces and removing invalid characters.
    // Also create fast position vector for the new positions
    std::string ch;
    std::string str(" ");
    unsigned int pos = 0;
    while (ReadUTF8(s, ch, pos))
        if (_invalidChars.find(ch) == std::string::npos)
            str.append(ch);

    // String prepared, now parse it and scan for all the words
    unsigned int prevPos = 0;
    pos = 0;
    while (ReadUTF8(str, ch, pos))
    {
        // Got character, now try to find wordmap for it
        ii = _wordMap.equal_range(ch);
        // Iterate over all found words
        for (i = ii.first; i != ii.second; i++)
            // Compare word at initial position
            if (_CompareWord(str, prevPos, _wordList[i->second]))
                return LCR_BAD;
        // Set initial position to the current position
        prevPos = pos;
    }
    return LCR_GOOD;
}
Exemplo n.º 6
0
bool LexicsCutter::Compare_Word(std::string& str, unsigned int pos, LC_WordVector word)
{
   std::string lchar_prev;
    std::string lchar;

   // read first letter of the word into lchar_prev
   ReadUTF8(str, lchar, pos);

    // okay, here we go, comparing word
    // first letter is already okay, we do begin from second and go on
    LC_WordVector::iterator i = word.begin();
    i++;
    while (i != word.end())
    {
        // get letter from word, return false if the string is shorter
        if (!ReadUTF8(str, lchar, pos)) return(false);
        // check, if the letter is in the set
        LC_LetterSet ls = *i;
        if (ls.count(lchar) == 0)
       {
           // letter is not in set, but we must check, if it is not space or repeat
           if ( (!(IgnoreMiddleSpaces && (lchar == " "))) &&
               (!(IgnoreLetterRepeat && (lchar == lchar_prev))) )
           {
               // no checks viable
               return(false);
           }
       }
       else
       {
           // next word letter
           i++;
       }
       // set previous string letter to compare if needed (this check can really conserve time)
       if (IgnoreLetterRepeat) lchar_prev = lchar;
   }

    return(true);
}
Exemplo n.º 7
0
bool LexicsCutter::_CompareWord(const std::string& str, unsigned int pos, LC_WordVector word) const
{
    std::string chPrev;
    std::string ch;

    // Read first letter of the word into lchar_prev
    ReadUTF8(str, ch, pos);

    // Compare word
    // First letter is already okay, we do begin from second and go on
    LC_WordVector::iterator i = word.begin();
    ++i;
    while (i != word.end())
    {
        // Get letter from word, return false if the string is shorter
        if (!ReadUTF8(str, ch, pos))
            return false;
        // Check, if the letter is in the set
        LC_LetterSet ls = *i;
        if (ls.count(ch) == 0)
        {
            // Letter is not in set, but we must check, if it is not space or repeat
            if ((!(_ignoreMiddleSpaces && (ch == " "))) &&
                (!(_ignoreLetterRepeat && (ch == chPrev))))
                // No checks viable
                return false;
        }
        else
            // Next word letter
            ++i;

        // Set previous string letter to compare if needed (this check can really conserve time)
        if (_ignoreLetterRepeat)
            chPrev = ch;
    }
    return true;
}
Exemplo n.º 8
0
bool LexicsCutter::_ReadInnormativeWords(const std::string& fileName)
{
    char szLine[1024];

    FILE* file = fopen(fileName.c_str(), "rb");
    if (!file)
        return false;

    while (!feof(file))
    {
        szLine[0] = 0x0;
        fgets(szLine, 1020, file);

        std::string line;
        if (!_ProcessLine(szLine, line))
            continue;

        // Create word vector of vectors
        LC_WordVector vw;
        std::string ch;
        unsigned int pos = 0;
        while (ReadUTF8(line, ch, pos))
        {
            LC_LetterSet vl;

            // Initialize letter set with letter read
            vl.insert(ch);

            // Find letter analogs and push them onto the vector
            LC_AnalogMap::iterator itr = _analogMap.find(ch);
            if (itr != _analogMap.end())
                // Analogs present, iterate
                for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); ++itr2)
                    vl.insert(*itr2);

            // Add letter vector to word vector
            vw.push_back(vl);
        }

        // Push new word to words list
        _wordList.push_back(vw);
    }
    fclose(file);
    return true;
}
Exemplo n.º 9
0
void Display_Newline(int bCarriageReturn)
{
//	Display_Flush();

	// Going down!
	giCurrentLine ++;
	if( giCurrentLine == giDisplayLines )
		giCurrentLine = 0;
	if( giCurrentLine == giFirstLine )
	{
		giFirstLine ++;
		if(giFirstLine == giDisplayLines)
			giFirstLine = 0;
	}
	
	if( bCarriageReturn ) {
		giCurrentLinePos = 0;
		giCurrentCol = 0;
	}
	else {
		giCurrentLinePos = 0;
		 int	i = giCurrentCol;
		if( !gasDisplayLines[giCurrentLine] )
		{
			giCurrentCol = 0;
			while(i--)
				Display_AddText(1, " ");
		}
		else
		{
			while( i -- )
			{
				uint32_t	cp;
				giCurrentLinePos += ReadUTF8(gasDisplayLines[giCurrentLine]+giCurrentLinePos, &cp);
				if( !Unicode_IsPrinting(cp) )
					i ++;
			}
		}
	}
}
Exemplo n.º 10
0
bool LexicsCutter::Read_Innormative_Words(std::string& FileName)
{
    FILE *ma_file;
    char line[1024];
    unsigned int pos;
    std::string line_s;
    std::string lchar;

    ma_file = fopen(FileName.c_str(), "rb");

    if (!ma_file)
    {
        sLog.outError("Chat lexics cutter disabled. Reason: LexicsCutterWordsFile file does not exist in the server directory.");
        return false;
    }

    while (!feof(ma_file))
    {
        line[0] = 0x0;
        fgets(line, 1020, ma_file);

        // check for UTF8 prefix and comment
        if (strlen(line) >= 3)
        {
            if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF')
            {
                strncpy(&line[0], &line[3], strlen(line) - 3);
            }
        }

        if (strlen(line) >= 2)
        {
            if (line[0] == '/' && line[1] == '/') continue;
        }

        // check for empty string
        line_s = line;
        line_s = trim(line_s, "\x0A\x0D\x20");
        if (line_s == "") continue;

        // process line without CR/LF
        line_s = line;
        line_s = trim(line_s, "\x0A\x0D");

        // create word vector of vectors
        LC_WordVector vw;
        pos = 0;
        while (ReadUTF8(line_s, lchar, pos))
        {
            // create letter set
            LC_LetterSet vl;

            // initialize letter set with letter read
            vl.insert(lchar);

            // find letter analogs and push them onto the vector
            LC_AnalogMap::iterator itr = AnalogMap.find(lchar);
            if (itr != AnalogMap.end())
            {
                // analogs present, iterate
                for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++)
                {
                    vl.insert(*itr2);
                }
            }

            // add letter vector to word vector
            vw.push_back(vl);
        }

        // push new word to words list
        WordList.push_back(vw);
    }

    fclose(ma_file);

    return true;
}
Exemplo n.º 11
0
void TtfFont::PlotString(const std::string &str,
                         SBezierList *sbl, Vector origin, Vector u, Vector v)
{
    ssassert(fontFace != NULL, "Expected font face to be loaded");

    FT_Pos dx = 0;
    for(char32_t chr : ReadUTF8(str)) {
        uint32_t gid = FT_Get_Char_Index(fontFace, chr);
        if (gid == 0) {
            dbp("freetype: CID-to-GID mapping for CID 0x%04x failed: %s; using CID as GID",
                chr, ft_error_string(gid));
        }

        FT_F26Dot6 scale = fontFace->units_per_EM;
        if(int fterr = FT_Set_Char_Size(fontFace, scale, scale, 72, 72)) {
            dbp("freetype: cannot set character size: %s",
                ft_error_string(fterr));
            return;
        }

        /*
         * Stupid hacks:
         *  - if we want fake-bold, use FT_Outline_Embolden(). This actually looks
         *    quite good.
         *  - if we want fake-italic, apply a shear transform [1 s s 1 0 0] here using
         *    FT_Set_Transform. This looks decent at small font sizes and bad at larger
         *    ones, antialiasing mitigates this considerably though.
         */
        if(int fterr = FT_Load_Glyph(fontFace, gid, FT_LOAD_NO_BITMAP | FT_LOAD_NO_HINTING)) {
            dbp("freetype: cannot load glyph (gid %d): %s",
                gid, ft_error_string(fterr));
            return;
        }

        /* A point that has x = xMin should be plotted at (dx0 + lsb); fix up
         * our x-position so that the curve-generating code will put stuff
         * at the right place.
         *
         * There's no point in getting the glyph BBox here - not only can it be
         * needlessly slow sometimes, but because we're about to render a single glyph,
         * what we want actually *is* the CBox.
         *
         * This is notwithstanding that this makes extremely little sense, this
         * looks like a workaround for either mishandling the start glyph on a line,
         * or as a really hacky pseudo-track-kerning (in which case it works better than
         * one would expect! especially since most fonts don't set track kerning).
         */
        FT_BBox cbox;
        FT_Outline_Get_CBox(&fontFace->glyph->outline, &cbox);
        FT_Pos bx = dx - cbox.xMin;
        // Yes, this is what FreeType calls left-side bearing.
        // Then interchangeably uses that with "left-side bearing". Sigh.
        bx += fontFace->glyph->metrics.horiBearingX;

        OutlineData data = {};
        data.origin  = origin;
        data.u       = u;
        data.v       = v;
        data.beziers = sbl;
        data.factor  = 1.0f/(float)scale;
        data.bx      = bx;
        if(int fterr = FT_Outline_Decompose(&fontFace->glyph->outline, &outline_funcs, &data)) {
            dbp("freetype: bezier decomposition failed (gid %d): %s",
                gid, ft_error_string(fterr));
        }

        // And we're done, so advance our position by the requested advance
        // width, plus the user-requested extra advance.
        dx += fontFace->glyph->advance.x;
    }
}
Exemplo n.º 12
0
bool LexicsCutter::ReadInnormativeWords(std::string& fileName)
{
    char line[1024];
    unsigned int pos;
    std::string line_s;
    std::string lchar;

    FILE* file = fopen(fileName.c_str(), "rb");
    if (!file)
        return false;

    while (!feof(file))
    {
        line[0] = 0x0;
        fgets(line, 1020, file);

        // check for UTF8 prefix and comment
        if (strlen(line) >= 3)
            if (line[0] == '\xEF' && line[1] == '\xBB' && line[2] == '\xBF')
                strncpy(&line[0], &line[3], strlen(line) - 3);

        if (strlen(line) >= 2)
            if (line[0] == '/' && line[1] == '/')
                continue;

        // check for empty string
        line_s = line; 
        line_s = trim(line_s, "\x0A\x0D\x20");
        if (line_s == "")
            continue;

        // process line without CR/LF
        line_s = line; 
        line_s = trim(line_s, "\x0A\x0D");
    
        // create word vector of vectors
        LC_WordVector vw;
        pos = 0;
        while (ReadUTF8(line_s, lchar, pos))
        {
            // create letter set
            LC_LetterSet vl;

            // initialize letter set with letter read
            vl.insert(lchar);

            // find letter analogs and push them onto the vector
            LC_AnalogMap::iterator itr = m_AnalogMap.find(lchar);
            if (itr != m_AnalogMap.end())
                // analogs present, iterate
                for (LC_AnalogVector::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++)
                    vl.insert(*itr2);

            // add letter vector to word vector
            vw.push_back(vl);
        }

        // push new word to words list
        m_WordList.push_back(vw);
    }
    fclose(file);
    return true;
}