Beispiel #1
0
/*
 * \param word
 * \param pfxopts Options to apply to prefixes
 */
int
ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
{
    ichar_t		newword[INPUTWORDLEN + MAXAFFIXLEN];
    register ichar_t *	p;
    register ichar_t	savech;
    long		secondcap;	/* Capitalization of 2nd half */

    /*
    ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
    */
    if (m_hashheader.compoundflag == COMPOUND_NEVER)
		return 0;
    /*
    ** Test for a possible compound word (for languages like German that
    ** form lots of compounds).
    **
    ** This is similar to missingspace, except we quit on the first hit,
    ** and we won't allow either member of the compound to be a single
    ** letter.
    **
    ** We don't do words of length less than 2 * compoundmin, since
    ** both halves must at least compoundmin letters.
    */
    if (icharlen (word) < 2 * m_hashheader.compoundmin)
		return 0;
    icharcpy (newword, word);
    p = newword + m_hashheader.compoundmin;
    for (  ;  p[m_hashheader.compoundmin - 1] != 0;  p++)
	{
		savech = *p;
		*p = 0;
		if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
	    {
			*p = savech;
			if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
			  ||  compoundgood (p, FF_COMPOUNDONLY))
			{
				secondcap = whatcap (p);
				switch (whatcap (newword))
				{
				case ANYCASE:
				case CAPITALIZED:
				case FOLLOWCASE:	/* Followcase can have l.c. suffix */
					return secondcap == ANYCASE;
				case ALLCAPS:
					return secondcap == ALLCAPS;
				}
			}
	    }
		else
			*p = savech;
	}
    return 0;
}
Beispiel #2
0
bool
ISpellChecker::checkWord(const char * const utf8Word, size_t length)
{	
	ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
	char szWord[INPUTWORDLEN + MAXAFFIXLEN];
	
	if (!m_bSuccessfulInit)
		return false;
	
	if (!utf8Word || length >= (INPUTWORDLEN + MAXAFFIXLEN) || length == 0)
		return false;
	
	bool retVal = false;

	if (!g_iconv_is_valid(m_translate_in))
		return false;	
	else
		{
			/* convert to 8bit string and null terminate */
			size_t len_in, len_out, result;
			// the 8bit encodings use precomposed forms
			char *normalizedWord = g_utf8_normalize (utf8Word, length, G_NORMALIZE_NFC);
			char *In = normalizedWord;
			char *Out = szWord;
			
			len_in = strlen(In);
			len_out = sizeof( szWord ) - 1;
			result = g_iconv(m_translate_in, &In, &len_in, &Out, &len_out);
			g_free(normalizedWord);
			if ((size_t)-1 == result)
				return false;
			*Out = '\0';
		}
	
	if (!strtoichar(iWord, szWord, sizeof(iWord), 0))
		{
			if (good(iWord, 0, 0, 1, 0) == 1 ||
			    compoundgood(iWord, 1) == 1)
				{
					retVal = true;
				}
		}
	
	return retVal;
}