Esempio n. 1
0
/*!
** Classify the capitalization of a sample entry.  Returns one of the
** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE.
**
** \param word
**
** \return
*/
long
ISpellChecker::whatcap (ichar_t *word)
{
    register ichar_t *	p;

    for (p = word;  *p;  p++)
	{
		if (mylower (*p))
			break;
	}
    if (*p == '\0')
		return ALLCAPS;
    else
	{
		for (  ;  *p;  p++)
	    {
			if (myupper (*p))
				break;
	    }
		if (*p == '\0')
	    {
			/*
			** No uppercase letters follow the lowercase ones.
			** If there is more than one uppercase letter, it's
			** "followcase". If only the first one is capitalized,
			** it's "capitalize".  If there are no capitals
			** at all, it's ANYCASE.
			*/
			if (myupper (word[0]))
			{
				for (p = word + 1;  *p != '\0';  p++)
				{
					if (myupper (*p))
						return FOLLOWCASE;
				}
				return CAPITALIZED;
			}
			else
				return ANYCASE;
	    }
		else
			return FOLLOWCASE;	/* .../lower/upper */
	}
}
Esempio n. 2
0
/*
 * \param a
 * \param b
 * \param canonical NZ for canonical string chars
 *
 * \return
 */
int
ISpellChecker::casecmp (char *a, char *b, int canonical)
{
    register ichar_t *	ap;
    register ichar_t *	bp;
    ichar_t		inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
    ichar_t		intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];

    strtoichar (inta, a, sizeof inta, canonical);
    strtoichar (intb, b, sizeof intb, canonical);
    for (ap = inta, bp = intb;  *ap != 0;  ap++, bp++)
	{
		if (*ap != *bp)
	    {
			if (*bp == '\0')
				return m_hashheader.sortorder[*ap];
			else if (mylower (*ap))
			{
				if (mylower (*bp)  ||  mytoupper (*ap) != *bp)
					return static_cast<int>(m_hashheader.sortorder[*ap])
					  - static_cast<int>(m_hashheader.sortorder[*bp]);
			}
			else
			{
				if (myupper (*bp)  ||  mytolower (*ap) != *bp)
					return static_cast<int>(m_hashheader.sortorder[*ap])
					  - static_cast<int>(m_hashheader.sortorder[*bp]);
			}
	    }
	}
    if (*bp != '\0')
		return -static_cast<int>(m_hashheader.sortorder[*bp]);
    for (ap = inta, bp = intb;  *ap;  ap++, bp++)
	{
		if (*ap != *bp)
	    {
			return static_cast<int>(m_hashheader.sortorder[*ap])
			  - static_cast<int>(m_hashheader.sortorder[*bp]);
	    }
	}
    return 0;
}
Esempio n. 3
0
/*!
 * Print a suffix expansion
 *
 * \param croot Char version of rootword
 * \param rootword Root word to expand
 * \param flent Current table entry
 * \param option Option, see expandmode
 * \param extra Extra info to add to line
 *
 * \return
 */
int ISpellChecker::pr_suf_expansion (char *croot, ichar_t *rootword, 
							struct flagent *flent, int option, char *extra)
{
    int				cond;		/* Current condition number */
    register ichar_t *		nextc;		/* Next case choice */
    int				tlen;		/* Length of tword */
    ichar_t			tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */

    tlen = icharlen (rootword);
    cond = flent->numconds;
    if (cond > tlen)
		return 0;
    if (tlen - flent->stripl <= 0)
		return 0;
    for (nextc = rootword + tlen;  --cond >= 0;  )
	{
		if ((flent->conds[mytoupper (*--nextc)] & (1 << cond)) == 0)
			return 0;
	}
    /*
     * The conditions are satisfied.  Copy the word, add the suffix,
     * and make it match the case of the last remaining character of the
     * root.  Again, this code carefully matches ins_cap and cap_ok.
     */
    icharcpy (tword, rootword);
    nextc = tword + tlen - flent->stripl;
    if (flent->affl)
	{
		icharcpy (nextc, flent->affix);
		if (!myupper (nextc[-1]))
			forcelc (nextc, flent->affl);
	}
    else
		*nextc = 0;
    if (option == 3)
		printf ("\n%s", croot);
    if (option != 4)
		printf (" %s%s", ichartosstr (tword, 1), extra);
    return tlen + flent->affl - flent->stripl;
}
Esempio n. 4
0
/*!
 * \param word Word to be saved
 * \param pattern Capitalization pattern
 * \param prestrip No. chars stripped from front
 * \param preadd No. chars added to front of root
 * \param sufstrip No. chars stripped from back
 * \param sufadd No. chars added to back of root
 * \param firstdent First dent for root
 * \param pfxent Pfx-flag entry for word
 * \param sufent Sfx-flag entry for word
 * \param savearea Room to save words
 * \param nsaved Number saved so far (updated)
 */
void
ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern, 
						  int prestrip, int preadd, int sufstrip, int sufadd,
						  struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent, 
						  ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], 
					      int * nsaved)
{
#ifndef NO_CAPITALIZATION_SUPPORT
    register struct dent * dent;
#endif /* NO_CAPITALIZATION_SUPPORT */
    int			firstisupper;
    ichar_t		newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
#ifndef NO_CAPITALIZATION_SUPPORT
    register ichar_t *	p;
    int			len;
    int			i;
    int			limit;
#endif /* NO_CAPITALIZATION_SUPPORT */

    if (*nsaved >= MAX_CAPS)
		return;
    icharcpy (newword, word);
    firstisupper = myupper (pattern[0]);
#ifdef NO_CAPITALIZATION_SUPPORT
    /*
    ** Apply the old, simple-minded capitalization rules.
    */
    if (firstisupper)
	{
		if (myupper (pattern[1]))
			upcase (newword);
		else
	    {
			lowcase (newword);
			newword[0] = mytoupper (newword[0]);
	    }
	}
    else
		lowcase (newword);
    icharcpy (savearea[*nsaved], newword);
    (*nsaved)++;
    return;
#else /* NO_CAPITALIZATION_SUPPORT */
#define flagsareok(dent)    \
    ((pfxent == NULL \
	||  TSTMASKBIT (dent->mask, pfxent->flagbit)) \
      &&  (sufent == NULL \
	||  TSTMASKBIT (dent->mask, sufent->flagbit)))

    dent = firstdent;
    if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
	{
		upcase (newword);	/* Uppercase required */
		icharcpy (savearea[*nsaved], newword);
		(*nsaved)++;
		return;
	}
    for (p = pattern;  *p;  p++)
	{
		if (mylower (*p))
			break;
	}
    if (*p == 0)
	{
		upcase (newword);	/* Pattern was all caps */
		icharcpy (savearea[*nsaved], newword);
		(*nsaved)++;
		return;
	}
    for (p = pattern + 1;  *p;  p++)
	{
		if (myupper (*p))
			break;
	}
    if (*p == 0)
	{
		/*
		** The pattern was all-lower or capitalized.  If that's
		** legal, insert only that version.
		*/
		if (firstisupper)
		{
			if (captype (dent->flagfield) == CAPITALIZED
			  ||  captype (dent->flagfield) == ANYCASE)
			{
				lowcase (newword);
				newword[0] = mytoupper (newword[0]);
				icharcpy (savearea[*nsaved], newword);
				(*nsaved)++;
				return;
			}
		}
		else
		{
			if (captype (dent->flagfield) == ANYCASE)
			{
				lowcase (newword);
				icharcpy (savearea[*nsaved], newword);
				(*nsaved)++;
				return;
			}
		}
		while (dent->flagfield & MOREVARIANTS)
		{
			dent = dent->next;
			if (captype (dent->flagfield) == FOLLOWCASE
			  ||  !flagsareok (dent))
				continue;
			if (firstisupper)
			{
				if (captype (dent->flagfield) == CAPITALIZED)
				{
					lowcase (newword);
					newword[0] = mytoupper (newword[0]);
					icharcpy (savearea[*nsaved], newword);
					(*nsaved)++;
					return;
				}
			}
			else
			{
				if (captype (dent->flagfield) == ANYCASE)
				{
					lowcase (newword);
					icharcpy (savearea[*nsaved], newword);
					(*nsaved)++;
					return;
				}
			}
	    }
	}
    /*
    ** Either the sample had complex capitalization, or the simple
    ** capitalizations (all-lower or capitalized) are illegal.
    ** Insert all legal capitalizations, including those that are
    ** all-lower or capitalized.  If the prototype is capitalized,
    ** capitalized all-lower samples.  Watch out for affixes.
    */
    dent = firstdent;
    p = strtosichar (dent->word, 1);
    len = icharlen (p);
    if (dent->flagfield & MOREVARIANTS)
		dent = dent->next;	/* Skip place-holder entry */
    for (  ;  ;  )
	{
		if (flagsareok (dent))
	    {
			if (captype (dent->flagfield) != FOLLOWCASE)
			{
				lowcase (newword);
				if (firstisupper  ||  captype (dent->flagfield) == CAPITALIZED)
					newword[0] = mytoupper (newword[0]);
				icharcpy (savearea[*nsaved], newword);
				(*nsaved)++;
				if (*nsaved >= MAX_CAPS)
					return;
			}
			else
			{
				/* Followcase is the tough one. */
				p = strtosichar (dent->word, 1);
				memmove (
				  reinterpret_cast<char *>(newword + preadd),
				  reinterpret_cast<char *>(p + prestrip),
				  (len - prestrip - sufstrip) * sizeof (ichar_t));
				if (myupper (p[prestrip]))
				{
					for (i = 0;  i < preadd;  i++)
						newword[i] = mytoupper (newword[i]);
				}
				else
				{
					for (i = 0;  i < preadd;  i++)
						newword[i] = mytolower (newword[i]);
				}
				limit = len + preadd + sufadd - prestrip - sufstrip;
				i = len + preadd - prestrip - sufstrip;
				p += len - sufstrip - 1;
				if (myupper (*p))
				{
					for (p = newword + i;  i < limit;  i++, p++)
						*p = mytoupper (*p);
				}
				else
				{
					for (p = newword + i;  i < limit;  i++, p++)
						*p = mytolower (*p);
				}
				icharcpy (savearea[*nsaved], newword);
				(*nsaved)++;
				if (*nsaved >= MAX_CAPS)
					return;
			}
	    }
		if ((dent->flagfield & MOREVARIANTS) == 0)
			break;		/* End of the line */
		dent = dent->next;
	}
    return;
#endif /* NO_CAPITALIZATION_SUPPORT */
}
Esempio n. 5
0
/*
 * \param word
 * \param hit
 * \param len
 *
 * \return
 */
int ISpellChecker::cap_ok(ichar_t *word, struct success *hit, int len)
{
    register ichar_t *dword;
    register ichar_t *w;
    register struct dent *dent;
    ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN];
    int preadd;
    int prestrip;
    int sufadd;
    ichar_t *limit;
    long thiscap;
    long dentcap;

    thiscap = whatcap(word);
    /*
    ** All caps is always legal, regardless of affixes.
    */
    preadd = prestrip = sufadd = 0;
    if(thiscap == ALLCAPS)
        return 1;
    else if(thiscap == FOLLOWCASE)
    {
        /* Set up some constants for the while(1) loop below */
        if(hit->prefix)
        {
            preadd = hit->prefix->affl;
            prestrip = hit->prefix->stripl;
        }
        else
            preadd = prestrip = 0;
        sufadd = hit->suffix ? hit->suffix->affl : 0;
    }
    /*
    ** Search the variants for one that matches what we have.  Note
    ** that thiscap can't be ALLCAPS, since we already returned
    ** for that case.
    */
    dent = hit->dictent;
    for(;;)
    {
        dentcap = captype(dent->flagfield);
        if(dentcap != thiscap)
        {
            if(dentcap == ANYCASE && thiscap == CAPITALIZED && entryhasaffixes(dent, hit))
                return 1;
        }
        else /* captypes match */
        {
            if(thiscap != FOLLOWCASE)
            {
                if(entryhasaffixes(dent, hit))
                    return 1;
            }
            else
            {
                /*
                ** Make sure followcase matches exactly.
                ** Life is made more difficult by the
                ** possibility of affixes.  Start with
                ** the prefix.
                */
                strtoichar(dentword, dent->word, INPUTWORDLEN, 1);
                dword = dentword;
                limit = word + preadd;
                if(myupper(dword[prestrip]))
                {
                    for(w = word; w < limit; w++)
                    {
                        if(mylower(*w))
                            goto doublecontinue;
                    }
                }
                else
                {
                    for(w = word; w < limit; w++)
                    {
                        if(myupper(*w))
                            goto doublecontinue;
                    }
                }
                dword += prestrip;
                /* Do root part of word */
                limit = dword + len - preadd - sufadd;
                while(dword < limit)
                {
                    if(*dword++ != *w++)
                        goto doublecontinue;
                }
                /* Do suffix */
                dword = limit - 1;
                if(myupper(*dword))
                {
                    for(; *w; w++)
                    {
                        if(mylower(*w))
                            goto doublecontinue;
                    }
                }
                else
                {
                    for(; *w; w++)
                    {
                        if(myupper(*w))
                            goto doublecontinue;
                    }
                }
                /*
                ** All failure paths go to "doublecontinue,"
                ** so if we get here it must match.
                */
                if(entryhasaffixes(dent, hit))
                    return 1;
            doublecontinue:;
            }
        }
        if((dent->flagfield & MOREVARIANTS) == 0)
            break;
        dent = dent->next;
    }

    /* No matches found */
    return 0;
}
Esempio n. 6
0
/*!
 * Print a prefix expansion
 *
 * \param croot Char version of rootword
 * \param rootword Root word to expand
 * \param flent Current table entry
 * \param mask Mask bits to expand on
 * \param option Option, see	expandmode
 * \param extra Extra info to add to line
 *
 * \return
 */
int ISpellChecker::pr_pre_expansion ( char *croot, ichar_t *rootword, 
							struct flagent *flent, MASKTYPE mask[], int option, 
							char *extra)
{
    int				cond;		/* Current condition number */
    register ichar_t *		nextc;		/* Next case choice */
    int				tlen;		/* Length of tword */
    ichar_t			tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */

    tlen = icharlen (rootword);
    if (flent->numconds > tlen)
		return 0;
    tlen -= flent->stripl;
    if (tlen <= 0)
		return 0;
    tlen += flent->affl;
    for (cond = 0, nextc = rootword;  cond < flent->numconds;  cond++)
	{
		if ((flent->conds[mytoupper (*nextc++)] & (1 << cond)) == 0)
			return 0;
	}
    /*
     * The conditions are satisfied.  Copy the word, add the prefix,
     * and make it the proper case.   This code is carefully written
     * to match that ins_cap and cap_ok.  Note that the affix, as
     * inserted, is uppercase.
     *
     * There is a tricky bit here:  if the root is capitalized, we
     * want a capitalized result.  If the root is followcase, however,
     * we want to duplicate the case of the first remaining letter
     * of the root.  In other words, "Loved/U" should generate "Unloved",
     * but "LOved/U" should generate "UNLOved" and "lOved/U" should
     * produce "unlOved".
     */
    if (flent->affl)
	{
		icharcpy (tword, flent->affix);
		nextc = tword + flent->affl;
	}
    icharcpy (nextc, rootword + flent->stripl);
    if (myupper (rootword[0]))
	{
		/* We must distinguish followcase from capitalized and all-upper */
		for (nextc = rootword + 1;  *nextc;  nextc++)
		{
			if (!myupper (*nextc))
				break;
		}
		if (*nextc)
		{
			/* It's a followcase or capitalized word.  Figure out which. */
			for (  ;  *nextc;  nextc++)
			{
				if (myupper (*nextc))
					break;
			}
			if (*nextc)
			{
				/* It's followcase. */
				if (!myupper (tword[flent->affl]))
					forcelc (tword, flent->affl);
			}
			else
			{
				/* It's capitalized */
				forcelc (tword + 1, tlen - 1);
			}
		}
	}
    else
	{
		/* Followcase or all-lower, we don't care which */
		if (!myupper (*nextc))
			forcelc (tword, flent->affl);
	}
    if (option == 3)
		printf ("\n%s", croot);
    if (option != 4)
		printf (" %s%s", ichartosstr (tword, 1), extra);
    if (flent->flagflags & FF_CROSSPRODUCT)
		return tlen
		  + expand_suf (croot, tword, mask, FF_CROSSPRODUCT, option, extra);
    else
		return tlen;
}