示例#1
0
int WordSize(char *word)  /* in: word having its WordSize taken */
{
	register int result;   /* WordSize of the word */
	register int state;    /* current state in machine */

	result = 0;
	state = 0;

	/* Run a DFA to compute the word size */
	while ( *word != '\0' )
	{
		switch ( state )
		{
			case 0:
				state = (IsVowel(*word)) ? 1 : 2;
				break;
			case 1:
				state = (IsVowel(*word)) ? 1 : 2;
				if ( 2 == state )
					result++;
				break;
			case 2:
				state = (IsVowel(*word) || ('y' == *word)) ? 1 : 2;
				break;
		}
		word++;
	}

	return( result );

} /* WordSize */
示例#2
0
std::string en::GetVerbPreAdd(int VerbNum)
{
	std::ifstream is (DICTIONARY EN_EN_FOLDER "verb_present");
	if (GotoLine(is,VerbNum)) return "";
	int Data = is.get();
	if (Data == '1')
	{
		if (GotoSegment(is,2)) return "";
		std::string Segment = GetSegment(is);
		is.close();
		return Segment;
	}
	else if (Data == '0')
	{
		if (GotoSegment(is,11)) return "";
		std::string Segment = GetSegment(is);
		is.close();
		return Segment;
	}
	else
	{
		if (GotoSegment(is,1)) return "";
		std::string Segment = GetSegment(is);
		is.close();
		//Now follow all the english spelling rules
		//http://www.oxforddictionaries.com/words/verb-tenses-adding-ed-and-ing
		//http://www.grammar.cl/Notes/Spelling_ING.htm
		int u = Segment.length();
		if (Segment[u-1] == 'e' && Segment[u-2] == 'i')
		{
			//Turn 'ie' into y
			Segment = Segment.substr(0,u-2);
			Segment += "y";
		}
		else if (Segment[u-1] == 'e' && ( Segment[u-2] != 'e' && Segment[u-2] != 'y' && Segment[u-2] != 'o' ))
		{
			//Drop final e, if e is not before y,e or o
			//make -> mak ( + {ed,ing} )
			Segment = Segment.substr(0,u-1);
		}
		else if (Segment[u-1] == 'c')
		{
			//Add a k after a c
			//picnic -> picnick ( + {ed,ing} )
			Segment += "k";
		}
		else if (!IsVowel(Segment[u-3]) && IsVowel(Segment[u-2]) && !IsVowel(Segment[u-1]) && Segment[u-1]!='x' && Segment[u-1]!='y' && Segment[u-1]!='z' && Segment[u-1]!='w')
		{
			//Double consonant after consonant-vowel-consonant cluster.
			//Provided that final consonant is not w,x,y or z
			//Travel -> Travell ( + {ed,ing} )
			Segment += Segment[u-1];
		}
		return Segment;
	}
	return "";
}
bool NoThreeCons(char* cString)
{
	if(strlen(cString)<3)
		return true;
	for(int i=0;cString[i+2]!='\0';i++)
		if(IsVowel(cString[i])==IsVowel(cString[i+1]) && IsVowel(cString[i])==IsVowel(cString[i+2]))
			return false;
	return true;
}
示例#4
0
文件: word.cpp 项目: Vuwij/uSpeech
int getSyllables(String ing){
	int i = 0,s;
	while(i<ing.length()){
		if((!IsVowel(ing[i]))&&IsVowel(ing[i-1])){
			s++;
		}
		i++;
	}
	return s;
}
 string reverseVowels(string s)  {
     int n=s.size();
     int i=0,j=n-1;
     while (i<j) {
         while (i<j&&!IsVowel(s[i])) i++;
         while (i<j&&!IsVowel(s[j])) j--;
         swap(s[i],s[j]);
         i++;j--;
     }
     return s;
 }
示例#6
0
bool InflectRule::IsApplicable(const char *string) const {
  for (int i=0; i<nEndings; i++) {
    const int n = strlen(Ending(i));
    const char *ending, *stringEnd;
    if (Ending(i)[0] == '^') {
      ending = Ending(i) + 1;
      stringEnd = string;
    }
    else
    {
	ending = Ending(i);
#if 0   /* jbfix: len - n may be < 0! */
	stringEnd = string + strlen(string) - n;
#else
	int index = strlen(string) - n;
	if(index < 0)
	    index = 0;
	stringEnd = string + index;
#endif
    }
    if (!strncmp(stringEnd, ending, n) || 
	(ending[0] == 'V' &&
	 IsVowel(stringEnd[0]) &&
	 !strncmp(stringEnd+1, ending+1, n-1)) ||
	(ending[0] == 'C' &&
	 IsConsonant(stringEnd[0]) &&
	 !strncmp(stringEnd+1, ending+1, n-1)))
      return true;
  }
  return false;
}
bool ExistVowel(char* cString)
{
	for(int i=0;cString[i]!='\0';i++)
		if(IsVowel(cString[i]))
			return true;
	return false;
}
void main()
{
	bool bUse[26]={false};
	char cString[LEN];
	int iLength,i,j;
	cin.getline(cString,LEN);
	iLength=strlen(cString);
	for(i=0;i<iLength;i++)
		if(isalpha(cString[i]))
		{
			if(IsVowel(cString[i]) || bUse[cString[i]-'A'])cString[i]=DELETE;
			else bUse[cString[i]-'A']=true;
		}
	//cout<<cString<<endl;
	for(i=iLength-1;i>=0;i--)
		if(cString[i]!=DELETE && (ispunct(cString[i]) || cString[i]==' '))
		{
			//cout<<i<<endl;
			for(j=i-1;j>=0 && (cString[j]==' ' || cString[j]==DELETE);j--)
			{
				//cout<<j<<endl;
				cString[j]=DELETE;
			}
		}
	//cout<<cString<<endl;
	for(;iLength>0 && (cString[iLength-1]==DELETE || cString[iLength-1]==' ');iLength--);
	for(i=0;i<=iLength && (cString[i]==DELETE || cString[i]==' ');i++);
	//cout<<i<<" "<<iLength<<endl;
	for(;i<iLength;i++)
		if(cString[i]!=DELETE)
			cout<<cString[i];
	cout<<endl;
}
示例#9
0
int main() {

	char c = 'O';

	printf("%d", IsVowel(c));

	return 0;
}
示例#10
0
文件: piglatin.c 项目: guoyu07/TCPL
static int FindFirstVowel(string word)
{
    int i;

    for (i = 0; i < StringLength(word); i++) {
        if (IsVowel(IthChar(word, i))) return (i);
    }
    return (-1);
}
示例#11
0
main()
{
    char ch;

    printf("The vowels are:");
    for (ch = 'A'; ch <= 'Z'; ch++) {
        if (IsVowel(ch)) printf(" %c", ch);
    }
    printf("\n");
}
示例#12
0
int ContainsVowel(char *word)   /* in: buffer with word checked */ //vowel:ÔªÒô
{

	if ( *word == '\0' )
		return( 0 );
	else
		return( IsVowel(*word) || (NULL != strpbrk(word+1,"aeiouy")) );


} /* ContainsVowel */
示例#13
0
int     ContainsVowel(char *word)
{

    /* This isn't needed, right? */
    if ( !*word )
        return FALSE;

    return (IsVowel(*word) || (NULL != strpbrk(word + 1, "aeiouy")));


}                               /* ContainsVowel */
示例#14
0
int removeVowelPointer(char *s)
{
int r = 0, i = 0;

	while (*(s+i) != '\0'){
	if (IsVowel(*(s+i)) == 1){
	*(s+i) = 'T';
	r++;
	}
	i++;
	}
return r;
}
bool    check ()
{
        int     len = strlen ( Str ) , i , Have = 0;
        if ( len == 0 ) return false;
        for ( i = 0; i < len; i ++ ) {
                if ( IsVowel ( Str [i] )) Have = true;
                if ( i && Str [i] == Str [i - 1] && Str [i] != 'e' && Str [i] != 'o') return false;
                if ( i > 1 && !IsVowel ( Str [i - 2] ) && !IsVowel ( Str [i - 1] ) && !IsVowel ( Str [i] )) return false;
                if ( i > 1 && IsVowel ( Str [i - 2] ) && IsVowel ( Str [i - 1] ) && IsVowel ( Str [i] )) return false;
        }
        return  Have;
}
int CStringCruncher::GetCharPriority(char c, char prev, char next)
{
	if (::isdigit(c))
		return 4;

	if (('-' == c || '.' == c) && ::isdigit(next))
		return 4;

	// Is it the first character of a word?
	if (::isalpha(c) && !::isalnum(prev))
		return 3;

	if (IsConsonant(c))
		return 2;

	if (IsVowel(c))
		return 1;

	return 0;
}
int CStringCruncherU::GetCharPriority(TCHAR c, TCHAR prev, TCHAR next)
{
	if (::isdigit(c))
		return 4;

	if ((_T('-') == c || _T('.') == c) && ::isdigit(next))
		return 4;

	// Is it the first character of a word?
	if (::isalpha(c) && !::isalnum(prev))
		return 3;

	if (IsConsonant(c))
		return 2;

	if (IsVowel(c))
		return 1;

	return 0;
}
示例#18
0
static int EditDistance(WORDP D, unsigned int size, unsigned int inputLen, char* inputSet, int min,
		unsigned char realWordLetterCounts[LETTERMAX], int language)
{//   dictword has no underscores, inputSet is already lower case
    char dictw[MAX_WORD_SIZE];
    MakeLowerCopy(dictw,D->word);
    char* dictinfo = dictw;
    char* dictstart = dictinfo;
	char* inputstart = inputSet;
    int val = 0; //   a difference in length will manifest as a difference in letter count
    //   how many changes  (change a letter, transpose adj letters, insert letter, drop letter)
    if (size != inputLen) 
	{
		val += (size < inputLen) ? 5 : 2;	// real word is shorter than what they typed, not so likely as longer
		if (size < 7) val += 3;	
	}
    if (val > min) return 60;	// fast abort
	
	// match off how many letter counts are correct between the two, need to be close enough to bother with
	unsigned char dictWordLetterSet[LETTERMAX];
 	memset(dictWordLetterSet,0,LETTERMAX); 
	for (unsigned int  i = 0; i < size; ++i) 
	{
		int index = letterIndexData[(unsigned char)dictinfo[i]];
		++dictWordLetterSet[index]; // computer number of each kind of letter
	}
	unsigned int count = 0;
	for (unsigned int  i = 0; i < LETTERMAX; ++i) // count how many letters are the same in both words
	{
		if (dictWordLetterSet[i]) // revised word has these many
		{
			int diff = dictWordLetterSet[i] - realWordLetterCounts[i]; // how many of ours does real have?
			if (diff < 0) count += dictWordLetterSet[i]; // he has more than we have, he gets credit for ours he does have
			else count += dictWordLetterSet[i] - diff; // he has <= what we have, count them
		}
	}
	unsigned int countVariation = size - ((size > 7) ? 3 : 2); // since size >= 2, this is always >= 0
	if (count < countVariation  && language == ENGLISH)  return 60;	// need most letters be in common
	if (count == size && language == ENGLISH)  // same letters (though he may have excess) --  how many transposes
	{
		unsigned int bad = 0;
		for (unsigned int i = 0; i < size; ++i) if (dictinfo[i] != inputSet[i]) ++bad;
		if (size != inputLen){;}
		else if (bad <= 2) return val + 3; // 1 transpose
		else if (bad <= 4) return val + 9; // 2 transpose
		else return val + 38; // many transpose
    }
	
	// now look at specific letter errors
    char* dictend = dictinfo+size;
    char* inputend = inputSet+inputLen;
	count = 0;
    while (ALWAYS)
    {
		++count;
        if (*dictinfo == *inputSet) // match
        {
            if (inputSet == inputend && dictinfo == dictend) break;    // ended
            ++inputSet;
            ++dictinfo;
            continue;
        }
        if (inputSet == inputend || dictinfo == dictend) // one ending, other has to catch up by adding a letter
        {
            if (inputSet == inputend) ++dictinfo;
            else ++inputSet;
            val += 6;
            continue;
        }

        //   letter match failed
		
        // can we change an accented letter forward to another similar letter without accent
		if (*dictinfo == 0xc3)
		{
			bool accent = false;
			if (*inputSet == 'a' && (dictinfo[1] >= 0xa0 && dictinfo[1] <= 0xa5 )) accent = true;
			else if (*inputSet == 'e' && (dictinfo[1] >= 0xa8 && dictinfo[1] <= 0xab )) accent = true;
			else if (*inputSet == 'i' &&  (dictinfo[1] >= 0xac && dictinfo[1] <= 0xaf )) accent = true;
			else if (*inputSet == 'o' && (dictinfo[1] >= 0xb2 && dictinfo[1] <= 0xb6 )) accent = true;
			else if (*inputSet == 'u' && (dictinfo[1] >= 0xb9 && dictinfo[1] <= 0xbc )) accent = true;
			if (accent)
			{
				++dictinfo;
				++dictinfo; // double unicode
				++inputSet;
				continue;
			}
		}
		  //   first and last letter errors are rare, more likely to get them right
		if (dictinfo == dictstart && *dictstart != *inputstart && language == ENGLISH) val += 6; // costs a lot  to change first letter, odds are he types that right 
		if (dictinfo[1] == 0 &&  inputSet[1] == 0 &&  *dictinfo != *inputSet) val += 6; // costs more to change last letter, odds are he types that right or sees its wrong
  
        //   try to resynch series and reduce cost of a transposition of adj letters  
        if (*dictinfo == inputSet[1] && dictinfo[1] == *inputSet) // transpose 
        {
			if (dictinfo[2] == inputSet[2]) // they match after, so transpose is pretty likely
			{
				val += 4;  
				if (dictinfo[2]) // not at end, skip the letter in synch for speed
				{
					++dictinfo;
					++inputSet;
				}
			}
			else val += 8;  // transposed maybe good, assume it is
   			dictinfo += 2;
			inputSet += 2;
		}
        else if (*dictinfo == inputSet[1]) // current dict letter matches matches his next input letter, so maybe his input inserted a char here and need to delete it 
        {
            char* prior = inputSet-1; // potential extraneous letter
            if (*prior == *inputSet) val += 5; // low cost for dropping an excess repeated letter - start of word is prepadded with 0 for prior char
            else if (*inputSet == '-') val += 3; //   very low cost for removing a hypen 
            else if (inputSet+1 == inputend && *inputSet == 's') val += 30;    // losing a trailing s is almost not acceptable
            else val += 9; //  high cost removing an extra letter, but not as much as having to change it
            ++inputSet;
		}
        else if (dictinfo[1] == *inputSet) // next dict leter matches current input letter, so maybe his input deleted a char here and needs to insert  it
        {
            char* prior = (dictinfo == dictstart) ? (char*)" " : (dictinfo-1);
            if (*dictinfo == *prior  && !IsVowel(*dictinfo )) val += 5; 
            else if (IsVowel(*dictinfo ))  val += 1; //  low cost for missing a vowel ( already charged for short input), might be a texting abbreviation
            else val += 9; // high cost for deleting a character, but not as much as changing it
            ++dictinfo;
       }
       else //   this has no valid neighbors.  alter it to be the correct, but charge for multiple occurences
       {
			if (count == 1 && *dictinfo != *inputSet && language == ENGLISH) val += 30; //costs a lot to change the first letter, odds are he types that right or sees its wrong
			//  2 in a row are bad, check for a substituted vowel sound
			bool swap = false;
			int oldval = val;
			if (dictinfo[1] != inputSet[1]) // do multicharacter transformations
			{
				if (language == SPANISH) // ch-x | qu-k | c-k | do-o | b-v | bue-w | vue-w | z-s | s-c | h- | y-i | y-ll | m-n  1st is valid
				{
					if (*inputSet == 'c' && *dictinfo == 'k') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 'b' && *dictinfo == 'v') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 'v' && *dictinfo == 'b') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 'z' && *dictinfo == 's') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 's' && *dictinfo == 'c') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 'y' && *dictinfo == 'i') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 'm' && *dictinfo == 'n') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*inputSet == 'n' && *dictinfo == 'm') 
					{
						dictinfo += 1;
						inputSet += 1;
						continue;
					}
					if (*dictinfo == 'h') 
					{
						dictinfo += 1;
						continue;
					}
					if (*inputSet == 'x' && !strncmp(dictinfo,(char*)"ch",2)) 
					{
						dictinfo += 2;
						inputSet += 1;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
					if (*inputSet == 'k' && !strncmp(dictinfo,(char*)"qu",2)) 
					{
						dictinfo += 2;
						inputSet += 1;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
					if (*inputSet == 'o' && !strncmp(dictinfo,(char*)"do",2) && !inputSet[1] && !dictinfo[2]) // at end
					{
						dictinfo += 2;
						inputSet += 1;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
					if (*inputSet == 'w' && !strncmp(dictinfo,(char*)"bue",3)) 
					{
						dictinfo += 3;
						inputSet += 1;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
					if (*inputSet == 'w' && !strncmp(dictinfo,(char*)"vue",3)) 
					{
						dictinfo += 3;
						inputSet += 1;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
					if (!strncmp(inputSet,(char*)"ll",2) && *dictinfo == 'y') 
					{
						inputSet += 2;
						dictinfo += 1;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
					if (*inputSet == 'y' && *dictinfo == 'l' && dictinfo[1] == 'l') 
					{
						inputSet += 1;
						dictinfo += 2;
						val -= (size < inputLen) ? 5 : 2;
						if (size < 7) val -= 3;	
						if (val < 0) val = 0;
						continue;
					}
				}

				if (*inputSet == 't' && !strncmp(dictinfo,(char*)"ght",3)) 
				{
                    dictinfo += 3;
                    inputSet += 1;
                    val += 5;  
				}
				else if (!strncmp(inputSet,(char*)"ci",2) && !strncmp(dictinfo,(char*)"cki",3)) 
				{
                    dictinfo += 3;
                    inputSet += 2;
                    val += 5;
				}
				else if (*(dictinfo-1) == 'a' && !strcmp(dictinfo,(char*)"ir") && !strcmp(inputSet,(char*)"re")) // prepair prepare as terminal sound
				{
                    dictinfo += 2;
                    inputSet += 2;
                    val += 3;
				}
				else if (!strncmp(inputSet,(char*)"ous",3) && !strncmp(dictinfo,(char*)"eous",4)) 
				{
                    dictinfo += 4;
                    inputSet += 3;
                    val += 5; 
               }
              else if (!strncmp(inputSet,(char*)"of",2) && !strncmp(dictinfo,(char*)"oph",3)) 
               {
                    dictinfo += 3;
                    inputSet += 2;
                    val += 5; 
               }
			else if (*dictinfo == 'x' && !strncmp(inputSet,(char*)"cks",3)) 
               {
                    dictinfo += 1;
                    inputSet += 3;
                    val += 5; 
               }
               else if (*inputSet == 'k' && !strncmp(dictinfo,(char*)"qu",2)) 
               {
                    dictinfo += 2;
                    inputSet += 1;
                    val += 5;  
               }
			   if (oldval != val){;} // swallowed a multiple letter sound change
               else if (!strncmp(dictinfo,(char*)"able",4) && !strncmp(inputSet,(char*)"ible",4)) swap = true;
               else if (!strncmp(dictinfo,(char*)"ible",4) && !strncmp(inputSet,(char*)"able",4)) swap = true;
               else if (*dictinfo == 'a' && dictinfo[1] == 'y'     && *inputSet == 'e' && inputSet[1] == 'i') swap = true;
               else if (*dictinfo == 'e' && dictinfo[1] == 'a'     && *inputSet == 'e' && inputSet[1] == 'e') swap = true;
               else if (*dictinfo == 'e' && dictinfo[1] == 'e'     && *inputSet == 'e' && inputSet[1] == 'a') swap = true;
               else if (*dictinfo == 'e' && dictinfo[1] == 'e'     && *inputSet == 'i' && inputSet[1] == 'e') swap = true;
               else if (*dictinfo == 'e' && dictinfo[1] == 'i'     && *inputSet == 'a' && inputSet[1] == 'y') swap = true;
               else if (*dictinfo == 'e' && dictinfo[1] == 'u'     && *inputSet == 'o' && inputSet[1] == 'o') swap = true;
               else if (*dictinfo == 'e' && dictinfo[1] == 'u'     && *inputSet == 'o' && inputSet[1] == 'u') swap = true;
               else if (*dictinfo == 'i' && dictinfo[1] == 'e'     && *inputSet == 'e' && inputSet[1] == 'e') swap = true;
               else if (*dictinfo == 'o' && dictinfo[1] == 'o'     && *inputSet == 'e' && inputSet[1] == 'u') swap = true;
               else if (*dictinfo == 'o' && dictinfo[1] == 'o'     && *inputSet == 'o' && inputSet[1] == 'u') swap = true;
               else if (*dictinfo == 'o' && dictinfo[1] == 'o'     && *inputSet == 'u' && inputSet[1] == 'i') swap = true;
               else if (*dictinfo == 'o' && dictinfo[1] == 'u'     && *inputSet == 'e' && inputSet[1] == 'u') swap = true;
               else if (*dictinfo == 'o' && dictinfo[1] == 'u'     && *inputSet == 'o' && inputSet[1] == 'o') swap = true;
               else if (*dictinfo == 'u' && dictinfo[1] == 'i'     && *inputSet == 'o' && inputSet[1] == 'o') swap = true;
               if (swap)
               {
                    dictinfo += 2;
                    inputSet += 2;
                    val += 5; 
               }
            } 

            // can we change a letter to another similar letter
            if (oldval == val) 
            {
				bool convert = false;
                if (*dictinfo == 'i' && *inputSet== 'y' && count > 1) convert = true;//   but not as first letter
                else if ((*dictinfo == 's' && *inputSet == 'z') || (*dictinfo == 'z' && *inputSet == 's')) convert = true;
                else if (*dictinfo == 'y' && *inputSet == 'i' && count > 1) convert = true; //   but not as first letter
                else if (*dictinfo == '/' && *inputSet == '-') convert = true;
                else if (inputSet+1 == inputend && *inputSet == 's') val += 30;    //   changing a trailing s is almost not acceptable
                if (convert) val += 5;	// low cost for exchange of similar letter, but dont do it often
                else val += 12;			// changing a letter is expensive, since it destroys the visual image
                ++dictinfo;
                ++inputSet;
            }
       } 
       if (val > min) return val; // too costly, ignore it
    }
    return val;
}
示例#19
0
////////////////////////////////////////////////////////////////////////////////
// main deal
////////////////////////////////////////////////////////////////////////////////
void MString::DoubleMetaphone(CString &metaph, CString &metaph2)
{

        int current = 0;

        length = GetLength();
        if(length < 1)
                return;
        last = length - 1;//zero based index

        alternate = FALSE;

        MakeUpper();

        //pad the original string so that we can index beyond the edge of the world 
        Insert(GetLength(), "     ");
        
        //skip these when at start of word
        if(StringAt(0, 2, "GN", "KN", "PN", "WR", "PS", ""))
                current += 1;

        //Initial 'X' is pronounced 'Z' e.g. 'Xavier'
        if(GetAt(0) == 'X')
        {
                MetaphAdd("S"); //'Z' maps to 'S'
                current += 1;
        }

        ///////////main loop//////////////////////////
        while(TRUE OR (primary.GetLength() < 4) OR (secondary.GetLength() < 4))
        {
                if(current >= length)
                        break;

                switch(GetAt(current))
                {
                        case 'A':
                        case 'E':
                        case 'I':
                        case 'O':
                        case 'U':
                        case 'Y':
                                if(current == 0)
                                        //all init vowels now map to 'A'
                                        MetaphAdd("A"); 
                                current +=1;
                                break;
                        
                        case 'B':

                                //"-mb", e.g", "dumb", already skipped over...
                                MetaphAdd("P");

                                if(GetAt(current + 1) == 'B')
                                        current +=2;
                                else
                                        current +=1;
                                break;
                        
                        case 'Ç':
                                MetaphAdd("S");
                                current += 1;
                                break;

                        case 'C':
                                //various germanic
                                if((current > 1)
                                        AND !IsVowel(current - 2) 
                                                AND StringAt((current - 1), 3, "ACH", "") 
                                                        AND ((GetAt(current + 2) != 'I') AND ((GetAt(current + 2) != 'E') 
                                                                        OR StringAt((current - 2), 6, "BACHER", "MACHER", "")) ))
                                {       
                                        MetaphAdd("K");
                                        current +=2;
                                        break;
                                }

                                //special case 'caesar'
                                if((current == 0) AND StringAt(current, 6, "CAESAR", ""))
                                {
                                        MetaphAdd("S");
                                        current +=2;
                                        break;
                                }

                                //italian 'chianti'
                                if(StringAt(current, 4, "CHIA", ""))
                                {
                                        MetaphAdd("K");
                                        current +=2;
                                        break;
                                }

                                if(StringAt(current, 2, "CH", ""))
                                {       
                                        //find 'michael'
                                        if((current > 0) AND StringAt(current, 4, "CHAE", ""))
                                        {
                                                MetaphAdd("K", "X");
                                                current +=2;
                                                break;
                                        }

                                        //greek roots e.g. 'chemistry', 'chorus'
                                        if((current == 0)
                                                AND (StringAt((current + 1), 5, "HARAC", "HARIS", "") 
                                                        OR StringAt((current + 1), 3, "HOR", "HYM", "HIA", "HEM", "")) 
                                                                AND !StringAt(0, 5, "CHORE", ""))
                                        {
                                                MetaphAdd("K");
                                                current +=2;
                                                break;
                                        }

                                        //germanic, greek, or otherwise 'ch' for 'kh' sound
                                        if((StringAt(0, 4, "VAN ", "VON ", "") OR StringAt(0, 3, "SCH", ""))
                                                // 'architect but not 'arch', 'orchestra', 'orchid'
                                                OR StringAt((current - 2), 6, "ORCHES", "ARCHIT", "ORCHID", "")
                                                        OR StringAt((current + 2), 1, "T", "S", "")
                                                                OR ((StringAt((current - 1), 1, "A", "O", "U", "E", "") OR (current == 0))
                                                                        //e.g., 'wachtler', 'wechsler', but not 'tichner'
                                                                        AND StringAt((current + 2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", "")))
                                        {
                                                MetaphAdd("K");
                                        }else{  
                                                if(current > 0)
                                                {
                                                        if(StringAt(0, 2, "MC", ""))
                                                                //e.g., "McHugh"
                                                                MetaphAdd("K");
                                                        else
                                                                MetaphAdd("X", "K");
                                                }else
                                                        MetaphAdd("X");
                                        }
                                        current +=2;
                                        break;
                                }
                                //e.g, 'czerny'
                                if(StringAt(current, 2, "CZ", "") AND !StringAt((current - 2), 4, "WICZ", ""))
                                {
                                        MetaphAdd("S", "X");
                                        current += 2;
                                        break;
                                }

                                //e.g., 'focaccia'
                                if(StringAt((current + 1), 3, "CIA", ""))
                                {
                                        MetaphAdd("X");
                                        current += 3;
                                        break;
                                }

                                //double 'C', but not if e.g. 'McClellan'
                                if(StringAt(current, 2, "CC", "") AND !((current == 1) AND (GetAt(0) == 'M')))
                                        //'bellocchio' but not 'bacchus'
                                        if(StringAt((current + 2), 1, "I", "E", "H", "") AND !StringAt((current + 2), 2, "HU", ""))
                                        {
                                                //'accident', 'accede' 'succeed'
                                                if(((current == 1) AND (GetAt(current - 1) == 'A')) 
                                                                OR StringAt((current - 1), 5, "UCCEE", "UCCES", ""))
                                                        MetaphAdd("KS");
                                                //'bacci', 'bertucci', other italian
                                                else
                                                        MetaphAdd("X");
                                                current += 3;
                                                break;
                                        }else{//Pierce's rule
                                                MetaphAdd("K");
                                                current += 2;
                                                break;
                                        }

                                if(StringAt(current, 2, "CK", "CG", "CQ", ""))
                                {
                                        MetaphAdd("K");
                                        current += 2;
                                        break;
                                }

                                if(StringAt(current, 2, "CI", "CE", "CY", ""))
                                {
                                        //italian vs. english
                                        if(StringAt(current, 3, "CIO", "CIE", "CIA", ""))
                                                MetaphAdd("S", "X");
                                        else
                                                MetaphAdd("S");
                                        current += 2;
                                        break;
                                }

                                //else
                                MetaphAdd("K");
                                
                                //name sent in 'mac caffrey', 'mac gregor
                                if(StringAt((current + 1), 2, " C", " Q", " G", ""))
                                        current += 3;
                                else
                                        if(StringAt((current + 1), 1, "C", "K", "Q", "") 
                                                AND !StringAt((current + 1), 2, "CE", "CI", ""))
                                                current += 2;
                                        else
                                                current += 1;
                                break;

                        case 'D':
                                if(StringAt(current, 2, "DG", ""))
                                        if(StringAt((current + 2), 1, "I", "E", "Y", ""))
                                        {
                                                //e.g. 'edge'
                                                MetaphAdd("J");
                                                current += 3;
                                                break;
                                        }else{
                                                //e.g. 'edgar'
                                                MetaphAdd("TK");
                                                current += 2;
                                                break;
                                        }

                                if(StringAt(current, 2, "DT", "DD", ""))
                                {
                                        MetaphAdd("T");
                                        current += 2;
                                        break;
                                }
                                
                                //else
                                MetaphAdd("T");
                                current += 1;
                                break;

                        case 'F':
                                if(GetAt(current + 1) == 'F')
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("F");
                                break;

                        case 'G':
                                if(GetAt(current + 1) == 'H')
                                {
                                        if((current > 0) AND !IsVowel(current - 1))
                                        {
                                                MetaphAdd("K");
                                                current += 2;
                                                break;
                                        }

                                        if(current < 3)
                                        {
                                                //'ghislane', ghiradelli
                                                if(current == 0)
                                                { 
                                                        if(GetAt(current + 2) == 'I')
                                                                MetaphAdd("J");
                                                        else
                                                                MetaphAdd("K");
                                                        current += 2;
                                                        break;
                                                }
                                        }
                                        //Parker's rule (with some further refinements) - e.g., 'hugh'
                                        if(((current > 1) AND StringAt((current - 2), 1, "B", "H", "D", "") )
                                                //e.g., 'bough'
                                                OR ((current > 2) AND StringAt((current - 3), 1, "B", "H", "D", "") )
                                                //e.g., 'broughton'
                                                OR ((current > 3) AND StringAt((current - 4), 1, "B", "H", "") ) )
                                        {
                                                current += 2;
                                                break;
                                        }else{
                                                //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
                                                if((current > 2) 
                                                        AND (GetAt(current - 1) == 'U') 
                                                        AND StringAt((current - 3), 1, "C", "G", "L", "R", "T", "") )
                                                {
                                                        MetaphAdd("F");
                                                }else
                                                        if((current > 0) AND GetAt(current - 1) != 'I')
                                                                MetaphAdd("K");

                                                current += 2;
                                                break;
                                        }
                                }

                                if(GetAt(current + 1) == 'N')
                                {
                                        if((current == 1) AND IsVowel(0) AND !SlavoGermanic())
                                        {
                                                MetaphAdd("KN", "N");
                                        }else
                                                //not e.g. 'cagney'
                                                if(!StringAt((current + 2), 2, "EY", "") 
                                                                AND (GetAt(current + 1) != 'Y') AND !SlavoGermanic())
                                                {
                                                        MetaphAdd("N", "KN");
                                                }else
                                                        MetaphAdd("KN");
                                        current += 2;
                                        break;
                                }

                                //'tagliaro'
                                if(StringAt((current + 1), 2, "LI", "") AND !SlavoGermanic())
                                {
                                        MetaphAdd("KL", "L");
                                        current += 2;
                                        break;
                                }

                                //-ges-,-gep-,-gel-, -gie- at beginning
                                if((current == 0)
                                        AND ((GetAt(current + 1) == 'Y') 
                                                OR StringAt((current + 1), 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", "")) )
                                {
                                        MetaphAdd("K", "J");
                                        current += 2;
                                        break;
                                }

                                // -ger-,  -gy-
                                if((StringAt((current + 1), 2, "ER", "") OR (GetAt(current + 1) == 'Y'))
                                                AND !StringAt(0, 6, "DANGER", "RANGER", "MANGER", "")
                                                        AND !StringAt((current - 1), 1, "E", "I", "") 
                                                                AND !StringAt((current - 1), 3, "RGY", "OGY", "") )
                                {
                                        MetaphAdd("K", "J");
                                        current += 2;
                                        break;
                                }

                                // italian e.g, 'biaggi'
                                if(StringAt((current + 1), 1, "E", "I", "Y", "") OR StringAt((current - 1), 4, "AGGI", "OGGI", ""))
                                {
                                        //obvious germanic
                                        if((StringAt(0, 4, "VAN ", "VON ", "") OR StringAt(0, 3, "SCH", ""))
                                                OR StringAt((current + 1), 2, "ET", ""))
                                                MetaphAdd("K");
                                        else
                                                //always soft if french ending
                                                if(StringAt((current + 1), 4, "IER ", ""))
                                                        MetaphAdd("J");
                                                else
                                                        MetaphAdd("J", "K");
                                        current += 2;
                                        break;
                                }

                                if(GetAt(current + 1) == 'G')
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("K");
                                break;

                        case 'H':
                                //only keep if first & before vowel or btw. 2 vowels
                                if(((current == 0) OR IsVowel(current - 1)) 
                                        AND IsVowel(current + 1))
                                {
                                        MetaphAdd("H");
                                        current += 2;
                                }else//also takes care of 'HH'
                                        current += 1;
                                break;

                        case 'J':
                                //obvious spanish, 'jose', 'san jacinto'
                                if(StringAt(current, 4, "JOSE", "") OR StringAt(0, 4, "SAN ", "") )
                                {
                                        if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, "SAN ", "") )
                                                MetaphAdd("H");
                                        else
                                        {
                                                MetaphAdd("J", "H");
                                        }
                                        current +=1;
                                        break;
                                }

                                if((current == 0) AND !StringAt(current, 4, "JOSE", ""))
                                        MetaphAdd("J", "A");//Yankelovich/Jankelowicz
                                else
                                        //spanish pron. of e.g. 'bajador'
                                        if(IsVowel(current - 1) 
                                                AND !SlavoGermanic()
                                                        AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
                                                MetaphAdd("J", "H");
                                        else
                                                if(current == last)
                                                        MetaphAdd("J", " ");
                                                else
                                                        if(!StringAt((current + 1), 1, "L", "T", "K", "S", "N", "M", "B", "Z", "") 
                                                                        AND !StringAt((current - 1), 1, "S", "K", "L", ""))
                                                                MetaphAdd("J");

                                if(GetAt(current + 1) == 'J')//it could happen!
                                        current += 2;
                                else
                                        current += 1;
                                break;

                        case 'K':
                                if(GetAt(current + 1) == 'K')
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("K");
                                break;

                        case 'L':
                                if(GetAt(current + 1) == 'L')
                                {
                                        //spanish e.g. 'cabrillo', 'gallegos'
                                        if(((current == (length - 3)) 
                                                AND StringAt((current - 1), 4, "ILLO", "ILLA", "ALLE", ""))
                                                         OR ((StringAt((last - 1), 2, "AS", "OS", "") OR StringAt(last, 1, "A", "O", "")) 
                                                                AND StringAt((current - 1), 4, "ALLE", "")) )
                                        {
                                                MetaphAdd("L", " ");
                                                current += 2;
                                                break;
                                        }
                                        current += 2;
                                }else
                                        current += 1;
                                MetaphAdd("L");
                                break;

                        case 'M':
                                if((StringAt((current - 1), 3, "UMB", "") 
                                        AND (((current + 1) == last) OR StringAt((current + 2), 2, "ER", "")))
                                                //'dumb','thumb'
                                                OR  (GetAt(current + 1) == 'M') )
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("M");
                                break;

                        case 'N':
                                if(GetAt(current + 1) == 'N')
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("N");
                                break;

                        case 'Ñ':
                                current += 1;
                                MetaphAdd("N");
                                break;

                        case 'P':
                                if(GetAt(current + 1) == 'H')
                                {
                                        MetaphAdd("F");
                                        current += 2;
                                        break;
                                }

                                //also account for "campbell", "raspberry"
                                if(StringAt((current + 1), 1, "P", "B", ""))
                                        current += 2;
                                else
                                        current += 1;
                                        MetaphAdd("P");
                                break;

                        case 'Q':
                                if(GetAt(current + 1) == 'Q')
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("K");
                                break;

                        case 'R':
                                //french e.g. 'rogier', but exclude 'hochmeier'
                                if((current == last)
                                        AND !SlavoGermanic()
                                                AND StringAt((current - 2), 2, "IE", "") 
                                                        AND !StringAt((current - 4), 2, "ME", "MA", ""))
                                        MetaphAdd("", "R");
                                else
                                        MetaphAdd("R");

                                if(GetAt(current + 1) == 'R')
                                        current += 2;
                                else
                                        current += 1;
                                break;

                        case 'S':
                                //special cases 'island', 'isle', 'carlisle', 'carlysle'
                                if(StringAt((current - 1), 3, "ISL", "YSL", ""))
                                {
                                        current += 1;
                                        break;
                                }

                                //special case 'sugar-'
                                if((current == 0) AND StringAt(current, 5, "SUGAR", ""))
                                {
                                        MetaphAdd("X", "S");
                                        current += 1;
                                        break;
                                }

                                if(StringAt(current, 2, "SH", ""))
                                {
                                        //germanic
                                        if(StringAt((current + 1), 4, "HEIM", "HOEK", "HOLM", "HOLZ", ""))
                                                MetaphAdd("S");
                                        else
                                                MetaphAdd("X");
                                        current += 2;
                                        break;
                                }

                                //italian & armenian
                                if(StringAt(current, 3, "SIO", "SIA", "") OR StringAt(current, 4, "SIAN", ""))
                                {
                                        if(!SlavoGermanic())
                                                MetaphAdd("S", "X");
                                        else
                                                MetaphAdd("S");
                                        current += 3;
                                        break;
                                }

                                //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
                                //also, -sz- in slavic language altho in hungarian it is pronounced 's'
                                if(((current == 0) 
                                                AND StringAt((current + 1), 1, "M", "N", "L", "W", ""))
                                                        OR StringAt((current + 1), 1, "Z", ""))
                                {
                                        MetaphAdd("S", "X");
                                        if(StringAt((current + 1), 1, "Z", ""))
                                                current += 2;
                                        else
                                                current += 1;
                                        break;
                                }

                                if(StringAt(current, 2, "SC", ""))
                                {
                                        //Schlesinger's rule
                                        if(GetAt(current + 2) == 'H')
                                                //dutch origin, e.g. 'school', 'schooner'
                                                if(StringAt((current + 3), 2, "OO", "ER", "EN", "UY", "ED", "EM", ""))
                                                {
                                                        //'schermerhorn', 'schenker'
                                                        if(StringAt((current + 3), 2, "ER", "EN", ""))
                                                        {
                                                                MetaphAdd("X", "SK");
                                                        }else
                                                                MetaphAdd("SK");
                                                        current += 3;
                                                        break;
                                                }else{
                                                        if((current == 0) AND !IsVowel(3) AND (GetAt(3) != 'W'))
                                                                MetaphAdd("X", "S");
                                                        else
                                                                MetaphAdd("X");
                                                        current += 3;
                                                        break;
                                                }

                                        if(StringAt((current + 2), 1, "I", "E", "Y", ""))
                                        {
                                                MetaphAdd("S");
                                                current += 3;
                                                break;
                                        }
                                        //else
                                        MetaphAdd("SK");
                                        current += 3;
                                        break;
                                }

                                //french e.g. 'resnais', 'artois'
                                if((current == last) AND StringAt((current - 2), 2, "AI", "OI", ""))
                                        MetaphAdd("", "S");
                                else
                                        MetaphAdd("S");

                                if(StringAt((current + 1), 1, "S", "Z", ""))
                                        current += 2;
                                else
                                        current += 1;
                                break;

                        case 'T':
                                if(StringAt(current, 4, "TION", ""))
                                {
                                        MetaphAdd("X");
                                        current += 3;
                                        break;
                                }

                                if(StringAt(current, 3, "TIA", "TCH", ""))
                                {
                                        MetaphAdd("X");
                                        current += 3;
                                        break;
                                }

                                if(StringAt(current, 2, "TH", "") 
                                        OR StringAt(current, 3, "TTH", ""))
                                {
                                        //special case 'thomas', 'thames' or germanic
                                        if(StringAt((current + 2), 2, "OM", "AM", "") 
                                                OR StringAt(0, 4, "VAN ", "VON ", "") 
                                                        OR StringAt(0, 3, "SCH", ""))
                                        {
                                                MetaphAdd("T");
                                        }else{
                                                MetaphAdd("0", "T");
                                        }
                                        current += 2;
                                        break;
                                }

                                if(StringAt((current + 1), 1, "T", "D", ""))
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("T");
                                break;

                        case 'V':
                                if(GetAt(current + 1) == 'V')
                                        current += 2;
                                else
                                        current += 1;
                                MetaphAdd("F");
                                break;

                        case 'W':
                                //can also be in middle of word
                                if(StringAt(current, 2, "WR", ""))
                                {
                                        MetaphAdd("R");
                                        current += 2;
                                        break;
                                }

                                if((current == 0) 
                                        AND (IsVowel(current + 1) OR StringAt(current, 2, "WH", "")))
                                {
                                        //Wasserman should match Vasserman
                                        if(IsVowel(current + 1))
                                                MetaphAdd("A", "F");
                                        else
                                                //need Uomo to match Womo
                                                MetaphAdd("A");
                                }

                                //Arnow should match Arnoff
                                if(((current == last) AND IsVowel(current - 1)) 
                                        OR StringAt((current - 1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY", "") 
                                                        OR StringAt(0, 3, "SCH", ""))
				  {
                                        MetaphAdd("", "F");
                                        current +=1;
                                        break;
                                }

                                //polish e.g. 'filipowicz'
                                if(StringAt(current, 4, "WICZ", "WITZ", ""))
                                {
                                        MetaphAdd("TS", "FX");
                                        current +=4;
                                        break;
                                }

                                //else skip it
                                current +=1;
                                break;

                        case 'X':
                                //french e.g. breaux
                                if(!((current == last) 
                                        AND (StringAt((current - 3), 3, "IAU", "EAU", "") 
                                                        OR StringAt((current - 2), 2, "AU", "OU", ""))) )
                                        MetaphAdd("KS");

                                if(StringAt((current + 1), 1, "C", "X", ""))
                                        current += 2;
                                else
                                        current += 1;
                                break;

                        case 'Z':
                                //chinese pinyin e.g. 'zhao'
                                if(GetAt(current + 1) == 'H')
                                {
                                        MetaphAdd("J");
                                        current += 2;
                                        break;
                                }else
                                        if(StringAt((current + 1), 2, "ZO", "ZI", "ZA", "") 
                                                OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) != 'T')))
                                        {
                                                MetaphAdd("S", "TS");
                                        }
                                        else
                                                MetaphAdd("S");

                                if(GetAt(current + 1) == 'Z')
                                        current += 2;
                                else
                                        current += 1;
                                break;

                        default:
                                current += 1;
                }
        }

        metaph  = primary;
        //only give back 4 char metaph
        //if(metaph.GetLength() > 4)
        //        metaph.SetAt(4,'\0');
        if(alternate)
        {
	        metaph2 = secondary;
                //if(metaph2.GetLength() > 4)
                //        metaph2.SetAt(4,'\0');
        }

}
示例#20
0
std::string en::GetNounString(Noun* NounObj, bool ObjCase)
{
	if (NounObj->ID==0) return "";
	//Create the final string that will be returned. (Empty for now).
	std::string NounString = "";

	//Create the string that will be inserted before the articles (so that we can correctly turn 'a' into 'an' if this begins with a vowel)
	std::string NounResult;
	
	//Prepend all the adjectives to the NounResult, if the adjectives exist.
	for (int i = 0; i < 16; ++i)
	{
		std::string Adjective = GetAdjective(NounObj->Adjectives[i]);
		if (Adjective.compare("")!=0)
			NounResult += Adjective + " ";
	}

	//Prepend the Noun to NounResult.
	NounResult += GetNoun(NounObj,ObjCase);

	//Get the string of the article, telling it to turn 'a' into 'an' if NounResult begins with a vowel.
	std::string Article = GetArticle(NounObj,IsVowel(NounResult[0]));

	//Get the numberal
	std::string Numeral = GetNumeral(NounObj,false);

	//Get the preposition
	std::string Prepos = GetPrepos(NounObj->PreposNum);

	//Start the NounString with the preposition if necessary
	if (Prepos.compare("")!=0)
		NounString+=Prepos + " ";

	//Add the article
	if (Article.compare("")!=0)
		NounString+=Article + " ";

	//Add the Numeral. This is not currently added to noun result, and shouldn't matter as you shouldn't be adding a numeral when you use a/an
	if (Numeral.compare("")!=0)
		NounString+=Numeral + " ";

	//Add the noun result to the noun string (adjectives, and noun)
	NounString+=NounResult;

	//If there is a genitive object, add it.
	if (NounObj->ShouldUseGenitive)
	{
		NounString += " " + GenitiveMarker + " ";
		NounString += GetNounString(NounObj->GenitiveNoun,true);
	}

	//If there is a relative clause, add it.
	if (NounObj->ShouldUseRelativeClause)
	{
		NounObj->RelativeClause->IsClause = true;
		
		//If the relative clause is about a persion, use "who"
		int NounType = GetNounType(NounObj->ID);
		if (NounType == 'm' || NounType == 'f' || NounType == 'p' || NounType == 'd')
		{
			NounString += " " + RClausePersonalMarker;	
		}
		//Otherwise if the relative clause is essential use "that"
		else if (NounObj->IsRelativeClauseEssential) NounString += " " + RClauseEssentialMarker;
		//Otherwise (non essential) use "which"
		else NounString += " " + RClauseNonEssentialMarker;
		
		//Append the clause
		NounString += " " + NounObj->RelativeClause->createSentence();
	}

	//Return the result.
	return NounString;
}
示例#21
0
static int EditDistance(WORDINFO& dictWordData, WORDINFO& realWordData,int min)
{//   dictword has no underscores, inputSet is already lower case
    char dictw[MAX_WORD_SIZE];
    MakeLowerCopy(dictw, dictWordData.word);
    char* dictinfo = dictw;
    char* realinfo = realWordData.word;
    char* dictstart = dictinfo;
	char* realstart = realWordData.word;
    int val = 0; //   a difference in length will manifest as a difference in letter count
	//  look at specific letter errors
    char priorCharDict[10];
    char priorCharReal[10];
    *priorCharDict = *priorCharReal = 0;
    char currentCharReal[10];
    char currentCharDict[10];
    *currentCharReal = *currentCharDict = 0;
    char nextCharReal[10];
    char nextCharDict[10];
    char next1CharReal[10];
    char next1CharDict[10];
    char* resumeReal2;
    char* resumeDict2;
    char* resumeReal;
    char* resumeDict;
    char* resumeReal1;
    char* resumeDict1;
	char baseCharReal;
	char baseCharDict;
    while (ALWAYS)
    {
        if (val > min) return 1000; // no good
        strcpy(priorCharReal, currentCharReal);
        strcpy(priorCharDict, currentCharDict);

        resumeReal = IsUTF8((char*)realinfo, currentCharReal);
        resumeDict = IsUTF8((char*)dictinfo, currentCharDict);
        if (!*currentCharReal && !*currentCharDict) break; //both at end
        if (!*currentCharReal || !*currentCharDict) // one ending, other has to catch up by adding a letter
        {
            val += 16; // add a letter
            if (*priorCharReal == *currentCharDict) val -= 10; // doubling letter at end
            dictinfo = resumeDict;
            realinfo = resumeReal;
            continue;
        }

		// punctuation in a word is bad tokenization, dont spell check it away
		if (*currentCharReal == '?' || *currentCharReal == '!' || *currentCharReal == '('
			|| *currentCharReal == ')' ||  *currentCharReal == '[' || *currentCharReal == ']'
			|| *currentCharReal == '{' || *currentCharReal == '}') return 200; // dont mess with this

        resumeReal1 = IsUTF8((char*)resumeReal, nextCharReal);
        resumeDict1 = IsUTF8((char*)resumeDict, nextCharDict);
        resumeReal2 = IsUTF8((char*)resumeReal1, next1CharReal); // 2 char ahead
        resumeDict2 = IsUTF8((char*)resumeDict1, next1CharDict);
		baseCharReal = UnaccentedChar(currentCharReal);
		baseCharDict = UnaccentedChar(currentCharDict);
		if (!stricmp(currentCharReal, currentCharDict)) // match chars
		{
			dictinfo = resumeDict;
			realinfo = resumeReal;
			continue;
		}
		if (baseCharReal && baseCharReal == baseCharDict)
		{
			dictinfo = resumeDict;
			realinfo = resumeReal;
			val += 1; // minimal charge but separate forms of delivre
			continue;
		}
        // treat german double s and ss equivalent
        if (!stricmp(language, "german"))
        {
            if (*currentCharReal == 0xc3 && currentCharReal[1] == 0x9f && *currentCharDict == 's' && *nextCharDict == 's')
            {
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharDict == 0xc3 && currentCharDict[1] == 0x9f && *currentCharReal == 's'  && *nextCharReal == 's')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal1;
                continue;
            }
        }
        // spanish alternative spellings
        if (!stricmp(language, "spanish")) // ch-x | qu-k | c-k | do-o | b-v | bue-w | vue-w | z-s | s-c | h- | y-i | y-ll | m-n  1st is valid
        {
            if (*currentCharReal == 'c' && *currentCharDict == 'k')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'b' && *currentCharDict == 'v')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'v' && *currentCharDict == 'b')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'z' && *currentCharDict == 's')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 's' && *currentCharDict == 'c')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'y' && *currentCharDict == 'i')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'm' && *currentCharDict == 'n')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'n' && *currentCharDict == 'm')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharDict == 'h')
            {
                dictinfo = resumeDict;
                continue;
            }
            if (*currentCharReal == 'x' && *currentCharDict == 'c' && *nextCharDict == 'h')
            {
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'k' && *currentCharDict == 'q' && *nextCharDict == 'u')
            {
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'o' && *currentCharDict == 'd' && *nextCharDict == 'o')
            {
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'w' && *currentCharDict == 'b' && *nextCharDict == 'u'  && *next1CharDict == 'e')
            {
                dictinfo = resumeDict2;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'w' && *currentCharDict == 'v' && *nextCharDict == 'u'  && *next1CharDict == 'e')
            {
                dictinfo = resumeDict2;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'l' && *nextCharReal == 'l' && *currentCharDict == 'y')
            {
                dictinfo = resumeDict;
                realinfo = resumeReal1;
                continue;
            }
            if (*currentCharReal == 'y' && *currentCharDict == 'l' && *nextCharDict == 'l')
            {
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
        }
        // french common bad spellings
        if (!stricmp(language, "french"))
        {
            if (*currentCharReal == 'a' && SameUTF(currentCharDict,"â"))
            {
            		val += 1;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'e' && SameUTF(currentCharDict,"ê"))
            {
				val += 10;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 0xc3 && currentCharReal[1] == 0xa8 && SameUTF(currentCharDict,"ê"))
            {
            	val += 5;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'i' && SameUTF(currentCharDict,"î"))
            {
				val += 1;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'o' && SameUTF(currentCharDict, "ô"))
            {
					val += 1;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'u' && SameUTF(currentCharDict, "û"))
            {
			 	val += 5;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'y' && *currentCharDict == 'l' && *nextCharDict == 'l')
            {
            	val += 10;
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'k' && *currentCharDict == 'q' && *nextCharDict == 'u')
            {
            	val += 10;
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'f' && *currentCharDict == 'p' && *nextCharDict == 'h')
            {
            	val += 5;
                dictinfo = resumeDict1;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 's' && *currentCharDict == 'c')
            {
            	val += 10;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 's' && SameUTF(currentCharDict, "ç"))
            {
            		val += 5;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
            if (*currentCharReal == 'c' && SameUTF(currentCharDict, "ç"))
            {
				val += 5;
                dictinfo = resumeDict;
                realinfo = resumeReal;
                continue;
            }
        }
        // probable transposition since swapping syncs up
        if (!strcmp(currentCharReal, nextCharDict) && !strcmp(nextCharReal, currentCharDict))
        {
            val += 16; // more expensive if next is not correct after transposition
            dictinfo = resumeDict2; // skip ahead 2
            realinfo = resumeReal2;
            continue;
        }

        // probable mistyped letter since next matches up
        if (!strcmp(nextCharReal, nextCharDict))
        {
            val += 16; // more expensive if 2nd next is not correct after transposition
            if (*currentCharReal == 's' && *currentCharDict == 'z') val -= 5;
            else if (*currentCharReal == 'z' && *currentCharDict == 's') val -= 5;
            else if (IsVowel(*currentCharReal) && IsVowel(*currentCharDict))  val -= 6; //  low cost for switching a vowel 

            dictinfo = resumeDict; 
            realinfo = resumeReal;
            continue;
        }
        // probable excess letter by user since next matches up to current
        if (!strcmp(nextCharReal, currentCharDict))
        {
            val += 16;  // only delete 1 letter

            if (*priorCharDict == *currentCharReal) val -= 14; // low cost for dropping an excess repeated letter (wherre->where not wherry)
            else if (*currentCharReal == '-') val -= 10; //   very low cost for removing a hypen 

            dictinfo = resumeDict; // skip over letter we match momentarily
            realinfo = resumeReal1; // move on past our junk letter and match letter
            continue;
        }
        // probable missing letter by user since current matches up to nextdict
        if (!strcmp(nextCharDict, currentCharReal))
        {
            val += 16; // only add 1 letter
            // better to add repeated letter than to drop a letter
            if (*currentCharDict == *priorCharReal) val -= 6; // low cost for adding a repeated letter
            else if (*currentCharDict == 'e' && *nextCharDict == 'o') val -= 10; // yoman->yeoman
            dictinfo = resumeDict1; // skip over letter we match momentarily
            realinfo = resumeReal; // move on past our junk letter and match letter
            continue;
        }
    
        // complete mismatch with no understanding of why, just fix them and move on
        dictinfo = resumeDict; // skip over letter we match momentarily
        realinfo = resumeReal; // move on past our junk letter and match letter
        val += 16;
    }
    return val;
}