Пример #1
0
int
tryword(char *bp, char *ep, int lev)
{
	int i, j;
	char duple[3];

	if (ep-bp <= 1)
		return (0);
	if (vowel(*ep) && monosyl(bp, ep))
		return (0);

	i = dict(bp, ep);
	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
		ep--;
		deriv[++lev] = duple;
		duple[0] = '+';
		duple[1] = *ep;
		duple[2] = '\0';
		i = dict(bp, ep);
	}
	if (vflag == 0 || i == 0)
		return (i);

	/* Also tack on possible derivations. (XXX - warn on truncation?) */
	for (j = lev; j > 0; j--) {
		if (deriv[j])
			strlcat(affix, deriv[j], sizeof(affix));
	}
	return (i);
}
Пример #2
0
char *
skipv(char *s)
{

	if (s >= word && vowel(*s))
		s--;
	while (s >= word && !vowel(*s))
		s--;
	return (s);
}
Пример #3
0
static int shortv(struct stemmer * z) {
    char * b = z->b;
    int i = z->j;
    int ch = b[i];
    if vowel(ch) return FALSE;
    if (i == 1 && vowel(b[0])) return TRUE;
    if (i > 1 && vowel(b[i - 1]) && cons(b[i - 2]) && ch != 'w' && ch != 'x' && ch != 'Y')
        return TRUE;
    return FALSE;
}
Пример #4
0
int
monosyl(char *bp, char *ep)
{

	if (ep < bp + 2)
		return (0);
	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
		return (0);
	while (--ep >= bp)
		if (vowel(*ep))
			return (0);
	return (1);
}
Пример #5
0
int checkv(char s[])
{
    int i , si= strlen(s);

    for(i=0;i<si;i++)
    {
        if(vowel(s[i])==1)
        {
            if(vowel(s[i+1]) && vowel(s[i+2]))
                return 1;
        }
    }
    return 0;
}
Пример #6
0
int tsFreePhoneImplementation::heavy(char *phon)	/* according to what I understand by heavy  */
{
	if(!strcmp(phon,"ai")) {
		return(1);
	} else if(!strcmp(phon,"ai")) {
		return(1);
	} else if(!strcmp(phon,"ie")) {
		return(1);
	} else if(!strcmp(phon,"oi")) {
		return(1);
	} else if(!strcmp(phon,"oa")) {
		return(1);
	} else if(!strcmp(phon,"ou")) {
		return(1);
	} else if(!strcmp(phon,"eer")) {
		return(1);
	} else if(!strcmp(phon,"air")) {
		return(1);
	} else if(!strcmp(phon,"oor")) {
		return(1);
	} else if(!vowel(phon)) {
		return(1);
	} else 
		return(0);
}
Пример #7
0
/*
 * For each matching suffix in suftab, call the function associated
 * with that suffix (p1 and p2).
 */
int
suffix(char *ep, int lev)
{
	struct suftab *t;
	char *cp, *sp;

	lev += DLEV;
	deriv[lev] = deriv[lev-1] = 0;
	for (t = suftab; (sp = t->suf); t++) {
		cp = ep;
		while (*sp) {
			if (*--cp != *sp++)
				goto next;
		}
		for (sp = cp; --sp >= word && !vowel(*sp);)
			;	/* nothing */
		if (sp < word)
			return (0);
		if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
			return (1);
		if (t->p2 != NULL) {
			deriv[lev] = deriv[lev+1] = '\0';
			return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
		}
		return (0);
next:		;
	}
	return (0);
}
Пример #8
0
static int not_usually_after_l(int c0)

    {
    int c;
    c=tolower(c0);
    return(!(vowel(c) || c=='h' || c=='l'));
    }
Пример #9
0
main()
	{
		struct SLL *h=NULL;
	
		int i;char ch;	
		for(i=0;i<10;i++)
			{
				scanf("%c",&ch);
				
				push(&h,ch);
			
			}
		print_list(h);
		
		printf("\n\n");
		
	vowel(h);
	print_list(h);
	
	printf("\n\n");
	
	
	
	
	}	
    string reverseVowels(string s) {

        if(s.size() < 2)
            return s;

        set<char> vowel({'a','o','e','i','u','A','O','E','I','U'});

        int i = 0, j = s.size();
        
        while(i<j)
        {
            if( vowel.find(s[i]) == vowel.end())
                i++;
            if( vowel.find(s[j]) == vowel.end())
                j--;
            if( vowel.find(s[i]) != vowel.end() &&
                vowel.find(s[j]) != vowel.end() )
                {
                    if (s[i] != s[j])
                        std::swap(s[i],s[j]);
                    i++;
                    j--;
                }
        }
        return s;
    }
Пример #11
0
/*
 * Possible consonant-vowel-consonant-e ending.
 */
int
VCe(char *ep, char *d, char *a, int lev)
{
	char c;

	c = ep[-1];
	if (c == 'e')
		return (0);
	if (!vowel(c) && vowel(ep[-2])) {
		c = *ep;
		*ep++ = 'e';
		if (trypref(ep, d, lev) || suffix(ep, lev))
			return (1);
		ep--;
		*ep = c;
	}
	return (strip(ep, d, a, lev));
}
Пример #12
0
/*
 * Possible consonant-consonant-e ending.
 */
int
CCe(char *ep, char *d, char *a, int lev)
{

	switch (ep[-1]) {
	case 'l':
		if (vowel(ep[-2]))
			break;
		switch (ep[-2]) {
		case 'l':
		case 'r':
		case 'w':
			break;
		default:
			return (y_to_e(ep, d, a, lev));
		}
		break;
	case 's':
		if (ep[-2] == 's')
			break;
	case 'c':
	case 'g':
		if (*ep == 'a')
			return (0);
	case 'v':
	case 'z':
		if (vowel(ep[-2]))
			break;
	case 'u':
		if (y_to_e(ep, d, a, lev))
			return (1);
		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
			return (0);
	}
	return (VCe(ep, d, a, lev));
}
Пример #13
0
char *
lookuppref(char **wp, char *ep)
{
	char **sp;
	char *bp,*cp;

	for (sp = preftab; *sp; sp++) {
		bp = *wp;
		for (cp = *sp; *cp; cp++, bp++) {
			if (tolower((unsigned char)*bp) != *cp)
				goto next;
		}
		for (cp = bp; cp < ep; cp++) {
			if (vowel(*cp)) {
				*wp = bp;
				return (*sp);
			}
		}
next:		;
	}
	return (0);
}
Пример #14
0
int main (void) 
{
	int Alpha=0;		/* letter of alphabeth variable 	*/
	int SentChk=0;		/* end of sentence identifier 		*/
	int punct=0;		/* punctuation counter 			*/
	int Words=0;		/* words ended by whitespace counter 	*/
	int totalSents=0;	/* total amount of sentences 		*/
	int onlyVowel_e=0;	/* only the vowel 'e' counter 		*/
	int isVowel=0;		/* vowel identifier 			*/
	int vowelChk=0;		/* number of vowels in a word 		*/
	int syllables=0;	/* number of syllables 			*/
	int Vowel_e=0;		/* number of vowels 'e' 		*/
	int endVowel_e=0;	/* word that ends in the vowel 'e' 	*/
	int totalSylls=0;	/* total number of syllables 		*/
	int totalWords=0;	/* total number of words 		*/
	int index;		/* legibility index 			*/
	char ch;		/* a character read from stdin 		*/
	
	while ((ch = getchar()) != EOF)	/* loop that reads each character from stdin until the end of file is reached */
	{			
		CountWords(ch, &Alpha, &SentChk, &punct, &Words, &totalSents, &onlyVowel_e);			/* Calls function to count words 	*/
		isVowel=vowel(ch);										/* Calls function to check vowels 	*/
		CountSyllables(ch, &isVowel, &vowelChk, &syllables, &Vowel_e, &onlyVowel_e, &endVowel_e);	/* Calls function to count syllables	*/
	}
	
	totalWords=Words+totalSents;													/* Calculates total words */
	totalSylls=syllables-endVowel_e;												/* Calculates total syllables */
	index= floor(206.835 - 84.6 * ((float)totalSylls/(float)totalWords) - 1.015 * ((float)totalWords/(float)totalSents)+0.5);	/* Calculates legibility index */
	
	/* Output of calculated data */
	printf("\nLegibility Index = %d", index);
	printf("\nSyllable count   = %d", totalSylls);
	printf("\nWord count       = %d", totalWords);
	printf("\nSentence count   = %d\n", totalSents);
	
    return 0;	
}
Пример #15
0
int metaphone(const char *Word, char *Metaph, int max_phones) {
    char *n, *n_start, *n_end;    /* Pointers to string               */
    char *metaph_start = Metaph, *metaph_end;    
    /* Pointers to metaph         */
    int ntrans_len = strlen(Word)+4;
    char *ntrans = (char *)malloc(sizeof(char) * ntrans_len);
    /* Word with uppercase letters      */
    int KSflag;                   /* State flag for X translation     */

    /* SDE -- special case: if the word starts with a number, just
     * copy the leading digits and return. This means we don't
     * metaphone cardinal number suffixes (i.e. "st","nd","rd") */
    int leading_digit = isdigit(*Word);
    /* SDE -- check for a leading semivowel. needed because
     * the copy in ntrans gets destroyed by the metaphone process. */
    char leading_semivowel = '\0';

    /*
     ** Copy word to internal buffer, dropping non-alphabetic characters
     ** and converting to upper case.
     */
    for (n = ntrans + 1, n_end = ntrans + ntrans_len - 2;
            *Word && n < n_end; ++Word)
    {
        /* SDE -- see previous comment */
        if (leading_digit && isalpha(*Word))
            break;
        /* SDE -- copy numbers as well, for geocoding street names */
        /* was: if (isalpha(*Word)) */
        if (isalnum(*Word)) 
            *n++ = toupper(*Word);
    }

    if (n == ntrans + 1) {
        free(ntrans);
        Metaph[0]='\0';
        return 0;           /* Return if zero characters        */
    }
    else  n_end = n;          /* Set end of string pointer        */

    /*
     ** Pad with '\0's, front and rear
     */

    *n++ = '\0';
    *n   = '\0';
    n    = ntrans;
    *n++ = '\0';
    
    /* SDE: check for leading semivowel here */
    if (ntrans[1] == 'W' || ntrans[1] == 'Y')
        leading_semivowel = ntrans[1];

    /*
     ** Check for PN, KN, GN, WR, WH, and X at start
     */

    switch (*n)
    {
        case 'P':
        case 'K':
        case 'G':
            if ('N' == *(n + 1))
                *n++ = '\0';
            break;

        case 'A':
            if ('E' == *(n + 1))
                *n++ = '\0';
            break;

        case 'W':
            if ('R' == *(n + 1))
                *n++ = '\0';
            else if ('H' == *(n + 1))
            {
                *(n + 1) = *n;
                *n++ = '\0';
            }
            break;

        case 'X':
            *n = 'S';
            break;
    }

    /*
     ** Now loop through the string, stopping at the end of the string
     ** or when the computed Metaphone code is max_phones characters long.
     */

    KSflag = 0;              /* State flag for KStranslation     */
    for (metaph_end = Metaph + max_phones, n_start = n;
            n <= n_end && Metaph < metaph_end; ++n)
    {
        if (KSflag)
        {
            KSflag = 0;
            *Metaph++ = *n;
        }
        else
        {
            /* SDE -- special case: copy numbers verbatim */
            if (isdigit(*n)) {
                *Metaph++ = *n;
                continue;
            }

            /* Drop duplicates except for CC    */
            if (*(n - 1) == *n && *n != 'C')
                continue;

            /* Check for F J L M N R  or first letter vowel */

            if (same(*n) || (n == n_start && vowel(*n)))
                *Metaph++ = *n;
            else switch (*n)
            {
                case 'B':
                    if (n < n_end || *(n - 1) != 'M')
                        *Metaph++ = *n;
                    break;

                case 'C':
                    if (*(n - 1) != 'S' || !frontv(*(n + 1)))
                    {
                        if ('I' == *(n + 1) && 'A' == *(n + 2))
                            *Metaph++ = 'X';
                        else if (frontv(*(n + 1)))
                            *Metaph++ = 'S';
                        else if ('H' == *(n + 1))
                            *Metaph++ = ((n == n_start &&
                                        !vowel(*(n + 2))) ||
                                    'S' == *(n - 1)) ? 'K' : 'X';
                        else  *Metaph++ = 'K';
                    }
                    break;

                case 'D':
                    *Metaph++ = ('G' == *(n + 1) && frontv(*(n + 2))) ?
                        'J' : 'T';
                    break;

                case 'G':
                    if ((*(n + 1) != 'H' || vowel(*(n + 2))) &&
                            (*(n + 1) != 'N' || ((n + 1) < n_end &&
                                                 (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
                            (*(n - 1) != 'D' || !frontv(*(n + 1))))
                    {
                        *Metaph++ = (frontv(*(n + 1)) &&
                                *(n + 2) != 'G') ? 'J' : 'K';
                    }
                    else if ('H' == *(n + 1) && !noghf(*(n - 3)) &&
                            *(n - 4) != 'H')
                    {
                        *Metaph++ = 'F';
                    }
                    break;

                case 'H':
                    if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
                                vowel(*(n + 1))))
                    {
                        *Metaph++ = 'H';
                    }
                    break;

                case 'K':
                    if (*(n - 1) != 'C')
                        *Metaph++ = 'K';
                    break;

                case 'P':
                    *Metaph++ = ('H' == *(n + 1)) ? 'F' : 'P';
                    break;

                case 'Q':
                    *Metaph++ = 'K';
                    break;

                case 'S':
                    *Metaph++ = ('H' == *(n + 1) || ('I' == *(n + 1) &&
                                ('O' == *(n + 2) || 'A' == *(n + 2)))) ?
                        'X' : 'S';
                    break;

                case 'T':
                    if ('I' == *(n + 1) && ('O' == *(n + 2) ||
                                'A' == *(n + 2)))
                    {
                        *Metaph++ = 'X';
                    }
                    else if ('H' == *(n + 1))
                        /* SDE: was:
                           *Metaph++ = 'O';
                           but that's WRONG. */
                        *Metaph++ = '0';
                    else if (*(n + 1) != 'C' || *(n + 2) != 'H')
                        *Metaph++ = 'T';
                    break;

                case 'V':
                    *Metaph++ = 'F';
                    break;

                case 'W':
                case 'Y':
                    if (vowel(*(n + 1)))
                        *Metaph++ = *n;
                    break;

                case 'X':
                    if (n == n_start)
                        *Metaph++ = 'S';
                    else
                    {
                        *Metaph++ = 'K';
                        KSflag = 1;
                    }
                    break;

                case 'Z':
                    *Metaph++ = 'S';
                    break;
            }
        }
    }

    /* SDE: special case: if word consists solely of W or Y, use that. */
    if (Metaph == metaph_start && leading_semivowel)
        *Metaph++ = leading_semivowel;

    *Metaph = '\0';
    free(ntrans);
    return strlen(metaph_start);
}
Пример #16
0
char *
phonetic( char *Word )
{
    unsigned char   *n_start, *n, *n_end;        /* pointers to string */
    char            *metaph_end;        /* pointers to metaph */
    unsigned char   ntrans[42];        /* word with uppercase letters */
    int             KSflag;        /* state flag for X -> KS */
    char                buf[MAXPHONEMELEN + 2];
    char                *Metaph;

    /*
     * Copy Word to internal buffer, dropping non-alphabetic characters
     * and converting to upper case
     */
    n = ntrans + 4; n_end = ntrans + 35;
    while (!iswordbreak( Word ) && n < n_end) {
        if (isascii(*Word)) {
            if (isalpha(*Word)) {
                *n++ = TOUPPER(*Word);
            }
            ++Word;
        } else {
            auto const size_t len = LDAP_UTF8COPY((char *)n, Word);
            n += len; Word += len;
        }
    }
    Metaph = buf;
    *Metaph = '\0';
    if (n == ntrans + 4) {
            return( slapi_ch_strdup( buf ) );                /* Return if null */
    }
    n_end = n;                /* Set n_end to end of string */

    /* ntrans[0] will always be == 0 */
    ntrans[0] = '\0';
    ntrans[1] = '\0';
    ntrans[2] = '\0';
    ntrans[3] = '\0';
    *n++ = 0;
    *n++ = 0;
    *n++ = 0;
    *n = 0;                        /* Pad with nulls */
    n = ntrans + 4;                /* Assign pointer to start */

    /* Check for PN, KN, GN, AE, WR, WH, and X at start */
    switch (*n) {
    case 'P':
    case 'K':
    case 'G':
        /* 'PN', 'KN', 'GN' becomes 'N' */
        if (*(n + 1) == 'N')
            *n++ = 0;
        break;
    case 'A':
        /* 'AE' becomes 'E' */
        if (*(n + 1) == 'E')
            *n++ = 0;
        break;
    case 'W':
        /* 'WR' becomes 'R', and 'WH' to 'H' */
        if (*(n + 1) == 'R')
            *n++ = 0;
        else if (*(n + 1) == 'H') {
            *n++ = 0;
        }
        break;
    case 'X':
        /* 'X' becomes 'S' */
        *n = 'S';
        break;
    case 0xC3:
        switch (*(n+1)) {
        case 0x80:
        case 0x81:
        case 0x82:
        case 0x83:
        case 0x84:
        case 0x85:
            *n++ = 0;
            *n = 'A';
            break;
        case 0x87:
            *n++ = 0;
            *n = 'C';
            break;
        case 0x86:
        case 0x88:
        case 0x89:
        case 0x8A:
        case 0x8B:
            *n++ = 0;
            *n = 'E';
            break;
        case 0x8C:
        case 0x8D:
        case 0x8E:
        case 0x8F:
            *n++ = 0;
            *n = 'I';
            break;
        case 0x90:    /* eth: TH */
            *n++ = 0;
            *n = '0';
            break;
        case 0x91:
            *n++ = 0;
            *n = 'N';
            break;
        case 0x92:
        case 0x93:
        case 0x94:
        case 0x95:
        case 0x96:
        case 0x98:
            *n++ = 0;
            *n = 'O';
            break;
        case 0x99:
        case 0x9A:
        case 0x9B:
        case 0x9C:
            *n++ = 0;
            *n = 'U';
            break;
        case 0x9D:
            *n++ = 0;
            *n = 'Y';
            break;
        case 0x9E:
            *n++ = 0;
            *n = '0';    /* thorn: TH */
            break;
        case 0x9F:
            *n++ = 0;
            *n = 's';
            break;
        case 0xA0:
        case 0xA1:
        case 0xA2:
        case 0xA3:
        case 0xA4:
        case 0xA5:
            *n++ = 0;
            *n = 'a';
            break;
        case 0xA6:
            *n++ = 0;
            *n = 'e';
            break;
        case 0xA7:
            *n++ = 0;
            *n = 'c';
            break;
        case 0xA8:
        case 0xA9:
        case 0xAA:
        case 0xAB:
            *n++ = 0;
            *n = 'e';
            break;
        case 0xAC:
        case 0xAD:
        case 0xAE:
        case 0xAF:
            *n++ = 0;
            *n = 'i';
            break;
        case 0xB0:
            *n++ = 0;
            *n = '0';    /* eth: th */
            break;
        case 0xB1:
            *n++ = 0;
            *n = 'n';
            break;
        case 0xB2:
        case 0xB3:
        case 0xB4:
        case 0xB5:
        case 0xB6:
        case 0xB8:
            *n++ = 0;
            *n = 'o';
            break;
        case 0xB9:
        case 0xBA:
        case 0xBB:
        case 0xBC:
            *n++ = 0;
            *n = 'u';
            break;
        case 0xBD:
        case 0xBF:
            *n++ = 0;
            *n = 'y';
            break;
        case 0xBE:
            *n++ = 0;
            *n = '0';    /* thorn: th */
            break;
        }
        break;
    }

    /*
     * Now, loop step through string, stopping at end of string or when
     * the computed 'metaph' is MAXPHONEMELEN characters long
     */

    KSflag = 0;                /* state flag for KS translation */
    for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
         n <= n_end && Metaph < metaph_end; n++) {
        if (KSflag) {
            KSflag = 0;
            *Metaph++ = 'S';
        } else if (!isascii(*n)) {
            switch (*n) {
            case 0xC3:
                if (n+1 <= n_end) {
                    switch (*(++n)) {
                    case 0x87:    /* C with cedilla */
                    case 0x9F:    /* ess-zed */
                    case 0xA7:    /* c with cedilla */
                        *Metaph++ = 'S';
                        break;
                    case 0x90:    /* eth: TH */
                    case 0x9E:    /* thorn: TH */
                    case 0xB0:    /* eth: th */
                    case 0xBE:    /* thorn: th */
                        *Metaph++ = '0';
                        break;
                    case 0x91:
                    case 0xB1:
                        *Metaph++ = 'N';
                        break;
                    case 0x9D:
                    case 0xBD:
                    case 0xBF:
                        *Metaph++ = 'Y';
                        break;
                    default:      /* skipping the rest */
                        break;
                    }
                }
                break;
            default:
                *Metaph++ = *n;
            }
        } else {
            /* Drop duplicates except for CC */
            if (*(n - 1) == *n && *n != 'C')
                continue;
            /* Check for F J L M N R or first letter vowel */
            if (same(*n) || (n == n_start && vowel(n))) {
                *Metaph++ = *n;
            } else {
                switch (*n) {
                case 'B':

                    /*
                     * B unless in -MB
                     */
                    if (n < (n_end - 1) && *(n - 1) != 'M') {
                        *Metaph++ = *n;
                    }
                    break;
                case 'C':

                    /*
                     * X if in -CIA-, -CH- else S if in
                     * -CI-, -CE-, -CY- else dropped if
                     * in -SCI-, -SCE-, -SCY- else K
                     */
                    if (*(n - 1) != 'S' || !frontv((n + 1))) {
                        if (*(n + 1) == 'I' && *(n + 2) == 'A') {
                            *Metaph++ = 'X';
                        } else if (frontv((n + 1))) {
                            *Metaph++ = 'S';
                        } else if (*(n + 1) == 'H') {
                            *Metaph++ = ((n == n_start && !vowel((n + 2)))
                             || *(n - 1) == 'S')
                                ? (char) 'K' : (char) 'X';
                        } else {
                            *Metaph++ = 'K';
                        }
                    }
                    break;
                case 'D':

                    /*
                     * J if in DGE or DGI or DGY else T
                     */
                    *Metaph++ = (*(n + 1) == 'G' && frontv((n + 2)))
                        ? (char) 'J' : (char) 'T';
                    break;
                case 'G':

                    /*
                     * F if in -GH and not B--GH, D--GH,
                     * -H--GH, -H---GH else dropped if
                     * -GNED, -GN, -DGE-, -DGI-, -DGY-
                     * else J if in -GE-, -GI-, -GY- and
                     * not GG else K
                     */
                    if ((*(n + 1) != 'J' || vowel((n + 2))) &&
                        (*(n + 1) != 'N' || ((n + 1) < n_end &&
                                 (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
                        (*(n - 1) != 'D' || !frontv((n + 1))))
                        *Metaph++ = (frontv((n + 1)) &&
                                 *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
                    else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
                         *(n - 4) != 'H')
                        *Metaph++ = 'F';
                    break;
                case 'H':

                    /*
                     * H if before a vowel and not after
                     * C, G, P, S, T else dropped
                     */
                    if (!varson(*(n - 1)) && (!vowel((n - 1)) ||
                               vowel((n + 1))))
                        *Metaph++ = 'H';
                    break;
                case 'K':

                    /*
                     * dropped if after C else K
                     */
                    if (*(n - 1) != 'C')
                        *Metaph++ = 'K';
                    break;
                case 'P':

                    /*
                     * F if before H, else P
                     */
                    *Metaph++ = *(n + 1) == 'H' ?
                        (char) 'F' : (char) 'P';
                    break;
                case 'Q':

                    /*
                     * K
                     */
                    *Metaph++ = 'K';
                    break;
                case 'S':

                    /*
                     * X in -SH-, -SIO- or -SIA- else S
                     */
                    *Metaph++ = (*(n + 1) == 'H' ||
                             (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
                              *(n + 2) == 'A')))
                        ? (char) 'X' : (char) 'S';
                    break;
                case 'T':

                    /*
                     * X in -TIA- or -TIO- else 0 (zero)
                     * before H else dropped if in -TCH-
                     * else T
                     */
                    if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
                               *(n + 2) == 'A'))
                        *Metaph++ = 'X';
                    else if (*(n + 1) == 'H')
                        *Metaph++ = '0';
                    else if (*(n + 1) != 'C' || *(n + 2) != 'H')
                        *Metaph++ = 'T';
                    break;
                case 'V':

                    /*
                     * F
                     */
                    *Metaph++ = 'F';
                    break;
                case 'W':

                    /*
                     * W after a vowel, else dropped
                     */
                case 'Y':

                    /*
                     * Y unless followed by a vowel
                     */
                    if (vowel((n + 1)))
                        *Metaph++ = *n;
                    break;
                case 'X':

                    /*
                     * KS
                     */
                    if (n == n_start)
                        *Metaph++ = 'S';
                    else {
                        *Metaph++ = 'K';    /* Insert K, then S */
                        KSflag = 1;
                    }
                    break;
                case 'Z':

                    /*
                     * S
                     */
                    *Metaph++ = 'S';
                    break;
                }
            }
        }
    }

    *Metaph = 0;                /* Null terminate */
    return( slapi_ch_strdup( buf ) );
}
Пример #17
0
int tsFreePhoneImplementation::seq(char * prev, char * curr)
{
	/* this is just a list of what can precede what  */
	/* and only for pairs of consonants  */

	if(vowel(prev) || vowel(curr)) {
		return(1);
	}

	if(!strcmp(curr,"r")) {
		if(!strcmp(prev,"p") ||
	 	!strcmp(prev,"t") ||
	 	!strcmp(prev,"k") ||
	 	!strcmp(prev,"b") ||
	 	!strcmp(prev,"d") ||
	 	!strcmp(prev,"g") ||
	 	!strcmp(prev,"th") ||
	 	!strcmp(prev,"sh") ||
	 	!strcmp(prev,"f")) {
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"y")) {
		if(!strcmp(prev,"p") ||
	 	!strcmp(prev,"t") ||
	 	!strcmp(prev,"k") ||
	 	!strcmp(prev,"b") ||
	 	!strcmp(prev,"d") ||
	 	!strcmp(prev,"s") ||
	 	!strcmp(prev,"f")) {
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"l")) {
		if(!strcmp(prev,"p") ||
	 	!strcmp(prev,"k") ||
	 	!strcmp(prev,"b") ||
	 	!strcmp(prev,"g") ||
	 	!strcmp(prev,"s") ||
	 	!strcmp(prev,"f")) {
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"w")) {
		if(!strcmp(prev,"t") ||
	 	!strcmp(prev,"k") ||
	 	!strcmp(prev,"th")) {
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"p")) {
		if(!strcmp(prev,"s")){
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"t")) {
		if(!strcmp(prev,"s")){
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"k")) {
		if(!strcmp(prev,"s")){
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"m")) {
		if(!strcmp(prev,"s")){
			return(1);
		} else {
			return(0);
		}
	} else if(!strcmp(curr,"n")) {
		if(!strcmp(prev,"s")){
			return(1);
		} else {
			return(0);
		}
	}
	return(0);
}
Пример #18
0
static int vowelinstem(struct stemmer * z) {
    char * b = z->b;
    int j = z->j;
    int i; for (i = 0; i <= j; i++) if (vowel(b[i])) return TRUE;
    return FALSE;
}
Пример #19
0
char *tsFreePhoneImplementation::stress(char *param) /* no stress allowed initially  */
{
	/* plan -- allow 2 spaces more in result  */
	char **llist;
	int nll;
	int words;
	int look;
	int *marker;
	char *stringout;
	int i;
	int hs;
	int reserve;

	llist = split(param);
	free(param);
	
	nll=0;
	words=0;
	while(llist[nll] != NULL) {
		if(!strcmp(llist[nll],"|"))
			words++;
		nll++;
	}

	marker = (int *)malloc(sizeof(int)*nll);
	for(i=0;i<nll;i++) 
		marker[i] = 0;
	stringout = (char *)malloc(sizeof(char *)*(nll+words*2+2));
	stringout[0] = '\0';

	look=2;
	reserve = -1;
	hs = 0;
	for(i=nll-1;i>=0;i--) {
		if(!strcmp(llist[i],"|")) {
			reserve = -1;
			look = 2 ;  /* ie just started looking  */
		} else if(look==2) {
			hs = heavy(llist[i]);
			look = 1;
		} else if(look==0) {
			;
		}

		if(look==1 && hs) {
			if(vowel(llist[i])) {
				marker[i] = 1;
				look=0;
			}
		} else if(look==1) {
			if(vowel(llist[i]) && reserve == (-1)) {
				reserve = i;
			} else if(vowel(llist[i])) {
				marker[i] = 1;
				reserve = -1;
				look=0;
			}
		}
	}
	if(reserve!=(-1)) {
		marker[reserve] = 1;
	}

	for(i=0;i<nll;i++) {
		if(marker[i]) 
			strcat(stringout,"* ");
		strcat(stringout,llist[i]);
		strcat(stringout," ");
	}
	tidy_split(llist);
	free(marker);

	return(stringout);
}
Пример #20
0
char *
phonetic( char *Word )
{
	char           *n, *n_start, *n_end;	/* pointers to string */
	char           *metaph_end;	/* pointers to metaph */
	char            ntrans[40];	/* word with uppercase letters */
	int             KSflag;	/* state flag for X -> KS */
	char		buf[MAXPHONEMELEN + 2];
	char		*Metaph;

	/*
	 * Copy Word to internal buffer, dropping non-alphabetic characters
	 * and converting to upper case
	 */

	for (n = ntrans + 4, n_end = ntrans + 35; !iswordbreak( *Word ) &&
	    n < n_end; Word++) {
		if (isalpha((unsigned char)*Word))
			*n++ = TOUPPER((unsigned char)*Word);
	}
	Metaph = buf;
	*Metaph = '\0';
	if (n == ntrans + 4) {
		return( ch_strdup( buf ) );		/* Return if null */
	}
	n_end = n;		/* Set n_end to end of string */

	/* ntrans[0] will always be == 0 */
	ntrans[0] = '\0';
	ntrans[1] = '\0';
	ntrans[2] = '\0';
	ntrans[3] = '\0';
	*n++ = 0;
	*n++ = 0;
	*n++ = 0;
	*n = 0;			/* Pad with nulls */
	n = ntrans + 4;		/* Assign pointer to start */

	/* Check for PN, KN, GN, AE, WR, WH, and X at start */
	switch (*n) {
	case 'P':
	case 'K':
	case 'G':
		/* 'PN', 'KN', 'GN' becomes 'N' */
		if (*(n + 1) == 'N')
			*n++ = 0;
		break;
	case 'A':
		/* 'AE' becomes 'E' */
		if (*(n + 1) == 'E')
			*n++ = 0;
		break;
	case 'W':
		/* 'WR' becomes 'R', and 'WH' to 'H' */
		if (*(n + 1) == 'R')
			*n++ = 0;
		else if (*(n + 1) == 'H') {
			*(n + 1) = *n;
			*n++ = 0;
		}
		break;
	case 'X':
		/* 'X' becomes 'S' */
		*n = 'S';
		break;
	}

	/*
	 * Now, loop step through string, stopping at end of string or when
	 * the computed 'metaph' is MAXPHONEMELEN characters long
	 */

	KSflag = 0;		/* state flag for KS translation */
	for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
	     n <= n_end && Metaph < metaph_end; n++) {
		if (KSflag) {
			KSflag = 0;
			*Metaph++ = 'S';
		} else {
			/* Drop duplicates except for CC */
			if (*(n - 1) == *n && *n != 'C')
				continue;
			/* Check for F J L M N R or first letter vowel */
			if (same(*n) || (n == n_start && vowel(*n)))
				*Metaph++ = *n;
			else
				switch (*n) {
				case 'B':

					/*
					 * B unless in -MB
					 */
					if (n == (n_end - 1) && *(n - 1) != 'M')
						*Metaph++ = *n;
					break;
				case 'C':

					/*
					 * X if in -CIA-, -CH- else S if in
					 * -CI-, -CE-, -CY- else dropped if
					 * in -SCI-, -SCE-, -SCY- else K
					 */
					if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
						if (*(n + 1) == 'I' && *(n + 2) == 'A')
							*Metaph++ = 'X';
						else if (frontv(*(n + 1)))
							*Metaph++ = 'S';
						else if (*(n + 1) == 'H')
							*Metaph++ = ((n == n_start && !vowel(*(n + 2)))
							 || *(n - 1) == 'S')
							    ? (char) 'K' : (char) 'X';
						else
							*Metaph++ = 'K';
					}
					break;
				case 'D':

					/*
					 * J if in DGE or DGI or DGY else T
					 */
					*Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
					    ? (char) 'J' : (char) 'T';
					break;
				case 'G':

					/*
					 * F if in -GH and not B--GH, D--GH,
					 * -H--GH, -H---GH else dropped if
					 * -GNED, -GN, -DGE-, -DGI-, -DGY-
					 * else J if in -GE-, -GI-, -GY- and
					 * not GG else K
					 */
					if ((*(n + 1) != 'J' || vowel(*(n + 2))) &&
					    (*(n + 1) != 'N' || ((n + 1) < n_end &&
								 (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
					    (*(n - 1) != 'D' || !frontv(*(n + 1))))
						*Metaph++ = (frontv(*(n + 1)) &&
							     *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
					else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
						 *(n - 4) != 'H')
						*Metaph++ = 'F';
					break;
				case 'H':

					/*
					 * H if before a vowel and not after
					 * C, G, P, S, T else dropped
					 */
					if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
							   vowel(*(n + 1))))
						*Metaph++ = 'H';
					break;
				case 'K':

					/*
					 * dropped if after C else K
					 */
					if (*(n - 1) != 'C')
						*Metaph++ = 'K';
					break;
				case 'P':

					/*
					 * F if before H, else P
					 */
					*Metaph++ = *(n + 1) == 'H' ?
					    (char) 'F' : (char) 'P';
					break;
				case 'Q':

					/*
					 * K
					 */
					*Metaph++ = 'K';
					break;
				case 'S':

					/*
					 * X in -SH-, -SIO- or -SIA- else S
					 */
					*Metaph++ = (*(n + 1) == 'H' ||
						     (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
							  *(n + 2) == 'A')))
					    ? (char) 'X' : (char) 'S';
					break;
				case 'T':

					/*
					 * X in -TIA- or -TIO- else 0 (zero)
					 * before H else dropped if in -TCH-
					 * else T
					 */
					if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
							   *(n + 2) == 'A'))
						*Metaph++ = 'X';
					else if (*(n + 1) == 'H')
						*Metaph++ = '0';
					else if (*(n + 1) != 'C' || *(n + 2) != 'H')
						*Metaph++ = 'T';
					break;
				case 'V':

					/*
					 * F
					 */
					*Metaph++ = 'F';
					break;
				case 'W':

					/*
					 * W after a vowel, else dropped
					 */
				case 'Y':

					/*
					 * Y unless followed by a vowel
					 */
					if (vowel(*(n + 1)))
						*Metaph++ = *n;
					break;
				case 'X':

					/*
					 * KS
					 */
					if (n == n_start)
						*Metaph++ = 'S';
					else {
						*Metaph++ = 'K';	/* Insert K, then S */
						KSflag = 1;
					}
					break;
				case 'Z':

					/*
					 * S
					 */
					*Metaph++ = 'S';
					break;
				}
		}
	}

	*Metaph = 0;		/* Null terminate */
	return( ch_strdup( buf ) );
}