int tryword(char *bp, char *ep, int lev) { int i, j; char duple[3]; if (ep-bp <= 1) return (0); if (vowel(*ep) && monosyl(bp, ep)) return (0); i = dict(bp, ep); if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) { ep--; deriv[++lev] = duple; duple[0] = '+'; duple[1] = *ep; duple[2] = '\0'; i = dict(bp, ep); } if (vflag == 0 || i == 0) return (i); /* Also tack on possible derivations. (XXX - warn on truncation?) */ for (j = lev; j > 0; j--) { if (deriv[j]) strlcat(affix, deriv[j], sizeof(affix)); } return (i); }
char * skipv(char *s) { if (s >= word && vowel(*s)) s--; while (s >= word && !vowel(*s)) s--; return (s); }
static int shortv(struct stemmer * z) { char * b = z->b; int i = z->j; int ch = b[i]; if vowel(ch) return FALSE; if (i == 1 && vowel(b[0])) return TRUE; if (i > 1 && vowel(b[i - 1]) && cons(b[i - 2]) && ch != 'w' && ch != 'x' && ch != 'Y') return TRUE; return FALSE; }
int monosyl(char *bp, char *ep) { if (ep < bp + 2) return (0); if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') return (0); while (--ep >= bp) if (vowel(*ep)) return (0); return (1); }
int checkv(char s[]) { int i , si= strlen(s); for(i=0;i<si;i++) { if(vowel(s[i])==1) { if(vowel(s[i+1]) && vowel(s[i+2])) return 1; } } return 0; }
int tsFreePhoneImplementation::heavy(char *phon) /* according to what I understand by heavy */ { if(!strcmp(phon,"ai")) { return(1); } else if(!strcmp(phon,"ai")) { return(1); } else if(!strcmp(phon,"ie")) { return(1); } else if(!strcmp(phon,"oi")) { return(1); } else if(!strcmp(phon,"oa")) { return(1); } else if(!strcmp(phon,"ou")) { return(1); } else if(!strcmp(phon,"eer")) { return(1); } else if(!strcmp(phon,"air")) { return(1); } else if(!strcmp(phon,"oor")) { return(1); } else if(!vowel(phon)) { return(1); } else return(0); }
/* * For each matching suffix in suftab, call the function associated * with that suffix (p1 and p2). */ int suffix(char *ep, int lev) { struct suftab *t; char *cp, *sp; lev += DLEV; deriv[lev] = deriv[lev-1] = 0; for (t = suftab; (sp = t->suf); t++) { cp = ep; while (*sp) { if (*--cp != *sp++) goto next; } for (sp = cp; --sp >= word && !vowel(*sp);) ; /* nothing */ if (sp < word) return (0); if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1)) return (1); if (t->p2 != NULL) { deriv[lev] = deriv[lev+1] = '\0'; return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev)); } return (0); next: ; } return (0); }
static int not_usually_after_l(int c0) { int c; c=tolower(c0); return(!(vowel(c) || c=='h' || c=='l')); }
main() { struct SLL *h=NULL; int i;char ch; for(i=0;i<10;i++) { scanf("%c",&ch); push(&h,ch); } print_list(h); printf("\n\n"); vowel(h); print_list(h); printf("\n\n"); }
string reverseVowels(string s) { if(s.size() < 2) return s; set<char> vowel({'a','o','e','i','u','A','O','E','I','U'}); int i = 0, j = s.size(); while(i<j) { if( vowel.find(s[i]) == vowel.end()) i++; if( vowel.find(s[j]) == vowel.end()) j--; if( vowel.find(s[i]) != vowel.end() && vowel.find(s[j]) != vowel.end() ) { if (s[i] != s[j]) std::swap(s[i],s[j]); i++; j--; } } return s; }
/* * Possible consonant-vowel-consonant-e ending. */ int VCe(char *ep, char *d, char *a, int lev) { char c; c = ep[-1]; if (c == 'e') return (0); if (!vowel(c) && vowel(ep[-2])) { c = *ep; *ep++ = 'e'; if (trypref(ep, d, lev) || suffix(ep, lev)) return (1); ep--; *ep = c; } return (strip(ep, d, a, lev)); }
/* * Possible consonant-consonant-e ending. */ int CCe(char *ep, char *d, char *a, int lev) { switch (ep[-1]) { case 'l': if (vowel(ep[-2])) break; switch (ep[-2]) { case 'l': case 'r': case 'w': break; default: return (y_to_e(ep, d, a, lev)); } break; case 's': if (ep[-2] == 's') break; case 'c': case 'g': if (*ep == 'a') return (0); case 'v': case 'z': if (vowel(ep[-2])) break; case 'u': if (y_to_e(ep, d, a, lev)) return (1); if (!(ep[-2] == 'n' && ep[-1] == 'g')) return (0); } return (VCe(ep, d, a, lev)); }
char * lookuppref(char **wp, char *ep) { char **sp; char *bp,*cp; for (sp = preftab; *sp; sp++) { bp = *wp; for (cp = *sp; *cp; cp++, bp++) { if (tolower((unsigned char)*bp) != *cp) goto next; } for (cp = bp; cp < ep; cp++) { if (vowel(*cp)) { *wp = bp; return (*sp); } } next: ; } return (0); }
int main (void) { int Alpha=0; /* letter of alphabeth variable */ int SentChk=0; /* end of sentence identifier */ int punct=0; /* punctuation counter */ int Words=0; /* words ended by whitespace counter */ int totalSents=0; /* total amount of sentences */ int onlyVowel_e=0; /* only the vowel 'e' counter */ int isVowel=0; /* vowel identifier */ int vowelChk=0; /* number of vowels in a word */ int syllables=0; /* number of syllables */ int Vowel_e=0; /* number of vowels 'e' */ int endVowel_e=0; /* word that ends in the vowel 'e' */ int totalSylls=0; /* total number of syllables */ int totalWords=0; /* total number of words */ int index; /* legibility index */ char ch; /* a character read from stdin */ while ((ch = getchar()) != EOF) /* loop that reads each character from stdin until the end of file is reached */ { CountWords(ch, &Alpha, &SentChk, &punct, &Words, &totalSents, &onlyVowel_e); /* Calls function to count words */ isVowel=vowel(ch); /* Calls function to check vowels */ CountSyllables(ch, &isVowel, &vowelChk, &syllables, &Vowel_e, &onlyVowel_e, &endVowel_e); /* Calls function to count syllables */ } totalWords=Words+totalSents; /* Calculates total words */ totalSylls=syllables-endVowel_e; /* Calculates total syllables */ index= floor(206.835 - 84.6 * ((float)totalSylls/(float)totalWords) - 1.015 * ((float)totalWords/(float)totalSents)+0.5); /* Calculates legibility index */ /* Output of calculated data */ printf("\nLegibility Index = %d", index); printf("\nSyllable count = %d", totalSylls); printf("\nWord count = %d", totalWords); printf("\nSentence count = %d\n", totalSents); return 0; }
int metaphone(const char *Word, char *Metaph, int max_phones) { char *n, *n_start, *n_end; /* Pointers to string */ char *metaph_start = Metaph, *metaph_end; /* Pointers to metaph */ int ntrans_len = strlen(Word)+4; char *ntrans = (char *)malloc(sizeof(char) * ntrans_len); /* Word with uppercase letters */ int KSflag; /* State flag for X translation */ /* SDE -- special case: if the word starts with a number, just * copy the leading digits and return. This means we don't * metaphone cardinal number suffixes (i.e. "st","nd","rd") */ int leading_digit = isdigit(*Word); /* SDE -- check for a leading semivowel. needed because * the copy in ntrans gets destroyed by the metaphone process. */ char leading_semivowel = '\0'; /* ** Copy word to internal buffer, dropping non-alphabetic characters ** and converting to upper case. */ for (n = ntrans + 1, n_end = ntrans + ntrans_len - 2; *Word && n < n_end; ++Word) { /* SDE -- see previous comment */ if (leading_digit && isalpha(*Word)) break; /* SDE -- copy numbers as well, for geocoding street names */ /* was: if (isalpha(*Word)) */ if (isalnum(*Word)) *n++ = toupper(*Word); } if (n == ntrans + 1) { free(ntrans); Metaph[0]='\0'; return 0; /* Return if zero characters */ } else n_end = n; /* Set end of string pointer */ /* ** Pad with '\0's, front and rear */ *n++ = '\0'; *n = '\0'; n = ntrans; *n++ = '\0'; /* SDE: check for leading semivowel here */ if (ntrans[1] == 'W' || ntrans[1] == 'Y') leading_semivowel = ntrans[1]; /* ** Check for PN, KN, GN, WR, WH, and X at start */ switch (*n) { case 'P': case 'K': case 'G': if ('N' == *(n + 1)) *n++ = '\0'; break; case 'A': if ('E' == *(n + 1)) *n++ = '\0'; break; case 'W': if ('R' == *(n + 1)) *n++ = '\0'; else if ('H' == *(n + 1)) { *(n + 1) = *n; *n++ = '\0'; } break; case 'X': *n = 'S'; break; } /* ** Now loop through the string, stopping at the end of the string ** or when the computed Metaphone code is max_phones characters long. */ KSflag = 0; /* State flag for KStranslation */ for (metaph_end = Metaph + max_phones, n_start = n; n <= n_end && Metaph < metaph_end; ++n) { if (KSflag) { KSflag = 0; *Metaph++ = *n; } else { /* SDE -- special case: copy numbers verbatim */ if (isdigit(*n)) { *Metaph++ = *n; continue; } /* Drop duplicates except for CC */ if (*(n - 1) == *n && *n != 'C') continue; /* Check for F J L M N R or first letter vowel */ if (same(*n) || (n == n_start && vowel(*n))) *Metaph++ = *n; else switch (*n) { case 'B': if (n < n_end || *(n - 1) != 'M') *Metaph++ = *n; break; case 'C': if (*(n - 1) != 'S' || !frontv(*(n + 1))) { if ('I' == *(n + 1) && 'A' == *(n + 2)) *Metaph++ = 'X'; else if (frontv(*(n + 1))) *Metaph++ = 'S'; else if ('H' == *(n + 1)) *Metaph++ = ((n == n_start && !vowel(*(n + 2))) || 'S' == *(n - 1)) ? 'K' : 'X'; else *Metaph++ = 'K'; } break; case 'D': *Metaph++ = ('G' == *(n + 1) && frontv(*(n + 2))) ? 'J' : 'T'; break; case 'G': if ((*(n + 1) != 'H' || vowel(*(n + 2))) && (*(n + 1) != 'N' || ((n + 1) < n_end && (*(n + 2) != 'E' || *(n + 3) != 'D'))) && (*(n - 1) != 'D' || !frontv(*(n + 1)))) { *Metaph++ = (frontv(*(n + 1)) && *(n + 2) != 'G') ? 'J' : 'K'; } else if ('H' == *(n + 1) && !noghf(*(n - 3)) && *(n - 4) != 'H') { *Metaph++ = 'F'; } break; case 'H': if (!varson(*(n - 1)) && (!vowel(*(n - 1)) || vowel(*(n + 1)))) { *Metaph++ = 'H'; } break; case 'K': if (*(n - 1) != 'C') *Metaph++ = 'K'; break; case 'P': *Metaph++ = ('H' == *(n + 1)) ? 'F' : 'P'; break; case 'Q': *Metaph++ = 'K'; break; case 'S': *Metaph++ = ('H' == *(n + 1) || ('I' == *(n + 1) && ('O' == *(n + 2) || 'A' == *(n + 2)))) ? 'X' : 'S'; break; case 'T': if ('I' == *(n + 1) && ('O' == *(n + 2) || 'A' == *(n + 2))) { *Metaph++ = 'X'; } else if ('H' == *(n + 1)) /* SDE: was: *Metaph++ = 'O'; but that's WRONG. */ *Metaph++ = '0'; else if (*(n + 1) != 'C' || *(n + 2) != 'H') *Metaph++ = 'T'; break; case 'V': *Metaph++ = 'F'; break; case 'W': case 'Y': if (vowel(*(n + 1))) *Metaph++ = *n; break; case 'X': if (n == n_start) *Metaph++ = 'S'; else { *Metaph++ = 'K'; KSflag = 1; } break; case 'Z': *Metaph++ = 'S'; break; } } } /* SDE: special case: if word consists solely of W or Y, use that. */ if (Metaph == metaph_start && leading_semivowel) *Metaph++ = leading_semivowel; *Metaph = '\0'; free(ntrans); return strlen(metaph_start); }
char * phonetic( char *Word ) { unsigned char *n_start, *n, *n_end; /* pointers to string */ char *metaph_end; /* pointers to metaph */ unsigned char ntrans[42]; /* word with uppercase letters */ int KSflag; /* state flag for X -> KS */ char buf[MAXPHONEMELEN + 2]; char *Metaph; /* * Copy Word to internal buffer, dropping non-alphabetic characters * and converting to upper case */ n = ntrans + 4; n_end = ntrans + 35; while (!iswordbreak( Word ) && n < n_end) { if (isascii(*Word)) { if (isalpha(*Word)) { *n++ = TOUPPER(*Word); } ++Word; } else { auto const size_t len = LDAP_UTF8COPY((char *)n, Word); n += len; Word += len; } } Metaph = buf; *Metaph = '\0'; if (n == ntrans + 4) { return( slapi_ch_strdup( buf ) ); /* Return if null */ } n_end = n; /* Set n_end to end of string */ /* ntrans[0] will always be == 0 */ ntrans[0] = '\0'; ntrans[1] = '\0'; ntrans[2] = '\0'; ntrans[3] = '\0'; *n++ = 0; *n++ = 0; *n++ = 0; *n = 0; /* Pad with nulls */ n = ntrans + 4; /* Assign pointer to start */ /* Check for PN, KN, GN, AE, WR, WH, and X at start */ switch (*n) { case 'P': case 'K': case 'G': /* 'PN', 'KN', 'GN' becomes 'N' */ if (*(n + 1) == 'N') *n++ = 0; break; case 'A': /* 'AE' becomes 'E' */ if (*(n + 1) == 'E') *n++ = 0; break; case 'W': /* 'WR' becomes 'R', and 'WH' to 'H' */ if (*(n + 1) == 'R') *n++ = 0; else if (*(n + 1) == 'H') { *n++ = 0; } break; case 'X': /* 'X' becomes 'S' */ *n = 'S'; break; case 0xC3: switch (*(n+1)) { case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: *n++ = 0; *n = 'A'; break; case 0x87: *n++ = 0; *n = 'C'; break; case 0x86: case 0x88: case 0x89: case 0x8A: case 0x8B: *n++ = 0; *n = 'E'; break; case 0x8C: case 0x8D: case 0x8E: case 0x8F: *n++ = 0; *n = 'I'; break; case 0x90: /* eth: TH */ *n++ = 0; *n = '0'; break; case 0x91: *n++ = 0; *n = 'N'; break; case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x98: *n++ = 0; *n = 'O'; break; case 0x99: case 0x9A: case 0x9B: case 0x9C: *n++ = 0; *n = 'U'; break; case 0x9D: *n++ = 0; *n = 'Y'; break; case 0x9E: *n++ = 0; *n = '0'; /* thorn: TH */ break; case 0x9F: *n++ = 0; *n = 's'; break; case 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: case 0xA5: *n++ = 0; *n = 'a'; break; case 0xA6: *n++ = 0; *n = 'e'; break; case 0xA7: *n++ = 0; *n = 'c'; break; case 0xA8: case 0xA9: case 0xAA: case 0xAB: *n++ = 0; *n = 'e'; break; case 0xAC: case 0xAD: case 0xAE: case 0xAF: *n++ = 0; *n = 'i'; break; case 0xB0: *n++ = 0; *n = '0'; /* eth: th */ break; case 0xB1: *n++ = 0; *n = 'n'; break; case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB8: *n++ = 0; *n = 'o'; break; case 0xB9: case 0xBA: case 0xBB: case 0xBC: *n++ = 0; *n = 'u'; break; case 0xBD: case 0xBF: *n++ = 0; *n = 'y'; break; case 0xBE: *n++ = 0; *n = '0'; /* thorn: th */ break; } break; } /* * Now, loop step through string, stopping at end of string or when * the computed 'metaph' is MAXPHONEMELEN characters long */ KSflag = 0; /* state flag for KS translation */ for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n; n <= n_end && Metaph < metaph_end; n++) { if (KSflag) { KSflag = 0; *Metaph++ = 'S'; } else if (!isascii(*n)) { switch (*n) { case 0xC3: if (n+1 <= n_end) { switch (*(++n)) { case 0x87: /* C with cedilla */ case 0x9F: /* ess-zed */ case 0xA7: /* c with cedilla */ *Metaph++ = 'S'; break; case 0x90: /* eth: TH */ case 0x9E: /* thorn: TH */ case 0xB0: /* eth: th */ case 0xBE: /* thorn: th */ *Metaph++ = '0'; break; case 0x91: case 0xB1: *Metaph++ = 'N'; break; case 0x9D: case 0xBD: case 0xBF: *Metaph++ = 'Y'; break; default: /* skipping the rest */ break; } } break; default: *Metaph++ = *n; } } else { /* Drop duplicates except for CC */ if (*(n - 1) == *n && *n != 'C') continue; /* Check for F J L M N R or first letter vowel */ if (same(*n) || (n == n_start && vowel(n))) { *Metaph++ = *n; } else { switch (*n) { case 'B': /* * B unless in -MB */ if (n < (n_end - 1) && *(n - 1) != 'M') { *Metaph++ = *n; } break; case 'C': /* * X if in -CIA-, -CH- else S if in * -CI-, -CE-, -CY- else dropped if * in -SCI-, -SCE-, -SCY- else K */ if (*(n - 1) != 'S' || !frontv((n + 1))) { if (*(n + 1) == 'I' && *(n + 2) == 'A') { *Metaph++ = 'X'; } else if (frontv((n + 1))) { *Metaph++ = 'S'; } else if (*(n + 1) == 'H') { *Metaph++ = ((n == n_start && !vowel((n + 2))) || *(n - 1) == 'S') ? (char) 'K' : (char) 'X'; } else { *Metaph++ = 'K'; } } break; case 'D': /* * J if in DGE or DGI or DGY else T */ *Metaph++ = (*(n + 1) == 'G' && frontv((n + 2))) ? (char) 'J' : (char) 'T'; break; case 'G': /* * F if in -GH and not B--GH, D--GH, * -H--GH, -H---GH else dropped if * -GNED, -GN, -DGE-, -DGI-, -DGY- * else J if in -GE-, -GI-, -GY- and * not GG else K */ if ((*(n + 1) != 'J' || vowel((n + 2))) && (*(n + 1) != 'N' || ((n + 1) < n_end && (*(n + 2) != 'E' || *(n + 3) != 'D'))) && (*(n - 1) != 'D' || !frontv((n + 1)))) *Metaph++ = (frontv((n + 1)) && *(n + 2) != 'G') ? (char) 'G' : (char) 'K'; else if (*(n + 1) == 'H' && !noghf(*(n - 3)) && *(n - 4) != 'H') *Metaph++ = 'F'; break; case 'H': /* * H if before a vowel and not after * C, G, P, S, T else dropped */ if (!varson(*(n - 1)) && (!vowel((n - 1)) || vowel((n + 1)))) *Metaph++ = 'H'; break; case 'K': /* * dropped if after C else K */ if (*(n - 1) != 'C') *Metaph++ = 'K'; break; case 'P': /* * F if before H, else P */ *Metaph++ = *(n + 1) == 'H' ? (char) 'F' : (char) 'P'; break; case 'Q': /* * K */ *Metaph++ = 'K'; break; case 'S': /* * X in -SH-, -SIO- or -SIA- else S */ *Metaph++ = (*(n + 1) == 'H' || (*(n + 1) == 'I' && (*(n + 2) == 'O' || *(n + 2) == 'A'))) ? (char) 'X' : (char) 'S'; break; case 'T': /* * X in -TIA- or -TIO- else 0 (zero) * before H else dropped if in -TCH- * else T */ if (*(n + 1) == 'I' && (*(n + 2) == 'O' || *(n + 2) == 'A')) *Metaph++ = 'X'; else if (*(n + 1) == 'H') *Metaph++ = '0'; else if (*(n + 1) != 'C' || *(n + 2) != 'H') *Metaph++ = 'T'; break; case 'V': /* * F */ *Metaph++ = 'F'; break; case 'W': /* * W after a vowel, else dropped */ case 'Y': /* * Y unless followed by a vowel */ if (vowel((n + 1))) *Metaph++ = *n; break; case 'X': /* * KS */ if (n == n_start) *Metaph++ = 'S'; else { *Metaph++ = 'K'; /* Insert K, then S */ KSflag = 1; } break; case 'Z': /* * S */ *Metaph++ = 'S'; break; } } } } *Metaph = 0; /* Null terminate */ return( slapi_ch_strdup( buf ) ); }
int tsFreePhoneImplementation::seq(char * prev, char * curr) { /* this is just a list of what can precede what */ /* and only for pairs of consonants */ if(vowel(prev) || vowel(curr)) { return(1); } if(!strcmp(curr,"r")) { if(!strcmp(prev,"p") || !strcmp(prev,"t") || !strcmp(prev,"k") || !strcmp(prev,"b") || !strcmp(prev,"d") || !strcmp(prev,"g") || !strcmp(prev,"th") || !strcmp(prev,"sh") || !strcmp(prev,"f")) { return(1); } else { return(0); } } else if(!strcmp(curr,"y")) { if(!strcmp(prev,"p") || !strcmp(prev,"t") || !strcmp(prev,"k") || !strcmp(prev,"b") || !strcmp(prev,"d") || !strcmp(prev,"s") || !strcmp(prev,"f")) { return(1); } else { return(0); } } else if(!strcmp(curr,"l")) { if(!strcmp(prev,"p") || !strcmp(prev,"k") || !strcmp(prev,"b") || !strcmp(prev,"g") || !strcmp(prev,"s") || !strcmp(prev,"f")) { return(1); } else { return(0); } } else if(!strcmp(curr,"w")) { if(!strcmp(prev,"t") || !strcmp(prev,"k") || !strcmp(prev,"th")) { return(1); } else { return(0); } } else if(!strcmp(curr,"p")) { if(!strcmp(prev,"s")){ return(1); } else { return(0); } } else if(!strcmp(curr,"t")) { if(!strcmp(prev,"s")){ return(1); } else { return(0); } } else if(!strcmp(curr,"k")) { if(!strcmp(prev,"s")){ return(1); } else { return(0); } } else if(!strcmp(curr,"m")) { if(!strcmp(prev,"s")){ return(1); } else { return(0); } } else if(!strcmp(curr,"n")) { if(!strcmp(prev,"s")){ return(1); } else { return(0); } } return(0); }
static int vowelinstem(struct stemmer * z) { char * b = z->b; int j = z->j; int i; for (i = 0; i <= j; i++) if (vowel(b[i])) return TRUE; return FALSE; }
char *tsFreePhoneImplementation::stress(char *param) /* no stress allowed initially */ { /* plan -- allow 2 spaces more in result */ char **llist; int nll; int words; int look; int *marker; char *stringout; int i; int hs; int reserve; llist = split(param); free(param); nll=0; words=0; while(llist[nll] != NULL) { if(!strcmp(llist[nll],"|")) words++; nll++; } marker = (int *)malloc(sizeof(int)*nll); for(i=0;i<nll;i++) marker[i] = 0; stringout = (char *)malloc(sizeof(char *)*(nll+words*2+2)); stringout[0] = '\0'; look=2; reserve = -1; hs = 0; for(i=nll-1;i>=0;i--) { if(!strcmp(llist[i],"|")) { reserve = -1; look = 2 ; /* ie just started looking */ } else if(look==2) { hs = heavy(llist[i]); look = 1; } else if(look==0) { ; } if(look==1 && hs) { if(vowel(llist[i])) { marker[i] = 1; look=0; } } else if(look==1) { if(vowel(llist[i]) && reserve == (-1)) { reserve = i; } else if(vowel(llist[i])) { marker[i] = 1; reserve = -1; look=0; } } } if(reserve!=(-1)) { marker[reserve] = 1; } for(i=0;i<nll;i++) { if(marker[i]) strcat(stringout,"* "); strcat(stringout,llist[i]); strcat(stringout," "); } tidy_split(llist); free(marker); return(stringout); }
char * phonetic( char *Word ) { char *n, *n_start, *n_end; /* pointers to string */ char *metaph_end; /* pointers to metaph */ char ntrans[40]; /* word with uppercase letters */ int KSflag; /* state flag for X -> KS */ char buf[MAXPHONEMELEN + 2]; char *Metaph; /* * Copy Word to internal buffer, dropping non-alphabetic characters * and converting to upper case */ for (n = ntrans + 4, n_end = ntrans + 35; !iswordbreak( *Word ) && n < n_end; Word++) { if (isalpha((unsigned char)*Word)) *n++ = TOUPPER((unsigned char)*Word); } Metaph = buf; *Metaph = '\0'; if (n == ntrans + 4) { return( ch_strdup( buf ) ); /* Return if null */ } n_end = n; /* Set n_end to end of string */ /* ntrans[0] will always be == 0 */ ntrans[0] = '\0'; ntrans[1] = '\0'; ntrans[2] = '\0'; ntrans[3] = '\0'; *n++ = 0; *n++ = 0; *n++ = 0; *n = 0; /* Pad with nulls */ n = ntrans + 4; /* Assign pointer to start */ /* Check for PN, KN, GN, AE, WR, WH, and X at start */ switch (*n) { case 'P': case 'K': case 'G': /* 'PN', 'KN', 'GN' becomes 'N' */ if (*(n + 1) == 'N') *n++ = 0; break; case 'A': /* 'AE' becomes 'E' */ if (*(n + 1) == 'E') *n++ = 0; break; case 'W': /* 'WR' becomes 'R', and 'WH' to 'H' */ if (*(n + 1) == 'R') *n++ = 0; else if (*(n + 1) == 'H') { *(n + 1) = *n; *n++ = 0; } break; case 'X': /* 'X' becomes 'S' */ *n = 'S'; break; } /* * Now, loop step through string, stopping at end of string or when * the computed 'metaph' is MAXPHONEMELEN characters long */ KSflag = 0; /* state flag for KS translation */ for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n; n <= n_end && Metaph < metaph_end; n++) { if (KSflag) { KSflag = 0; *Metaph++ = 'S'; } else { /* Drop duplicates except for CC */ if (*(n - 1) == *n && *n != 'C') continue; /* Check for F J L M N R or first letter vowel */ if (same(*n) || (n == n_start && vowel(*n))) *Metaph++ = *n; else switch (*n) { case 'B': /* * B unless in -MB */ if (n == (n_end - 1) && *(n - 1) != 'M') *Metaph++ = *n; break; case 'C': /* * X if in -CIA-, -CH- else S if in * -CI-, -CE-, -CY- else dropped if * in -SCI-, -SCE-, -SCY- else K */ if (*(n - 1) != 'S' || !frontv(*(n + 1))) { if (*(n + 1) == 'I' && *(n + 2) == 'A') *Metaph++ = 'X'; else if (frontv(*(n + 1))) *Metaph++ = 'S'; else if (*(n + 1) == 'H') *Metaph++ = ((n == n_start && !vowel(*(n + 2))) || *(n - 1) == 'S') ? (char) 'K' : (char) 'X'; else *Metaph++ = 'K'; } break; case 'D': /* * J if in DGE or DGI or DGY else T */ *Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2))) ? (char) 'J' : (char) 'T'; break; case 'G': /* * F if in -GH and not B--GH, D--GH, * -H--GH, -H---GH else dropped if * -GNED, -GN, -DGE-, -DGI-, -DGY- * else J if in -GE-, -GI-, -GY- and * not GG else K */ if ((*(n + 1) != 'J' || vowel(*(n + 2))) && (*(n + 1) != 'N' || ((n + 1) < n_end && (*(n + 2) != 'E' || *(n + 3) != 'D'))) && (*(n - 1) != 'D' || !frontv(*(n + 1)))) *Metaph++ = (frontv(*(n + 1)) && *(n + 2) != 'G') ? (char) 'G' : (char) 'K'; else if (*(n + 1) == 'H' && !noghf(*(n - 3)) && *(n - 4) != 'H') *Metaph++ = 'F'; break; case 'H': /* * H if before a vowel and not after * C, G, P, S, T else dropped */ if (!varson(*(n - 1)) && (!vowel(*(n - 1)) || vowel(*(n + 1)))) *Metaph++ = 'H'; break; case 'K': /* * dropped if after C else K */ if (*(n - 1) != 'C') *Metaph++ = 'K'; break; case 'P': /* * F if before H, else P */ *Metaph++ = *(n + 1) == 'H' ? (char) 'F' : (char) 'P'; break; case 'Q': /* * K */ *Metaph++ = 'K'; break; case 'S': /* * X in -SH-, -SIO- or -SIA- else S */ *Metaph++ = (*(n + 1) == 'H' || (*(n + 1) == 'I' && (*(n + 2) == 'O' || *(n + 2) == 'A'))) ? (char) 'X' : (char) 'S'; break; case 'T': /* * X in -TIA- or -TIO- else 0 (zero) * before H else dropped if in -TCH- * else T */ if (*(n + 1) == 'I' && (*(n + 2) == 'O' || *(n + 2) == 'A')) *Metaph++ = 'X'; else if (*(n + 1) == 'H') *Metaph++ = '0'; else if (*(n + 1) != 'C' || *(n + 2) != 'H') *Metaph++ = 'T'; break; case 'V': /* * F */ *Metaph++ = 'F'; break; case 'W': /* * W after a vowel, else dropped */ case 'Y': /* * Y unless followed by a vowel */ if (vowel(*(n + 1))) *Metaph++ = *n; break; case 'X': /* * KS */ if (n == n_start) *Metaph++ = 'S'; else { *Metaph++ = 'K'; /* Insert K, then S */ KSflag = 1; } break; case 'Z': /* * S */ *Metaph++ = 'S'; break; } } } *Metaph = 0; /* Null terminate */ return( ch_strdup( buf ) ); }