do_tempaug(gk_word *gkform, int maxaugs) { int i, wstart; int naugs = 0; char tmpstem[MAXWORDSIZE]; Dialect d; gk_word TmpGkword; TmpGkword = *gkform; Xstrncpy(tmpstem,workword_of(gkform),MAXWORDSIZE); striphyph(tmpstem); for(i=0;TempAugments[i].noaug[0];i++) { if( !Xstrncmp( TempAugments[i].noaug , tmpstem , Xstrlen(TempAugments[i].noaug ))) { char tmp[MAXWORDSIZE]; *(gkform+naugs) = TmpGkword; if((d=AndDialect(dialect_of(gkform+naugs),TempAugments[i].augdial))<0) { continue; } if( d ) set_dialect((gkform+naugs),d); Xstrncpy(tmp,TempAugments[i].withaug,MAXWORDSIZE); wstart = Xstrlen(TempAugments[i].noaug); if( tmpstem[wstart] == HARDSHORT ) wstart++; Xstrncat(tmp,tmpstem+wstart ,MAXWORDSIZE); /* Xstrncat(tmp,tmpstem+wstart ,MAXWORDSIZE); */ sprintf(aug1_of(gkform+naugs),"%s>%s", TempAugments[i].noaug,TempAugments[i].withaug); /* if(d) set_dialect(aug1_gstr_of((gkform+naugs)),d); else */ set_dialect(aug1_gstr_of((gkform+naugs)),TempAugments[i].augdial); set_workword((gkform+naugs),tmp); naugs++; if( naugs >= maxaugs ) { fprintf(stderr,"temp: got naugs %d with max %d\n", naugs , maxaugs ); break; } if( TempAugments[i].uniqueflag ) break; } } return(naugs); }
do_syllaug(gk_word *gkform, int maxaugs) { int i; int naugs = 0; Dialect d; int compval; int wstart; char tmpstem[MAXWORDSIZE]; gk_word TmpGkword; TmpGkword = *gkform; Xstrncpy(tmpstem,workword_of(gkform),MAXWORDSIZE); striphyph(tmpstem); for(i=0;SyllAugments[i].noaug[0];i++) { compval = Xstrncmp( SyllAugments[i].noaug , tmpstem , Xstrlen(SyllAugments[i].noaug )); if( ! compval ) { char tmp[MAXWORDSIZE]; *(gkform+naugs) = TmpGkword; if((d=AndDialect(dialect_of(gkform+naugs),SyllAugments[i].augdial))<0) { continue; } if( d ) set_dialect((gkform+naugs),d); Xstrncpy(tmp,SyllAugments[i].withaug,MAXWORDSIZE); wstart = Xstrlen(SyllAugments[i].noaug); if( tmpstem[wstart] == HARDSHORT ) wstart++; Xstrncat(tmp,tmpstem+wstart ,MAXWORDSIZE); sprintf(aug1_of(gkform+naugs),"%s>%s", SyllAugments[i].noaug,SyllAugments[i].withaug); /* if(d) set_dialect(aug1_gstr_of((gkform+naugs)),d); else */ set_dialect(aug1_gstr_of((gkform+naugs)),SyllAugments[i].augdial); add_morphflag(morphflags_of(aug1_gstr_of(gkform+naugs)),SYLL_AUG); zap_morphflag(morphflags_of(stem_gstr_of(gkform+naugs)),SYLL_AUG); set_workword((gkform+naugs),tmp); naugs++; if( naugs >= maxaugs ) { fprintf(stderr,"temp: got naugs %d with max %d\n", naugs , maxaugs ); break; } if( SyllAugments[i].uniqueflag ) break; } } return(naugs); }
endstrcmp(char *wendstr, char *haveendstr) { char tmp[MAXWORDSIZE]; char *hp, *sp; int i = 0; int wlen, j; if( start_match) { wlen = Xstrlen(wendstr); hp = haveendstr; sp = tmp; for(i=0;i<wlen;) { if( ! isalpha(*hp) && *(wendstr+i) != *hp ) { hp++; continue; } *sp++ = *hp++; i++; } *sp = 0; /* printf("%s --> %s\n", haveendstr, tmp ); */ /* strncpy(tmp,haveendstr,Xstrlen(wendstr)); */ haveendstr = tmp; } return(dictstrcmp(wendstr,haveendstr)); }
char * is_substring(char *s1, char *s2) { int slen; register char * p; slen = Xstrlen(s1); if( ! slen ) return(NULL); if(slen > Xstrlen(s2) ) return(NULL); p = s2; while(*p) { if(*p == *s1) { if( ! Xstrncmp(p,s1,slen)) return(p); } p++; while(*p && *p != *s1 ) p++; } return(NULL); }
unaugfromlemma(char *stem, char *lemma) { char tmp[MAXWORDSIZE]; int i, withlen, noauglen; int compval; /* * normal augment on consonantal stem, e.g. "e)ball" and "ba/llw" */ if( !Xstrncmp(stem,"e)",2) ) { /* * if you have "e)" as an augment, but your lemma does not * start with a consonant, you got something funny ... * * yeah--you might have e)ws- as in w)qe/w, let it ride * grc 2/18/87 */ if( Is_cons(*lemma) ) { Xstrncpy(tmp,stem+2,MAXWORDSIZE); Xstrncpy(stem,tmp,MAXWORDSIZE); return(TEMP_AUGMENT); } /* grc 2/18/87 return(-1); */ } for(i=0;TempAugments[i].noaug[0];i++) { withlen = Xstrlen(TempAugments[i].withaug); if( !Xstrncmp( TempAugments[i].withaug , stem , withlen)) { noauglen = Xstrlen(TempAugments[i].noaug); /* * ok, so the you have a match between the augmented stem, and the * unaugmented lemma. make the current stem start the same way as * the lemma */ if(!Xstrncmp( TempAugments[i].noaug, lemma ,noauglen)){ Xstrncpy(tmp,TempAugments[i].noaug,MAXWORDSIZE); Xstrncat(tmp,stem+Xstrlen(TempAugments[i].withaug) ,MAXWORDSIZE); Xstrncpy(stem,tmp,MAXWORDSIZE); return(TEMP_AUGMENT); } } } for(i=0;SyllAugments[i].noaug[0];i++) { withlen = Xstrlen(SyllAugments[i].withaug); compval = Xstrncmp( SyllAugments[i].withaug , stem , withlen); if( ! compval ) { noauglen = Xstrlen(SyllAugments[i].noaug); /* * ok, so the you have a match between the augmented stem, and the * unaugmented lemma. make the current stem start the same way as * the lemma */ if(!Xstrncmp( SyllAugments[i].noaug, lemma ,noauglen)){ Xstrncpy(tmp,SyllAugments[i].noaug,MAXWORDSIZE); Xstrncat(tmp,stem+Xstrlen(SyllAugments[i].withaug),MAXWORDSIZE ); Xstrncpy(stem,tmp,MAXWORDSIZE); return(SYLL_AUGMENT); } } } return(-1); }
/* * this has to do a better job with dialects at some point * * grc 3/21/91 * * this thing checks for whether you are undoing an reduplication * * if not, then these stems should only be attached to indicatives */ unaugment(char *s, gk_string *possibs[], gk_string *qpossibs[], int maxstems, Dialect dial, int wantsyllaugs, int wantredupl) { int rval = 0; int compval; int i; char augnoquant[MAXWORDSIZE]; Dialect d; if( ! Xstrncmp(s,"e)rr",4) ) { rval = 1; Xstrncpy(gkstring_of(possibs[0]),"r(",MAXWORDSIZE); Xstrncat(gkstring_of(possibs[0]),s+4,MAXWORDSIZE); if( ! wantredupl ) { set_mood(forminfo_of(possibs[0]),INDICATIVE); } return(rval); } if( ! Xstrncmp(s,"e)r",3) ) { rval = 1; Xstrncpy(gkstring_of(possibs[0]),"r(",MAXWORDSIZE); Xstrncat(gkstring_of(possibs[0]),s+3,MAXWORDSIZE); add_morphflag(morphflags_of(possibs[0]),RAW_SONANT); if( ! wantredupl ) { set_mood(forminfo_of(possibs[0]),INDICATIVE); } return(rval); } if( ! Xstrncmp(s,"e)",2) && Is_cons(*(s+2)) ) { rval = 1; Xstrncpy(gkstring_of(possibs[0]),s+2,MAXWORDSIZE); if(*(s+2) == *(s+3) ) { /* e)/llabe, e)ssei/onto 429 a. D */ rval = 2; Xstrncpy(gkstring_of(possibs[1]),s+3,MAXWORDSIZE); add_morphflag(morphflags_of(possibs[1]),SYLL_AUGMENT); } if( ! wantredupl ) { set_mood(forminfo_of(possibs[0]),INDICATIVE); } return(rval); } for(i=0;TempAugments[i].noaug[0] && rval<maxstems;i++) { Xstrncpy(augnoquant,TempAugments[i].withaug,MAXWORDSIZE); stripquant(augnoquant); compval = morphstrncmp( augnoquant , s , strlen(augnoquant) ); if( ! compval ) { char tmp[128]; Xstrncpy(tmp,TempAugments[i].noaug,MAXWORDSIZE); /* Xstrncat(tmp,s+Xstrlen(TempAugments[i].withaug),MAXWORDSIZE ); */ Xstrncat(tmp,s+Xstrlen(augnoquant) ,MAXWORDSIZE); Xstrncpy(gkstring_of(possibs[rval]),tmp,MAXWORDSIZE); if( strcmp(augnoquant,TempAugments[i].withaug) ) { Xstrncpy(tmp,TempAugments[i].withaug,MAXWORDSIZE); Xstrncat(tmp,s+Xstrlen(augnoquant) ,MAXWORDSIZE); Xstrncpy(gkstring_of(qpossibs[rval]),tmp,MAXWORDSIZE); set_dialect(possibs[rval],TempAugments[i].augdial ); } if( ! wantredupl ) { set_mood(forminfo_of(possibs[rval]),INDICATIVE); } rval++; } } if( ! wantsyllaugs ) return(rval); for(i=0;SyllAugments[i].noaug[0] && rval<maxstems;i++) { Xstrncpy(augnoquant,SyllAugments[i].withaug,MAXWORDSIZE); stripquant(augnoquant); compval = morphstrncmp( augnoquant , s , strlen(augnoquant) ); if( ! compval ) { char tmp[128]; Xstrncpy(tmp,SyllAugments[i].noaug,MAXWORDSIZE); /* Xstrncat(tmp,s+Xstrlen(SyllAugments[i].withaug) ,MAXWORDSIZE); */ Xstrncat(tmp,s+Xstrlen(augnoquant) ,MAXWORDSIZE); Xstrncpy(gkstring_of(possibs[rval]),tmp,MAXWORDSIZE); set_dialect(possibs[rval],SyllAugments[i].augdial); add_morphflag(morphflags_of(possibs[rval]),SYLL_AUGMENT); if( ! wantredupl ) { set_mood(forminfo_of(possibs[rval]),INDICATIVE); } rval++; } } return(rval); }
beta2mac(char *source, char *res, int xlit) { char * sp; /*unsigned*/ char * rp; int acc; int saw_isub = 0; int long_vowel = 0; if( ! gkinit ) init_gktab(); sp = source; rp = res; while(*sp) { if( *sp == '$' ) { if( charstyle_flag ) { if( rp == res ) { END_CHARSTYLE(rp); /* *rp++ = 0253;*/ } else { if( rp > res) rp--; /* grc 6/26/89 while(*rp == ' ' && rp > res ) rp--; */ if( ispunct(*rp) ) { *(rp+1) = *rp; END_CHARSTYLE(rp); /* *rp = 0253; rp++; */ rp++; } else { rp++; END_CHARSTYLE(rp); /* *rp++ = 0253;*/ } /* *rp++ = ' '; */ } charstyle_flag = 0; } sp = greekfont(sp); if( xlit == SMK ) { strcpy(rp,GKFONT); rp += Xstrlen(GKFONT); } continue; } else if( *sp == '&' ) { if( charstyle_flag && ! IS_CHARSTYLE(sp)) { END_CHARSTYLE(rp); charstyle_flag = 0; } /* } else if( ! charstyle_flag && IS_CHARSTYLE(sp) ) { */ if( IS_CHARSTYLE(sp) ) { if( (*(sp+1) == '3' && charstyle_flag == BOLD ) && (*(sp+1) == '1' && charstyle_flag == ITALIC ) ) { END_CHARSTYLE(rp); charstyle_flag = 0; } if( *(sp+1) == '3' ) charstyle_flag = ITALIC; else if( *(sp+1) == '1' ) charstyle_flag = BOLD; /* *rp++ = ' '; */ if( xlit == SMARTA ) *rp++ = 0137; else { if( charstyle_flag == ITALIC ) { strcpy(rp,ITALICFONT); rp += Xstrlen(ITALICFONT); } else { strcpy(rp,BOLDFONT); rp += Xstrlen(BOLDFONT); } } sp += 2; while(isspace(*sp)) sp++; } sp = romanfont(sp); if( xlit == SMK && ! charstyle_flag && *(rp-1) != '}' ) { strcpy(rp,ROMANFONT); rp += Xstrlen(ROMANFONT); } continue; } if( *sp == '%' ) { int n; char numbuf[8]; char * np; np = numbuf; n = atoi(++sp); while(isdigit(*sp)) *np++ = *sp++; *np = 0; switch(n) { case 1: *rp++ = '?'; break; case 2: *rp++ = '*'; break; case 4: *rp++ = '!'; break; case 6: if( xlit == SMARTA ) *rp++ = EQUALS; else { if( cur_font == GK ) { strcpy(rp,ROMANFONT); rp += Xstrlen(ROMANFONT); strcpy(rp,"=}{"); rp += 3; strcpy(rp,GKFONT); rp += Xstrlen(GKFONT); } else *rp++ = '='; } break; case 10: if( xlit == SMARTA ) *rp++ = '`'; else *rp++ = ':'; break; case 40: if( xlit == SMK ) { *rp++ = ' '; *rp++ = SMK_SHORTMARK; } else if( xlit == SMARTA ) { *rp++ = SMARTA_SHORTMARK; break; } case 41: if( xlit == SMK ) { *rp++ = ' '; *rp++ = SMK_LONGMARK; break; } else if( xlit == SMARTA ) { *rp++ = SMARTA_LONGMARK; } default: np = numbuf; *rp ++ = '%'; while(*np) *rp++ = *np++; break; } continue; } if( *sp == '*' && cur_font == ROMAN ) { if( xlit == SMARTA ) { *rp++ = UCASEMARKER; } else if (xlit == SMK ) { sp++; strcpy(rp,sp); if( islower(*rp) ) *rp = toupper(*rp); rp++; } sp++; continue; } if( cur_font == ROMAN &&isalpha(*sp) ) { if( isupper(*sp) ) { *rp++ = UCASEMARKER; *rp++ = *sp++; continue; } else { if( xlit == SMARTA ) *rp++ = toupper(*sp++); else *rp++ = *sp++; } continue; } if( *sp == '[' && *(sp+1) == '1' && *(sp+2) != '.' ) { /* * grc 6/5/88 * * note that we want to accept things like "[1.]" which show up * all the time in the Greek English Lexicon. */ *rp++ = '('; sp += 2; continue; } if( *sp == ']' && *(sp+1) == '1') { *rp++ = ')'; sp += 2; continue; } if(isalpha(*sp) || *sp == '*') { acc = 0; if( xlit == SMK && *sp == '*' ) { if( Is_accflag(*(sp+1)) ) { char * t = sp; *sp = ' '; while(*t&&!isalpha(*t)) t++; if(isalpha(*t) && islower(*t) ) *t = toupper(*t); } else { strcpy(sp,sp+1); if(islower(*sp)) *sp = toupper(*sp); } } *rp = *sp++; if( isupper(*rp) && xlit == SMARTA ) { *(rp+1) = tolower(*rp); *rp++ = UCASEMARKER; } /* if( *rp == '*' ) { if( xlit == SMARTA ) *rp = UCASEMARKER; *sp = smk_char_xlit(*sp,sp+1); } else */ *rp = smk_char_xlit(*rp,sp,xlit); /* if( *rp == 's' && !isalpha(*sp) && *sp != '\'' && *sp != '-' ) *rp = TERMINAL_SIGMA; else if( *rp == 'w' ) *rp = 'v'; else if( *rp == 'q' ) *rp = 'y'; else if( *rp == 'Q' ) *rp = 'Y'; else if( *rp == 'c' ) *rp = 'j'; else if( *rp == 'C' ) *rp = 'J'; else if (*rp == 'y' ) *rp = 'c'; else if (*rp == 'W' ) *rp = 'V'; else if (*rp == 'V' ) *rp = 'C'; else if( *rp == 'v' ) /* digamma * *rp = 'W'; else if( *rp == '*' && xlit == SMARTA ) { *rp = UCASEMARKER; } /*else if ( xlit == SMK && ) { unsigned char * t = sp; if( Is_accflag(*sp)) { *rp = ' '; while(*t&&!isalpha(*t)) t++; if(isalpha(*t)&&islower(*t)) *t = toupper(*t); } else { *rp = toupper(*sp++); } } }*/ while( Is_accflag(*sp) ) { if( *sp == HARDLONG ) { long_vowel++; sp++; } else if( *sp == '|' ) { saw_isub++; sp++; } else if (*sp == HARDSHORT) sp++; /* * don't count the hard short marker (no way to print it for now) */ else acc += accenttab[*sp++]; } /* printf("got [%o] ", acc ); */ /* if( *sp == '|' ) { saw_isub++; sp++; } */ /* * if you see a capital letter marker * then keep that and find the letter that that * capital would cover. */ if( acc && *rp == UCASEMARKER && xlit == SMARTA ) { if( isalpha(*sp ) ) { *++rp = *sp++; *rp = smk_char_xlit(*rp,sp,xlit); } } /* * don't bother showing the long mark if the letter is * accented with a circumflex */ if( long_vowel ) { if( ! (acc & CIRCUMFLAG ) ) { if(xlit == SMARTA ) { if( *rp == 'a' ) *rp = 046; else if( *rp == 'i' ) { *rp = 0372; *rp &= 0377; } else if( *rp == 'u' ) { *rp = 0304; *rp &= 0377; } } else if( xlit == SMK ) { *(rp+1) = *rp; *rp++ = '*'; } } long_vowel = 0; } if( saw_isub && xlit == SMK ) { switch(*rp) { case 'a': *rp = AISUB; break; case 'h': *rp = HISUB; *rp &= 0377; break; case 'v': *rp = WISUB; *rp &= 0377; break; default: break; } saw_isub = 0; } if( acc ) { if(*rp == 'r' && acc == ROUGHFLAG ) { if( xlit == SMK ) *rp = SMK_ROUGH_RHO; else *rp = SMARTA_ROUGH_RHO; } else if( acc == DIAERFLAG && (*rp == 'i' || *rp == 'u') ) { if( *rp == 'i' ) *rp = 0363; else *rp = 043; } else if( acc == (DIAERFLAG|ACUTEFLAG) && (*rp == 'i' || *rp == 'u') ) { if( *rp == 'i' ) *rp = 0375; else *rp = 0100; }else if( acc == (DIAERFLAG|GRAVEFLAG) && (*rp == 'i' || *rp == 'u') ) { if( *rp == 'i' ) *rp = 0376; else *rp = 0243; } else if( !gktab[*rp] ) { *(rp+1) = *rp; *rp = '?'; rp += 2; *rp = '?'; } else *rp = (unsigned char) (gktab[*rp] + accnum(acc)); /* if(1) { int n; n = *rp; n &= 0377; printf(" *rp [%o] n [%o] ", *rp , n ); } */ } if( saw_isub && xlit == SMARTA ) { saw_isub = 0; *++rp = 'i'; } } else if( *sp == '_' ) { *rp = '-'; sp++; } else *rp = *sp++; rp++; } *rp = 0; }