Пример #1
0
do_tempaug(gk_word *gkform, int maxaugs)
{
	int i, wstart;
	int naugs = 0;
	char tmpstem[MAXWORDSIZE];
	Dialect d;
	gk_word TmpGkword;
	
	TmpGkword = *gkform;

	Xstrncpy(tmpstem,workword_of(gkform),MAXWORDSIZE);
	striphyph(tmpstem);

	for(i=0;TempAugments[i].noaug[0];i++) {
		if( !Xstrncmp( TempAugments[i].noaug , tmpstem , Xstrlen(TempAugments[i].noaug ))) {
			char tmp[MAXWORDSIZE];
			*(gkform+naugs) = TmpGkword;

			if((d=AndDialect(dialect_of(gkform+naugs),TempAugments[i].augdial))<0) {
				continue;
			}

			if( d ) set_dialect((gkform+naugs),d);

			Xstrncpy(tmp,TempAugments[i].withaug,MAXWORDSIZE);

			wstart = Xstrlen(TempAugments[i].noaug);
			if( tmpstem[wstart] == HARDSHORT ) wstart++;
			Xstrncat(tmp,tmpstem+wstart ,MAXWORDSIZE);

/*			
			Xstrncat(tmp,tmpstem+wstart ,MAXWORDSIZE);
*/
			sprintf(aug1_of(gkform+naugs),"%s>%s", TempAugments[i].noaug,TempAugments[i].withaug);

/*
			if(d) set_dialect(aug1_gstr_of((gkform+naugs)),d);
			else 
*/
			set_dialect(aug1_gstr_of((gkform+naugs)),TempAugments[i].augdial);

			set_workword((gkform+naugs),tmp);
			naugs++;
			if( naugs >= maxaugs ) {
fprintf(stderr,"temp: got naugs %d with max %d\n", naugs , maxaugs );
				break;
			}
			if( TempAugments[i].uniqueflag ) break;
		}
	}
	return(naugs);
}
Пример #2
0
do_syllaug(gk_word *gkform, int maxaugs)
{
	int i;
	int naugs = 0;
	Dialect d;
	int compval;
	int wstart;
	char tmpstem[MAXWORDSIZE];
	gk_word TmpGkword;
	
	TmpGkword = *gkform;

	Xstrncpy(tmpstem,workword_of(gkform),MAXWORDSIZE);
	striphyph(tmpstem);
	
	for(i=0;SyllAugments[i].noaug[0];i++) {
		compval = Xstrncmp( SyllAugments[i].noaug , tmpstem , Xstrlen(SyllAugments[i].noaug ));
		if( ! compval ) {
			char tmp[MAXWORDSIZE];
			*(gkform+naugs) = TmpGkword;
			
			if((d=AndDialect(dialect_of(gkform+naugs),SyllAugments[i].augdial))<0) {
				continue;
			}
if( d ) set_dialect((gkform+naugs),d);
			Xstrncpy(tmp,SyllAugments[i].withaug,MAXWORDSIZE);
			wstart = Xstrlen(SyllAugments[i].noaug);
			if( tmpstem[wstart] == HARDSHORT ) wstart++;
			
			Xstrncat(tmp,tmpstem+wstart ,MAXWORDSIZE);
			sprintf(aug1_of(gkform+naugs),"%s>%s", SyllAugments[i].noaug,SyllAugments[i].withaug);

/*
			if(d) set_dialect(aug1_gstr_of((gkform+naugs)),d);
			else 
*/
			set_dialect(aug1_gstr_of((gkform+naugs)),SyllAugments[i].augdial);
			add_morphflag(morphflags_of(aug1_gstr_of(gkform+naugs)),SYLL_AUG);
			zap_morphflag(morphflags_of(stem_gstr_of(gkform+naugs)),SYLL_AUG);
			
			set_workword((gkform+naugs),tmp);
			naugs++;
			if( naugs >= maxaugs ) {
fprintf(stderr,"temp: got naugs %d with max %d\n", naugs , maxaugs );
				break;
			}
			
			if( SyllAugments[i].uniqueflag ) break;
		}
	}
	return(naugs);
}
Пример #3
0
endstrcmp(char *wendstr, char *haveendstr)
{
	char tmp[MAXWORDSIZE];
	char *hp, *sp;
	int i = 0;
	int wlen, j;
	
	
	if( start_match) {
		wlen = Xstrlen(wendstr);
		hp = haveendstr;
		sp = tmp;
		for(i=0;i<wlen;) {
			if( ! isalpha(*hp) && *(wendstr+i) != *hp ) {
				hp++;
				continue;
			}
			*sp++ = *hp++;
			i++;
		}
		*sp = 0;
/*
printf("%s --> %s\n", haveendstr, tmp );
*/
/*			
		strncpy(tmp,haveendstr,Xstrlen(wendstr));
*/
		haveendstr = tmp;
	}
	return(dictstrcmp(wendstr,haveendstr));
}
Пример #4
0
char * 
is_substring(char *s1, char *s2)
{
	int slen;
	register char * p;


	slen = Xstrlen(s1);
	if( ! slen ) return(NULL);
	if(slen > Xstrlen(s2) ) return(NULL);
	
	p = s2;

	while(*p) {
		if(*p == *s1) {
			if( ! Xstrncmp(p,s1,slen))
				return(p);
		}
		p++;
		while(*p && *p != *s1 ) p++;
	}

	return(NULL);
}
Пример #5
0
unaugfromlemma(char *stem, char *lemma)
{
	char tmp[MAXWORDSIZE];
	int i, withlen, noauglen;
	int compval;
/*
 * normal augment on consonantal stem, e.g. "e)ball" and "ba/llw"
 */
	if( !Xstrncmp(stem,"e)",2) ) {
	/*
 	 * if you have "e)" as an augment, but your lemma does not
	 * start with a consonant, you got something funny ...
	 *
 	 * yeah--you might have e)ws- as in w)qe/w, let it ride 
	 * 	grc 2/18/87
 	 */
		if( Is_cons(*lemma) ) {
			Xstrncpy(tmp,stem+2,MAXWORDSIZE);
			Xstrncpy(stem,tmp,MAXWORDSIZE);
			return(TEMP_AUGMENT);
		}
/* grc 2/18/87
		return(-1);
*/
	}

	for(i=0;TempAugments[i].noaug[0];i++) {
		withlen = Xstrlen(TempAugments[i].withaug);
		if( !Xstrncmp( TempAugments[i].withaug , stem , withlen)) {
			
			noauglen = Xstrlen(TempAugments[i].noaug);
/*
 * ok, so the you have a match between the augmented stem, and the
 * unaugmented lemma. make the current stem start the same way as
 * the lemma
 */
			if(!Xstrncmp( TempAugments[i].noaug, lemma ,noauglen)){

				Xstrncpy(tmp,TempAugments[i].noaug,MAXWORDSIZE);
				Xstrncat(tmp,stem+Xstrlen(TempAugments[i].withaug) ,MAXWORDSIZE);
				Xstrncpy(stem,tmp,MAXWORDSIZE);
				return(TEMP_AUGMENT);
			}
		}
	}
	for(i=0;SyllAugments[i].noaug[0];i++) {
		withlen = Xstrlen(SyllAugments[i].withaug);
		compval = Xstrncmp( SyllAugments[i].withaug , stem , withlen);
		if( ! compval ) {	
			noauglen = Xstrlen(SyllAugments[i].noaug);
/*
 * ok, so the you have a match between the augmented stem, and the
 * unaugmented lemma. make the current stem start the same way as
 * the lemma
 */
			if(!Xstrncmp( SyllAugments[i].noaug, lemma ,noauglen)){

				Xstrncpy(tmp,SyllAugments[i].noaug,MAXWORDSIZE);
				Xstrncat(tmp,stem+Xstrlen(SyllAugments[i].withaug),MAXWORDSIZE );
				Xstrncpy(stem,tmp,MAXWORDSIZE);
				return(SYLL_AUGMENT);
			}
		}
	}
	return(-1);
}
Пример #6
0
/*
 * this has to do a better job with dialects at some point 
 *
 * grc 3/21/91
 *
 * this thing checks for whether you are undoing an reduplication
 *
 * if not, then these stems should only be attached to indicatives
 */
unaugment(char *s, gk_string *possibs[], gk_string *qpossibs[], int maxstems, Dialect dial, int wantsyllaugs, int wantredupl)
{
	int rval = 0;
	int compval;
	int i;
	char augnoquant[MAXWORDSIZE];
	Dialect d;


	if( ! Xstrncmp(s,"e)rr",4) ) {
		rval = 1;
		Xstrncpy(gkstring_of(possibs[0]),"r(",MAXWORDSIZE);
		Xstrncat(gkstring_of(possibs[0]),s+4,MAXWORDSIZE);
		if( ! wantredupl ) {
			set_mood(forminfo_of(possibs[0]),INDICATIVE);
		}
		return(rval);
	}
	
	if( ! Xstrncmp(s,"e)r",3) ) {
		rval = 1;
		Xstrncpy(gkstring_of(possibs[0]),"r(",MAXWORDSIZE);
		Xstrncat(gkstring_of(possibs[0]),s+3,MAXWORDSIZE);
		add_morphflag(morphflags_of(possibs[0]),RAW_SONANT);
		if( ! wantredupl ) {
			set_mood(forminfo_of(possibs[0]),INDICATIVE);
		}
		return(rval);
	}
	
	if( ! Xstrncmp(s,"e)",2) && Is_cons(*(s+2)) ) {
		rval = 1;
		Xstrncpy(gkstring_of(possibs[0]),s+2,MAXWORDSIZE);
		if(*(s+2) == *(s+3) ) { /* e)/llabe, e)ssei/onto 429 a. D */
			rval = 2;
			Xstrncpy(gkstring_of(possibs[1]),s+3,MAXWORDSIZE);
			add_morphflag(morphflags_of(possibs[1]),SYLL_AUGMENT);
		}
		if( ! wantredupl ) {
			set_mood(forminfo_of(possibs[0]),INDICATIVE);
		}
		return(rval);
	}
	
	for(i=0;TempAugments[i].noaug[0] && rval<maxstems;i++) {
		Xstrncpy(augnoquant,TempAugments[i].withaug,MAXWORDSIZE);
		stripquant(augnoquant);
		compval = morphstrncmp( augnoquant , s , strlen(augnoquant) );
		if( ! compval ) {
			char tmp[128];

			Xstrncpy(tmp,TempAugments[i].noaug,MAXWORDSIZE);

/*
			Xstrncat(tmp,s+Xstrlen(TempAugments[i].withaug),MAXWORDSIZE );
*/
			Xstrncat(tmp,s+Xstrlen(augnoquant) ,MAXWORDSIZE);
			Xstrncpy(gkstring_of(possibs[rval]),tmp,MAXWORDSIZE);

			if( strcmp(augnoquant,TempAugments[i].withaug) ) {
				Xstrncpy(tmp,TempAugments[i].withaug,MAXWORDSIZE);
				Xstrncat(tmp,s+Xstrlen(augnoquant) ,MAXWORDSIZE);
				Xstrncpy(gkstring_of(qpossibs[rval]),tmp,MAXWORDSIZE);
				set_dialect(possibs[rval],TempAugments[i].augdial );
			}
			if( ! wantredupl ) {
				set_mood(forminfo_of(possibs[rval]),INDICATIVE);
			}
			rval++;
		}
	}

	if( ! wantsyllaugs ) return(rval);
	
	for(i=0;SyllAugments[i].noaug[0] && rval<maxstems;i++) {
		Xstrncpy(augnoquant,SyllAugments[i].withaug,MAXWORDSIZE);
		stripquant(augnoquant);
		compval = morphstrncmp( augnoquant , s , strlen(augnoquant) );

		if( ! compval ) {
			char tmp[128];
			
			Xstrncpy(tmp,SyllAugments[i].noaug,MAXWORDSIZE);
/*
			Xstrncat(tmp,s+Xstrlen(SyllAugments[i].withaug) ,MAXWORDSIZE);
*/
			Xstrncat(tmp,s+Xstrlen(augnoquant) ,MAXWORDSIZE);
			Xstrncpy(gkstring_of(possibs[rval]),tmp,MAXWORDSIZE);
			set_dialect(possibs[rval],SyllAugments[i].augdial);
			add_morphflag(morphflags_of(possibs[rval]),SYLL_AUGMENT);
			if( ! wantredupl ) {
				set_mood(forminfo_of(possibs[rval]),INDICATIVE);
			}
			rval++;
		}
	}
	return(rval);
}
Пример #7
0
beta2mac(char *source, char *res, int xlit)
{
	 char * sp;
	/*unsigned*/ char * rp;
	int acc;
	int saw_isub = 0;
	int long_vowel = 0;
	
	if( ! gkinit ) init_gktab();
	
	sp = source; rp = res;
	
	while(*sp) {
		if( *sp == '$' ) {
			if( charstyle_flag ) {
				if( rp == res ) {
					END_CHARSTYLE(rp);
/*					*rp++ = 0253;*/
				} else {
					if( rp > res) rp--;
/* grc 6/26/89
					while(*rp == ' ' && rp > res ) rp--;
*/
					if( ispunct(*rp) ) {
						*(rp+1) = *rp;
						END_CHARSTYLE(rp);
/*						*rp = 0253;
						rp++; 
*/
						rp++;
					} else {
						rp++;
						END_CHARSTYLE(rp);
/*						*rp++ = 0253;*/
					}
/*
					*rp++ = ' ';
*/
				}
				charstyle_flag = 0;
			}
			sp = greekfont(sp);
			if( xlit == SMK ) {
				strcpy(rp,GKFONT); 
				rp += Xstrlen(GKFONT);
			}
			continue;
		} else if( *sp == '&' ) {

			if( charstyle_flag  && ! IS_CHARSTYLE(sp)) {
				END_CHARSTYLE(rp);
				charstyle_flag = 0;
			}
/*
			} else if( ! charstyle_flag && IS_CHARSTYLE(sp)  ) {
*/
			if( IS_CHARSTYLE(sp) ) {
				if( (*(sp+1) == '3' && charstyle_flag == BOLD ) &&
					(*(sp+1) == '1' && charstyle_flag == ITALIC ) ) {
						END_CHARSTYLE(rp);
						charstyle_flag = 0;
				}
				if( *(sp+1) == '3' )
					charstyle_flag = ITALIC;
				else if( *(sp+1) == '1' )
					charstyle_flag = BOLD;
/*
				*rp++ = ' ';
*/
				if( xlit == SMARTA ) 
					*rp++ = 0137;
				else {
					if( charstyle_flag == ITALIC ) {
						strcpy(rp,ITALICFONT); rp += Xstrlen(ITALICFONT);
					} else {
						strcpy(rp,BOLDFONT); rp += Xstrlen(BOLDFONT);
					}
				}
				sp += 2;
				while(isspace(*sp)) sp++;
			}
			sp = romanfont(sp);
			if( xlit == SMK && ! charstyle_flag && *(rp-1) != '}' ) {
				strcpy(rp,ROMANFONT);  rp += Xstrlen(ROMANFONT);
			}
			continue;
		}
			
		if( *sp == '%' ) {
			int n;
			char numbuf[8];
			char * np;
			
			np = numbuf;
			n = atoi(++sp);
			while(isdigit(*sp)) *np++ = *sp++;
			*np = 0;
			
			switch(n) {
				case 1:
					*rp++ = '?';
					break;
				case 2:
					*rp++ = '*';
					break;
				case 4:
					*rp++ = '!';
					break;
				case 6:
					if( xlit == SMARTA ) 
						*rp++ = EQUALS;
					else {
						if( cur_font == GK ) {
							strcpy(rp,ROMANFONT);
							rp += Xstrlen(ROMANFONT);
							strcpy(rp,"=}{");
							rp += 3;
							strcpy(rp,GKFONT);
							rp += Xstrlen(GKFONT);
						} else
							*rp++ = '=';
					}
					break;
				case 10:
					if( xlit == SMARTA )
						*rp++ = '`';
					else
						*rp++ = ':';
					break;
				case 40:
					if( xlit == SMK ) {
						*rp++ = ' ';
						*rp++ = SMK_SHORTMARK;
					} else if( xlit == SMARTA ) {
						*rp++ = SMARTA_SHORTMARK;
						break;
					}
				case 41:
					if( xlit == SMK ) {
						*rp++ = ' ';
						*rp++ = SMK_LONGMARK;
						break;
					} else if( xlit == SMARTA ) {
						*rp++ = SMARTA_LONGMARK;
					}
					
				default:
					np = numbuf;
					*rp ++ = '%';
					while(*np) *rp++ = *np++;
					break;
				}
			continue;
		}
		if( *sp == '*' && cur_font == ROMAN ) {
			if( xlit == SMARTA ) {
				*rp++ = UCASEMARKER;
			} else if (xlit == SMK ) {
				sp++;
				strcpy(rp,sp);
				if( islower(*rp) ) *rp = toupper(*rp);
				rp++;
			}
			sp++;
			continue;
		}
		if( cur_font == ROMAN &&isalpha(*sp) ) {
			if( isupper(*sp) ) {
				*rp++ = UCASEMARKER;
				*rp++ = *sp++;
				continue;
			} else {
				if( xlit == SMARTA ) 
					*rp++ = toupper(*sp++);
				else
					*rp++ = *sp++;
			}
			continue;
		}
		if( *sp == '[' && *(sp+1) == '1' && *(sp+2) != '.' ) {
				/* 
				 * grc 6/5/88
				 *
				 * note that we want to accept things like "[1.]" which show up
				 * all the time in the Greek English Lexicon.
				 */
				*rp++ = '(';
				sp += 2;
				continue;
		}
		
		if( *sp == ']' && *(sp+1) == '1') {
				*rp++ = ')';
				sp += 2;
				continue;
		}
		
		if(isalpha(*sp) || *sp == '*') {
			acc = 0;
			
			
			if( xlit == SMK && *sp == '*' ) {
				if( Is_accflag(*(sp+1)) ) {
					 char * t = sp;
					*sp = ' ';
					while(*t&&!isalpha(*t)) t++;
					if(isalpha(*t) && islower(*t) ) *t = toupper(*t);
				} else {
					strcpy(sp,sp+1);
					if(islower(*sp)) *sp = toupper(*sp);
				}
			} 
			

			*rp = *sp++;
			
			
			if( isupper(*rp) && xlit == SMARTA ) {
					*(rp+1) = tolower(*rp);
					*rp++ = UCASEMARKER;
			}

/*			if( *rp == '*' ) {
				if( xlit == SMARTA )
					*rp = UCASEMARKER;
				*sp = smk_char_xlit(*sp,sp+1);
			} else 
*/
				*rp = smk_char_xlit(*rp,sp,xlit);
/*			
			if( *rp == 's' && !isalpha(*sp) && *sp != '\'' && *sp != '-' )
				*rp = TERMINAL_SIGMA;
			else if( *rp == 'w' )
				*rp = 'v';
			else if( *rp == 'q' )
				*rp = 'y';
			else if( *rp == 'Q' )
				*rp = 'Y';
			else if( *rp == 'c' )
				*rp = 'j';
			else if( *rp == 'C' )
				*rp = 'J';
			else if (*rp == 'y' )
				*rp = 'c';
			else if (*rp == 'W' )
				*rp = 'V';
			else if (*rp == 'V' )
				*rp = 'C';
			else if( *rp == 'v' ) /* digamma *
				*rp = 'W';
			else if( *rp == '*' && xlit == SMARTA ) {
					*rp = UCASEMARKER;
			} /*else if ( xlit == SMK && ) {
					unsigned char * t = sp;
					if( Is_accflag(*sp)) {
							*rp = ' ';
						while(*t&&!isalpha(*t)) t++;
						if(isalpha(*t)&&islower(*t)) *t = toupper(*t);
					} else {
						*rp = toupper(*sp++);
					}
				}
					
			}*/
			
			while( Is_accflag(*sp) ) {

				if( *sp == HARDLONG ) {
					long_vowel++;
					sp++;
				} else if( *sp == '|' )  {
					saw_isub++;
					sp++;
				} else if (*sp == HARDSHORT)
					sp++;
				/*
				 * don't count the hard short marker (no way to print it for now)
				 */
				 else		
					acc += accenttab[*sp++];
			}
/*
printf("got [%o] ", acc );
*/
/*
			if( *sp == '|' ) {
				saw_isub++;
				sp++;
			}
*/
			/*
			 * if you see a capital letter marker 
			 * then keep that and find the letter that that
			 * capital would cover.
			 */
			if( acc && *rp == UCASEMARKER && xlit == SMARTA ) {
				if( isalpha(*sp ) ) {
					*++rp = *sp++;
					*rp = smk_char_xlit(*rp,sp,xlit);
				}
			} 
			
			/*
			 * don't bother showing the long mark if the letter is
			 * accented with a circumflex
			 */
			if( long_vowel )  {
				if( ! (acc & CIRCUMFLAG ) ) {
					if(xlit == SMARTA ) {
						if( *rp == 'a' )
							*rp = 046;
						else if( *rp == 'i' ) {
							*rp = 0372;
							*rp &= 0377;
						} else if( *rp == 'u' ) {
							*rp = 0304;
							*rp &= 0377;
						}
					} else if( xlit == SMK ) {
						*(rp+1) = *rp;
						*rp++ = '*';
					}
				} 
				long_vowel = 0;
			}
			if( saw_isub && xlit == SMK ) {
				switch(*rp) {
					case 'a':
						*rp = AISUB;
						break;
					case 'h':
						*rp = HISUB;
						*rp &= 0377;
						break;
					case 'v':
						*rp = WISUB;
						*rp &= 0377;
						break;
					default:
						break;
				}
				saw_isub = 0;
			} 
				
			
			if( acc ) {
				if(*rp == 'r' && acc == ROUGHFLAG ) {
					if( xlit == SMK ) 
						*rp = SMK_ROUGH_RHO;
					else
						*rp = SMARTA_ROUGH_RHO;
				} else if( acc == DIAERFLAG  &&
							(*rp == 'i' || *rp == 'u') ) {
					if( *rp == 'i' ) 
						*rp = 0363;
					else 
						*rp =  043;
				} else if( acc == (DIAERFLAG|ACUTEFLAG)  &&
							(*rp == 'i' || *rp == 'u') ) {
					if( *rp == 'i' ) 
						*rp = 0375;
					else 
						*rp =  0100;
				}else if( acc == (DIAERFLAG|GRAVEFLAG)  &&
							(*rp == 'i' || *rp == 'u') ) {
					if( *rp == 'i' ) 
						*rp = 0376;
					else 
						*rp =  0243;
				} else if( !gktab[*rp] ) {
					*(rp+1) = *rp;
					*rp = '?';
					rp += 2;
					*rp = '?';
				} else
					*rp = (unsigned char) (gktab[*rp] + accnum(acc));
/*
if(1) {
int n;
n = *rp;
n &= 0377;
printf(" *rp [%o] n [%o] ", *rp , n  );
}
*/
			}
			if( saw_isub && xlit == SMARTA ) {
				saw_isub = 0;
				*++rp = 'i';
			}
		}  else if( *sp == '_' ) {
			*rp = '-';
			sp++;
		} else
			*rp = *sp++;
		rp++;
	}
	*rp = 0;
}