void ThetaRoleCheckSubcat(char *features) { char *p, *save; for (p = features; *p; p++) { if (StringIn(*p, FT_SUBCAT)) { save = p; p--; if (*p == F_OPTIONAL) { p--; } if (p <= features || !StringIn(*p, FT_OBJLOC)) { Dbg(DBGLEX, DBGBAD, "<%s> FT_SUBCAT doesn't follow FT_OBJLOC [F_OPTIONAL]", features); } p = save; } } for (p = features; *p; p++) { if (*p == F_OPTIONAL) { if (p <= features || !StringIn(*(p-1), FT_OBJLOC)) { Dbg(DBGLEX, DBGBAD, "<%s> F_OPTIONAL doesn't follow FT_OBJLOC", features); } } } }
Bool CharPhoneMatch(int ptn, int c) { switch (ptn) { case 'X': return(StringIn(c, "0123456789")); case 'N': return(StringIn(c, "23456789")); case '=': return(StringIn(c, PHONE_WHITESPACE)); default: return(ptn == c); } }
Bool FeatureTaskOK(char *features, Obj *task) { if (task == N("parse")) { return(!StringIn(F_TRANS_ONLY, features)); } else if (task == N("generate")) { return(!StringIn(F_TRANS_ONLY, features)); } else if (task == N("translate")) { return(1); } return(1); }
Float Syn_ParseFilterZZ_Z(PNode *z1, PNode *z2, int lang) { PNode *pn; if (z1->pn1 && z1->pn1->feature == F_INTERJECTION && z1->pn2 == NULL) { /* "What, are you thinking?" ACCEPT THIS PARSE vs. * "What are you thinking" REJECT THIS PARSE */ if (StringIn(',', PNodeGetEndPunc(z1))) return(1.0); else return(0.0); } if ((pn = PNodeLeftmost(z1)) && (pn->feature == F_CONJUNCTION)) { if (!Syn_ParseZZ_ZConjunctionOK(pn)) { return(0.0); } else if (pn->lexitem && pn->lexitem->le && LexEntryAllConceptsFeat(F_CLAUSE2_ONLY, pn->lexitem->le)) { return(0.0); } else { return(1.0); } } if ((pn = PNodeLeftmost(z2)) && (pn->feature == F_CONJUNCTION)) { if (!Syn_ParseZZ_ZConjunctionOK(pn)) { return(0.0); } else { return(1.0); } } if (StringIn(',', PNodeGetEndPunc(z1))) { /* Allow without conjunction if comma is used. In French these are frequent. * In English these are frowned upon "run-on sentences": * "The air was cold and humid, I hate that." * However if there is an elipsis, this is not considered a run-on sentence: * "The air was cold and humid, the sky overcast." */ if (PNodeNumberOfWords(z1) >= 5 && PNodeNumberOfWords(z2) <= 1) { /* Disallow if too lopsided: * "The American who is Jim Garnier's sister, ate." * todoSCORE */ } else { return(1.0); } } /* Disallow. */ return(0.0); }
void TA_LELAdd(LexEntry *le, char *features, int maxlen, /* RESULTS */ LexEntry **le_le, char **le_feat, int *le_len) { int i, len; char buf[FEATLEN]; len = *le_len; if (!StringIn(F_VERB, features)) { /* This condition allows "Qui a peint Le Déjeuner sur l herbe?" to parse. * But is collapsing needed for verbs for other reasons? * It would be nice to collapse verbs to reduce number of parses, but we * can't reduce them because tense (and the resulting aspect) * information is significant. */ for (i = 0; i < len; i++) { if (le == le_le[i]) { StringIntersect(le_feat[i], features, buf); StringCpy(le_feat[i], buf, FEATLEN); return; } } } if (len < maxlen) { le_le[len] = le; le_feat[len] = MemAlloc(FEATLEN, "char TA_LELAdd"); StringCpy(le_feat[len], features, FEATLEN); len++; *le_len = len; } else { Dbg(DBGGEN, DBGBAD, "TA_LELAdd: increase maxlen <%s.%s>", le->srcphrase, le->features); } }
void RptTextPrint1(Text *text, char *s, int fieldlen, int just) { int i, slen, total; if (StringIn(NEWLINE, s)) { if (0) { /* if (strlen(s) < fieldlen) { */ StringElimChar(s, NEWLINE); /* Destructive. Watch out. */ TextPuts(s, text); } else { TextPutsIndented(s, text, " "); return; } } slen = strlen(s); if (slen > fieldlen) { for (i = 0; i < fieldlen; i++) TextPutc(s[i], text); return; } if (just == RPT_JUST_RIGHT) { total = fieldlen - slen; TextPrintSpaces(text, total); } else if (just == RPT_JUST_CENTER) { total = (fieldlen - slen)/2; TextPrintSpaces(text, total); } else total = 0; TextPuts(s, text); total += slen; TextPrintSpaces(text, fieldlen-total); }
void StringMapLeWhitespaceToDash(char *to) { while (*to) { if (StringIn(*to, LE_WHITESPACE)) *to = '-'; to++; } }
char *StringReadToNons(char *in, int including, char *out, int maxlen, char *seps) { char *p; p = out; maxlen--; while (*in && StringIn(*in, seps)) { if (*in == TREE_ESCAPE) { *p = *in; p++; in++; if (in[0] == TERM) { Dbg(DBGGEN, DBGBAD, "StringReadToNons: empty escape"); break; } } if ((p - out) >= maxlen) { out[maxlen] = TERM; Dbg(DBGGEN, DBGBAD, "StringReadToNons: overflow reading up to <%s>: <%s>", seps, out); return(in); } *p = *in; p++; in++; } *p = TERM; if (including && (*in != TERM)) in++; return(in); }
void StringMapLeWhitespaceToSpace(char *to) { while (*to) { if (StringIn(*to, LE_WHITESPACE)) *to = SPACE; to++; } }
PNode *TransformGenFrenchContractions(PNode *pn, Discourse *dc, /* RESULTS */ int *change) { int det_gender, det_number; PNode *pnword2; Obj *noun_con; pnword2 = PNodeLeftmost(pn->pn2); if (pn->pn1 && pn->pn1->feature == F_PREPOSITION && pnword2 && pnword2->feature == F_DETERMINER && pnword2->lexitem && LexEntryConceptIsAncestor(N("definite-article"), pnword2->lexitem->le)) { det_number = FeatureGet(pnword2->lexitem->features, FT_NUMBER); det_gender = FeatureGet(pnword2->lexitem->features, FT_GENDER); if ((!StringIn(F_ELISION, pnword2->lexitem->features)) && (det_number == F_PLURAL || det_gender == F_MASCULINE)) { if (LexEntryConceptIsAncestor(N("prep-to"), pn->pn1->lexitem->le)) { /* à le -> au * à les -> aux */ pn->pn1 = GenMakePrep(N("prep-au"), det_gender, det_number, dc); LexitemMakeTrace(pnword2->lexitem); *change = 1; return(pn); } else if (LexEntryConceptIsAncestor(N("prep-of"), pn->pn1->lexitem->le)) { /* de le -> du * de les -> des */ pn->pn1 = GenMakePrep(N("prep-du"), det_gender, det_number, dc); LexitemMakeTrace(pnword2->lexitem); *change = 1; return(pn); } } if ((noun_con = PNodeNounConcept(pn->pn2)) && ISA(N("polity"), noun_con) && LexEntryConceptIsAncestor(N("prep-en"), pn->pn1->lexitem->le)) { /* If noun is "polity". * en la -> en * en le + vowel -> en * en le + nonvowel -> au * en les -> aux */ if (det_gender == F_FEMININE || (F_VOCALIC == LexitemInitialSound(PNodeLeftmostLexitem(pn->pn2), dc))) { LexitemMakeTrace(pnword2->lexitem); *change = 1; return(pn); } else { pn->pn1 = GenMakePrep(N("prep-au"), det_gender, det_number, dc); LexitemMakeTrace(pnword2->lexitem); *change = 1; return(pn); } } } return(pn); }
Bool StringIsDigitOr(char *s, char *set) { while (*s) { if ((!Char_isdigit(*s)) && (!StringIn(*s, set))) return(0); s++; } return(1); }
Bool StringAllIn(char *s1, char *s2) { while (*s1) { if (!StringIn(*s1, s2)) return(0); s1++; } return(1); }
Bool StringIsAlphaOr(char *s, char *set) { while (*s) { if ((!CharIsAlpha(*s)) && (!StringIn(*s, set))) return(0); s++; } return(1); }
/* Used during parsing when parent is not available. */ Bool XBarValidE_MAX(PNode *e) { LexEntry *le; if ((le = PNodeLeftmostLexEntry(e)) && StringIn(F_CONJUNCTION, le->features)) { return(0); } return(1); }
void StringUnion(char *in1, char *in2, int maxlen, /* RESULTS */ char *out) { int i; char *orig_out, orig_out = out; maxlen--; for (i = 1; i < 256; i++) { if (StringIn(i, in1) || StringIn(i, in2)) { if ((out - orig_out) >= maxlen) { orig_out[maxlen] = TERM; return; } *out = i; out++; } } *out = TERM; }
Bool StringAnyIn(char *s1, char *s2) { if (s1[0] == TERM) return(1); while (*s1) { if (StringIn(*s1, s2)) return(1); s1++; } return(0); }
void StringAppendIfNotAlreadyIn(int in, int maxlen, /* RESULTS */ char *out) { char *p; maxlen--; p = StringEndOf(out); if (!StringIn(in, out)) { if ((p - out) >= maxlen) { out[maxlen] = TERM; return; } p[0] = in; p[1] = TERM; } }
void StringIntersect(char *in1, char *in2, /* RESULTS */ char *out) { while (*in1) { if (StringIn(*in1, in2)) { *out = *in1; out++; } in1++; } *out = TERM; }
void Load() { Ts ts1, ts2; Dur d; TsSetNow(&ts1); if (StringIn(F_FRENCH, StdDiscourse->langs)) { LexEntryReadInflFile("db/frinfl.txt"); } if (StringIn(F_ENGLISH, StdDiscourse->langs)) { LexEntryReadInflFile("db/eninfl.txt"); } DbFileRead("db/name.txt", DBFILETYPE_ISA); DbFileRead("db/fooddrug.txt", DBFILETYPE_ISA); DbFileRead("db/geog.txt", DBFILETYPE_POLITY); DbFileRead("db/absobj.txt", DBFILETYPE_ISA); DbFileRead("db/street.txt", DBFILETYPE_ISA); DbFileRead("db/grid.txt", DBFILETYPE_ISA); DbFileRead("db/all.txt", DBFILETYPE_ISA); DbFileRead("db/physics.txt", DBFILETYPE_ISA); DbFileRead("db/chem.txt", DBFILETYPE_ISA); DbFileRead("db/trans.txt", DBFILETYPE_ISA); DbFileRead("db/celest.txt", DBFILETYPE_ISA); DbFileRead("db/physobj.txt", DBFILETYPE_ISA); DbFileRead("db/ling.txt", DBFILETYPE_ISA); /* FeatPrintUnused(stdout); */ DbFileRead("db/relation.txt", DBFILETYPE_ISA); DbFileRead("db/action.txt", DBFILETYPE_ISA); DbFileRead("db/attr.txt", DBFILETYPE_ISA); DbFileRead("db/enum.txt", DBFILETYPE_ISA); DbFileRead("db/living.txt", DBFILETYPE_ISA); DbFileRead("db/human.txt", DBFILETYPE_ISA); DbFileRead("db/clothing.txt", DBFILETYPE_ISA); DbFileRead("db/mediaobj.txt", DBFILETYPE_ISA); DbFileRead("db/music.txt", DBFILETYPE_ISA); DbFileRead("db/tv.txt", DBFILETYPE_ISA); DbFileRead("db/company.txt", DBFILETYPE_ISA); DbFileRead("db/elec.txt", DBFILETYPE_ISA); TsSetNow(&ts2); d = TsMinus(&ts2, &ts1); Dbg(DBGGEN, DBGBAD, "Load time = %.2ld:%.2ld.", d/60, d%60); }
Bool StringPhoneMatch(char *ptn, char *s, /* RESULTS */ char *digits, char **nextp) { while (*ptn) { if (!CharPhoneMatch(*ptn, *s)) return(0); if (StringIn(*s, "0123456789")) *digits++ = *s; ptn++; s++; } *digits = TERM; *nextp = s; return(1); }
void FeatSubstPOS(char *in, int newpos, /* RESULTS */ char *out) { while (*in) { if (StringIn(*in, FT_POS)) { *out = newpos; } else { *out = *in; } in++; out++; } *out = TERM; }
Bool StringLineIsAll(char *s, char *set, /* RESULTS */ char **next_line) { while (*s) { if (*s == NEWLINE) { *next_line = s+1; return(1); } if (!StringIn(*s, set)) return(0); s++; } /* Hit EOF; we'll accept. */ *next_line = s; return(1); }
/* Used during parsing when parent is not available. */ Bool XBarValidX_MAX(PNode *x) { LexEntry *le; if ((le = PNodeLeftmostLexEntry(x)) && StringIn(F_CONJUNCTION, le->features)) { return(0); } if (x->pn1 && x->pn1->feature == F_PRONOUN && x->pn2 == NULL) { /* Reject [Z [X [H <à peine de quoi.Hy>]] [W [W [V <remplir.fVy>]]]] */ if (!LexEntryConceptIsAncestor(N("noun-phrase-pronoun"), PNodeLeftmostLexEntry(x))) { return(0); } } return(1); }
void StringElims(char *to, char *set, /* RESULTS */ int *mods) { char *from; from = to; if (mods) *mods = 0; while (*from) { if (!StringIn(*from, set)) { *to = *from; to++; from++; } else { if (mods) *mods = 1; from++; } } *to = TERM; }
/* Returns pointer to first non separator character. */ char *StringSkipToNons(char *in, char *seps) { while (*in && StringIn(*in, seps)) { if (*in == TREE_ESCAPE) { in++; if (*in == TERM) { Dbg(DBGGEN, DBGBAD, "StringSkipNons"); return(in); } } if (*in == TERM) { Dbg(DBGGEN, DBGBAD, "StringSkipNons: hit end skipping to <%s>", seps); return(in); } in++; } return(in); }
void StringAppendIfNotAlreadyIns(char *in, int maxlen, /* RESULTS */ char *out) { char *p; maxlen--; p = StringEndOf(out); while (*in) { if (!StringIn(*in, out)) { if ((p - out) >= maxlen) { out[maxlen] = TERM; return; } *p = *in; p++; *p = TERM; } in++; } *p = TERM; }
PNode *TranslateAWord(PNode *pn, PNode *pnp, Obj *max, int pos, int tense, int gender, int number, int person, int degree, int srclang, int tgtlang, Discourse *dc) { int number1; ObjList *p; ObjToLexEntry *ole_src, *ole_tgt; Word *infl; PNode *r, *r1; r = NULL; for (p = TranslateGetAllcons(pn, pn->lexitem ? pn->lexitem->le : NULL); p; p = p->next) { if (ObjIsList(p->obj)) { continue; } /* todo: Put FT_FILTER checks here. */ if (!TranslateGetOles(pos, p->obj, pn->lexitem->le, NULL, tgtlang, dc, &ole_src, &ole_tgt)) { continue; } if (StringIn(F_COMMON_INFL, ole_tgt->features)) { number1 = FeatureGet(ole_tgt->features, FT_NUMBER); } else { number1 = number; } if (!(infl = LexEntryGetInflection(ole_tgt->le, tense, gender, number1, person, F_NULL, degree, 1, dc))) { continue; } r1 = PNodeWord(pos, infl->word, infl->features, ole_tgt->le, p->obj); if (pn->punc[0]) TranslatePunc(r1->punc, pn->punc, srclang, tgtlang); r1->ole = ole_tgt; r1->obj = p->obj; r1->next_altern = r; r = r1; } if (r == NULL) { return(pn); } return(r); }
Bool ThetaRoleGetSubcat(int c, char *s, /* RESULTS */ int *isoptional, int *subcat) { *isoptional = 0; while (*s) { if (c == *((uc *)s)) { s++; if (*s == F_OPTIONAL) { *isoptional = 1; s++; } if (StringIn(*((uc *)s), FT_SUBCAT)) { *subcat = *((uc *)s); s++; } else { *subcat = F_NULL; } return(1); } s++; } return(0); }
Float Syn_ParseFilterYZ_Z(PNode *y, PNode *z, int lang) { if (Syn_ParseFilter_IsXE(z)) return(0.0); if (Syn_ParseFilter_IsPrepRel(y)) { /* y:[avec qui] z:[vous parlez francais] */ return(1.0); } if (StringIn(',', PNodeGetEndPunc(y))) { /* "par précaution," */ return(1.0); } if ((PNodeClassIn(y, N("interrogative-pronoun")) || PNodeClassIn(y, N("interrogative-determiner"))) && Syn_ParseIsCompleteSentence(z, lang)) { /* "De quel instrument jouait Paganini?" */ return(1.0); } return(0.0); }
Bool WordFormSkipLink(char *linkfeat) { return(StringIn(F_INFREQUENT, linkfeat)); }