// Convert the list of words in iwords to the same capitalization aWord and // return them in owords. NS_IMETHODIMP mozEnglishWordUtils::FromRootForm(const char16_t *aWord, const char16_t **iwords, uint32_t icount, char16_t ***owords, uint32_t *ocount) { nsAutoString word(aWord); nsresult rv = NS_OK; int32_t length; char16_t **tmpPtr = (char16_t **)moz_xmalloc(sizeof(char16_t *)*icount); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; mozEnglishWordUtils::myspCapitalization ct = captype(word); for(uint32_t i = 0; i < icount; ++i) { length = NS_strlen(iwords[i]); tmpPtr[i] = (char16_t *) moz_xmalloc(sizeof(char16_t) * (length + 1)); if (MOZ_UNLIKELY(!tmpPtr[i])) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } memcpy(tmpPtr[i], iwords[i], (length + 1) * sizeof(char16_t)); nsAutoString capTest(tmpPtr[i]); mozEnglishWordUtils::myspCapitalization newCt=captype(capTest); if(newCt == NoCap){ switch(ct) { case HuhCap: case NoCap: break; case AllCap: ToUpperCase(tmpPtr[i],tmpPtr[i],length); rv = NS_OK; break; case InitCap: ToUpperCase(tmpPtr[i],tmpPtr[i],1); rv = NS_OK; break; default: rv = NS_ERROR_FAILURE; // should never get here; break; } } } if (NS_SUCCEEDED(rv)){ *owords = tmpPtr; *ocount = icount; } return rv; }
/* ** Calls a runtime capture. Returns number of captures removed by ** the call, including the initial Cgroup. (Captures to be added are ** on the Lua stack.) */ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { int n, id; lua_State *L = cs->L; int otop = lua_gettop(L); Capture *open = findopen(close); assert(captype(open) == Cgroup); id = finddyncap(open, close); /* get first dynamic capture argument */ close->kind = Cclose; /* closes the group */ close->s = s; cs->cap = open; cs->valuecached = 0; /* prepare capture state */ luaL_checkstack(L, 4, "too many runtime captures"); pushluaval(cs); /* push function to be called */ lua_pushvalue(L, SUBJIDX); /* push original subject */ lua_pushinteger(L, s - cs->s + 1); /* push current position */ n = pushnestedvalues(cs, 0); /* push nested captures */ lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ if (id > 0) { /* are there old dynamic captures to be removed? */ int i; for (i = id; i <= otop; i++) lua_remove(L, id); /* remove old dynamic captures */ *rem = otop - id + 1; /* total number of dynamic captures removed */ } else *rem = 0; /* no dynamic captures removed */ return (int)(close - open); /* number of captures of all kinds removed */ }
/*! ** Add a variant-capitalization header to a word. This routine may be ** called even for a followcase word that doesn't yet have a header. ** ** \param dp Entry to update ** ** \return 0 if all was ok, -1 if allocation error. */ int ISpellChecker::addvheader ( struct dent *dp) { register struct dent * tdent; /* Copy of entry */ /* ** Add a second entry with the correct capitalization, and then make ** dp into a special dummy entry. */ tdent = static_cast<struct dent *>(malloc(sizeof (struct dent))); if (tdent == NULL) { fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); return -1; } *tdent = *dp; if (captype (tdent->flagfield) != FOLLOWCASE) tdent->word = NULL; else { /* Followcase words need a copy of the capitalization */ tdent->word = static_cast<char *>(malloc (static_cast<unsigned int>(strlen(tdent->word)) + 1)); if (tdent->word == NULL) { fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word); free (reinterpret_cast<char *>(tdent)); return -1; } strcpy (tdent->word, dp->word); } chupcase (dp->word); dp->next = tdent; dp->flagfield &= ~CAPTYPEMASK; dp->flagfield |= (ALLCAPS | MOREVARIANTS); return 0; }
// Convert the list of words in iwords to the same capitalization aWord and // return them in owords. NS_IMETHODIMP mozEnglishWordUtils::FromRootForm(const PRUnichar *aWord, const PRUnichar **iwords, PRUint32 icount, PRUnichar ***owords, PRUint32 *ocount) { nsAutoString word(aWord); nsresult rv = NS_OK; PRInt32 length; PRUnichar **tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *)*icount); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; mozEnglishWordUtils::myspCapitalization ct = captype(word); for(PRUint32 i = 0; i < icount; ++i) { length = nsCRT::strlen(iwords[i]); tmpPtr[i] = (PRUnichar *) nsMemory::Alloc(sizeof(PRUnichar) * (length + 1)); if (NS_UNLIKELY(!tmpPtr[i])) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } memcpy(tmpPtr[i], iwords[i], (length + 1) * sizeof(PRUnichar)); nsAutoString capTest(tmpPtr[i]); mozEnglishWordUtils::myspCapitalization newCt=captype(capTest); if(newCt == NoCap){ switch(ct) { case HuhCap: case NoCap: break; case AllCap: rv = mCaseConv->ToUpper(tmpPtr[i],tmpPtr[i],length); break; case InitCap: rv = mCaseConv->ToUpper(tmpPtr[i],tmpPtr[i],1); break; default: rv = NS_ERROR_FAILURE; // should never get here; break; } } } if (NS_SUCCEEDED(rv)){ *owords = tmpPtr; *ocount = icount; } return rv; }
/* ** Try to find a named group capture with the name given at the top of ** the stack; goes backward from 'cap'. */ static Capture *findback (CapState *cs, Capture *cap) { lua_State *L = cs->L; while (cap-- > cs->ocap) { /* repeat until end of list */ if (isclosecap(cap)) cap = findopen(cap); /* skip nested captures */ else if (!isfullcap(cap)) continue; /* opening an enclosing capture: skip and get previous */ if (captype(cap) == Cgroup) { getfromktable(cs, cap->idx); /* get group name */ if (lua_equal(L, -2, -1)) { /* right group? */ lua_pop(L, 2); /* remove reference name and group name */ return cap; } else lua_pop(L, 1); /* remove group name */ } } luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); return NULL; /* to avoid warnings */ }
/* ** Evaluates a capture and adds its first value to buffer 'b'; returns ** whether there was a value */ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { switch (captype(cs->cap)) { case Cstring: stringcap(b, cs); /* add capture directly to buffer */ return 1; case Csubst: substcap(b, cs); /* add capture directly to buffer */ return 1; default: { lua_State *L = cs->L; int n = pushcapture(cs); if (n > 0) { if (n > 1) lua_pop(L, n - 1); /* only one result */ if (!lua_isstring(L, -1)) luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); luaL_addvalue(b); } return n; } } }
/* ** Collect values from current capture into array 'cps'. Current ** capture must be Cstring (first call) or Csimple (recursive calls). ** (In first call, fills %0 with whole match for Cstring.) ** Returns number of elements in the array that were filled. */ static int getstrcaps (CapState *cs, StrAux *cps, int n) { int k = n++; cps[k].isstring = 1; /* get string value */ cps[k].u.s.s = cs->cap->s; /* starts here */ if (!isfullcap(cs->cap++)) { /* nested captures? */ while (!isclosecap(cs->cap)) { /* traverse them */ if (n >= MAXSTRCAPS) /* too many captures? */ nextcap(cs); /* skip extra captures (will not need them) */ else if (captype(cs->cap) == Csimple) /* string? */ n = getstrcaps(cs, cps, n); /* put info. into array */ else { cps[n].isstring = 0; /* not a string */ cps[n].u.cp = cs->cap; /* keep original capture */ nextcap(cs); n++; } } cs->cap++; /* skip close */ } cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ return n; }
/* ** Table capture: creates a new table and populates it with nested ** captures. */ static int tablecap (CapState *cs) { lua_State *L = cs->L; int n = 0; lua_newtable(L); if (isfullcap(cs->cap++)) return 1; /* table is empty */ while (!isclosecap(cs->cap)) { if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ pushluaval(cs); /* push group name */ pushonenestedvalue(cs); lua_settable(L, -3); } else { /* not a named group */ int i; int k = pushcapture(cs); for (i = k; i > 0; i--) /* store all values into table */ lua_rawseti(L, -(i + 1), n + i); n += k; } } cs->cap++; /* skip close entry */ return 1; /* number of values pushed (only the table) */ }
// return the possible root forms of aWord. NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const char16_t *aWord, uint32_t type, char16_t ***words, uint32_t *count) { nsAutoString word(aWord); char16_t **tmpPtr; int32_t length = word.Length(); *count = 0; mozEnglishWordUtils::myspCapitalization ct = captype(word); switch (ct) { case HuhCap: case NoCap: tmpPtr = (char16_t **)moz_xmalloc(sizeof(char16_t *)); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; tmpPtr[0] = ToNewUnicode(word); if (!tmpPtr[0]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } *words = tmpPtr; *count = 1; break; case AllCap: tmpPtr = (char16_t **)moz_xmalloc(sizeof(char16_t *) * 3); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; tmpPtr[0] = ToNewUnicode(word); if (!tmpPtr[0]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } ToLowerCase(tmpPtr[0], tmpPtr[0], length); tmpPtr[1] = ToNewUnicode(word); if (!tmpPtr[1]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } ToLowerCase(tmpPtr[1], tmpPtr[1], length); ToUpperCase(tmpPtr[1], tmpPtr[1], 1); tmpPtr[2] = ToNewUnicode(word); if (!tmpPtr[2]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } *words = tmpPtr; *count = 3; break; case InitCap: tmpPtr = (char16_t **)moz_xmalloc(sizeof(char16_t *) * 2); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; tmpPtr[0] = ToNewUnicode(word); if (!tmpPtr[0]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } ToLowerCase(tmpPtr[0], tmpPtr[0], length); tmpPtr[1] = ToNewUnicode(word); if (!tmpPtr[1]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } *words = tmpPtr; *count = 2; break; default: return NS_ERROR_FAILURE; // should never get here; } return NS_OK; }
/*! * \param word Word to be saved * \param pattern Capitalization pattern * \param prestrip No. chars stripped from front * \param preadd No. chars added to front of root * \param sufstrip No. chars stripped from back * \param sufadd No. chars added to back of root * \param firstdent First dent for root * \param pfxent Pfx-flag entry for word * \param sufent Sfx-flag entry for word * \param savearea Room to save words * \param nsaved Number saved so far (updated) */ void ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern, int prestrip, int preadd, int sufstrip, int sufadd, struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent, ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], int * nsaved) { #ifndef NO_CAPITALIZATION_SUPPORT register struct dent * dent; #endif /* NO_CAPITALIZATION_SUPPORT */ int firstisupper; ichar_t newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; #ifndef NO_CAPITALIZATION_SUPPORT register ichar_t * p; int len; int i; int limit; #endif /* NO_CAPITALIZATION_SUPPORT */ if (*nsaved >= MAX_CAPS) return; icharcpy (newword, word); firstisupper = myupper (pattern[0]); #ifdef NO_CAPITALIZATION_SUPPORT /* ** Apply the old, simple-minded capitalization rules. */ if (firstisupper) { if (myupper (pattern[1])) upcase (newword); else { lowcase (newword); newword[0] = mytoupper (newword[0]); } } else lowcase (newword); icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; #else /* NO_CAPITALIZATION_SUPPORT */ #define flagsareok(dent) \ ((pfxent == NULL \ || TSTMASKBIT (dent->mask, pfxent->flagbit)) \ && (sufent == NULL \ || TSTMASKBIT (dent->mask, sufent->flagbit))) dent = firstdent; if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS) { upcase (newword); /* Uppercase required */ icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; } for (p = pattern; *p; p++) { if (mylower (*p)) break; } if (*p == 0) { upcase (newword); /* Pattern was all caps */ icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; } for (p = pattern + 1; *p; p++) { if (myupper (*p)) break; } if (*p == 0) { /* ** The pattern was all-lower or capitalized. If that's ** legal, insert only that version. */ if (firstisupper) { if (captype (dent->flagfield) == CAPITALIZED || captype (dent->flagfield) == ANYCASE) { lowcase (newword); newword[0] = mytoupper (newword[0]); icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; } } else { if (captype (dent->flagfield) == ANYCASE) { lowcase (newword); icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; } } while (dent->flagfield & MOREVARIANTS) { dent = dent->next; if (captype (dent->flagfield) == FOLLOWCASE || !flagsareok (dent)) continue; if (firstisupper) { if (captype (dent->flagfield) == CAPITALIZED) { lowcase (newword); newword[0] = mytoupper (newword[0]); icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; } } else { if (captype (dent->flagfield) == ANYCASE) { lowcase (newword); icharcpy (savearea[*nsaved], newword); (*nsaved)++; return; } } } } /* ** Either the sample had complex capitalization, or the simple ** capitalizations (all-lower or capitalized) are illegal. ** Insert all legal capitalizations, including those that are ** all-lower or capitalized. If the prototype is capitalized, ** capitalized all-lower samples. Watch out for affixes. */ dent = firstdent; p = strtosichar (dent->word, 1); len = icharlen (p); if (dent->flagfield & MOREVARIANTS) dent = dent->next; /* Skip place-holder entry */ for ( ; ; ) { if (flagsareok (dent)) { if (captype (dent->flagfield) != FOLLOWCASE) { lowcase (newword); if (firstisupper || captype (dent->flagfield) == CAPITALIZED) newword[0] = mytoupper (newword[0]); icharcpy (savearea[*nsaved], newword); (*nsaved)++; if (*nsaved >= MAX_CAPS) return; } else { /* Followcase is the tough one. */ p = strtosichar (dent->word, 1); memmove ( reinterpret_cast<char *>(newword + preadd), reinterpret_cast<char *>(p + prestrip), (len - prestrip - sufstrip) * sizeof (ichar_t)); if (myupper (p[prestrip])) { for (i = 0; i < preadd; i++) newword[i] = mytoupper (newword[i]); } else { for (i = 0; i < preadd; i++) newword[i] = mytolower (newword[i]); } limit = len + preadd + sufadd - prestrip - sufstrip; i = len + preadd - prestrip - sufstrip; p += len - sufstrip - 1; if (myupper (*p)) { for (p = newword + i; i < limit; i++, p++) *p = mytoupper (*p); } else { for (p = newword + i; i < limit; i++, p++) *p = mytolower (*p); } icharcpy (savearea[*nsaved], newword); (*nsaved)++; if (*nsaved >= MAX_CAPS) return; } } if ((dent->flagfield & MOREVARIANTS) == 0) break; /* End of the line */ dent = dent->next; } return; #endif /* NO_CAPITALIZATION_SUPPORT */ }
/* ** Push all values of the current capture into the stack; returns ** number of values pushed */ static int pushcapture (CapState *cs) { lua_State *L = cs->L; luaL_checkstack(L, 4, "too many captures"); switch (captype(cs->cap)) { case Cposition: { lua_pushinteger(L, cs->cap->s - cs->s + 1); cs->cap++; return 1; } case Cconst: { pushluaval(cs); cs->cap++; return 1; } case Carg: { int arg = (cs->cap++)->idx; if (arg + FIXEDARGS > cs->ptop) return luaL_error(L, "reference to absent extra argument #%d", arg); lua_pushvalue(L, arg + FIXEDARGS); return 1; } case Csimple: { int k = pushnestedvalues(cs, 1); lua_insert(L, -k); /* make whole match be first result */ return k; } case Cruntime: { lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ return 1; } case Cstring: { luaL_Buffer b; luaL_buffinit(L, &b); stringcap(&b, cs); luaL_pushresult(&b); return 1; } case Csubst: { luaL_Buffer b; luaL_buffinit(L, &b); substcap(&b, cs); luaL_pushresult(&b); return 1; } case Cgroup: { if (cs->cap->idx == 0) /* anonymous group? */ return pushnestedvalues(cs, 0); /* add all nested values */ else { /* named group: add no values */ nextcap(cs); /* skip capture */ return 0; } } case Cbackref: return backrefcap(cs); case Ctable: return tablecap(cs); case Cfunction: return functioncap(cs); case Cnum: return numcap(cs); case Cquery: return querycap(cs); case Cfold: return foldcap(cs); default: assert(0); return 0; } }
// return the possible root forms of aWord. NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const PRUnichar *aWord, PRUint32 type, PRUnichar ***words, PRUint32 *count) { nsAutoString word(aWord); PRUnichar **tmpPtr; PRInt32 length = word.Length(); *count = 0; if (!mCaseConv) { mCaseConv = do_GetService(kUnicharUtilCID); if (!mCaseConv) return NS_ERROR_FAILURE; } mozEnglishWordUtils::myspCapitalization ct = captype(word); switch (ct) { case HuhCap: case NoCap: tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *)); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; tmpPtr[0] = ToNewUnicode(word); if (!tmpPtr[0]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } *words = tmpPtr; *count = 1; break; case AllCap: tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * 3); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; tmpPtr[0] = ToNewUnicode(word); if (!tmpPtr[0]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } mCaseConv->ToLower(tmpPtr[0], tmpPtr[0], length); tmpPtr[1] = ToNewUnicode(word); if (!tmpPtr[1]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } mCaseConv->ToLower(tmpPtr[1], tmpPtr[1], length); mCaseConv->ToUpper(tmpPtr[1], tmpPtr[1], 1); tmpPtr[2] = ToNewUnicode(word); if (!tmpPtr[2]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } *words = tmpPtr; *count = 3; break; case InitCap: tmpPtr = (PRUnichar **)nsMemory::Alloc(sizeof(PRUnichar *) * 2); if (!tmpPtr) return NS_ERROR_OUT_OF_MEMORY; tmpPtr[0] = ToNewUnicode(word); if (!tmpPtr[0]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } mCaseConv->ToLower(tmpPtr[0], tmpPtr[0], length); tmpPtr[1] = ToNewUnicode(word); if (!tmpPtr[1]) { NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr); return NS_ERROR_OUT_OF_MEMORY; } *words = tmpPtr; *count = 2; break; default: return NS_ERROR_FAILURE; // should never get here; } return NS_OK; }
/* * \param word * \param hit * \param len * * \return */ int ISpellChecker::cap_ok(ichar_t *word, struct success *hit, int len) { register ichar_t *dword; register ichar_t *w; register struct dent *dent; ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN]; int preadd; int prestrip; int sufadd; ichar_t *limit; long thiscap; long dentcap; thiscap = whatcap(word); /* ** All caps is always legal, regardless of affixes. */ preadd = prestrip = sufadd = 0; if(thiscap == ALLCAPS) return 1; else if(thiscap == FOLLOWCASE) { /* Set up some constants for the while(1) loop below */ if(hit->prefix) { preadd = hit->prefix->affl; prestrip = hit->prefix->stripl; } else preadd = prestrip = 0; sufadd = hit->suffix ? hit->suffix->affl : 0; } /* ** Search the variants for one that matches what we have. Note ** that thiscap can't be ALLCAPS, since we already returned ** for that case. */ dent = hit->dictent; for(;;) { dentcap = captype(dent->flagfield); if(dentcap != thiscap) { if(dentcap == ANYCASE && thiscap == CAPITALIZED && entryhasaffixes(dent, hit)) return 1; } else /* captypes match */ { if(thiscap != FOLLOWCASE) { if(entryhasaffixes(dent, hit)) return 1; } else { /* ** Make sure followcase matches exactly. ** Life is made more difficult by the ** possibility of affixes. Start with ** the prefix. */ strtoichar(dentword, dent->word, INPUTWORDLEN, 1); dword = dentword; limit = word + preadd; if(myupper(dword[prestrip])) { for(w = word; w < limit; w++) { if(mylower(*w)) goto doublecontinue; } } else { for(w = word; w < limit; w++) { if(myupper(*w)) goto doublecontinue; } } dword += prestrip; /* Do root part of word */ limit = dword + len - preadd - sufadd; while(dword < limit) { if(*dword++ != *w++) goto doublecontinue; } /* Do suffix */ dword = limit - 1; if(myupper(*dword)) { for(; *w; w++) { if(mylower(*w)) goto doublecontinue; } } else { for(; *w; w++) { if(myupper(*w)) goto doublecontinue; } } /* ** All failure paths go to "doublecontinue," ** so if we get here it must match. */ if(entryhasaffixes(dent, hit)) return 1; doublecontinue:; } } if((dent->flagfield & MOREVARIANTS) == 0) break; dent = dent->next; } /* No matches found */ return 0; }