bool dictionary::findwordSub(const char * word,tcount & Pos,int & Nmbr) { int kar = UTF8char(word,staticUTF8); const char * w = word; int nmbr = NODES.ntoplevel; tcount pos = 0; while(nmbr > 0) { int kar2 = NODES.initialchars[pos]; if(kar2 < kar) { ++pos; --nmbr; } else if(kar2 == kar) { if(kar) { ptrdiff_t p,q; char * s = NODES.strings[pos]; strcmpN(s,w,p,q); if(s[p]) return false; w += q; } nmbr = NODES.numberOfChildren[pos]; pos = NODES.pos[pos]; if(pos < 0) // not a leaf, descend further { pos = -pos; // Make it a valid index. kar = UTF8char(w,staticUTF8); } else if(*w && *++w) { return false; } else // This is a leaf. Do the baseform and type stuff. { Pos = pos; Nmbr = nmbr; return true; } } else // Initial character alphabetically greater than any of the // available candidates. { return false; } } return false; }
bool dictionary::readNodes(FILE * fp) { tcount nodeBufLen; if(fread(&nodeBufLen,sizeof(nodeBufLen),1,fp) == 1) { NODES.nnodes = nodeBufLen; NODES.initialchars = new int[nodeBufLen]; NODES.strings = new char * [nodeBufLen]; NODES.numberOfChildren = new tchildren[nodeBufLen]; NODES.pos = new tindex[nodeBufLen]; tchildren length; if(fread(&length,sizeof(length),1,fp) == 1) { NODES.ntoplevel = length; readStretch(NODES.ntoplevel,0,fp); for(tcount i = 0;i < nodeBufLen;++i) { NODES.initialchars[i] = UTF8char(NODES.strings[i],staticUTF8); } } return true; } return false; }
bool dictionary::findwordSub(const char * word, const char * tag, tcount & Pos,int & Nmbr) { int kar = UTF8char(word,staticUTF8); const char * w = word; int nmbr = NODES.ntoplevel; tcount pos = 0; while(nmbr > 0) { int kar2 = NODES.initialchars[pos]; if(kar2 < kar) { ++pos; --nmbr; } else if(kar2 == kar) { if(kar) { ptrdiff_t p,q; char * s = NODES.strings[pos]; strcmpN(s,w,p,q); if(s[p]) return false; w += q; } nmbr = NODES.numberOfChildren[pos]; pos = NODES.pos[pos]; if(pos < 0) // not a leaf, descend further { pos = -pos; // Make it a valid index. kar = UTF8char(w,staticUTF8); } else if(*w && *++w) { return false; } else // This is a leaf. Do the baseform and type stuff. { if (tag) { lext * plext; const char * Tp = Lemmatiser::translate(tag); // tag as found in the text // See whether the word's tag can be found in the // dictionary's lexical information. plext = LEXT + pos; int m; const char * baseTp = LemmaTag(Tp); unsigned int maxFreq = Word::maxFrequency(LEXT, nmbr, baseTp, m); for (int n = nmbr; n; --n, ++plext) { if (plext->S.frequency >= maxFreq) { if (!strcmp(Tp, (plext->Type))) // Word is in dictionary, { Pos = pos; Nmbr = nmbr; return true; } } } } else { Pos = pos; Nmbr = nmbr; return true; } } } else // Initial character alphabetically greater than any of the // available candidates. { return false; } } return false; }
bool isUpperUTF8(const char * s) { int S = UTF8char(s,UTF8); return upperEquivalent(S) == (unsigned int)S; }