bool dictionary::findwordSub(const char * word,tcount & Pos,int & Nmbr)
    {
    int kar = UTF8char(word,staticUTF8);
    const char * w = word;
    int nmbr = NODES.ntoplevel;
    tcount pos = 0;
    while(nmbr > 0)
        {
        int kar2 = NODES.initialchars[pos];
        if(kar2 < kar)
            {
            ++pos;
            --nmbr;
            }
        else if(kar2 == kar)
            {
            if(kar)
                {
                ptrdiff_t p,q;
                char * s = NODES.strings[pos];
                strcmpN(s,w,p,q);
                if(s[p])
                    return false;
                w += q;
                }
            nmbr = NODES.numberOfChildren[pos];
            pos = NODES.pos[pos];
            if(pos < 0) // not a leaf, descend further
                {
                pos = -pos; // Make it a valid index.
                kar = UTF8char(w,staticUTF8);
                }
            else if(*w && *++w)
                {
                return false;
                }
            else // This is a leaf. Do the baseform and type stuff.
                {
                Pos = pos;
                Nmbr = nmbr;
                return true;
                }
            }
        else // Initial character alphabetically greater than any of the
             // available candidates.
            {
            return false;
            }
        }
    return false;
    }
Beispiel #2
0
bool dictionary::readNodes(FILE * fp)
    {
    tcount nodeBufLen;
    if(fread(&nodeBufLen,sizeof(nodeBufLen),1,fp) == 1)
        {
        NODES.nnodes = nodeBufLen;
        NODES.initialchars = new int[nodeBufLen];
        NODES.strings = new char * [nodeBufLen];
        NODES.numberOfChildren = new tchildren[nodeBufLen];
        NODES.pos = new tindex[nodeBufLen];
        tchildren length;
        if(fread(&length,sizeof(length),1,fp) == 1)
            {
            NODES.ntoplevel = length;
            readStretch(NODES.ntoplevel,0,fp);
            for(tcount i = 0;i < nodeBufLen;++i)
                {
                NODES.initialchars[i] = UTF8char(NODES.strings[i],staticUTF8);
                }
            }
        return true;
        }
    return false;
    }
Beispiel #3
0
bool dictionary::findwordSub(const char * word, const char * tag, tcount & Pos,int & Nmbr)
    {
    int kar = UTF8char(word,staticUTF8);
    const char * w = word;
    int nmbr = NODES.ntoplevel;
    tcount pos = 0;
    while(nmbr > 0)
        {
        int kar2 = NODES.initialchars[pos];
        if(kar2 < kar)
            {
            ++pos;
            --nmbr;
            }
        else if(kar2 == kar)
            {
            if(kar)
                {
                ptrdiff_t p,q;
                char * s = NODES.strings[pos];
                strcmpN(s,w,p,q);
                if(s[p])
                    return false;
                w += q;
                }
            nmbr = NODES.numberOfChildren[pos];
            pos = NODES.pos[pos];
            if(pos < 0) // not a leaf, descend further
                {
                pos = -pos; // Make it a valid index.
                kar = UTF8char(w,staticUTF8);
                }
            else if(*w && *++w)
                {
                return false;
                }
            else // This is a leaf. Do the baseform and type stuff.
                {
                if (tag)
                    {
                    lext * plext;
                    const char * Tp = Lemmatiser::translate(tag); // tag as found in the text
                                                                    // See whether the word's tag can be found in the
                                                                    // dictionary's lexical information.
                    plext = LEXT + pos;
                    int m;

                    const char * baseTp = LemmaTag(Tp);

                    unsigned int maxFreq = Word::maxFrequency(LEXT, nmbr, baseTp, m);

                    for (int n = nmbr; n; --n, ++plext)
                        {
                        if (plext->S.frequency >= maxFreq)
                            {
                            if (!strcmp(Tp, (plext->Type))) // Word is in dictionary,
                                {
                                Pos = pos;
                                Nmbr = nmbr;
                                return true;
                                }
                            }
                        }
                    }
                else
                    {
                    Pos = pos;
                    Nmbr = nmbr;
                    return true;
                    }
                }
            }
        else // Initial character alphabetically greater than any of the
             // available candidates.
            {
            return false;
            }
        }
    return false;
    }
Beispiel #4
0
bool isUpperUTF8(const char * s)
    {
    int S = UTF8char(s,UTF8);
    return upperEquivalent(S) == (unsigned int)S;
    }