Пример #1
0
/*
 * Implementation notes: readBinaryFile
 * ------------------------------------
 * The binary lexicon file format must follow this pattern:
 * DAWG:<startnode index>:<num bytes>:<num bytes block of edge data>
 */
void DawgLexicon::readBinaryFile(std::istream& input) {
    long startIndex, numBytes;
    char firstFour[4], expected[] = "DAWG";
    if (input.fail()) {
        error("DawgLexicon::addWordsFromFile: Couldn't read input");
    }
    input.read(firstFour, 4);
    input.get();
    input >> startIndex;
    input.get();
    input >> numBytes;
    input.get();
    if (input.fail() || strncmp(firstFour, expected, 4) != 0
            || startIndex < 0 || numBytes < 0) {
        error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file");
    }
    numEdges = numBytes / sizeof(Edge);
    edges = new Edge[numEdges];
    start = &edges[startIndex];
    input.read((char*) edges, numBytes);
    if (input.fail() && !input.eof()) {
        error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file");
    }

#if defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN
    uint32_t *cur = (uint32_t *) edges;
    for (int i = 0; i < numEdges; i++, cur++) {
        *cur = my_ntohl(*cur);
    }
#endif

    numDawgWords = countDawgWords(start);
}
int DawgLexicon::countDawgWords(Edge* ep) const {
    int count = 0;
    while (true) {
        if (ep->accept) count++;
        if (ep->children != 0) {
            count += countDawgWords(&edges[ep->children]);
        }
        if (ep->lastEdge) break;
        ep++;
    }
    return count;
}
/*
 * Implementation notes: readBinaryFile
 * ------------------------------------
 * The binary lexicon file format must follow this pattern:
 * DAWG:<startnode index>:<num bytes>:<num bytes block of edge data>
 */
void DawgLexicon::readBinaryFile(const std::string& filename) {
    long startIndex, numBytes;
    char firstFour[4], expected[] = "DAWG";
#ifdef _foreachpatch_h
    std::ifstream istr(filename.c_str(), __IOS_IN__ | __IOS_BINARY__);
#else
    std::ifstream istr(filename.c_str(), std::ios::in | std::ios::binary);
#endif // _foreachpatch_h
    
    if (istr.fail()) {
        error("DawgLexicon::addWordsFromFile: Couldn't open lexicon file " + filename);
    }
    istr.read(firstFour, 4);
    istr.get();
    istr >> startIndex;
    istr.get();
    istr >> numBytes;
    istr.get();
    if (istr.fail() || strncmp(firstFour, expected, 4) != 0
            || startIndex < 0 || numBytes < 0) {
        error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file " + filename);
    }
    numEdges = numBytes / sizeof(Edge);
    edges = new Edge[numEdges];
    start = &edges[startIndex];
    istr.read((char*) edges, numBytes);
    if (istr.fail() && !istr.eof()) {
        error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file " + filename);
    }

#if defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN
    uint32_t *cur = (uint32_t *) edges;
    for (int i = 0; i < numEdges; i++, cur++) {
        *cur = my_ntohl(*cur);
    }
#endif

    istr.close();
    numDawgWords = countDawgWords(start);
}