/* * Implementation notes: readBinaryFile * ------------------------------------ * The binary lexicon file format must follow this pattern: * DAWG:<startnode index>:<num bytes>:<num bytes block of edge data> */ void DawgLexicon::readBinaryFile(std::istream& input) { long startIndex, numBytes; char firstFour[4], expected[] = "DAWG"; if (input.fail()) { error("DawgLexicon::addWordsFromFile: Couldn't read input"); } input.read(firstFour, 4); input.get(); input >> startIndex; input.get(); input >> numBytes; input.get(); if (input.fail() || strncmp(firstFour, expected, 4) != 0 || startIndex < 0 || numBytes < 0) { error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file"); } numEdges = numBytes / sizeof(Edge); edges = new Edge[numEdges]; start = &edges[startIndex]; input.read((char*) edges, numBytes); if (input.fail() && !input.eof()) { error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file"); } #if defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN uint32_t *cur = (uint32_t *) edges; for (int i = 0; i < numEdges; i++, cur++) { *cur = my_ntohl(*cur); } #endif numDawgWords = countDawgWords(start); }
int DawgLexicon::countDawgWords(Edge* ep) const { int count = 0; while (true) { if (ep->accept) count++; if (ep->children != 0) { count += countDawgWords(&edges[ep->children]); } if (ep->lastEdge) break; ep++; } return count; }
/* * Implementation notes: readBinaryFile * ------------------------------------ * The binary lexicon file format must follow this pattern: * DAWG:<startnode index>:<num bytes>:<num bytes block of edge data> */ void DawgLexicon::readBinaryFile(const std::string& filename) { long startIndex, numBytes; char firstFour[4], expected[] = "DAWG"; #ifdef _foreachpatch_h std::ifstream istr(filename.c_str(), __IOS_IN__ | __IOS_BINARY__); #else std::ifstream istr(filename.c_str(), std::ios::in | std::ios::binary); #endif // _foreachpatch_h if (istr.fail()) { error("DawgLexicon::addWordsFromFile: Couldn't open lexicon file " + filename); } istr.read(firstFour, 4); istr.get(); istr >> startIndex; istr.get(); istr >> numBytes; istr.get(); if (istr.fail() || strncmp(firstFour, expected, 4) != 0 || startIndex < 0 || numBytes < 0) { error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file " + filename); } numEdges = numBytes / sizeof(Edge); edges = new Edge[numEdges]; start = &edges[startIndex]; istr.read((char*) edges, numBytes); if (istr.fail() && !istr.eof()) { error("DawgLexicon::addWordsFromFile: Improperly formed lexicon file " + filename); } #if defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN uint32_t *cur = (uint32_t *) edges; for (int i = 0; i < numEdges; i++, cur++) { *cur = my_ntohl(*cur); } #endif istr.close(); numDawgWords = countDawgWords(start); }