GaddagFactory::GaddagFactory(const UVString &alphabetFile) : m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL) { if (!alphabetFile.empty()) { QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile)); m_alphas = flexure; } // So the separator is sorted to last. m_root.t = false; m_root.c = QUACKLE_NULL_MARK; // "_" m_root.pointer = 0; m_root.lastchild = true; m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0; }
int main(int argc, char **argv) { QCoreApplication a(argc, argv); GetOpt opts; QString alphabet; opts.addOption('a', "alphabet", &alphabet); if (!opts.parse()) return 1; if (alphabet.isNull()) alphabet = "english"; Quackle::AlphabetParameters *alphas = 0; QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(alphabetFile); alphas = flexure; QString leavesFilename = "superleaves.raw"; QFile file(leavesFilename); if (!file.exists()) { UVcout << "leaves file does not exist: " << QuackleIO::Util::qstringToString(leavesFilename) << endl; return false; } if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { UVcout << "Could not open " << QuackleIO::Util::qstringToString(leavesFilename) << endl; return false; } QTextStream stream(&file); stream.setCodec(QTextCodec::codecForName("UTF-8")); ofstream out("encoded"); int encodableLeaves = 0; int unencodableLeaves = 0; while (!stream.atEnd()) { QString leaveQString; stream >> leaveQString; double value; stream >> value; //UVcout << "value: " << value << endl; UVString leaveString = QuackleIO::Util::qstringToString(leaveQString); if (stream.atEnd()) break; //UVcout << "read original string: " << originalString << endl; UVString leftover; Quackle::LetterString encodedLeave = alphas->encode(leaveString, &leftover); if (leftover.empty()) { unsigned char leavelength = encodedLeave.length(); out.write((char*)(&leavelength), 1); out.write(encodedLeave.begin(), encodedLeave.length()); unsigned short int intvalue = (value + 128) * 256; //UVcout << "intvalue: " << intvalue << endl; out.write((char*)(&intvalue), 2); ++encodableLeaves; } else { //UVcout << "not encodable without leftover: " << originalString << endl; ++unencodableLeaves; } } file.close(); delete alphas; UVcout << "encodable leaves: " << encodableLeaves << ", unencodable leaves: " << unencodableLeaves << endl; }
int MiniDawgMaker::executeFromArguments() { GetOpt opts; QString alphabet; opts.addOption('a', "alphabet", &alphabet); if (!opts.parse()) return 1; if (alphabet.isNull()) alphabet = "english"; Quackle::AlphabetParameters *alphas = 0; QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet); UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl; QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters; flexure->load(alphabetFile); alphas = flexure; root.t = false; root.insmallerdict = false; root.playability = 0; root.c = QUACKLE_BLANK_MARK; root.pointer = 0; root.lastchild = true; QString smallerDictFilename = "smaller.raw"; QFile smallerDict(smallerDictFilename); if (!smallerDict.exists()) { UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; return false; } if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text)) { UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl; return false; } QTextStream smallerStream(&smallerDict); smallerStream.setCodec(QTextCodec::codecForName("UTF-8")); while (!smallerStream.atEnd()) { QString originalQString; smallerStream >> originalQString; //UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl; smallerMap[originalQString] = true; } QString playabilityFilename = "playabilities.raw"; QFile playability(playabilityFilename); if (!playability.exists()) { UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; return false; } if (!playability.open(QIODevice::ReadOnly | QIODevice::Text)) { UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl; return false; } QTextStream playabilityStream(&playability); playabilityStream.setCodec(QTextCodec::codecForName("UTF-8")); while (!playabilityStream.atEnd()) { int pb; playabilityStream >> pb; QString originalQString; playabilityStream >> originalQString; //UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl; playabilityMap[originalQString] = pb; } QString dawgFilename = "dawginput.raw"; QFile file(dawgFilename); if (!file.exists()) { UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl; return false; } if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl; return false; } QTextStream stream(&file); stream.setCodec(QTextCodec::codecForName("UTF-8")); int encodableWords = 0; int unencodableWords = 0; while (!stream.atEnd()) { QString originalQString; stream >> originalQString; bool inSmaller = smallerMap[originalQString]; int pb = playabilityMap[originalQString]; if (stream.atEnd()) break; UVString originalString = QuackleIO::Util::qstringToString(originalQString); //UVcout << "read original string: " << originalString; //if (!inSmaller) UVcout << "#"; //UVcout << endl; UVString leftover; Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover); if (leftover.empty()) { //for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it) //UVcout << "got encoded letter: " << (int)(*it) << endl; root.pushword(encodedWord, inSmaller, pb); ++encodableWords; } else { UVcout << "not encodable without leftover: " << originalString << endl; ++unencodableWords; } } file.close(); delete alphas; UVcout << "encodable words: " << encodableWords << ", unencodable words: " << unencodableWords << endl; nodelist.push_back(&root); root.print(""); UVcout << "nodelist.size(): " << nodelist.size() << endl; minimize(); ofstream out("output.dawg", ios::out | ios::binary); for (unsigned int i = 0; i < nodelist.size(); i++) { //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl; Node* n = nodelist[i]; unsigned int p; if (nodelist[i]->deleted) { p = (unsigned int)(nodelist[i]->cloneof->pointer); // n = nodelist[i]->cloneof; } else p = (unsigned int)(nodelist[i]->pointer); char bytes[7]; unsigned char n1 = (p & 0x00FF0000) >> 16; unsigned char n2 = (p & 0x0000FF00) >> 8; unsigned char n3 = (p & 0x000000FF); unsigned char n4 = n->c - QUACKLE_FIRST_LETTER; unsigned int pb = n->playability; unsigned char n5 = (pb & 0x00FF0000) >> 16; unsigned char n6 = (pb & 0x0000FF00) >> 8; unsigned char n7 = (pb & 0x000000FF); if (n->t) { n4 |= 32; } if (n->lastchild) { n4 |= 64; } if (n->insmallerdict) { n4 |= 128; } bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4; bytes[4] = n5; bytes[5] = n6; bytes[6] = n7; out.write(bytes, 7); } }