Пример #1
0
GaddagFactory::GaddagFactory(const UVString &alphabetFile)
	: m_encodableWords(0), m_unencodableWords(0), m_alphas(NULL)
{
	if (!alphabetFile.empty())
	{
		QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
		flexure->load(QuackleIO::Util::uvStringToQString(alphabetFile));
		m_alphas = flexure;
	}

	// So the separator is sorted to last.
	m_root.t = false;
	m_root.c = QUACKLE_NULL_MARK;  // "_"
	m_root.pointer = 0;
	m_root.lastchild = true;

	m_hash.int32ptr[0] = m_hash.int32ptr[1] = m_hash.int32ptr[2] = m_hash.int32ptr[3] = 0;
}
Пример #2
0
int main(int argc, char **argv) {
	QCoreApplication a(argc, argv);

	GetOpt opts;
	QString alphabet;
	opts.addOption('a', "alphabet", &alphabet);
	if (!opts.parse())
		return 1;

	if (alphabet.isNull())
		alphabet = "english";

	Quackle::AlphabetParameters *alphas = 0;
	QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet);
	UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl;
	QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
	flexure->load(alphabetFile);
	alphas = flexure;

	QString leavesFilename = "superleaves.raw";
	QFile file(leavesFilename);
	if (!file.exists())
	{
		UVcout << "leaves file does not exist: " << QuackleIO::Util::qstringToString(leavesFilename) << endl;
		return false;
	}

	if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
	{
		UVcout << "Could not open " << QuackleIO::Util::qstringToString(leavesFilename) << endl;
		return false;
	}

	QTextStream stream(&file);
	stream.setCodec(QTextCodec::codecForName("UTF-8"));

	ofstream out("encoded");

	int encodableLeaves = 0;
	int unencodableLeaves = 0;

  while (!stream.atEnd()) {
		QString leaveQString;
		stream >> leaveQString;
		double value;
		stream >> value;
		//UVcout << "value: " << value << endl;

		UVString leaveString = QuackleIO::Util::qstringToString(leaveQString);

		if (stream.atEnd())
			break;

		//UVcout << "read original string: " << originalString << endl;
		UVString leftover;
    Quackle::LetterString encodedLeave = alphas->encode(leaveString, &leftover);
		if (leftover.empty())
		{
			unsigned char leavelength = encodedLeave.length();
			out.write((char*)(&leavelength), 1);
			out.write(encodedLeave.begin(), encodedLeave.length());
			unsigned short int intvalue = (value + 128) * 256;
			//UVcout << "intvalue: " << intvalue << endl;
			out.write((char*)(&intvalue), 2);
			++encodableLeaves;
		}
		else
		{
			//UVcout << "not encodable without leftover: " << originalString << endl;
			++unencodableLeaves;
		}
    }

	file.close();
	delete alphas;

	UVcout << "encodable leaves: " << encodableLeaves << ", unencodable leaves: " << unencodableLeaves << endl;

}
Пример #3
0
int MiniDawgMaker::executeFromArguments()
{
	GetOpt opts;
	QString alphabet;
	opts.addOption('a', "alphabet", &alphabet);
	if (!opts.parse())
		return 1;

	if (alphabet.isNull())
		alphabet = "english";

	Quackle::AlphabetParameters *alphas = 0;
	QString alphabetFile = QString("../data/alphabets/%1.quackle_alphabet").arg(alphabet);
	UVcout << "Using alphabet file: " << QuackleIO::Util::qstringToString(alphabetFile) << endl;
	QuackleIO::FlexibleAlphabetParameters *flexure = new QuackleIO::FlexibleAlphabetParameters;
	flexure->load(alphabetFile);
	alphas = flexure;

    root.t = false;
		root.insmallerdict = false;
		root.playability = 0;
    root.c = QUACKLE_BLANK_MARK;
    root.pointer = 0;
    root.lastchild = true;

	QString smallerDictFilename = "smaller.raw";
	QFile smallerDict(smallerDictFilename);
	if (!smallerDict.exists())
	{
		UVcout << "smaller dictionary does not exist: " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl;
		return false;
	}

	if (!smallerDict.open(QIODevice::ReadOnly | QIODevice::Text))
	{
		UVcout << "Could not open " << QuackleIO::Util::qstringToString(smallerDictFilename) << endl;
		return false;
	}

	QTextStream smallerStream(&smallerDict);
	smallerStream.setCodec(QTextCodec::codecForName("UTF-8"));
	
	while (!smallerStream.atEnd())
	{
		QString originalQString;
		smallerStream >> originalQString;
		//UVcout << "this word is in the smaller dictionary: " << QuackleIO::Util::qstringToString(originalQString) << endl;
		smallerMap[originalQString] = true;
	}

	QString playabilityFilename = "playabilities.raw";
	QFile playability(playabilityFilename);
	if (!playability.exists())
	{
		UVcout << "playability does not exist: " << QuackleIO::Util::qstringToString(playabilityFilename) << endl;
		return false;
	}

	if (!playability.open(QIODevice::ReadOnly | QIODevice::Text))
	{
		UVcout << "Could not open " << QuackleIO::Util::qstringToString(playabilityFilename) << endl;
		return false;
	}

	QTextStream playabilityStream(&playability);
	playabilityStream.setCodec(QTextCodec::codecForName("UTF-8"));
	
	while (!playabilityStream.atEnd())
	{
		int pb;
		playabilityStream >> pb;
		QString originalQString;
		playabilityStream >> originalQString;
		//UVcout << "playability: " << QuackleIO::Util::qstringToString(originalQString) << " " << pb << endl;
		playabilityMap[originalQString] = pb;
	}

	QString dawgFilename = "dawginput.raw";
	QFile file(dawgFilename);
	if (!file.exists())
	{
		UVcout << "dawg does not exist: " << QuackleIO::Util::qstringToString(dawgFilename) << endl;
		return false;
	}

	if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
	{
		UVcout << "Could not open " << QuackleIO::Util::qstringToString(dawgFilename) << endl;
		return false;
	}

	QTextStream stream(&file);
	stream.setCodec(QTextCodec::codecForName("UTF-8"));

	int encodableWords = 0;
	int unencodableWords = 0;

    while (!stream.atEnd())
	{
		QString originalQString;
    stream >> originalQString;

		bool inSmaller = smallerMap[originalQString];
		int pb = playabilityMap[originalQString];

		if (stream.atEnd())
			break;

		UVString originalString = QuackleIO::Util::qstringToString(originalQString);

		//UVcout << "read original string: " << originalString;
		//if (!inSmaller) UVcout << "#";
		//UVcout << endl;

		UVString leftover;
        Quackle::LetterString encodedWord = alphas->encode(originalString, &leftover);
		if (leftover.empty())
		{
			//for (Quackle::LetterString::iterator it = encodedWord.begin(); it != encodedWord.end(); ++it)
				//UVcout << "got encoded letter: " << (int)(*it) << endl;

			root.pushword(encodedWord, inSmaller, pb);
			++encodableWords;
		}
		else
		{
			UVcout << "not encodable without leftover: " << originalString << endl;
			++unencodableWords;
		}
    }

	file.close();
	delete alphas;

	UVcout << "encodable words: " << encodableWords << ", unencodable words: " << unencodableWords << endl;

    nodelist.push_back(&root);
    root.print("");    
	UVcout << "nodelist.size(): " << nodelist.size() << endl;

	minimize();

	ofstream out("output.dawg", ios::out | ios::binary);

    for (unsigned int i = 0; i < nodelist.size(); i++) {
        //cout << nodelist[i]->c << " " << nodelist[i]->pointer << " " << nodelist[i]->t << " " << nodelist[i]->lastchild << endl;
		Node* n = nodelist[i];
		unsigned int p;
		if (nodelist[i]->deleted)
		{
			p = (unsigned int)(nodelist[i]->cloneof->pointer);
			// n = nodelist[i]->cloneof;
		}
		else
			p = (unsigned int)(nodelist[i]->pointer);
        
        char bytes[7];
        unsigned char n1 = (p & 0x00FF0000) >> 16;
        unsigned char n2 = (p & 0x0000FF00) >>  8;
        unsigned char n3 = (p & 0x000000FF);
        unsigned char n4 = n->c - QUACKLE_FIRST_LETTER;
				
				unsigned int pb = n->playability;
				unsigned char n5 = (pb & 0x00FF0000) >> 16;
				unsigned char n6 = (pb & 0x0000FF00) >>  8;
				unsigned char n7 = (pb & 0x000000FF);

        if (n->t) {
            n4 |= 32;
        }
        if (n->lastchild) {
            n4 |= 64;
        }
				if (n->insmallerdict) {
						n4 |= 128;
				}

        bytes[0] = n1; bytes[1] = n2; bytes[2] = n3; bytes[3] = n4;
				bytes[4] = n5; bytes[5] = n6; bytes[6] = n7;
        out.write(bytes, 7);
    }
}