int writeState() { FILE *fp[2]; if ( ! _dir_exists(_key_file_dir()) ) return 0; if ( ! pppCheckFlags(PPP_FLAGS_PRESENT)) { /* Update the key file to include flags */ pppSetFlags(PPP_FLAGS_PRESENT); fp[0] = fopen(_key_file_name(), "w"); if (fp[0]) { _write_data(seqKey(), fp[0]); fclose(fp[0]); } } fp[0] = fopen(_cnt_file_name(), "w"); fp[1] = fopen(_gen_file_name(), "w"); if (fp[0] && fp[1]) { _write_data(currPasscodeNum(), fp[0]); fclose(fp[0]); _write_data(lastCardGenerated(), fp[1]); fclose(fp[1]); return 1; } else { if (fp[0]) fclose(fp[0]); if (fp[1]) fclose(fp[1]); } return 0; }
void FQTermShortcutTableWidget::keyPressEvent(QKeyEvent * event) { if (event) { event->accept(); if (event == QKeySequence::Delete) { setText(""); } else { //Thank you hooey. int key = event->key(); Qt::KeyboardModifiers mod = event->modifiers(); if (key == Qt::Key_Shift || key == Qt::Key_Control || key == Qt::Key_Meta || key == Qt::Key_Alt || key == Qt::Key_AltGr) return; QString text = ""; if (mod != Qt::NoModifier) { QKeySequence seqMod(mod); text = seqMod.toString(QKeySequence::NativeText); } QKeySequence seqKey(key); text += seqKey.toString(QKeySequence::NativeText); setText(text); } } }
// Generates ARPA (or Doug Paul) format. // LM must contain </s>, otherwise Sphinx emits error on LM loading // ERROR: "ngram_search.c", line 221: Language model/set does not contain </s>, recognition will fail // LM must contain <s>, otherwise Sphinx emits error on samples decoding // ERROR: "ngram_search.c", line 1157 : Couldn't find <s> in first frame void ArpaLanguageModel::generate(wv::slice<const WordPart*> seedUnigrams, const UkrainianPhoneticSplitter& phoneticSplitter) { const WordsUsageInfo& wordUsage = phoneticSplitter.wordUsage(); long unigramTotalUsageCounter = 0; int unusedUnigramsCount = 0; for (const WordPart* wordPart : seedUnigrams) { WordSeqKey seqKey({ wordPart->id() }); long seqUsage = wordUsage.getWordSequenceUsage(seqKey); unigramTotalUsageCounter += seqUsage; if (seqUsage == 0) unusedUnigramsCount++; } // assume that processed text covers P portion of the whole language (0..1) static const float TextStatisticCover = 0.8; //static const float TextStatisticCover = 1; static const float NewWordProb = 0.1; static const float FillerInhaleProb = 0.1; // create unigrams unigrams_.reserve(seedUnigrams.size()); int unigramOrder = 0; for (const WordPart* wordPart : seedUnigrams) { auto wordPartProbFun = [&wordUsage, unigramTotalUsageCounter](const WordPart* wordPart) -> float { if (wordPart->partText().compare(fillerInhale().data()) == 0) return FillerInhaleProb; WordSeqKey seqKey({ wordPart->id() }); long seqUsage = wordUsage.getWordSequenceUsage(seqKey); if (seqUsage == 0) { // assign it some minimal usage seqUsage = 1; } PG_Assert(seqUsage > 0); double prob = seqUsage / (double)unigramTotalUsageCounter; return prob; }; double prob = wordPartProbFun(wordPart); PG_Assert(prob > 0 && prob <= 1); double logProb = std::log10(prob); // attempt to block isolated right parts // bad, sentences become shorter even more //if (wordPart->partSide() == WordPartSide::RightPart) // logProb = LogProbMinusInf; auto gram = std::make_unique<NGramRow>(); gram->Order = unigramOrder++; gram->WordParts.ActualSize = 1; gram->WordParts.Array[0] = wordPart; gram->LogProb = logProb; wordPartIdToUnigram_.insert({ wordPart->id(), gram.get() }); // assert gram != null unigrams_.push_back(std::move(gram)); } if (gramMaxDimensions_ == 1) return; // TODO: merge this two routines //buildBigramsWordPartsAware(phoneticSplitter); buildBigramsWholeWordsOnly(phoneticSplitter); // set backoff probabilities for (std::unique_ptr<NGramRow>& uniPtr : unigrams_) { NGramRow& uni = *uniPtr; PG_Assert(uni.WordParts.ActualSize == 1); // unigram (<s>) if (uni.WordParts.Array[0] == phoneticSplitter.sentStartWordPart()) { //uni.LogProb = LogProbMinusInf; // start the new sentence the latest (the lowest priority) //double backOff = 0; // the first word of a sentence may be any word, not just specified one // double backOff = 1.4124; // from CMU LM for English double backOff = std::log10(1 - TextStatisticCover); uni.BackOffLogProb = backOff; continue; } // unigram (</s>) //if (uni.WordParts.Array[0] == phoneticSplitter.sentEndWordPart()) //{ // // this back off value is ignored if there is no (</s>, *) bigram // uni.BackOffLogProb = 0; // all prob are in this case // //uni.BackOffLogProb = LogProbMinusInf; // all prob are described in bigrams // continue; //} if (uni.WordParts.Array[0]->partSide() == WordPartSide::LeftPart) { // only right part can go after left part; other cases must be prohibited assertLeftBackOffImpossible(); uni.BackOffLogProb = LogProbMinusInf; } else if (uni.WordParts.Array[0]->partSide() == WordPartSide::RightPart || uni.WordParts.Array[0]->partSide() == WordPartSide::WholeWord) { // allow any word composition if text statistic is not available for some word sequence //uni.BackOffLogProb = 0; // prob=100% uni.BackOffLogProb = std::log10(1 - TextStatisticCover); } else { PG_Assert(false); if (uni.BackOffLogProb != nullptr) { // back off > 0 means eg. L~ ~R which do not match //PG_Assert(gram.BackOffCounter == 0); } else { PG_Assert(uni.BackOffCounter > 0); // TODO: int unigramTotalBackOffCounter = 10; double prob = uni.BackOffCounter / (double)unigramTotalBackOffCounter; prob *= (1 - TextStatisticCover); PG_Assert(prob > 0); double logProb = std::log10(prob); uni.BackOffLogProb = logProb; } } } for (NGramRow& gram : bigrams_) { PG_Assert(gram.WordParts.ActualSize == 2); if (gram.LogProb == nullptr) { PG_Assert(gram.LowOrderNGram != nullptr); double prob = gram.UsageCounter / (double)gram.LowOrderNGram->UsageCounter; if (gram.WordParts.Array[0]->partSide() == WordPartSide::LeftPart) { // Everything is concentrated in the (L~,~R) pairs; backOff(L~)=-99 // This way we prohibit (L~,~R) words, which are not enumerated - which is bad. // But we also prohibit (L~,L~) or (L~,W) - which is good. // Do not change probability! } else if (gram.WordParts.Array[0]->partSide() == WordPartSide::RightPart || gram.WordParts.Array[0]->partSide() == WordPartSide::WholeWord) { // backOff(uni1)=log(1-TextStatisticCover), so here we should reduce max probability to TextStatisticCover prob *= TextStatisticCover; } else if (gram.WordParts.Array[0] == phoneticSplitter.sentEndWordPart() || gram.WordParts.Array[0] == phoneticSplitter.sentStartWordPart()) { } PG_Assert(prob >= 0 && prob <= 1); double logProb = std::log10(prob); gram.LogProb = logProb; } } }
void ArpaLanguageModel::buildBigramsWordPartsAware(const UkrainianPhoneticSplitter& phoneticSplitter) { // note, the order of bigrams must be the same as for unigrams const WordsUsageInfo& wordUsage = phoneticSplitter.wordUsage(); bigrams_.reserve(unigrams_.size()); size_t progressBigramStepSize = unigrams_.size() / 10; for (size_t uniIndex1 = 0; uniIndex1 < unigrams_.size(); ++uniIndex1) { if (uniIndex1 % progressBigramStepSize == 0) std::cout << "progress bigram uniIndex1=" << uniIndex1 / unigrams_.size() << std::endl; NGramRow& uni1 = *unigrams_[uniIndex1]; for (size_t uniIndex2 = 0; uniIndex2 < unigrams_.size(); ++uniIndex2) { // assert: case uniIndex1 == uniIndex2 is allowed // (word1,word1) combinations is necessary to exclude impossible combination of word parts, eg (~R,~R) const NGramRow& uni2 = *unigrams_[uniIndex2]; // bigram (<s>,~Right) is impossible; handled via back off if (uni1.WordParts.Array[0] == phoneticSplitter.sentStartWordPart() && uni2.WordParts.Array[0]->partSide() == WordPartSide::RightPart) { // as <s> is ordinary word, all possible bigrams must be discarded by enumeration NGramRow bigram; bigram.LowOrderNGram = &uni1; bigram.WordParts.ActualSize = 2; bigram.WordParts.Array[0] = uni1.WordParts.Array[0]; bigram.WordParts.Array[1] = uni2.WordParts.Array[0]; bigram.LogProb = LogProbMinusInf; bigrams_.push_back(bigram); continue; } // bigram (Left~,</s>) is impossible; handled via back off if (uni1.WordParts.Array[0]->partSide() == WordPartSide::LeftPart && uni2.WordParts.Array[0] == phoneticSplitter.sentEndWordPart()) { assertLeftBackOffImpossible(); continue; } // specific case to discard (</s>,*) // (</s>,<s>) is ok if (uni1.WordParts.Array[0] == phoneticSplitter.sentEndWordPart()) { if (uni2.WordParts.Array[0] == phoneticSplitter.sentStartWordPart()) { WordSeqKey seqKey({ uni1.WordParts.Array[0]->id(), uni2.WordParts.Array[0]->id() }); long seqUsage = wordUsage.getWordSequenceUsage(seqKey); uni1.UsageCounter += seqUsage; NGramRow bigram; bigram.LowOrderNGram = &uni1; bigram.WordParts.ActualSize = 2; bigram.WordParts.Array[0] = uni1.WordParts.Array[0]; bigram.WordParts.Array[1] = uni2.WordParts.Array[0]; bigram.UsageCounter = seqUsage; bigrams_.push_back(bigram); } // bigram (</s>,*) is not possible continue; } // if (uni1.WordParts.Array[0]->partSide() == WordPartSide::LeftPart) { // 2nd: Left or Whole are handled via back off probability if (uni2.WordParts.Array[0]->partSide() == WordPartSide::RightPart) { NGramRow bigram; bigram.LowOrderNGram = &uni1; bigram.WordParts.ActualSize = 2; bigram.WordParts.Array[0] = uni1.WordParts.Array[0]; bigram.WordParts.Array[1] = uni2.WordParts.Array[0]; WordSeqKey seqKey({ uni1.WordParts.Array[0]->id(), uni2.WordParts.Array[0]->id() }); long seqUsage = wordUsage.getWordSequenceUsage(seqKey); if (seqUsage > 0) { uni1.UsageCounter += seqUsage; bigram.UsageCounter = seqUsage; } else { // prohibit impossible bigram (L~,~R) by enumeration bigram.LogProb = LogProbMinusInf; } bigrams_.push_back(bigram); } } else if (uni1.WordParts.Array[0]->partSide() == WordPartSide::RightPart || uni1.WordParts.Array[0]->partSide() == WordPartSide::WholeWord) { if (uni2.WordParts.Array[0]->partSide() == WordPartSide::RightPart) { // prohibit such case NGramRow bigram; bigram.LowOrderNGram = &uni1; bigram.WordParts.ActualSize = 2; bigram.WordParts.Array[0] = uni1.WordParts.Array[0]; bigram.WordParts.Array[1] = uni2.WordParts.Array[0]; bigram.LogProb = LogProbMinusInf; bigrams_.push_back(bigram); } else if (uni2.WordParts.Array[0]->partSide() == WordPartSide::LeftPart || uni2.WordParts.Array[0]->partSide() == WordPartSide::WholeWord) { WordSeqKey seqKey({ uni1.WordParts.Array[0]->id(), uni2.WordParts.Array[0]->id() }); long seqUsage = wordUsage.getWordSequenceUsage(seqKey); if (seqUsage > 0) { uni1.UsageCounter += seqUsage; NGramRow bigram; bigram.LowOrderNGram = &uni1; bigram.WordParts.ActualSize = 2; bigram.WordParts.Array[0] = uni1.WordParts.Array[0]; bigram.WordParts.Array[1] = uni2.WordParts.Array[0]; bigram.UsageCounter = seqUsage; bigrams_.push_back(bigram); } } } } } }
int writeKeyFile() { FILE *fp[3]; char buf[128]; int proceed = 1; /* create ~/.pppauth if necessary */ if ( ! _dir_exists(_key_file_dir()) ) { mkdir(_key_file_dir(), S_IRWXU); } /* warn about overwriting an existing key */ if ( _file_exists(_key_file_name()) ) { proceed = 0; fprintf(stderr, "\n" "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" "@ WARNING: YOU ARE ABOUT TO OVERWRITE YOUR KEY FILE! @\n" "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" "\n" "IF YOU PROCEED, YOUR EXISTING PASSCARDS WILL BECOME USELESS!\n" "\n" "If this is not what you intended to do, type `NO' below.\n" "\n" "By typing `yes' below, a new sequence key will be generated\n" "and you will no longer be able to log in using your existing\n" "passcards. New passcards must be printed.\n" "\n" ); proceed = confirm("Are you sure you want to proceed (yes/no)? "); } if (proceed) { _enforce_permissions(); umask(S_IRWXG|S_IRWXO); fp[0] = fopen(_key_file_name(), "w"); fp[1] = fopen(_cnt_file_name(), "w"); fp[2] = fopen(_gen_file_name(), "w"); if (fp[0] && fp[1] && fp[2]) { _write_data(seqKey(), fp[0]); fclose(fp[0]); _write_data(currPasscodeNum(), fp[1]); fclose(fp[1]); _write_data(lastCardGenerated(), fp[2]); fclose(fp[2]); memset(buf, 0, 128); fprintf(stderr, "\n" "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" " A new sequence key has been generated and saved. It is\n" " HIGHLY RECOMMENDED that you IMMEDIATELY print new pass-\n" " cards in order to access your account in the future.\n" "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" "\n" ); return 1; } } else { fprintf(stderr, "\n" "===========================================================\n" " A new sequence key WAS NOT generated and your old key\n" " remains intact. As a result, you can continue to use\n" " your existing passcards to log into your account.\n" "===========================================================\n" "\n" ); } return 0; }