char *GetTimeonStringsTrans( short lp ) { strcpy( TimeonStringsTrans[lp], TimeonStrings[lp] ); long len = strlen( TimeonStringsTrans[lp] ); if ( lp == 0 ) // For 1 min mystrncpy( &TimeonStringsTrans[lp][len], TranslateID( TIME_MIN ), BUF16-len ); else // For >1 min mystrncpy( &TimeonStringsTrans[lp][len], TranslateID( TIME_MINS ), BUF16-len ); TimeonStringsTrans[lp][BUF16-1] = 0; return TimeonStringsTrans[lp]; }
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); size_t position; if (m_bos == phrase[0][m_factorType]) { scorer.BeginSentence(); position = 1; } else { position = 0; } size_t ngramBoundary = m_ngram->Order() - 1; size_t end_loop = std::min(ngramBoundary, phrase.GetSize()); for (; position < end_loop; ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } float before_boundary = fullScore + scorer.Finish(); for (; position < phrase.GetSize(); ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } fullScore += scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
char *LookupDomain(char *code) { DomainRegionMapPtr ptr; short index1,index2; if ( !code ) return 0; if (!code[1]) return 0; index1=tolower(code[0])-'a'; index2=tolower(code[1])-'a'; if (code[2]!=0) { index2=26; if (code[3]!=0) index2=27; } if (index1 < 0 || index1 >26) return ((char *)0); if (index2 < 0 || index2 >27) return ((char *)0); ptr = regionPtr[index1][index2]; if (ptr != NULL) return TranslateID( ptr->fCountryId ); else return ((char *)0); }
void KENLM<Model>::EvaluateWhenApplied(const SCFG::Manager &mgr, const SCFG::Hypothesis &hypo, int featureID, Scores &scores, FFState &state) const { LanguageModelChartStateKenLM &newState = static_cast<LanguageModelChartStateKenLM&>(state); lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState.GetChartState()); const SCFG::TargetPhraseImpl &target = hypo.GetTargetPhrase(); const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = target.GetAlignNonTerm().GetNonTermIndexMap(); const size_t size = target.GetSize(); size_t phrasePos = 0; // Special cases for first word. if (size) { const SCFG::Word &word = target[0]; if (word[m_factorType] == m_bos) { // Begin of sentence ruleScore.BeginSentence(); phrasePos++; } else if (word.isNonTerminal) { // Non-terminal is first so we can copy instead of rescoring. const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]); const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState(); ruleScore.BeginNonTerminal(prevState); phrasePos++; } } for (; phrasePos < size; phrasePos++) { const SCFG::Word &word = target[phrasePos]; if (word.isNonTerminal) { const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]); const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState(); ruleScore.NonTerminal(prevState); } else { ruleScore.Terminal(TranslateID(word)); } } float score = ruleScore.Finish(); score = TransformLMScore(score); // take out score from loading. This needs reworking //score -= target.GetScores().GetScores(*this)[0]; bool OOVFeatureEnabled = false; if (OOVFeatureEnabled) { std::vector<float> scoresVec(2); scoresVec[0] = score; scoresVec[1] = 0.0; scores.PlusEquals(mgr.system, *this, scoresVec); } else { scores.PlusEquals(mgr.system, *this, score); } }
void InitLangList() { char colTitles[LANG_LINE_SIZE]; char *catStr; char lineStr[LANG_LINE_SIZE]; short i; short category = -1; changedSomeStrings = 0; alreadySaving = 0; // If we have already displayed (created) the list, // then week need to delete the column headings... this seems to do it... if ( hWndLangBuilderListView ) { ListView_DeleteColumn( hWndLangBuilderListView, EDIT_LANGUAGE_LISTVIEW_POS ); ListView_DeleteColumn( hWndLangBuilderListView, DEFAULT_ENGLISH_LISTVIEW_POS ); ListView_DeleteColumn( hWndLangBuilderListView, CATEGORY_NAME_LISTVIEW_POS ); } // Initialise the Language list, means creating the column titles and adding them, // then making sure there is no data in the list mystrncpyNull( colTitles, ReturnString( IDS_LANGBUILDER_LISTTITLES ), LANG_LINE_SIZE ); mystrncatNull( colTitles, MyPrefStruct.language, LANG_LINE_SIZE ); hWndLangBuilderListView = InitGenericListView( hDlg, IDC_LANGTOKEN_LIST, 0, 0, colTitles ); ListView_DeleteAllItems( hWndLangBuilderListView ); ListView_SetColumnWidth( hWndLangBuilderListView, CATEGORY_NAME_LISTVIEW_POS, 100 ); ListView_SetColumnWidth( hWndLangBuilderListView, DEFAULT_ENGLISH_LISTVIEW_POS, 200 ); ListView_SetColumnWidth( hWndLangBuilderListView, EDIT_LANGUAGE_LISTVIEW_POS, 200 ); // Add the Language strings to the list for ( i = SUMM_BEGIN; i < END_OF_STRINGS; i++ ) { catStr = GetLangSectionName(i); if ( !catStr ) continue; mystrncpyNull( lineStr, catStr, LANG_LINE_SIZE ); mystrncatNull( lineStr, LANG_LIST_SEP, LANG_LINE_SIZE ); mystrncatNull( lineStr, DefaultEnglishStr(i), LANG_LINE_SIZE ); mystrncatNull( lineStr, LANG_LIST_SEP, LANG_LINE_SIZE ); mystrncatNull( lineStr, TranslateID(i), LANG_LINE_SIZE ); AddItemToListView( hWndLangBuilderListView, ListView_GetItemCount(hWndLangBuilderListView), 3, lineStr, LANG_LIST_SEP ); } ListView_GetItemText( hWndLangBuilderListView, 0, EDIT_LANGUAGE_LISTVIEW_POS, lineStr, LANG_LINE_SIZE ); SetWindowText( GetDlgItem( hDlg, IDC_LANGTOKEN_TEXT), lineStr ); SetFocus( GetDlgItem( hDlg, IDC_LANGTOKEN_LIST ) ); int state = LVIS_SELECTED|LVIS_FOCUSED; if ( hWndLangBuilderListView ) ListView_SetItemState( hWndLangBuilderListView, 0, state, state ); }
lm::WordIndex *KENLM<Model>::LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const { lm::WordIndex *index = indices; lm::WordIndex *end = indices + m_ngram->Order() - 1; int position = hypo.GetCurrTargetWordsRange().GetEndPos(); for (;; ++index, --position) { if (index == end) return index; if (position == -1) { *index = m_ngram->GetVocabulary().BeginSentence(); return index + 1; } *index = TranslateID(hypo.GetWord(position)); } }
/** * Pre-calculate the n-gram probabilities for the words in the specified phrase. * * Note that when this method is called, we do not have access to the context * in which this phrase will eventually be applied. * * In other words, we know what words are in this phrase, * but we do not know what words will come before or after this phrase. * * The parameters fullScore, ngramScore, and oovCount are all output parameters. * * The value stored in oovCount is the number of words in the phrase * that are not in the language model's vocabulary. * * The sum of the ngram scores for all words in this phrase are stored in fullScore. * * The value stored in ngramScore is similar, but only full-order ngram scores are included. * * This is best shown by example: * * Assume a trigram backward language model and a phrase "a b c d e f g" * * fullScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) * * ngramScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) */ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); UTIL_THROW_IF( (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)), util::Exception, "BackwardLanguageModel does not currently support rules that include <s>" ); float before_boundary = 0.0f; int lastWord = phrase.GetSize() - 1; int ngramBoundary = m_ngram->Order() - 1; int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary; int position; for (position = lastWord; position >= 0; position-=1) { const Word &word = phrase.GetWord(position); UTIL_THROW_IF( (word.IsNonTerminal()), util::Exception, "BackwardLanguageModel does not currently support rules that include non-terminals " ); lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; if (position==boundary) { before_boundary = scorer.Finish(); } } fullScore = scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phrase &phrase, const FFState *ps, float &returnedScore) const { returnedScore = 0.0f; const lm::ngram::ChartState &previous = static_cast<const BackwardLMState&>(*ps).state; std::auto_ptr<BackwardLMState> ret(new BackwardLMState()); lm::ngram::RuleScore<Model> scorer(*m_ngram, ret->state); int ngramBoundary = m_ngram->Order() - 1; int lastWord = phrase.GetSize() - 1; // Get scores for words at the end of the previous phrase // that are now adjacent to words at the the beginning of this phrase for (int position=std::min( lastWord, ngramBoundary - 1); position >= 0; position-=1) { const Word &word = phrase.GetWord(position); UTIL_THROW_IF( (word.IsNonTerminal()), util::Exception, "BackwardLanguageModel does not currently support rules that include non-terminals " ); lm::WordIndex index = TranslateID(word); scorer.Terminal(index); } scorer.NonTerminal(previous); returnedScore = scorer.Finish(); /* out->PlusEquals(this, score); UTIL_THROW_IF( (1==1), util::Exception, "This method (BackwardLanguageModel<Model>::Evaluate) is not yet fully implemented" ); */ return ret.release(); }
/* DisplayRealtimeSummary - print summary of total day statistics */ extern "C" void GetRealtimeSummary( VDinfoP VDptr, char *out ) { char number[32], *p; if ( !VDptr ) VDptr = VD[0]; if ( VDptr ){ p = out; DISPLAY_SUMMARYNUM( VDptr->totalRequests, TranslateID(SUMM_TOTALREQS) ); DISPLAY_SUMMARYNUM( VDptr->totalCachedHits, TranslateID(SUMM_TOTALCACHEDREQS) ); DISPLAY_SUMMARYNUM( VDptr->totalFailedRequests, TranslateID(SUMM_TOTALFAILREQS) ); DISPLAY_SUMMARYNUM( VDptr->badones, TranslateID(SUMM_INVALLOGENTRIES) ); if ( VDptr->byClient ){ DISPLAY_SUMMARYNUM( VDptr->byClient->GetStatListTotalVisits(), TranslateID(SUMM_TOTALSESSIONS) ); DISPLAY_SUMMARYNUM( VDptr->byClient->GetStatListTotalCounters4(), TranslateID(SUMM_TOTALPAGES) ); // includes wrong urls/non existent pages attepted } if ( VDptr->byDownload ){ if ( VDptr->byDownload->GetStatListTotalRequests() ){ DISPLAY_SUMMARYNUM( VDptr->byDownload->GetStatListTotalRequests(), TranslateID(SUMM_TOTALDLOADFILES) ); //DISPLAY_SUMMARYFLOAT( (double)VDptr->byDownload->GetStatListTotalBytesIn()/(ONEMEGFLOAT), TranslateID(SUMM_TOTALDOWNLOADMB) ); } } if ( VDptr->byClient ){ DISPLAY_SUMMARYNUM( VDptr->byClient->GetStatListNum(), TranslateID(SUMM_TOTALUNIQUEVISITORS) ); DISPLAY_SUMMARYNUM( CountRepeatVisits( VDptr->byClient ), TranslateID(SUMM_TOTALRPTVISITORS) ); } DISPLAY_SUMMARYNUM( GetUnique(), "Unique DNRs" ); DISPLAY_SUMMARYNUM( GetReturnedDNR(), "Returned DNRs" ); DISPLAY_SUMMARYNUM( GetWaitingDNR(), "Waiting DNRs" ); } }
char *FindReportTitleStr( long l_id ) { ReportTypesP pReportRec; long i = 0; pReportRec = FindReportTypeData( l_id ); if ( pReportRec ){ const char *pTitleStr; if ( pReportRec->titleStringID ){ pTitleStr = TranslateID(pReportRec->titleStringID); if ( !pTitleStr ) pTitleStr = ConfigFindTitle( &MyPrefStruct, l_id ); } else pTitleStr = ConfigFindTitle( &MyPrefStruct, l_id ); return (char*)pTitleStr; }else return NULL; }
void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr, const Hypothesis &hypo, const FFState &prevState, Scores &scores, FFState &state) const { KenLMState &stateCast = static_cast<KenLMState&>(state); const System &system = mgr.system; const lm::ngram::State &in_state = static_cast<const KenLMState&>(prevState).state; if (!hypo.GetTargetPhrase().GetSize()) { stateCast.state = in_state; return; } const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos(); //[begin, end) in STL-like fashion. const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1; const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1); std::size_t position = begin; typename Model::State aux_state; typename Model::State *state0 = &stateCast.state, *state1 = &aux_state; float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)), *state0); ++position; for (; position < adjust_end; ++position) { score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)), *state1); std::swap(state0, state1); } if (hypo.GetBitmap().IsComplete()) { // Score end of sentence. std::vector<lm::WordIndex> indices(m_ngram->Order() - 1); const lm::WordIndex *last = LastIDs(hypo, &indices.front()); score += m_ngram->FullScoreForgotState(&indices.front(), last, m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob; } else if (adjust_end < end) { // Get state after adding a long phrase. std::vector<lm::WordIndex> indices(m_ngram->Order() - 1); const lm::WordIndex *last = LastIDs(hypo, &indices.front()); m_ngram->GetState(&indices.front(), last, stateCast.state); } else if (state0 != &stateCast.state) { // Short enough phrase that we can just reuse the state. stateCast.state = *state0; } score = TransformLMScore(score); bool OOVFeatureEnabled = false; if (OOVFeatureEnabled) { std::vector<float> scoresVec(2); scoresVec[0] = score; scoresVec[1] = 0.0; scores.PlusEquals(system, *this, scoresVec); } else { scores.PlusEquals(system, *this, score); } }