Пример #1
0
char *GetTimeonStringsTrans( short lp ) 
{
	strcpy( TimeonStringsTrans[lp], TimeonStrings[lp] );
	long len = strlen( TimeonStringsTrans[lp] );
	if ( lp == 0 ) // For 1 min
		mystrncpy( &TimeonStringsTrans[lp][len], TranslateID( TIME_MIN ), BUF16-len );
	else // For >1 min
		mystrncpy( &TimeonStringsTrans[lp][len], TranslateID( TIME_MINS ), BUF16-len );
	TimeonStringsTrans[lp][BUF16-1] = 0;
	return TimeonStringsTrans[lp];
}
Пример #2
0
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
                             float &ngramScore, std::size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  size_t position;
  if (m_bos == phrase[0][m_factorType]) {
    scorer.BeginSentence();
    position = 1;
  } else {
    position = 0;
  }

  size_t ngramBoundary = m_ngram->Order() - 1;

  size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
  for (; position < end_loop; ++position) {
    const SCFG::Word &word = phrase[position];
    if (word.isNonTerminal) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  float before_boundary = fullScore + scorer.Finish();
  for (; position < phrase.GetSize(); ++position) {
    const SCFG::Word &word = phrase[position];
    if (word.isNonTerminal) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  fullScore += scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);
}
Пример #3
0
char *LookupDomain(char *code)
{
	DomainRegionMapPtr	ptr;
	short index1,index2;
	
	if ( !code ) return 0;
	if (!code[1]) return 0;

	index1=tolower(code[0])-'a';
	index2=tolower(code[1])-'a';
	if (code[2]!=0) {
		index2=26;
		if (code[3]!=0)
			index2=27;
	}
	
	if (index1 < 0 || index1 >26)
		return ((char *)0);
	if (index2 < 0 || index2 >27)
		return ((char *)0);
		
	ptr = regionPtr[index1][index2];
	if (ptr != NULL)
		return TranslateID( ptr->fCountryId );
	else
		return ((char *)0);
}
Пример #4
0
void KENLM<Model>::EvaluateWhenApplied(const SCFG::Manager &mgr,
                                       const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
                                       FFState &state) const
{
  LanguageModelChartStateKenLM &newState = static_cast<LanguageModelChartStateKenLM&>(state);
  lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState.GetChartState());
  const SCFG::TargetPhraseImpl &target = hypo.GetTargetPhrase();
  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
    target.GetAlignNonTerm().GetNonTermIndexMap();

  const size_t size = target.GetSize();
  size_t phrasePos = 0;
  // Special cases for first word.
  if (size) {
    const SCFG::Word &word = target[0];
    if (word[m_factorType] == m_bos) {
      // Begin of sentence
      ruleScore.BeginSentence();
      phrasePos++;
    } else if (word.isNonTerminal) {
      // Non-terminal is first so we can copy instead of rescoring.
      const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
      const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState();
      ruleScore.BeginNonTerminal(prevState);
      phrasePos++;
    }
  }

  for (; phrasePos < size; phrasePos++) {
    const SCFG::Word &word = target[phrasePos];
    if (word.isNonTerminal) {
      const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
      const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState();
      ruleScore.NonTerminal(prevState);
    } else {
      ruleScore.Terminal(TranslateID(word));
    }
  }

  float score = ruleScore.Finish();
  score = TransformLMScore(score);

  // take out score from loading. This needs reworking
  //score -= target.GetScores().GetScores(*this)[0];

  bool OOVFeatureEnabled = false;
  if (OOVFeatureEnabled) {
    std::vector<float> scoresVec(2);
    scoresVec[0] = score;
    scoresVec[1] = 0.0;
    scores.PlusEquals(mgr.system, *this, scoresVec);
  } else {
    scores.PlusEquals(mgr.system, *this, score);
  }
}
Пример #5
0
void InitLangList()
{
	char colTitles[LANG_LINE_SIZE];
	char *catStr;
	char lineStr[LANG_LINE_SIZE];
	short i;
	short category = -1;
	changedSomeStrings = 0;
	alreadySaving = 0;

	// If we have already displayed (created) the list,
	// then week need to delete the column headings... this seems to do it...
	if ( hWndLangBuilderListView )
	{
		ListView_DeleteColumn( hWndLangBuilderListView, EDIT_LANGUAGE_LISTVIEW_POS );
		ListView_DeleteColumn( hWndLangBuilderListView, DEFAULT_ENGLISH_LISTVIEW_POS );
		ListView_DeleteColumn( hWndLangBuilderListView, CATEGORY_NAME_LISTVIEW_POS );
	}
	
	// Initialise the Language list, means creating the column titles and adding them,
	// then making sure there is no data in the list
	mystrncpyNull( colTitles, ReturnString( IDS_LANGBUILDER_LISTTITLES ), LANG_LINE_SIZE );
	mystrncatNull( colTitles, MyPrefStruct.language, LANG_LINE_SIZE );
	hWndLangBuilderListView = InitGenericListView( hDlg, IDC_LANGTOKEN_LIST, 0, 0, colTitles );
	ListView_DeleteAllItems( hWndLangBuilderListView );		

	ListView_SetColumnWidth( hWndLangBuilderListView, CATEGORY_NAME_LISTVIEW_POS, 100 );
	ListView_SetColumnWidth( hWndLangBuilderListView, DEFAULT_ENGLISH_LISTVIEW_POS, 200 );
	ListView_SetColumnWidth( hWndLangBuilderListView, EDIT_LANGUAGE_LISTVIEW_POS, 200 );

	// Add the Language strings to the list
	for ( i = SUMM_BEGIN; i < END_OF_STRINGS; i++ )
	{
		catStr = GetLangSectionName(i);
		if ( !catStr )
			continue;
		mystrncpyNull( lineStr, catStr, LANG_LINE_SIZE ); 
		mystrncatNull( lineStr, LANG_LIST_SEP, LANG_LINE_SIZE ); 
		mystrncatNull( lineStr, DefaultEnglishStr(i), LANG_LINE_SIZE ); 
		mystrncatNull( lineStr, LANG_LIST_SEP, LANG_LINE_SIZE ); 
		mystrncatNull( lineStr, TranslateID(i), LANG_LINE_SIZE ); 
		AddItemToListView( hWndLangBuilderListView, ListView_GetItemCount(hWndLangBuilderListView), 3, lineStr, LANG_LIST_SEP );
	}
	ListView_GetItemText( hWndLangBuilderListView, 0, EDIT_LANGUAGE_LISTVIEW_POS, lineStr, LANG_LINE_SIZE );
	SetWindowText( GetDlgItem( hDlg, IDC_LANGTOKEN_TEXT), lineStr );
	SetFocus( GetDlgItem( hDlg, IDC_LANGTOKEN_LIST ) );
	int state = LVIS_SELECTED|LVIS_FOCUSED;
	if ( hWndLangBuilderListView )
		ListView_SetItemState( hWndLangBuilderListView, 0, state, state );
}
Пример #6
0
lm::WordIndex *KENLM<Model>::LastIDs(const Hypothesis &hypo,
                                     lm::WordIndex *indices) const
{
  lm::WordIndex *index = indices;
  lm::WordIndex *end = indices + m_ngram->Order() - 1;
  int position = hypo.GetCurrTargetWordsRange().GetEndPos();
  for (;; ++index, --position) {
    if (index == end) return index;
    if (position == -1) {
      *index = m_ngram->GetVocabulary().BeginSentence();
      return index + 1;
    }
    *index = TranslateID(hypo.GetWord(position));
  }
}
Пример #7
0
/**
 * Pre-calculate the n-gram probabilities for the words in the specified phrase.
 *
 * Note that when this method is called, we do not have access to the context
 * in which this phrase will eventually be applied.
 *
 * In other words, we know what words are in this phrase,
 * but we do not know what words will come before or after this phrase.
 *
 * The parameters fullScore, ngramScore, and oovCount are all output parameters.
 *
 * The value stored in oovCount is the number of words in the phrase
 * that are not in the language model's vocabulary.
 *
 * The sum of the ngram scores for all words in this phrase are stored in fullScore.
 *
 * The value stored in ngramScore is similar, but only full-order ngram scores are included.
 *
 * This is best shown by example:
 *
 * Assume a trigram backward language model and a phrase "a b c d e f g"
 *
 * fullScore would represent the sum of the logprob scores for the following values:
 *
 * p(g)
 * p(f | g)
 * p(e | g f)
 * p(d | f e)
 * p(c | e d)
 * p(b | d c)
 * p(a | c b)
 *
 * ngramScore would represent the sum of the logprob scores for the following values:
 *
 * p(g)
 * p(f | g)
 * p(e | g f)
 * p(d | f e)
 * p(c | e d)
 * p(b | d c)
 * p(a | c b)
 */
template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  UTIL_THROW_IF(
    (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)),
    util::Exception,
    "BackwardLanguageModel does not currently support rules that include <s>"
  );

  float before_boundary = 0.0f;

  int lastWord = phrase.GetSize() - 1;
  int ngramBoundary = m_ngram->Order() - 1;
  int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary;

  int position;
  for (position = lastWord; position >= 0; position-=1) {
    const Word &word = phrase.GetWord(position);
    UTIL_THROW_IF(
      (word.IsNonTerminal()),
      util::Exception,
      "BackwardLanguageModel does not currently support rules that include non-terminals "
    );

    lm::WordIndex index = TranslateID(word);
    scorer.Terminal(index);
    if (!index) ++oovCount;

    if (position==boundary) {
      before_boundary = scorer.Finish();
    }

  }

  fullScore = scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);

}
Пример #8
0
template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phrase &phrase, const FFState *ps, float &returnedScore) const
{

  returnedScore = 0.0f;

  const lm::ngram::ChartState &previous = static_cast<const BackwardLMState&>(*ps).state;

  std::auto_ptr<BackwardLMState> ret(new BackwardLMState());

  lm::ngram::RuleScore<Model> scorer(*m_ngram, ret->state);

  int ngramBoundary = m_ngram->Order() - 1;
  int lastWord = phrase.GetSize() - 1;

  // Get scores for words at the end of the previous phrase
  // that are now adjacent to words at the the beginning of this phrase
  for (int position=std::min( lastWord,  ngramBoundary - 1); position >= 0; position-=1) {
    const Word &word = phrase.GetWord(position);
    UTIL_THROW_IF(
      (word.IsNonTerminal()),
      util::Exception,
      "BackwardLanguageModel does not currently support rules that include non-terminals "
    );

    lm::WordIndex index = TranslateID(word);
    scorer.Terminal(index);
  }
  scorer.NonTerminal(previous);
  returnedScore = scorer.Finish();
  /*
  out->PlusEquals(this, score);


    UTIL_THROW_IF(
      (1==1),
      util::Exception,
      "This method (BackwardLanguageModel<Model>::Evaluate) is not yet fully implemented"
      );
  */
  return ret.release();



}
Пример #9
0
/* DisplayRealtimeSummary - print summary of total day statistics */
extern "C" void GetRealtimeSummary( VDinfoP VDptr, char *out )
{
	char number[32], *p;

	if ( !VDptr ) 
		VDptr = VD[0];

	if ( VDptr ){
		p = out;

		DISPLAY_SUMMARYNUM( VDptr->totalRequests, TranslateID(SUMM_TOTALREQS) );
		DISPLAY_SUMMARYNUM( VDptr->totalCachedHits, TranslateID(SUMM_TOTALCACHEDREQS) );
		DISPLAY_SUMMARYNUM( VDptr->totalFailedRequests, TranslateID(SUMM_TOTALFAILREQS) );
		DISPLAY_SUMMARYNUM( VDptr->badones, TranslateID(SUMM_INVALLOGENTRIES) );

		if ( VDptr->byClient ){
			DISPLAY_SUMMARYNUM( VDptr->byClient->GetStatListTotalVisits(), TranslateID(SUMM_TOTALSESSIONS) );
			DISPLAY_SUMMARYNUM( VDptr->byClient->GetStatListTotalCounters4(), TranslateID(SUMM_TOTALPAGES) );		// includes wrong urls/non existent pages attepted
		}

		if ( VDptr->byDownload ){
			if ( VDptr->byDownload->GetStatListTotalRequests() ){
				DISPLAY_SUMMARYNUM( VDptr->byDownload->GetStatListTotalRequests(), TranslateID(SUMM_TOTALDLOADFILES) );
				//DISPLAY_SUMMARYFLOAT( (double)VDptr->byDownload->GetStatListTotalBytesIn()/(ONEMEGFLOAT), TranslateID(SUMM_TOTALDOWNLOADMB) );
			}
		}

		if ( VDptr->byClient ){
			DISPLAY_SUMMARYNUM( VDptr->byClient->GetStatListNum(), TranslateID(SUMM_TOTALUNIQUEVISITORS) );
			DISPLAY_SUMMARYNUM( CountRepeatVisits( VDptr->byClient ), TranslateID(SUMM_TOTALRPTVISITORS) );
		}
		DISPLAY_SUMMARYNUM( GetUnique(), "Unique DNRs" );
		DISPLAY_SUMMARYNUM( GetReturnedDNR(), "Returned DNRs" );
		DISPLAY_SUMMARYNUM( GetWaitingDNR(), "Waiting DNRs" );
	}
}
Пример #10
0
char *FindReportTitleStr( long l_id )
{
	ReportTypesP pReportRec;
	long i = 0;

	pReportRec = FindReportTypeData( l_id );

	if ( pReportRec ){
		const char *pTitleStr;
		if ( pReportRec->titleStringID ){
			pTitleStr = TranslateID(pReportRec->titleStringID);
			if ( !pTitleStr )
				pTitleStr = ConfigFindTitle( &MyPrefStruct, l_id );
		} else
			pTitleStr = ConfigFindTitle( &MyPrefStruct, l_id );
		return (char*)pTitleStr;
	}else
		return NULL;
}
Пример #11
0
void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
                                       const Hypothesis &hypo, const FFState &prevState, Scores &scores,
                                       FFState &state) const
{
  KenLMState &stateCast = static_cast<KenLMState&>(state);

  const System &system = mgr.system;

  const lm::ngram::State &in_state =
    static_cast<const KenLMState&>(prevState).state;

  if (!hypo.GetTargetPhrase().GetSize()) {
    stateCast.state = in_state;
    return;
  }

  const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
  //[begin, end) in STL-like fashion.
  const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
  const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);

  std::size_t position = begin;
  typename Model::State aux_state;
  typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;

  float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
                               *state0);
  ++position;
  for (; position < adjust_end; ++position) {
    score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
                            *state1);
    std::swap(state0, state1);
  }

  if (hypo.GetBitmap().IsComplete()) {
    // Score end of sentence.
    std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
    const lm::WordIndex *last = LastIDs(hypo, &indices.front());
    score += m_ngram->FullScoreForgotState(&indices.front(), last,
                                           m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
  } else if (adjust_end < end) {
    // Get state after adding a long phrase.
    std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
    const lm::WordIndex *last = LastIDs(hypo, &indices.front());
    m_ngram->GetState(&indices.front(), last, stateCast.state);
  } else if (state0 != &stateCast.state) {
    // Short enough phrase that we can just reuse the state.
    stateCast.state = *state0;
  }

  score = TransformLMScore(score);

  bool OOVFeatureEnabled = false;
  if (OOVFeatureEnabled) {
    std::vector<float> scoresVec(2);
    scoresVec[0] = score;
    scoresVec[1] = 0.0;
    scores.PlusEquals(system, *this, scoresVec);
  } else {
    scores.PlusEquals(system, *this, score);
  }
}