コード例 #1
0
ファイル: KategProblemTest.cpp プロジェクト: 51303014/giza-pp
Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
int anzIter,int verfahren)
{
  massert(anzprob>1);
  massert(probs[0]->wordFreq.mindestAnzahl<=1);
  KategProblem *p0=probs[0];

  int nWords=p0->wordFreq.nWords;
  int nKats=p0->katFreq.nKats;
  int minimumNumberOfWords = max(1,int(nWords*0.95));

  int indexOfDurchschnitt;
  Array<int> newWords(nWords);
  int useAnzprob=anzprob;
  do
    {
      int w,k;
      indexOfDurchschnitt=0;
      for(w=0;w<nWords;w++)
	newWords[w]=-1;
      for(k=0;k<useAnzprob;k++)
	{
	  massert(probs[k]->wordFreq.nWords==nWords);
	  probs[k]->makeKats();
	}
      
      for(w=0;w<nWords;w++)
	{
	  if( newWords[w]==-1 )
	    {
	      
	      
	      
	      leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
	      for(k=1;k<useAnzprob;k++)
	      durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
	      
	      
	      int _anzInDurchschnitt=0;
	      int nr=0;
	      forall_set(leda_set<int>,nr,durchschnitt)
		{
		  _anzInDurchschnitt++;
		  newWords[nr]=indexOfDurchschnitt;
		}
	      if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
		{
		  cout << "- (";
		    forall_set(leda_set<int>,nr,durchschnitt)
		    {
		      cout << p0->getString(nr);
		      if( p0->wordFreq.n1(nr)==1 )
			cout << "* ";
		      else
			cout << " ";
		    }
		  cout << ")\n";
		}
コード例 #2
0
// Sort elements and return sort index mapping.
bool NgramVector::Sort(const VocabVector &vocabMap,
                       const IndexVector &boNgramMap,
                       IndexVector &ngramMap) {
    // Update word and hist indices.
    for (size_t i = 0; i < size(); ++i) {
        _words[i] = vocabMap[_words[i]];
        _hists[i] = boNgramMap[_hists[i]];
    }

    // Sort indices.
    NgramIndexCompare compare(*this);
    IndexVector       sortIndices = Range(0, size());
    if (!sortIndices.sort(compare)) {
        ngramMap = Range(size());
        return false;
    }

    // Apply ordered indices to values.
    // Build sort mapping that maps old to new indices.
    VocabVector newWords(_words.length());
    IndexVector newHists(_hists.length());
    ngramMap.reset(size());
    for (NgramIndex i = 0; i < (NgramIndex)size(); i++) {
        newWords[i] = _words[sortIndices[i]];
        newHists[i] = _hists[sortIndices[i]];
        ngramMap[sortIndices[i]] = i;
    }
    _words.swap(newWords);
    _hists.swap(newHists);

    // Rebuild index map.
    _Reindex(_indices.length());

    // Build truncated view into words and hists.
    Range r(_length);
    _wordsView.attach(_words[r]);
    _histsView.attach(_hists[r]);

    return true;
}