FFState* PhraseBoundaryFeature::Evaluate
(const Hypothesis& cur_hypo, const FFState* prev_state,
 ScoreComponentCollection* scores) const
{
  const PhraseBoundaryState* pbState = dynamic_cast<const PhraseBoundaryState*>(prev_state);
  const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
  if (targetPhrase.GetSize() == 0) {
    return new PhraseBoundaryState(*pbState);
  }
  const Word* leftTargetWord = pbState->GetTargetWord();
  const Word* rightTargetWord = &(targetPhrase.GetWord(0));
  AddFeatures(leftTargetWord,rightTargetWord,m_targetFactors,"tgt",scores);

  const Phrase& sourcePhrase = cur_hypo.GetTranslationOption().GetInputPath().GetPhrase();
  const Word* leftSourceWord = pbState->GetSourceWord();
  const Word* rightSourceWord = &(sourcePhrase.GetWord(0));
  AddFeatures(leftSourceWord,rightSourceWord,m_sourceFactors,"src",scores);

  const Word* endSourceWord = &(sourcePhrase.GetWord(sourcePhrase.GetSize()-1));
  const Word* endTargetWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));

  //if end of sentence add EOS
  if (cur_hypo.IsSourceCompleted()) {
    AddFeatures(endSourceWord,NULL,m_sourceFactors,"src",scores);
    AddFeatures(endTargetWord,NULL,m_targetFactors,"tgt",scores);
  }

  return new PhraseBoundaryState(endSourceWord,endTargetWord);
}
예제 #2
0
void Data::loadNBest(const string &file)
{
  TRACE_ERR("loading nbest from " << file << endl);
  inputfilestream inp(file); // matches a stream with a file. Opens the file
  if (!inp.good())
    throw runtime_error("Unable to open: " + file);

  ScoreStats scoreentry;
  string line, sentence_index, sentence, feature_str;

  while (getline(inp, line, '\n')) {
    if (line.empty()) continue;
    // adding statistics for error measures
    scoreentry.clear();

    getNextPound(line, sentence_index, "|||"); // first field
    getNextPound(line, sentence, "|||");       // second field
    getNextPound(line, feature_str, "|||");    // third field

    m_scorer->prepareStats(sentence_index, sentence, scoreentry);
    m_score_data->add(scoreentry, sentence_index);

    // examine first line for name of features
    if (!existsFeatureNames()) {
      InitFeatureMap(feature_str);
    }
    AddFeatures(feature_str, sentence_index);
  }
  inp.close();
}
예제 #3
0
void Data::loadNBest(const string &file, bool oneBest)
{
  TRACE_ERR("loading nbest from " << file << endl);
  util::FilePiece in(file.c_str());

  ScoreStats scoreentry;
  string sentence, feature_str, alignment;
  int sentence_index;

  while (true) {
    try {
      StringPiece line = in.ReadLine();
      if (line.empty()) continue;
      // adding statistics for error measures
      scoreentry.clear();

      util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||"));

      sentence_index = ParseInt(*it);
      if (oneBest && m_score_data->exists(sentence_index)) continue;
      ++it;
      sentence = it->as_string();
      ++it;
      feature_str = it->as_string();
      ++it;

      if (it) {
        ++it;                             // skip model score.

        if (it) {
          alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment
          ++it;
          if (it) {
            alignment = it->as_string(); //sixth field (if present) is word alignment
          }
        }
      }
      //TODO check alignment exists if scorers need it

      if (m_scorer->useAlignment()) {
        sentence += "|||";
        sentence += alignment;
      }
      m_scorer->prepareStats(sentence_index, sentence, scoreentry);

      m_score_data->add(scoreentry, sentence_index);

      // examine first line for name of features
      if (!existsFeatureNames()) {
        InitFeatureMap(feature_str);
      }
      AddFeatures(feature_str, sentence_index);
    } catch (util::EndOfFileException &e) {
      PrintUserTime("Loaded N-best lists");
      break;
    }
  }
}
예제 #4
0
bool VampEffect::Process()
{
   if (!mPlugin) return false;

   TrackListIterator iter(mWaveTracks);

   int count = 0;

   WaveTrack *left = (WaveTrack *)iter.First();

   bool multiple = false;
   int prevTrackChannels = 0;

   TrackListIterator scooter(iter);
   if (left->GetLinked()) scooter.Next();      
   if (scooter.Next()) {
      // if there is another track beyond this one and any linked one,
      // then we're processing more than one track.  That means we
      // should use the originating track name in each new label
      // track's name, to make clear which is which
      multiple = true;
   }

   while (left) {

      sampleCount lstart, rstart;
      sampleCount len;
      GetSamples(left, &lstart, &len);
      
      WaveTrack *right = NULL;
      int channels = 1;

      if (left->GetLinked()) {
         right = (WaveTrack *)iter.Next();
         channels = 2;
         GetSamples(right, &rstart, &len);
      }

      size_t step = mPlugin->getPreferredStepSize();
      size_t block = mPlugin->getPreferredBlockSize();

      bool initialiseRequired = true;

      if (block == 0) {
         if (step != 0) block = step;
         else block = 1024;
      }
      if (step == 0) {
         step = block;
      }

      if (prevTrackChannels > 0) {
         // Plugin has already been initialised, so if the number of
         // channels remains the same, we only need to do a reset.
         // Otherwise we need to re-construct the whole plugin,
         // because a Vamp plugin can't be re-initialised.
         if (prevTrackChannels == channels) {
            mPlugin->reset();
            initialiseRequired = false;
         } else {
            //!!! todo: retain parameters previously set
            Init();
         }
      }

      if (initialiseRequired) {
         if (!mPlugin->initialise(channels, step, block)) {
            wxMessageBox(_("Sorry, Vamp Plug-in failed to initialize."));
            return false;
         }
      }

      LabelTrack *ltrack = mFactory->NewLabelTrack();

      if (!multiple) {
         ltrack->SetName(GetEffectName());
      } else {
         ltrack->SetName(wxString::Format(wxT("%s: %s"),
                                          left->GetName().c_str(),
                                          GetEffectName().c_str()));
      }

      mTracks->Add(ltrack);

      float **data = new float*[channels];
      for (int c = 0; c < channels; ++c) data[c] = new float[block];

      sampleCount originalLen = len;
      sampleCount ls = lstart;
      sampleCount rs = rstart;

      while (len) {
         
         int request = block;
         if (request > len) request = len;

         if (left) left->Get((samplePtr)data[0], floatSample, ls, request);
         if (right) right->Get((samplePtr)data[1], floatSample, rs, request);

         if (request < (int)block) {
            for (int c = 0; c < channels; ++c) {
               for (int i = request; i < (int)block; ++i) {
                  data[c][i] = 0.f;
               }
            }
         }

         Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime
            (ls, (int)(mRate + 0.5));

         Vamp::Plugin::FeatureSet features = mPlugin->process(data, timestamp);
         AddFeatures(ltrack, features);

         if (len > (int)step) len -= step;
         else len = 0;

         ls += step;
         rs += step;

         if (channels > 1) {
            if (TrackGroupProgress(count, (ls - lstart) / double(originalLen)))
               return false;
         } else {
            if (TrackProgress(count, (ls - lstart) / double(originalLen)))
               return false;
         }
      }

      Vamp::Plugin::FeatureSet features = mPlugin->getRemainingFeatures();
      AddFeatures(ltrack, features);

      prevTrackChannels = channels;

      left = (WaveTrack *)iter.Next();
   }

   return true;
}
예제 #5
0
void SparseReordering::CopyScores(
               const TranslationOption& currentOpt,
               const TranslationOption* previousOpt,
               const InputType& input,
               LexicalReorderingState::ReorderingType reoType,
               LexicalReorderingConfiguration::Direction direction,
               ScoreComponentCollection* scores) const 
{
  if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
      (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
        reoType == LexicalReorderingState::DR)) {
    size_t gapStart, gapEnd;
    //NB: Using a static cast for speed, but could be nasty if 
    //using non-sentence input
    const Sentence& sentence = static_cast<const Sentence&>(input);
    const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
    if (previousOpt) {
      const WordsRange& previousRange = previousOpt->GetSourceWordsRange();
      if (previousRange < currentRange) {
        gapStart = previousRange.GetEndPos() + 1;
        gapEnd = currentRange.GetStartPos();
      } else {
        gapStart = currentRange.GetEndPos() + 1;
        gapEnd = previousRange.GetStartPos();
      }
    } else {
      //start of sentence
      gapStart = 0;
      gapEnd  = currentRange.GetStartPos();
    }
    assert(gapStart < gapEnd);
    for (size_t i = gapStart; i < gapEnd; ++i) {
        AddFeatures(SparseReorderingFeatureKey::Between,
           SparseReorderingFeatureKey::Source, sentence.GetWord(i),
          SparseReorderingFeatureKey::First, reoType, scores);
    }
  }
  //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
  //phrase (backward)
  //stack (forward)
  SparseReorderingFeatureKey::Type type;
  if (direction == LexicalReorderingConfiguration::Forward) {
    if (!m_useStack) return;
    type = SparseReorderingFeatureKey::Stack;
  } else if (direction == LexicalReorderingConfiguration::Backward) {
    if (!m_usePhrase) return;
    type = SparseReorderingFeatureKey::Phrase;
  } else {
    //Shouldn't be called for bidirectional
    //keep compiler happy
    type = SparseReorderingFeatureKey::Phrase;
    assert(!"Shouldn't call CopyScores() with bidirectional direction");
  }
  const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
    SparseReorderingFeatureKey::First, reoType, scores);
  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
  const Phrase& targetPhrase = currentOpt.GetTargetPhrase();   
  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
    SparseReorderingFeatureKey::First, reoType, scores);
  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);


}
예제 #6
0
bool VampEffect::Process()
{
   if (!mPlugin)
   {
      return false;
   }

   int count = 0;

   bool multiple = false;
   unsigned prevTrackChannels = 0;

   if (GetNumWaveGroups() > 1)
   {
      // if there is another track beyond this one and any linked one,
      // then we're processing more than one track.  That means we
      // should use the originating track name in each NEW label
      // track's name, to make clear which is which
      multiple = true;
   }

   std::vector<std::shared_ptr<Effect::AddedAnalysisTrack>> addedTracks;

   for (auto leader : inputTracks()->Leaders<const WaveTrack>())
   {
      auto channelGroup = TrackList::Channels(leader);
      auto left = *channelGroup.first++;

      sampleCount lstart, rstart = 0;
      sampleCount len;
      GetSamples(left, &lstart, &len);

      unsigned channels = 1;

      // channelGroup now contains all but the first channel
      const WaveTrack *right =
         channelGroup.size() ? *channelGroup.first++ : nullptr;
      if (right)
      {
         channels = 2;
         GetSamples(right, &rstart, &len);
      }

      // TODO: more-than-two-channels

      size_t step = mPlugin->getPreferredStepSize();
      size_t block = mPlugin->getPreferredBlockSize();

      bool initialiseRequired = true;

      if (block == 0)
      {
         if (step != 0)
         {
            block = step;
         }
         else
         {
            block = 1024;
         }
      }

      if (step == 0)
      {
         step = block;
      }

      if (prevTrackChannels > 0)
      {
         // Plugin has already been initialised, so if the number of
         // channels remains the same, we only need to do a reset.
         // Otherwise we need to re-construct the whole plugin,
         // because a Vamp plugin can't be re-initialised.
         if (prevTrackChannels == channels)
         {
            mPlugin->reset();
            initialiseRequired = false;
         }
         else
         {
            //!!! todo: retain parameters previously set
            Init();
         }
      }

      if (initialiseRequired)
      {
         if (!mPlugin->initialise(channels, step, block))
         {
            Effect::MessageBox(_("Sorry, Vamp Plug-in failed to initialize."));
            return false;
         }
      }

      const auto effectName = GetSymbol().Translation();
      addedTracks.push_back(AddAnalysisTrack(
         multiple
         ? wxString::Format( _("%s: %s"), left->GetName(), effectName )
         : effectName
      ));
      LabelTrack *ltrack = addedTracks.back()->get();

      FloatBuffers data{ channels, block };

      auto originalLen = len;
      auto ls = lstart;
      auto rs = rstart;

      while (len != 0)
      {
         const auto request = limitSampleBufferSize( block, len );

         if (left)
         {
            left->Get((samplePtr)data[0].get(), floatSample, ls, request);
         }

         if (right)
         {
            right->Get((samplePtr)data[1].get(), floatSample, rs, request);
         }

         if (request < block)
         {
            for (unsigned int c = 0; c < channels; ++c)
            {
               for (decltype(block) i = request; i < block; ++i)
               {
                  data[c][i] = 0.f;
               }
            }
         }

         // UNSAFE_SAMPLE_COUNT_TRUNCATION
         // Truncation in case of very long tracks!
         Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime(
            long( ls.as_long_long() ),
            (int)(mRate + 0.5)
         );

         Vamp::Plugin::FeatureSet features = mPlugin->process(
            reinterpret_cast< float** >( data.get() ), timestamp);
         AddFeatures(ltrack, features);

         if (len > (int)step)
         {
            len -= step;
         }
         else
         {
            len = 0;
         }

         ls += step;
         rs += step;

         if (channels > 1)
         {
            if (TrackGroupProgress(count,
                  (ls - lstart).as_double() /
                  originalLen.as_double() ))
            {
               return false;
            }
         }
         else
         {
            if (TrackProgress(count,
                  (ls - lstart).as_double() /
                  originalLen.as_double() ))
            {
               return false;
            }
         }
      }

      Vamp::Plugin::FeatureSet features = mPlugin->getRemainingFeatures();
      AddFeatures(ltrack, features);

      prevTrackChannels = channels;
   }

   // All completed without cancellation, so commit the addition of tracks now
   for (auto &addedTrack : addedTracks)
      addedTrack->Commit();

   return true;
}
예제 #7
0
void DataAsiya::loadNBest(const string &file)
{
  TRACE_ERR("loading nbest from DataAsiya " << file << endl);
  inputfilestream inp(file); // matches a stream with a file. Opens the file
  if (!inp.good())
    throw runtime_error("Unable to open: " + file);

  ScoreStats scoreentry;
  string line, sentence_index, sentence, feature_str, alignment;

  AsiyaScorer* a_scorer = dynamic_cast<AsiyaScorer*>(m_scorer);
  
  /*todo. change this loop. instead of obtaining the score for each sentence, obtain all the scores at once!*/
  while (getline(inp, line, '\n')) {
    if (line.empty()) continue;
    // adding statistics for error measures
    scoreentry.clear();

    getNextPound(line, sentence_index, "|||"); // first field
    getNextPound(line, sentence, "|||");       // second field
    getNextPound(line, feature_str, "|||");    // third field

    if (line.length() > 0) {
      string temp;
      getNextPound(line, temp, "|||"); //fourth field sentence score
      if (line.length() > 0) {
        getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer
      }
    }
    //TODO check alignment exists if scorers need it
    if (a_scorer->useAlignment()) {
      sentence += "|||";
      sentence += alignment;
    }
    // prepare stats gets all the scores for sentence_i of sentence_index
//    a_scorer->addCandidateSentence(sentence_index, sentence);
    a_scorer->prepareStats(atoi(sentence_index.c_str()), sentence, scoreentry);

    // examine first line for name of features
    if (!existsFeatureNames()) {
      InitFeatureMap(feature_str);
    }
    AddFeatures(feature_str, atoi(sentence_index.c_str()));
  }

  a_scorer->doScoring();
//  TRACE_ERR("before getAllScoreStats" << endl);

  std::vector<std::vector <ScoreStats> > allScoreStats = a_scorer->getAllScoreStats();
  for (int i = 0; i < allScoreStats.size(); ++i)
      for(int j = 0; j < allScoreStats[i].size(); ++j)
      {
          stringstream ss;
          ss << i;
          m_score_data->add(allScoreStats[i][j], atoi(ss.str().c_str()));
//          TRACE_ERR("allScoreStats[" << i << "].size() " << allScoreStats[i].size() << " " << allScoreStats[i][j] << endl);
      }


  inp.close();
//  a_scorer->doScoring( m_score_data );

  //score each sentence
  //a_scorer->prepareStats(sentence_index, sentence, scoreentry);
  // save the score for previous sentence. Do it aling with previous function
  //m_score_data->add(scoreentry, sentence_index);  
  
}