FFState* PhraseBoundaryFeature::Evaluate (const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* scores) const { const PhraseBoundaryState* pbState = dynamic_cast<const PhraseBoundaryState*>(prev_state); const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); if (targetPhrase.GetSize() == 0) { return new PhraseBoundaryState(*pbState); } const Word* leftTargetWord = pbState->GetTargetWord(); const Word* rightTargetWord = &(targetPhrase.GetWord(0)); AddFeatures(leftTargetWord,rightTargetWord,m_targetFactors,"tgt",scores); const Phrase& sourcePhrase = cur_hypo.GetTranslationOption().GetInputPath().GetPhrase(); const Word* leftSourceWord = pbState->GetSourceWord(); const Word* rightSourceWord = &(sourcePhrase.GetWord(0)); AddFeatures(leftSourceWord,rightSourceWord,m_sourceFactors,"src",scores); const Word* endSourceWord = &(sourcePhrase.GetWord(sourcePhrase.GetSize()-1)); const Word* endTargetWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1)); //if end of sentence add EOS if (cur_hypo.IsSourceCompleted()) { AddFeatures(endSourceWord,NULL,m_sourceFactors,"src",scores); AddFeatures(endTargetWord,NULL,m_targetFactors,"tgt",scores); } return new PhraseBoundaryState(endSourceWord,endTargetWord); }
void Data::loadNBest(const string &file) { TRACE_ERR("loading nbest from " << file << endl); inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); ScoreStats scoreentry; string line, sentence_index, sentence, feature_str; while (getline(inp, line, '\n')) { if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); getNextPound(line, sentence_index, "|||"); // first field getNextPound(line, sentence, "|||"); // second field getNextPound(line, feature_str, "|||"); // third field m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } inp.close(); }
void Data::loadNBest(const string &file, bool oneBest) { TRACE_ERR("loading nbest from " << file << endl); util::FilePiece in(file.c_str()); ScoreStats scoreentry; string sentence, feature_str, alignment; int sentence_index; while (true) { try { StringPiece line = in.ReadLine(); if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||")); sentence_index = ParseInt(*it); if (oneBest && m_score_data->exists(sentence_index)) continue; ++it; sentence = it->as_string(); ++it; feature_str = it->as_string(); ++it; if (it) { ++it; // skip model score. if (it) { alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment ++it; if (it) { alignment = it->as_string(); //sixth field (if present) is word alignment } } } //TODO check alignment exists if scorers need it if (m_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } catch (util::EndOfFileException &e) { PrintUserTime("Loaded N-best lists"); break; } } }
bool VampEffect::Process() { if (!mPlugin) return false; TrackListIterator iter(mWaveTracks); int count = 0; WaveTrack *left = (WaveTrack *)iter.First(); bool multiple = false; int prevTrackChannels = 0; TrackListIterator scooter(iter); if (left->GetLinked()) scooter.Next(); if (scooter.Next()) { // if there is another track beyond this one and any linked one, // then we're processing more than one track. That means we // should use the originating track name in each new label // track's name, to make clear which is which multiple = true; } while (left) { sampleCount lstart, rstart; sampleCount len; GetSamples(left, &lstart, &len); WaveTrack *right = NULL; int channels = 1; if (left->GetLinked()) { right = (WaveTrack *)iter.Next(); channels = 2; GetSamples(right, &rstart, &len); } size_t step = mPlugin->getPreferredStepSize(); size_t block = mPlugin->getPreferredBlockSize(); bool initialiseRequired = true; if (block == 0) { if (step != 0) block = step; else block = 1024; } if (step == 0) { step = block; } if (prevTrackChannels > 0) { // Plugin has already been initialised, so if the number of // channels remains the same, we only need to do a reset. // Otherwise we need to re-construct the whole plugin, // because a Vamp plugin can't be re-initialised. if (prevTrackChannels == channels) { mPlugin->reset(); initialiseRequired = false; } else { //!!! todo: retain parameters previously set Init(); } } if (initialiseRequired) { if (!mPlugin->initialise(channels, step, block)) { wxMessageBox(_("Sorry, Vamp Plug-in failed to initialize.")); return false; } } LabelTrack *ltrack = mFactory->NewLabelTrack(); if (!multiple) { ltrack->SetName(GetEffectName()); } else { ltrack->SetName(wxString::Format(wxT("%s: %s"), left->GetName().c_str(), GetEffectName().c_str())); } mTracks->Add(ltrack); float **data = new float*[channels]; for (int c = 0; c < channels; ++c) data[c] = new float[block]; sampleCount originalLen = len; sampleCount ls = lstart; sampleCount rs = rstart; while (len) { int request = block; if (request > len) request = len; if (left) left->Get((samplePtr)data[0], floatSample, ls, request); if (right) right->Get((samplePtr)data[1], floatSample, rs, request); if (request < (int)block) { for (int c = 0; c < channels; ++c) { for (int i = request; i < (int)block; ++i) { data[c][i] = 0.f; } } } Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime (ls, (int)(mRate + 0.5)); Vamp::Plugin::FeatureSet features = mPlugin->process(data, timestamp); AddFeatures(ltrack, features); if (len > (int)step) len -= step; else len = 0; ls += step; rs += step; if (channels > 1) { if (TrackGroupProgress(count, (ls - lstart) / double(originalLen))) return false; } else { if (TrackProgress(count, (ls - lstart) / double(originalLen))) return false; } } Vamp::Plugin::FeatureSet features = mPlugin->getRemainingFeatures(); AddFeatures(ltrack, features); prevTrackChannels = channels; left = (WaveTrack *)iter.Next(); } return true; }
void SparseReordering::CopyScores( const TranslationOption& currentOpt, const TranslationOption* previousOpt, const InputType& input, LexicalReorderingState::ReorderingType reoType, LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const { if (m_useBetween && direction == LexicalReorderingConfiguration::Backward && (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL || reoType == LexicalReorderingState::DR)) { size_t gapStart, gapEnd; //NB: Using a static cast for speed, but could be nasty if //using non-sentence input const Sentence& sentence = static_cast<const Sentence&>(input); const WordsRange& currentRange = currentOpt.GetSourceWordsRange(); if (previousOpt) { const WordsRange& previousRange = previousOpt->GetSourceWordsRange(); if (previousRange < currentRange) { gapStart = previousRange.GetEndPos() + 1; gapEnd = currentRange.GetStartPos(); } else { gapStart = currentRange.GetEndPos() + 1; gapEnd = previousRange.GetStartPos(); } } else { //start of sentence gapStart = 0; gapEnd = currentRange.GetStartPos(); } assert(gapStart < gapEnd); for (size_t i = gapStart; i < gapEnd; ++i) { AddFeatures(SparseReorderingFeatureKey::Between, SparseReorderingFeatureKey::Source, sentence.GetWord(i), SparseReorderingFeatureKey::First, reoType, scores); } } //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; //phrase (backward) //stack (forward) SparseReorderingFeatureKey::Type type; if (direction == LexicalReorderingConfiguration::Forward) { if (!m_useStack) return; type = SparseReorderingFeatureKey::Stack; } else if (direction == LexicalReorderingConfiguration::Backward) { if (!m_usePhrase) return; type = SparseReorderingFeatureKey::Phrase; } else { //Shouldn't be called for bidirectional //keep compiler happy type = SparseReorderingFeatureKey::Phrase; assert(!"Shouldn't call CopyScores() with bidirectional direction"); } const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase(); AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), SparseReorderingFeatureKey::First, reoType, scores); AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); const Phrase& targetPhrase = currentOpt.GetTargetPhrase(); AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), SparseReorderingFeatureKey::First, reoType, scores); AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); }
bool VampEffect::Process() { if (!mPlugin) { return false; } int count = 0; bool multiple = false; unsigned prevTrackChannels = 0; if (GetNumWaveGroups() > 1) { // if there is another track beyond this one and any linked one, // then we're processing more than one track. That means we // should use the originating track name in each NEW label // track's name, to make clear which is which multiple = true; } std::vector<std::shared_ptr<Effect::AddedAnalysisTrack>> addedTracks; for (auto leader : inputTracks()->Leaders<const WaveTrack>()) { auto channelGroup = TrackList::Channels(leader); auto left = *channelGroup.first++; sampleCount lstart, rstart = 0; sampleCount len; GetSamples(left, &lstart, &len); unsigned channels = 1; // channelGroup now contains all but the first channel const WaveTrack *right = channelGroup.size() ? *channelGroup.first++ : nullptr; if (right) { channels = 2; GetSamples(right, &rstart, &len); } // TODO: more-than-two-channels size_t step = mPlugin->getPreferredStepSize(); size_t block = mPlugin->getPreferredBlockSize(); bool initialiseRequired = true; if (block == 0) { if (step != 0) { block = step; } else { block = 1024; } } if (step == 0) { step = block; } if (prevTrackChannels > 0) { // Plugin has already been initialised, so if the number of // channels remains the same, we only need to do a reset. // Otherwise we need to re-construct the whole plugin, // because a Vamp plugin can't be re-initialised. if (prevTrackChannels == channels) { mPlugin->reset(); initialiseRequired = false; } else { //!!! todo: retain parameters previously set Init(); } } if (initialiseRequired) { if (!mPlugin->initialise(channels, step, block)) { Effect::MessageBox(_("Sorry, Vamp Plug-in failed to initialize.")); return false; } } const auto effectName = GetSymbol().Translation(); addedTracks.push_back(AddAnalysisTrack( multiple ? wxString::Format( _("%s: %s"), left->GetName(), effectName ) : effectName )); LabelTrack *ltrack = addedTracks.back()->get(); FloatBuffers data{ channels, block }; auto originalLen = len; auto ls = lstart; auto rs = rstart; while (len != 0) { const auto request = limitSampleBufferSize( block, len ); if (left) { left->Get((samplePtr)data[0].get(), floatSample, ls, request); } if (right) { right->Get((samplePtr)data[1].get(), floatSample, rs, request); } if (request < block) { for (unsigned int c = 0; c < channels; ++c) { for (decltype(block) i = request; i < block; ++i) { data[c][i] = 0.f; } } } // UNSAFE_SAMPLE_COUNT_TRUNCATION // Truncation in case of very long tracks! Vamp::RealTime timestamp = Vamp::RealTime::frame2RealTime( long( ls.as_long_long() ), (int)(mRate + 0.5) ); Vamp::Plugin::FeatureSet features = mPlugin->process( reinterpret_cast< float** >( data.get() ), timestamp); AddFeatures(ltrack, features); if (len > (int)step) { len -= step; } else { len = 0; } ls += step; rs += step; if (channels > 1) { if (TrackGroupProgress(count, (ls - lstart).as_double() / originalLen.as_double() )) { return false; } } else { if (TrackProgress(count, (ls - lstart).as_double() / originalLen.as_double() )) { return false; } } } Vamp::Plugin::FeatureSet features = mPlugin->getRemainingFeatures(); AddFeatures(ltrack, features); prevTrackChannels = channels; } // All completed without cancellation, so commit the addition of tracks now for (auto &addedTrack : addedTracks) addedTrack->Commit(); return true; }
void DataAsiya::loadNBest(const string &file) { TRACE_ERR("loading nbest from DataAsiya " << file << endl); inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); ScoreStats scoreentry; string line, sentence_index, sentence, feature_str, alignment; AsiyaScorer* a_scorer = dynamic_cast<AsiyaScorer*>(m_scorer); /*todo. change this loop. instead of obtaining the score for each sentence, obtain all the scores at once!*/ while (getline(inp, line, '\n')) { if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); getNextPound(line, sentence_index, "|||"); // first field getNextPound(line, sentence, "|||"); // second field getNextPound(line, feature_str, "|||"); // third field if (line.length() > 0) { string temp; getNextPound(line, temp, "|||"); //fourth field sentence score if (line.length() > 0) { getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer } } //TODO check alignment exists if scorers need it if (a_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } // prepare stats gets all the scores for sentence_i of sentence_index // a_scorer->addCandidateSentence(sentence_index, sentence); a_scorer->prepareStats(atoi(sentence_index.c_str()), sentence, scoreentry); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, atoi(sentence_index.c_str())); } a_scorer->doScoring(); // TRACE_ERR("before getAllScoreStats" << endl); std::vector<std::vector <ScoreStats> > allScoreStats = a_scorer->getAllScoreStats(); for (int i = 0; i < allScoreStats.size(); ++i) for(int j = 0; j < allScoreStats[i].size(); ++j) { stringstream ss; ss << i; m_score_data->add(allScoreStats[i][j], atoi(ss.str().c_str())); // TRACE_ERR("allScoreStats[" << i << "].size() " << allScoreStats[i].size() << " " << allScoreStats[i][j] << endl); } inp.close(); // a_scorer->doScoring( m_score_data ); //score each sentence //a_scorer->prepareStats(sentence_index, sentence, scoreentry); // save the score for previous sentence. Do it aling with previous function //m_score_data->add(scoreentry, sentence_index); }