//TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange , const TargetPhrase &targetPhrase , const InputType &inputType , int /*whatever*/) : m_targetPhrase(targetPhrase) , m_sourceWordsRange (wordsRange) , m_futureScore(0) { const UnknownWordPenaltyProducer *up = StaticData::Instance().GetUnknownWordPenaltyProducer(); if (up) { const ScoreProducer *scoreProducer = (const ScoreProducer *)up; // not sure why none of the c++ cast works vector<float> score(1); score[0] = FloorScore(-numeric_limits<float>::infinity()); m_scoreBreakdown.Assign(scoreProducer, score); } if (inputType.GetType() == SentenceInput) { Phrase phrase = inputType.GetSubString(wordsRange); m_sourcePhrase = new Phrase(phrase); } else { // TODO lex reordering with confusion network m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase()); //the target phrase from a confusion network/lattice has input scores that we want to keep m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown()); } }
int operator()(const InputType &x, ValueType& fvec) const { m_decoder.Decode(m_rots, x); Vector3 v = sik.endPosition(m_rots); v -= m_goal; fvec.setZero(); fvec.head<3>() = Eigen::Vector3f::Map(&v.x); // limit-exceed panelaty auto limpanl = fvec.tail(x.size()); for (int i = 0; i < x.size(); i++) { if (x[i] < m_min[i]) limpanl[i] = m_limitPanalty*(x[i] - m_min[i])*(x[i] - m_min[i]); else if (x[i] > m_max[i]) limpanl[i] = m_limitPanalty*(x[i] - m_max[i])*(x[i] - m_max[i]); } if (m_useRef) { limpanl += m_refWeights *(x - m_ref); } return 0; }
void ChartParser::CreateInputPaths(const InputType &input) { size_t size = input.GetSize(); m_inputPathMatrix.resize(size); UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType, "Input must be a sentence or a tree, not lattice or confusion networks"); for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) { for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { size_t endPos = startPos + phaseSize -1; vector<InputPath*> &vec = m_inputPathMatrix[startPos]; WordsRange range(startPos, endPos); Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); InputPath *node; if (range.GetNumWordsCovered() == 1) { node = new InputPath(subphrase, labels, range, NULL, NULL); vec.push_back(node); } else { const InputPath &prevNode = GetInputPath(startPos, endPos - 1); node = new InputPath(subphrase, labels, range, &prevNode, NULL); vec.push_back(node); } //m_inputPathQueue.push_back(node); } } }
TEST(FileInputTypeTest, ignoreDroppedNonNativeFiles) { Document* document = Document::create(); HTMLInputElement* input = HTMLInputElement::create(*document, nullptr, false); InputType* fileInput = FileInputType::create(*input); DataObject* nativeFileRawDragData = DataObject::create(); const DragData nativeFileDragData(nativeFileRawDragData, IntPoint(), IntPoint(), DragOperationCopy); nativeFileDragData.platformData()->add(File::create("/native/path")); nativeFileDragData.platformData()->setFilesystemId("fileSystemId"); fileInput->receiveDroppedFiles(&nativeFileDragData); EXPECT_EQ("fileSystemId", fileInput->droppedFileSystemId()); ASSERT_EQ(1u, fileInput->files()->length()); EXPECT_EQ(String("/native/path"), fileInput->files()->item(0)->path()); DataObject* nonNativeFileRawDragData = DataObject::create(); const DragData nonNativeFileDragData(nonNativeFileRawDragData, IntPoint(), IntPoint(), DragOperationCopy); FileMetadata metadata; metadata.length = 1234; const KURL url(ParsedURLStringTag(), "filesystem:http://example.com/isolated/hash/non-native-file"); nonNativeFileDragData.platformData()->add(File::createForFileSystemFile(url, metadata, File::IsUserVisible)); nonNativeFileDragData.platformData()->setFilesystemId("fileSystemId"); fileInput->receiveDroppedFiles(&nonNativeFileDragData); // Dropping non-native files should not change the existing files. EXPECT_EQ("fileSystemId", fileInput->droppedFileSystemId()); ASSERT_EQ(1u, fileInput->files()->length()); EXPECT_EQ(String("/native/path"), fileInput->files()->item(0)->path()); }
typename std::enable_if<NetworkTraits<ModelType>::IsSAE, void>::type Train(InputType& data, OutputType& /* unused */) { // Reset the training error. trainingError = 0; arma::uvec indices(batchSize); if (index.n_elem > batchSize) { for (size_t i = 0; i < index.n_elem; i += batchSize) { for (size_t j = 0; j < batchSize; j++) indices(j) = index(j + i); MatType input = data.rows(indices); net.FeedForward(input, input, error); trainingError += net.Error(); net.FeedBackward(input, error); net.ApplyGradients(); } trainingError /= (index.n_elem / batchSize); } else { net.FeedForward(data, data, error); trainingError += net.Error(); net.FeedBackward(data, error); net.ApplyGradients(); } }
ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( const InputType &sentence, const ChartCellCollectionBase &cellColl, const PhraseDictionaryOnDisk &dictionary, OnDiskPt::OnDiskWrapper &dbWrapper, const std::vector<FactorType> &inputFactorsVec, const std::vector<FactorType> &outputFactorsVec, const std::string &filePath) : ChartRuleLookupManagerCYKPlus(sentence, cellColl) , m_dictionary(dictionary) , m_dbWrapper(dbWrapper) , m_inputFactorsVec(inputFactorsVec) , m_outputFactorsVec(outputFactorsVec) , m_filePath(filePath) { CHECK(m_expandableDottedRuleListVec.size() == 0); size_t sourceSize = sentence.GetSize(); m_expandableDottedRuleListVec.resize(sourceSize); for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) { DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode()); DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1); processedStack->Add(0, initDottedRule); // init rule. stores the top node in tree m_expandableDottedRuleListVec[ind] = processedStack; } }
void Gradient(const InputType& input, const arma::Mat<eT>& d, GradientDataType& g) { g = d * input.t() / static_cast<typename InputType::value_type>( input.n_cols) + lambda * weights; }
ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( const InputType &sentence, const ChartCellCollection &cellColl, const PhraseDictionaryOnDisk &dictionary, OnDiskPt::OnDiskWrapper &dbWrapper, const LMList *languageModels, const WordPenaltyProducer *wpProducer, const std::vector<FactorType> &inputFactorsVec, const std::vector<FactorType> &outputFactorsVec, const std::vector<float> &weight, const std::string &filePath) : ChartRuleLookupManager(sentence, cellColl) , m_dictionary(dictionary) , m_dbWrapper(dbWrapper) , m_languageModels(languageModels) , m_wpProducer(wpProducer) , m_inputFactorsVec(inputFactorsVec) , m_outputFactorsVec(outputFactorsVec) , m_weight(weight) , m_filePath(filePath) { assert(m_expandableDottedRuleListVec.size() == 0); size_t sourceSize = sentence.GetSize(); m_expandableDottedRuleListVec.resize(sourceSize); for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) { DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode()); DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1); processedStack->Add(0, initDottedRule); // init rule. stores the top node in tree m_expandableDottedRuleListVec[ind] = processedStack; } }
int df(const InputType &x, JacobianType& fjac) { m_decoder.Decode(m_rots, x); fjac.setZero(); m_jacb.resize(3, 3 * n); sik.endPositionJaccobiRespectEuler(m_rots, array_view<Vector3>(reinterpret_cast<Vector3*>(m_jacb.data()),3*n)); m_decoder.EncodeJacobi(m_rots, m_jacb); fjac.topRows<3>() = m_jacb;//Eigen::Matrix3Xf::Map(&m_jac[0].x, 3, 3 * n); // limit-exceed panelaty for (int i = 0; i < x.size(); i++) { if (x[i] < m_min[i]) fjac(3 + i, i) = m_limitPanalty * (x[i] - m_min[i]); else if (x[i] > m_max[i]) fjac(3 + i, i) = m_limitPanalty * (x[i] - m_max[i]); if (m_useRef) { fjac(3 + i, i) += m_refWeights; } } return 0; }
ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory( const InputType &src, const ChartCellCollectionBase &cellColl, const PhraseDictionarySCFG &ruleTable) : ChartRuleLookupManagerCYKPlus(src, cellColl) , m_ruleTable(ruleTable) { CHECK(m_dottedRuleColls.size() == 0); size_t sourceSize = src.GetSize(); m_dottedRuleColls.resize(sourceSize); const PhraseDictionaryNodeSCFG &rootNode = m_ruleTable.GetRootNode(); for (size_t ind = 0; ind < m_dottedRuleColls.size(); ++ind) { #ifdef USE_BOOST_POOL DottedRuleInMemory *initDottedRule = m_dottedRulePool.malloc(); new (initDottedRule) DottedRuleInMemory(rootNode); #else DottedRuleInMemory *initDottedRule = new DottedRuleInMemory(rootNode); #endif DottedRuleColl *dottedRuleColl = new DottedRuleColl(sourceSize - ind + 1); dottedRuleColl->Add(0, initDottedRule); // init rule. stores the top node in tree m_dottedRuleColls[ind] = dottedRuleColl; } }
typename MRFIsingSmoothnessTerm<TInputValueType, TOutputValueType>::OutputType MRFIsingSmoothnessTerm<TInputValueType, TOutputValueType>:: Evaluate(const InputType &input) const { if(input.Size() != Superclass::m_NumberOfParameters) { itkExceptionMacro(<< "Not the expected number of paramters"); }
void BaseClickableWithKeyInputType::handleKeyupEvent(InputType& inputType, KeyboardEvent* event) { const String& key = event->keyIdentifier(); if (key != "U+0020") return; // Simulate mouse click for spacebar for button types. inputType.dispatchSimulatedClickIfActive(event); }
//TODO this should be a factory function! TranslationOption::TranslationOption(const WordsRange &wordsRange , const TargetPhrase &targetPhrase , const InputType &inputType) : m_targetPhrase(targetPhrase) , m_sourceWordsRange(wordsRange) { // set score m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown()); if (inputType.GetType() == SentenceInput) { Phrase phrase = inputType.GetSubString(wordsRange); m_sourcePhrase = new Phrase(phrase); } else { // TODO lex reordering with confusion network m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase()); } }
Search::Search(Manager& manager, const InputType &source) : m_manager(manager) , m_source(source) , m_inputPath() , m_initialTransOpt() , m_options(manager.options()) , interrupted_flag(0) , m_bitmaps(source.GetSize(), source.m_sourceCompleted) { m_initialTransOpt.SetInputPath(m_inputPath); }
typename Tokenizer<TokenEnumType, InputType>::ResultType Tokenizer<TokenEnumType, InputType>::Tokenize(const InputType &input) const { typename ResultType::TokenList tokens; auto iterator = input.begin( ); while (iterator < input.end( )) { TokenType token; readToken( iterator, input.end( ), token ); iterator += token.value.size( ); if (token.value.empty( )) ++iterator; else tokens.push_back( token ); } return ResultType( tokens ); }
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source) { // populate with rules for this sentence long translationId = source.GetTranslationId(); string grammarFile = GetFilePath() + "/grammar." + SPrint(translationId) + ".gz"; std::auto_ptr<RuleTableLoader> loader = RuleTableLoaderFactory::Create(grammarFile); bool ret = loader->Load(m_input, m_output, grammarFile, m_tableLimit, *this); CHECK(ret); }
void PhraseDictionaryNewFormat::InitializeForInput(const InputType& input) { assert(m_runningNodesVec.size() == 0); size_t sourceSize = input.GetSize(); m_runningNodesVec.resize(sourceSize); for (size_t ind = 0; ind < m_runningNodesVec.size(); ++ind) { ProcessedRule *initProcessedRule = new ProcessedRule(m_collection); ProcessedRuleStack *processedStack = new ProcessedRuleStack(sourceSize - ind + 1); processedStack->Add(0, initProcessedRule); // init rule. stores the top node in tree m_runningNodesVec[ind] = processedStack; } }
void InputFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedScores) const { if (m_legacy) { //binary phrase-table does input feature itself return; } else if (input.GetType() == WordLatticeInput) { const ScorePair *scores = inputPath.GetInputScore(); if (scores) { scoreBreakdown.PlusEquals(this, *scores); } } }
void Gradient(const InputType& input, const arma::Mat<eT>& d, GradientDataType& g) { if (uselayer) { baseLayer.Gradient(input, d, g); // Denoise the weights. baseLayer.Weights() = denoise; } else { g = d * input.t(); // Denoise the weights. weights = denoise; } }
void num_diff(const boost::function<OutputType (const InputType& )>& f , const InputType& cur // current point , int m // output dimension , double epsilon , MatType* out ) { assert (out != nullptr); int n = cur.size(); InputType x = cur; out->resize(m, n); for (int i = 0; i < n; ++i) { x(i) = cur(i) + epsilon; OutputType fplus = f(x); x(i) = cur(i) - epsilon; OutputType fminus = f(x); out->col(i) = (fplus - fminus) / (2 * epsilon); x(i) = cur(i); } }
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source) { // clear out rules for previous sentence m_collection.Clear(); // populate with rules for this sentence long translationId = source.GetTranslationId(); string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId); // data from file InputFileStream inFile(grammarFile); std::auto_ptr<RuleTableLoader> loader = RuleTableLoaderFactory::Create(grammarFile); bool ret = loader->Load(*m_input, *m_output, inFile, *m_weight, m_tableLimit, *m_languageModels, m_wpProducer, *this); CHECK(ret); }
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore) const { long id = input.GetTranslationId(); boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id); multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase); multiset<string> covered; set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(), refIt->second.begin(), refIt->second.end(), inserter(covered, covered.begin())); vector<float> scores; scores.push_back(covered.size()); scoreBreakdown.Assign(this, scores); estimatedFutureScore->Assign(this, scores); }
ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory( const InputType &src, const CellCollection &cellColl, const PhraseDictionarySCFG &ruleTable) : ChartRuleLookupManager(src, cellColl) , m_ruleTable(ruleTable) { assert(m_processedRuleColls.size() == 0); size_t sourceSize = src.GetSize(); m_processedRuleColls.resize(sourceSize); const PhraseDictionaryNodeSCFG &rootNode = m_ruleTable.GetRootNode(); for (size_t ind = 0; ind < m_processedRuleColls.size(); ++ind) { ProcessedRule *initProcessedRule = new ProcessedRule(rootNode); ProcessedRuleColl *processedRuleColl = new ProcessedRuleColl(sourceSize - ind + 1); processedRuleColl->Add(0, initProcessedRule); // init rule. stores the top node in tree m_processedRuleColls[ind] = processedRuleColl; } }
int main(int argc, char* argv[]) { cerr << "Lattice MBR Grid search" << endl; Grid grid; grid.addParam(lmbr_p, "-lmbr-p", 0.5); grid.addParam(lmbr_r, "-lmbr-r", 0.5); grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0); grid.addParam(lmbr_scale, "-mbr-scale",1.0); grid.parseArgs(argc,argv); Parameter* params = new Parameter(); if (!params->LoadParam(argc,argv)) { params->Explain(); exit(1); } if (!StaticData::LoadDataStatic(params, argv[0])) { exit(1); } StaticData& staticData = const_cast<StaticData&>(StaticData::Instance()); staticData.SetUseLatticeMBR(true); IOWrapper* ioWrapper = IOWrapper::GetIOWrapper(staticData); if (!ioWrapper) { throw runtime_error("Failed to initialise IOWrapper"); } size_t nBestSize = staticData.GetMBRSize(); if (nBestSize <= 0) { throw new runtime_error("Non-positive size specified for n-best list"); } size_t lineCount = 0; InputType* source = NULL; const vector<float>& pgrid = grid.getGrid(lmbr_p); const vector<float>& rgrid = grid.getGrid(lmbr_r); const vector<float>& prune_grid = grid.getGrid(lmbr_prune); const vector<float>& scale_grid = grid.getGrid(lmbr_scale); while(ioWrapper->ReadInput(staticData.GetInputType(),source)) { ++lineCount; source->SetTranslationId(lineCount); Manager manager(*source, staticData.GetSearchAlgorithm()); manager.ProcessSentence(); TrellisPathList nBestList; manager.CalcNBest(nBestSize, nBestList,true); //grid search for (vector<float>::const_iterator pi = pgrid.begin(); pi != pgrid.end(); ++pi) { float p = *pi; staticData.SetLatticeMBRPrecision(p); for (vector<float>::const_iterator ri = rgrid.begin(); ri != rgrid.end(); ++ri) { float r = *ri; staticData.SetLatticeMBRPRatio(r); for (vector<float>::const_iterator prune_i = prune_grid.begin(); prune_i != prune_grid.end(); ++prune_i) { size_t prune = (size_t)(*prune_i); staticData.SetLatticeMBRPruningFactor(prune); for (vector<float>::const_iterator scale_i = scale_grid.begin(); scale_i != scale_grid.end(); ++scale_i) { float scale = *scale_i; staticData.SetMBRScale(scale); cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| "; vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList); ioWrapper->OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(), staticData.GetReportAllFactors(),cout); } } } } } }
void mitk::GroupDiffusionHeadersFilter::Update() { InputType input = static_cast<InputType>( this->GetInput( ) ); this->SetNthOutput(0, input); InputType dwi; InputType zerodwi; InputType other; bool foundDWI = false; // check each series' first image unsigned int size = input.size(); HeaderPointer header; HeaderPointer dwiHeader; for ( unsigned int i = 0 ; i < size ; ++i ) { header = input[i]; // list of files if( header->bValue > 0) { header->headerGroup = DHG_NonZeroDiffusionWeighted; if(!foundDWI) dwiHeader = header; foundDWI = true; } else { header->headerGroup = DHG_ZeroDiffusionWeighted; } } if(foundDWI) { for ( unsigned int i = 0 ; i < size ; ++i ) { header = input[i]; // list of files if( !header->isIdentical(dwiHeader)) { header->headerGroup = DHG_Other; } } } else { for ( unsigned int i = 0 ; i < size ; ++i ) { header = input[i]; header->headerGroup = DHG_Other; } } for ( unsigned int i = 0 ; i < size ; ++i ) { header = input[i]; switch (header->headerGroup) { case DHG_Other: other.push_back(header); break; case DHG_ZeroDiffusionWeighted: zerodwi.push_back(header); break; case DHG_NonZeroDiffusionWeighted: dwi.push_back(header); break; case DHG_NotYetGrouped: break; } } this->SetNthOutput(1, dwi); this->SetNthOutput(2, zerodwi); this->SetNthOutput(3, other); }
void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedScores) const { const Sentence& sentence = static_cast<const Sentence&>(input); const AlignmentInfo &alignment = targetPhrase.GetAlignTerm(); // process aligned words for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) { const Phrase& sourcePhrase = inputPath.GetPhrase(); int sourceIndex = alignmentPoint->first; int targetIndex = alignmentPoint->second; Word ws = sourcePhrase.GetWord(sourceIndex); if (m_factorTypeSource == 0 && ws.IsNonTerminal()) continue; Word wt = targetPhrase.GetWord(targetIndex); if (m_factorTypeSource == 0 && wt.IsNonTerminal()) continue; StringPiece sourceWord = ws.GetFactor(m_factorTypeSource)->GetString(); StringPiece targetWord = wt.GetFactor(m_factorTypeTarget)->GetString(); if (m_ignorePunctuation) { // check if source or target are punctuation char firstChar = sourceWord[0]; CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar ); if(charIterator != m_punctuationHash.end()) continue; firstChar = targetWord[0]; charIterator = m_punctuationHash.find( firstChar ); if(charIterator != m_punctuationHash.end()) continue; } if (!m_unrestricted) { if (FindStringPiece(m_vocabSource, sourceWord) == m_vocabSource.end()) sourceWord = "OTHER"; if (FindStringPiece(m_vocabTarget, targetWord) == m_vocabTarget.end()) targetWord = "OTHER"; } if (m_simple) { // construct feature name util::StringStream featureName; featureName << m_description << "_"; featureName << sourceWord; featureName << "~"; featureName << targetWord; scoreBreakdown.SparsePlusEquals(featureName.str(), 1); } if (m_domainTrigger && !m_sourceContext) { const bool use_topicid = sentence.GetUseTopicId(); const bool use_topicid_prob = sentence.GetUseTopicIdAndProb(); if (use_topicid || use_topicid_prob) { if(use_topicid) { // use topicid as trigger const long topicid = sentence.GetTopicId(); util::StringStream feature; feature << m_description << "_"; if (topicid == -1) feature << "unk"; else feature << topicid; feature << "_"; feature << sourceWord; feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), 1); } else { // use topic probabilities const vector<string> &topicid_prob = *(input.GetTopicIdAndProb()); if (atol(topicid_prob[0].c_str()) == -1) { util::StringStream feature; feature << m_description << "_unk_"; feature << sourceWord; feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), 1); } else { for (size_t i=0; i+1 < topicid_prob.size(); i+=2) { util::StringStream feature; feature << m_description << "_"; feature << topicid_prob[i]; feature << "_"; feature << sourceWord; feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str())); } } } } else { // range over domain trigger words (keywords) const long docid = input.GetDocumentId(); for (boost::unordered_set<std::string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) { string sourceTrigger = *p; util::StringStream feature; feature << m_description << "_"; feature << sourceTrigger; feature << "_"; feature << sourceWord; feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), 1); } } } if (m_sourceContext) { size_t globalSourceIndex = inputPath.GetWordsRange().GetStartPos() + sourceIndex; if (!m_domainTrigger && globalSourceIndex == 0) { // add <s> trigger feature for source util::StringStream feature; feature << m_description << "_"; feature << "<s>,"; feature << sourceWord; feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), 1); } // range over source words to get context for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) { if (contextIndex == globalSourceIndex) continue; StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString(); if (m_ignorePunctuation) { // check if trigger is punctuation char firstChar = sourceTrigger[0]; CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar ); if(charIterator != m_punctuationHash.end()) continue; } const long docid = input.GetDocumentId(); bool sourceTriggerExists = false; if (m_domainTrigger) sourceTriggerExists = FindStringPiece(m_vocabDomain[docid], sourceTrigger ) != m_vocabDomain[docid].end(); else if (!m_unrestricted) sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end(); if (m_domainTrigger) { if (sourceTriggerExists) { util::StringStream feature; feature << m_description << "_"; feature << sourceTrigger; feature << "_"; feature << sourceWord; feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), 1); } } else if (m_unrestricted || sourceTriggerExists) { util::StringStream feature; feature << m_description << "_"; if (contextIndex < globalSourceIndex) { feature << sourceTrigger; feature << ","; feature << sourceWord; } else { feature << sourceWord; feature << ","; feature << sourceTrigger; } feature << "~"; feature << targetWord; scoreBreakdown.SparsePlusEquals(feature.str(), 1); } } } if (m_targetContext) { throw runtime_error("Can't use target words outside current translation option in a stateless feature"); /* size_t globalTargetIndex = cur_hypo.GetCurrTargetWordsRange().GetStartPos() + targetIndex; if (globalTargetIndex == 0) { // add <s> trigger feature for source stringstream feature; feature << "wt_"; feature << sourceWord; feature << "~"; feature << "<s>,"; feature << targetWord; accumulator->SparsePlusEquals(feature.str(), 1); } // range over target words (up to current position) to get context for(size_t contextIndex = 0; contextIndex < globalTargetIndex; contextIndex++ ) { string targetTrigger = cur_hypo.GetWord(contextIndex).GetFactor(m_factorTypeTarget)->GetString(); if (m_ignorePunctuation) { // check if trigger is punctuation char firstChar = targetTrigger.at(0); CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar ); if(charIterator != m_punctuationHash.end()) continue; } bool targetTriggerExists = false; if (!m_unrestricted) targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end(); if (m_unrestricted || targetTriggerExists) { stringstream feature; feature << "wt_"; feature << sourceWord; feature << "~"; feature << targetTrigger; feature << ","; feature << targetWord; accumulator->SparsePlusEquals(feature.str(), 1); } }*/ } } }
NumericType TransformLinear(InputType x){ return TransformLinearNumeric(x.numeric_value()); }
void PhraseDictionaryFuzzyMatch::CleanUpAfterSentenceProcessing(const InputType &source) { m_collection.erase(source.GetTranslationId()); }
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence) { char dirName[] = "/tmp/moses.XXXXXX"; char *temp = mkdtemp(dirName); UTIL_THROW_IF2(temp == NULL, "Couldn't create temporary directory " << dirName); string dirNameStr(dirName); string inFileName(dirNameStr + "/in"); ofstream inFile(inFileName.c_str()); for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) { inFile << inputSentence.GetWord(i); } inFile << endl; inFile.close(); long translationId = inputSentence.GetTranslationId(); string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr); // populate with rules for this sentence PhraseDictionaryNodeMemory &rootNode = m_collection[translationId]; FormatType format = MosesFormat; // data from file InputFileStream inStream(ptFileName); // copied from class LoaderStandard PrintUserTime("Start loading fuzzy-match phrase model"); const StaticData &staticData = StaticData::Instance(); const std::string& factorDelimiter = staticData.GetFactorDelimiter(); string lineOrig; size_t count = 0; while(getline(inStream, lineOrig)) { const string *line; if (format == HieroFormat) { // reformat line UTIL_THROW(util::Exception, "Cannot be Hiero format"); //line = ReformatHieroRule(lineOrig); } else { // do nothing to format of line line = &lineOrig; } vector<string> tokens; vector<float> scoreVector; TokenizeMultiCharSeparator(tokens, *line , "|||" ); if (tokens.size() != 4 && tokens.size() != 5) { stringstream strme; strme << "Syntax error at " << ptFileName << ":" << count; UserMessage::Add(strme.str()); abort(); } const string &sourcePhraseString = tokens[0] , &targetPhraseString = tokens[1] , &scoreString = tokens[2] , &alignString = tokens[3]; bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } Tokenize<float>(scoreVector, scoreString); const size_t numScoreComponents = GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { stringstream strme; strme << "Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count; UserMessage::Add(strme.str()); abort(); } UTIL_THROW_IF2(scoreVector.size() != numScoreComponents, "Number of scores incorrectly specified"); // parse source & find pt node // constituent labels Word *sourceLHS; Word *targetLHS; // source Phrase sourcePhrase( 0); sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS); // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(); targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString); targetPhrase->SetTargetLHS(targetLHS); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); // component score, for n-best output std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore); std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore); targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply()); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS); phraseColl.Add(targetPhrase); count++; if (format == HieroFormat) { // reformat line delete line; } else { // do nothing } } // sort and prune each target phrase collection SortAndPrune(rootNode); //removedirectoryrecursively(dirName); }
std::vector<std::vector<int>> GetCombinationsIterative(const std::vector<int>& input) { typedef std::vector<std::vector<int>> ResultType; typedef std::vector<int> InputType; typedef std::vector<std::tuple<int, InputType>> ProblemType; typedef std::map<int, ProblemType> ProblemListType; typedef std::queue<InputType> LevelResultsType; ResultType result; ProblemListType problemTree; int currentLevel = input.size(); int startLevel = currentLevel; problemTree[currentLevel] = ProblemType { std::tuple<int, InputType> {0, input} }; InputType temp = input; int nodeNr = 1; while (currentLevel > 2) { problemTree[currentLevel - 1] = ProblemType{}; const auto& nodes = problemTree[currentLevel]; for (const auto& node : nodes) { temp = std::get<1>(node); for (const auto& element : temp) { std::vector<int> diffSet = temp; diffSet.erase(std::find(diffSet.begin(), diffSet.end(), element)); problemTree[currentLevel - 1].push_back(std::make_tuple(element, diffSet)); } } currentLevel = currentLevel - 1; } LevelResultsType resultsCurrentLevel; LevelResultsType resultsPreviousLevel; while (currentLevel < startLevel) { resultsCurrentLevel = {}; const auto& nodes = problemTree[currentLevel]; int partitionSize = currentLevel == 2 ? 2 : resultsPreviousLevel.size() / nodes.size(); for (const auto& node : nodes) { temp = std::get<1>(node); if (temp.size() == 2) { resultsPreviousLevel.push({ temp[0], temp[1] }); resultsPreviousLevel.push({ temp[1], temp[0] }); } for (int i = 0; i < partitionSize ; ++i) { auto& previousLevelResult = resultsPreviousLevel.front(); previousLevelResult.insert(previousLevelResult.begin(), std::get<0>(node)); resultsCurrentLevel.push(resultsPreviousLevel.front()); resultsPreviousLevel.pop(); } } resultsPreviousLevel = resultsCurrentLevel; ++currentLevel; } while (!resultsCurrentLevel.empty()) { result.push_back(resultsCurrentLevel.front()); resultsCurrentLevel.pop(); } return result; }