Ejemplo n.º 1
0
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange
																		 , const TargetPhrase &targetPhrase
																		 , const InputType &inputType
																		 , int /*whatever*/)
: m_targetPhrase(targetPhrase)
, m_sourceWordsRange	(wordsRange)
, m_futureScore(0)
{
	const UnknownWordPenaltyProducer *up = StaticData::Instance().GetUnknownWordPenaltyProducer();
  if (up) {
		const ScoreProducer *scoreProducer = (const ScoreProducer *)up; // not sure why none of the c++ cast works
		vector<float> score(1);
		score[0] = FloorScore(-numeric_limits<float>::infinity());
		m_scoreBreakdown.Assign(scoreProducer, score);
	}

	if (inputType.GetType() == SentenceInput)
	{
		Phrase phrase = inputType.GetSubString(wordsRange);
		m_sourcePhrase = new Phrase(phrase);
	}
	else
	{ // TODO lex reordering with confusion network
		m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase());
		//the target phrase from a confusion network/lattice has input scores that we want to keep
		m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());

	}
}
Ejemplo n.º 2
0
	int operator()(const InputType &x, ValueType& fvec) const {
		m_decoder.Decode(m_rots, x);

		Vector3 v = sik.endPosition(m_rots);
		v -= m_goal;
		fvec.setZero();
		fvec.head<3>() = Eigen::Vector3f::Map(&v.x);

		// limit-exceed panelaty
		auto limpanl = fvec.tail(x.size());
		for (int i = 0; i < x.size(); i++)
		{
			if (x[i] < m_min[i])
				limpanl[i] = m_limitPanalty*(x[i] - m_min[i])*(x[i] - m_min[i]);
			else if (x[i] > m_max[i])
				limpanl[i] = m_limitPanalty*(x[i] - m_max[i])*(x[i] - m_max[i]);
		}

		if (m_useRef)
		{
			limpanl += m_refWeights *(x - m_ref);
		}

		return 0;
	}
Ejemplo n.º 3
0
void ChartParser::CreateInputPaths(const InputType &input)
{
  size_t size = input.GetSize();
  m_inputPathMatrix.resize(size);

  UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType,
		  "Input must be a sentence or a tree, not lattice or confusion networks");
  for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
    for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
      size_t endPos = startPos + phaseSize -1;
      vector<InputPath*> &vec = m_inputPathMatrix[startPos];

      WordsRange range(startPos, endPos);
      Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos)));
      const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);

      InputPath *node;
      if (range.GetNumWordsCovered() == 1) {
        node = new InputPath(subphrase, labels, range, NULL, NULL);
        vec.push_back(node);
      } else {
        const InputPath &prevNode = GetInputPath(startPos, endPos - 1);
        node = new InputPath(subphrase, labels, range, &prevNode, NULL);
        vec.push_back(node);
      }

      //m_inputPathQueue.push_back(node);
    }
  }
}
TEST(FileInputTypeTest, ignoreDroppedNonNativeFiles)
{
    Document* document = Document::create();
    HTMLInputElement* input =
        HTMLInputElement::create(*document, nullptr, false);
    InputType* fileInput = FileInputType::create(*input);

    DataObject* nativeFileRawDragData = DataObject::create();
    const DragData nativeFileDragData(nativeFileRawDragData, IntPoint(), IntPoint(), DragOperationCopy);
    nativeFileDragData.platformData()->add(File::create("/native/path"));
    nativeFileDragData.platformData()->setFilesystemId("fileSystemId");
    fileInput->receiveDroppedFiles(&nativeFileDragData);
    EXPECT_EQ("fileSystemId", fileInput->droppedFileSystemId());
    ASSERT_EQ(1u, fileInput->files()->length());
    EXPECT_EQ(String("/native/path"), fileInput->files()->item(0)->path());

    DataObject* nonNativeFileRawDragData = DataObject::create();
    const DragData nonNativeFileDragData(nonNativeFileRawDragData, IntPoint(), IntPoint(), DragOperationCopy);
    FileMetadata metadata;
    metadata.length = 1234;
    const KURL url(ParsedURLStringTag(), "filesystem:http://example.com/isolated/hash/non-native-file");
    nonNativeFileDragData.platformData()->add(File::createForFileSystemFile(url, metadata, File::IsUserVisible));
    nonNativeFileDragData.platformData()->setFilesystemId("fileSystemId");
    fileInput->receiveDroppedFiles(&nonNativeFileDragData);
    // Dropping non-native files should not change the existing files.
    EXPECT_EQ("fileSystemId", fileInput->droppedFileSystemId());
    ASSERT_EQ(1u, fileInput->files()->length());
    EXPECT_EQ(String("/native/path"), fileInput->files()->item(0)->path());
}
Ejemplo n.º 5
0
    typename std::enable_if<NetworkTraits<ModelType>::IsSAE, void>::type
    Train(InputType& data, OutputType& /* unused */)
    {
      // Reset the training error.
      trainingError = 0;

      arma::uvec indices(batchSize);

      if (index.n_elem > batchSize)
      {
        for (size_t i = 0; i < index.n_elem; i += batchSize)
        {
          for (size_t j = 0; j < batchSize; j++)
            indices(j) = index(j + i);

          MatType input = data.rows(indices);
          net.FeedForward(input, input, error);

          trainingError += net.Error();
          net.FeedBackward(input, error);
          net.ApplyGradients();
        }

        trainingError /= (index.n_elem / batchSize);
      }
      else
      {
        net.FeedForward(data, data, error);
        trainingError += net.Error();
        net.FeedBackward(data, error);
        net.ApplyGradients();
      }
    }
ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
  const InputType &sentence,
  const ChartCellCollectionBase &cellColl,
  const PhraseDictionaryOnDisk &dictionary,
  OnDiskPt::OnDiskWrapper &dbWrapper,
  const std::vector<FactorType> &inputFactorsVec,
  const std::vector<FactorType> &outputFactorsVec,
  const std::string &filePath)
  : ChartRuleLookupManagerCYKPlus(sentence, cellColl)
  , m_dictionary(dictionary)
  , m_dbWrapper(dbWrapper)
  , m_inputFactorsVec(inputFactorsVec)
  , m_outputFactorsVec(outputFactorsVec)
  , m_filePath(filePath)
{
  CHECK(m_expandableDottedRuleListVec.size() == 0);
  size_t sourceSize = sentence.GetSize();
  m_expandableDottedRuleListVec.resize(sourceSize);

  for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) {
    DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode());

    DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1);
    processedStack->Add(0, initDottedRule); // init rule. stores the top node in tree

    m_expandableDottedRuleListVec[ind] = processedStack;
  }
}
Ejemplo n.º 7
0
 void Gradient(const InputType& input,
               const arma::Mat<eT>& d,
               GradientDataType& g)
 {
   g = d * input.t() / static_cast<typename InputType::value_type>(
       input.n_cols) + lambda * weights;
 }
ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
  const InputType &sentence,
  const ChartCellCollection &cellColl,
  const PhraseDictionaryOnDisk &dictionary,
  OnDiskPt::OnDiskWrapper &dbWrapper,
  const LMList *languageModels,
  const WordPenaltyProducer *wpProducer,
  const std::vector<FactorType> &inputFactorsVec,
  const std::vector<FactorType> &outputFactorsVec,
  const std::vector<float> &weight,
  const std::string &filePath)
  : ChartRuleLookupManager(sentence, cellColl)
  , m_dictionary(dictionary)
  , m_dbWrapper(dbWrapper)
  , m_languageModels(languageModels)
  , m_wpProducer(wpProducer)
  , m_inputFactorsVec(inputFactorsVec)
  , m_outputFactorsVec(outputFactorsVec)
  , m_weight(weight)
  , m_filePath(filePath)
{
  assert(m_expandableDottedRuleListVec.size() == 0);
  size_t sourceSize = sentence.GetSize();
  m_expandableDottedRuleListVec.resize(sourceSize);

  for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) {
    DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode());

    DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1);
    processedStack->Add(0, initDottedRule); // init rule. stores the top node in tree

    m_expandableDottedRuleListVec[ind] = processedStack;
  }
}
Ejemplo n.º 9
0
	int df(const InputType &x, JacobianType& fjac) {
		m_decoder.Decode(m_rots, x);

		fjac.setZero();

		m_jacb.resize(3, 3 * n);
		sik.endPositionJaccobiRespectEuler(m_rots, 
			array_view<Vector3>(reinterpret_cast<Vector3*>(m_jacb.data()),3*n));

		m_decoder.EncodeJacobi(m_rots, m_jacb);

		fjac.topRows<3>() = m_jacb;//Eigen::Matrix3Xf::Map(&m_jac[0].x, 3, 3 * n);

		// limit-exceed panelaty
		for (int i = 0; i < x.size(); i++)
		{
			if (x[i] < m_min[i])
				fjac(3 + i, i) = m_limitPanalty * (x[i] - m_min[i]);
			else if (x[i] > m_max[i])
				fjac(3 + i, i) = m_limitPanalty * (x[i] - m_max[i]);

			if (m_useRef)
			{
				fjac(3 + i, i) += m_refWeights;
			}
		}

		return 0;
	}
ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory(
  const InputType &src,
  const ChartCellCollectionBase &cellColl,
  const PhraseDictionarySCFG &ruleTable)
  : ChartRuleLookupManagerCYKPlus(src, cellColl)
  , m_ruleTable(ruleTable)
{
  CHECK(m_dottedRuleColls.size() == 0);
  size_t sourceSize = src.GetSize();
  m_dottedRuleColls.resize(sourceSize);

  const PhraseDictionaryNodeSCFG &rootNode = m_ruleTable.GetRootNode();

  for (size_t ind = 0; ind < m_dottedRuleColls.size(); ++ind) {
#ifdef USE_BOOST_POOL
    DottedRuleInMemory *initDottedRule = m_dottedRulePool.malloc();
    new (initDottedRule) DottedRuleInMemory(rootNode);
#else
    DottedRuleInMemory *initDottedRule = new DottedRuleInMemory(rootNode);
#endif

    DottedRuleColl *dottedRuleColl = new DottedRuleColl(sourceSize - ind + 1);
    dottedRuleColl->Add(0, initDottedRule); // init rule. stores the top node in tree

    m_dottedRuleColls[ind] = dottedRuleColl;
  }
}
typename MRFIsingSmoothnessTerm<TInputValueType, TOutputValueType>::OutputType
MRFIsingSmoothnessTerm<TInputValueType, TOutputValueType>::
Evaluate(const InputType &input) const
{
	if(input.Size() != Superclass::m_NumberOfParameters)
	{
		itkExceptionMacro(<< "Not the expected number of paramters");
	}
void BaseClickableWithKeyInputType::handleKeyupEvent(InputType& inputType, KeyboardEvent* event)
{
    const String& key = event->keyIdentifier();
    if (key != "U+0020")
        return;
    // Simulate mouse click for spacebar for button types.
    inputType.dispatchSimulatedClickIfActive(event);
}
Ejemplo n.º 13
0
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange
																		, const TargetPhrase &targetPhrase
																		, const InputType &inputType)
: m_targetPhrase(targetPhrase)
, m_sourceWordsRange(wordsRange)
{
	// set score
	m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());

	if (inputType.GetType() == SentenceInput)
	{
		Phrase phrase = inputType.GetSubString(wordsRange);
		m_sourcePhrase = new Phrase(phrase);
	}
	else
	{ // TODO lex reordering with confusion network
		m_sourcePhrase = new Phrase(*targetPhrase.GetSourcePhrase());
	}
}
Ejemplo n.º 14
0
Search::Search(Manager& manager, const InputType &source)
  : m_manager(manager)
  , m_source(source)
  , m_inputPath()
  , m_initialTransOpt()
  , m_options(manager.options())
  , interrupted_flag(0)
  , m_bitmaps(source.GetSize(), source.m_sourceCompleted)
{
  m_initialTransOpt.SetInputPath(m_inputPath);
}
Ejemplo n.º 15
0
typename Tokenizer<TokenEnumType, InputType>::ResultType Tokenizer<TokenEnumType, InputType>::Tokenize(const InputType &input) const
{
    typename ResultType::TokenList tokens;

    auto iterator = input.begin( );

    while (iterator < input.end( ))
    {
        TokenType token;

        readToken( iterator, input.end( ), token );

        iterator += token.value.size( );

        if (token.value.empty( ))
            ++iterator;
        else
            tokens.push_back( token );
    }

    return ResultType( tokens );
}
Ejemplo n.º 16
0
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
{
    // populate with rules for this sentence
    long translationId = source.GetTranslationId();

    string grammarFile = GetFilePath() + "/grammar." + SPrint(translationId) + ".gz";

    std::auto_ptr<RuleTableLoader> loader =
        RuleTableLoaderFactory::Create(grammarFile);
    bool ret = loader->Load(m_input, m_output, grammarFile, m_tableLimit,
                            *this);

    CHECK(ret);
}
void PhraseDictionaryNewFormat::InitializeForInput(const InputType& input)
{
	assert(m_runningNodesVec.size() == 0);
	size_t sourceSize = input.GetSize();
	m_runningNodesVec.resize(sourceSize);
	
	for (size_t ind = 0; ind < m_runningNodesVec.size(); ++ind)
	{
		ProcessedRule *initProcessedRule = new ProcessedRule(m_collection);
		
		ProcessedRuleStack *processedStack = new ProcessedRuleStack(sourceSize - ind + 1);
		processedStack->Add(0, initProcessedRule); // init rule. stores the top node in tree
		
		m_runningNodesVec[ind] = processedStack;
	}
}
Ejemplo n.º 18
0
void InputFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  if (m_legacy) {
    //binary phrase-table does input feature itself
    return;
  } else if (input.GetType() == WordLatticeInput) {
    const ScorePair *scores = inputPath.GetInputScore();
    if (scores) {
      scoreBreakdown.PlusEquals(this, *scores);
    }
  }
}
Ejemplo n.º 19
0
  void Gradient(const InputType& input,
                const arma::Mat<eT>& d,
                GradientDataType& g)
  {
    if (uselayer)
    {
      baseLayer.Gradient(input, d, g);

      // Denoise the weights.
      baseLayer.Weights() = denoise;
    }
    else
    {
      g = d * input.t();

      // Denoise the weights.
      weights = denoise;
    }
  }
Ejemplo n.º 20
0
 void num_diff(const boost::function<OutputType (const InputType& )>& f
             , const InputType& cur // current point
             , int m // output dimension
             , double epsilon
             , MatType* out
              ) {
   assert (out != nullptr);
   int n = cur.size();
   InputType x = cur;
   out->resize(m, n);
   for (int i = 0; i < n; ++i) {
     x(i) = cur(i) + epsilon;
     OutputType fplus = f(x);
     x(i) = cur(i) - epsilon;
     OutputType fminus = f(x);
     out->col(i) = (fplus - fminus) / (2 * epsilon);
     x(i) = cur(i);
   }
 }
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
{
  // clear out rules for previous sentence
  m_collection.Clear();
  
  // populate with rules for this sentence
  long translationId = source.GetTranslationId();
  
  string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId);
  
  // data from file
  InputFileStream inFile(grammarFile);

  std::auto_ptr<RuleTableLoader> loader =
  RuleTableLoaderFactory::Create(grammarFile);
  bool ret = loader->Load(*m_input, *m_output, inFile, *m_weight, m_tableLimit,
                          *m_languageModels, m_wpProducer, *this);
  
  CHECK(ret);
}
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedFutureScore) const
{
  long id = input.GetTranslationId();
  boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
  multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
  multiset<string> covered;
  set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
                   refIt->second.begin(), refIt->second.end(),
                   inserter(covered, covered.begin()));
  vector<float> scores;
  scores.push_back(covered.size());

  scoreBreakdown.Assign(this, scores);
  estimatedFutureScore->Assign(this, scores);
}
ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory(
    const InputType &src,
    const CellCollection &cellColl,
    const PhraseDictionarySCFG &ruleTable)
  : ChartRuleLookupManager(src, cellColl)
  , m_ruleTable(ruleTable)
{
  assert(m_processedRuleColls.size() == 0);
  size_t sourceSize = src.GetSize();
  m_processedRuleColls.resize(sourceSize);

  const PhraseDictionaryNodeSCFG &rootNode = m_ruleTable.GetRootNode();

  for (size_t ind = 0; ind < m_processedRuleColls.size(); ++ind)
  {
    ProcessedRule *initProcessedRule = new ProcessedRule(rootNode);

    ProcessedRuleColl *processedRuleColl = new ProcessedRuleColl(sourceSize - ind + 1);
    processedRuleColl->Add(0, initProcessedRule); // init rule. stores the top node in tree

    m_processedRuleColls[ind] = processedRuleColl;
  }
}
Ejemplo n.º 24
0
int main(int argc, char* argv[])
{
  cerr << "Lattice MBR Grid search" << endl;

  Grid grid;
  grid.addParam(lmbr_p, "-lmbr-p", 0.5);
  grid.addParam(lmbr_r, "-lmbr-r", 0.5);
  grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
  grid.addParam(lmbr_scale, "-mbr-scale",1.0);

  grid.parseArgs(argc,argv);

  Parameter* params = new Parameter();
  if (!params->LoadParam(argc,argv)) {
    params->Explain();
    exit(1);
  }
  if (!StaticData::LoadDataStatic(params, argv[0])) {
    exit(1);
  }

  StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
  staticData.SetUseLatticeMBR(true);
  IOWrapper* ioWrapper = IOWrapper::GetIOWrapper(staticData);

  if (!ioWrapper) {
    throw runtime_error("Failed to initialise IOWrapper");
  }
  size_t nBestSize = staticData.GetMBRSize();

  if (nBestSize <= 0) {
    throw new runtime_error("Non-positive size specified for n-best list");
  }

  size_t lineCount = 0;
  InputType* source = NULL;

  const vector<float>& pgrid = grid.getGrid(lmbr_p);
  const vector<float>& rgrid = grid.getGrid(lmbr_r);
  const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
  const vector<float>& scale_grid = grid.getGrid(lmbr_scale);

  while(ioWrapper->ReadInput(staticData.GetInputType(),source)) {
    ++lineCount;
    source->SetTranslationId(lineCount);

    Manager manager(*source, staticData.GetSearchAlgorithm());
    manager.ProcessSentence();
    TrellisPathList nBestList;
    manager.CalcNBest(nBestSize, nBestList,true);
    //grid search
    for (vector<float>::const_iterator pi = pgrid.begin(); pi != pgrid.end(); ++pi) {
      float p = *pi;
      staticData.SetLatticeMBRPrecision(p);
      for (vector<float>::const_iterator ri = rgrid.begin(); ri != rgrid.end(); ++ri) {
        float r = *ri;
        staticData.SetLatticeMBRPRatio(r);
        for (vector<float>::const_iterator prune_i = prune_grid.begin(); prune_i != prune_grid.end(); ++prune_i) {
          size_t prune = (size_t)(*prune_i);
          staticData.SetLatticeMBRPruningFactor(prune);
          for (vector<float>::const_iterator scale_i = scale_grid.begin(); scale_i != scale_grid.end(); ++scale_i) {
            float scale = *scale_i;
            staticData.SetMBRScale(scale);
            cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
            vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
            ioWrapper->OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
                           staticData.GetReportAllFactors(),cout);
          }
        }

      }
    }


  }

}
void mitk::GroupDiffusionHeadersFilter::Update()
{
  InputType input =  static_cast<InputType>( this->GetInput( ) );
  this->SetNthOutput(0, input);

  InputType dwi;
  InputType zerodwi;
  InputType other;

  bool foundDWI = false;

  // check each series' first image
  unsigned int size = input.size();
  HeaderPointer header;
  HeaderPointer dwiHeader;
  for ( unsigned int i = 0 ; i < size ; ++i )
  {
    header = input[i];

    // list of files
    if( header->bValue > 0)
    {
      header->headerGroup = DHG_NonZeroDiffusionWeighted;
      if(!foundDWI)
        dwiHeader = header;
      foundDWI = true;
    }
    else
    {
      header->headerGroup = DHG_ZeroDiffusionWeighted;
    }
  }

  if(foundDWI)
  {
    for ( unsigned int i = 0 ; i < size ; ++i )
    {
      header = input[i];

      // list of files
      if( !header->isIdentical(dwiHeader))
      {
        header->headerGroup = DHG_Other;
      }
    }
  }
  else
  {
    for ( unsigned int i = 0 ; i < size ; ++i )
    {
      header = input[i];
      header->headerGroup = DHG_Other;
    }
  }

  for ( unsigned int i = 0 ; i < size ; ++i )
  {
    header = input[i];
    
    switch (header->headerGroup)
    {
    case DHG_Other:
      other.push_back(header);
      break;
    case DHG_ZeroDiffusionWeighted:
      zerodwi.push_back(header);
      break;
    case DHG_NonZeroDiffusionWeighted:
      dwi.push_back(header);
      break;
    case DHG_NotYetGrouped:
      break;
    }
  }

  this->SetNthOutput(1, dwi);
  this->SetNthOutput(2, zerodwi);
  this->SetNthOutput(3, other);

}
void WordTranslationFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  const Sentence& sentence = static_cast<const Sentence&>(input);
  const AlignmentInfo &alignment = targetPhrase.GetAlignTerm();

  // process aligned words
  for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) {
    const Phrase& sourcePhrase = inputPath.GetPhrase();
    int sourceIndex = alignmentPoint->first;
    int targetIndex = alignmentPoint->second;
    Word ws = sourcePhrase.GetWord(sourceIndex);
    if (m_factorTypeSource == 0 && ws.IsNonTerminal()) continue;
    Word wt = targetPhrase.GetWord(targetIndex);
    if (m_factorTypeSource == 0 && wt.IsNonTerminal()) continue;
    StringPiece sourceWord = ws.GetFactor(m_factorTypeSource)->GetString();
    StringPiece targetWord = wt.GetFactor(m_factorTypeTarget)->GetString();
    if (m_ignorePunctuation) {
      // check if source or target are punctuation
      char firstChar = sourceWord[0];
      CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
      firstChar = targetWord[0];
      charIterator = m_punctuationHash.find( firstChar );
      if(charIterator != m_punctuationHash.end())
        continue;
    }

    if (!m_unrestricted) {
      if (FindStringPiece(m_vocabSource, sourceWord) == m_vocabSource.end())
        sourceWord = "OTHER";
      if (FindStringPiece(m_vocabTarget, targetWord) == m_vocabTarget.end())
        targetWord = "OTHER";
    }

    if (m_simple) {
      // construct feature name
      util::StringStream featureName;
      featureName << m_description << "_";
      featureName << sourceWord;
      featureName << "~";
      featureName << targetWord;
      scoreBreakdown.SparsePlusEquals(featureName.str(), 1);
    }
    if (m_domainTrigger && !m_sourceContext) {
      const bool use_topicid = sentence.GetUseTopicId();
      const bool use_topicid_prob = sentence.GetUseTopicIdAndProb();
      if (use_topicid || use_topicid_prob) {
        if(use_topicid) {
          // use topicid as trigger
          const long topicid = sentence.GetTopicId();
          util::StringStream feature;
          feature << m_description << "_";
          if (topicid == -1)
            feature << "unk";
          else
            feature << topicid;

          feature << "_";
          feature << sourceWord;
          feature << "~";
          feature << targetWord;
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        } else {
          // use topic probabilities
          const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
          if (atol(topicid_prob[0].c_str()) == -1) {
            util::StringStream feature;
            feature << m_description << "_unk_";
            feature << sourceWord;
            feature << "~";
            feature << targetWord;
            scoreBreakdown.SparsePlusEquals(feature.str(), 1);
          } else {
            for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
              util::StringStream feature;
              feature << m_description << "_";
              feature << topicid_prob[i];
              feature << "_";
              feature << sourceWord;
              feature << "~";
              feature << targetWord;
              scoreBreakdown.SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
            }
          }
        }
      } else {
        // range over domain trigger words (keywords)
        const long docid = input.GetDocumentId();
        for (boost::unordered_set<std::string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
          string sourceTrigger = *p;
          util::StringStream feature;
          feature << m_description << "_";
          feature << sourceTrigger;
          feature << "_";
          feature << sourceWord;
          feature << "~";
          feature << targetWord;
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        }
      }
    }
    if (m_sourceContext) {
      size_t globalSourceIndex = inputPath.GetWordsRange().GetStartPos() + sourceIndex;
      if (!m_domainTrigger && globalSourceIndex == 0) {
        // add <s> trigger feature for source
        util::StringStream feature;
        feature << m_description << "_";
        feature << "<s>,";
        feature << sourceWord;
        feature << "~";
        feature << targetWord;
        scoreBreakdown.SparsePlusEquals(feature.str(), 1);
      }

      // range over source words to get context
      for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
        if (contextIndex == globalSourceIndex) continue;
        StringPiece sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
        if (m_ignorePunctuation) {
          // check if trigger is punctuation
          char firstChar = sourceTrigger[0];
          CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
          if(charIterator != m_punctuationHash.end())
            continue;
        }

        const long docid = input.GetDocumentId();
        bool sourceTriggerExists = false;
        if (m_domainTrigger)
          sourceTriggerExists = FindStringPiece(m_vocabDomain[docid], sourceTrigger ) != m_vocabDomain[docid].end();
        else if (!m_unrestricted)
          sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger ) != m_vocabSource.end();

        if (m_domainTrigger) {
          if (sourceTriggerExists) {
            util::StringStream feature;
            feature << m_description << "_";
            feature << sourceTrigger;
            feature << "_";
            feature << sourceWord;
            feature << "~";
            feature << targetWord;
            scoreBreakdown.SparsePlusEquals(feature.str(), 1);
          }
        } else if (m_unrestricted || sourceTriggerExists) {
          util::StringStream feature;
          feature << m_description << "_";
          if (contextIndex < globalSourceIndex) {
            feature << sourceTrigger;
            feature << ",";
            feature << sourceWord;
          } else {
            feature << sourceWord;
            feature << ",";
            feature << sourceTrigger;
          }
          feature << "~";
          feature << targetWord;
          scoreBreakdown.SparsePlusEquals(feature.str(), 1);
        }
      }
    }
    if (m_targetContext) {
      throw runtime_error("Can't use target words outside current translation option in a stateless feature");
      /*
      size_t globalTargetIndex = cur_hypo.GetCurrTargetWordsRange().GetStartPos() + targetIndex;
      if (globalTargetIndex == 0) {
      	// add <s> trigger feature for source
      	stringstream feature;
      	feature << "wt_";
      	feature << sourceWord;
      	feature << "~";
      	feature << "<s>,";
      	feature << targetWord;
      	accumulator->SparsePlusEquals(feature.str(), 1);
      }

      // range over target words (up to current position) to get context
      for(size_t contextIndex = 0; contextIndex < globalTargetIndex; contextIndex++ ) {
      	string targetTrigger = cur_hypo.GetWord(contextIndex).GetFactor(m_factorTypeTarget)->GetString();
      	if (m_ignorePunctuation) {
      		// check if trigger is punctuation
      		char firstChar = targetTrigger.at(0);
      		CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
      		if(charIterator != m_punctuationHash.end())
      			continue;
      	}

      	bool targetTriggerExists = false;
      	if (!m_unrestricted)
      		targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();

      	if (m_unrestricted || targetTriggerExists) {
      		stringstream feature;
      		feature << "wt_";
      		feature << sourceWord;
      		feature << "~";
      		feature << targetTrigger;
      		feature << ",";
      		feature << targetWord;
      		accumulator->SparsePlusEquals(feature.str(), 1);
      	}
      }*/
    }
  }
}
Ejemplo n.º 27
0
NumericType TransformLinear(InputType x){
	return TransformLinearNumeric(x.numeric_value());
}
void PhraseDictionaryFuzzyMatch::CleanUpAfterSentenceProcessing(const InputType &source)
{
  m_collection.erase(source.GetTranslationId());
}
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
{
  char dirName[] = "/tmp/moses.XXXXXX";
  char *temp = mkdtemp(dirName);
  UTIL_THROW_IF2(temp == NULL,
		  "Couldn't create temporary directory " << dirName);

  string dirNameStr(dirName);

  string inFileName(dirNameStr + "/in");

  ofstream inFile(inFileName.c_str());

  for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) {
    inFile << inputSentence.GetWord(i);
  }
  inFile << endl;
  inFile.close();

  long translationId = inputSentence.GetTranslationId();
  string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr);

  // populate with rules for this sentence
  PhraseDictionaryNodeMemory &rootNode = m_collection[translationId];
  FormatType format = MosesFormat;

  // data from file
  InputFileStream inStream(ptFileName);

  // copied from class LoaderStandard
  PrintUserTime("Start loading fuzzy-match phrase model");

  const StaticData &staticData = StaticData::Instance();
  const std::string& factorDelimiter = staticData.GetFactorDelimiter();


  string lineOrig;
  size_t count = 0;

  while(getline(inStream, lineOrig)) {
    const string *line;
    if (format == HieroFormat) { // reformat line
      UTIL_THROW(util::Exception, "Cannot be Hiero format");
      //line = ReformatHieroRule(lineOrig);
    } else {
      // do nothing to format of line
      line = &lineOrig;
    }

    vector<string> tokens;
    vector<float> scoreVector;

    TokenizeMultiCharSeparator(tokens, *line , "|||" );

    if (tokens.size() != 4 && tokens.size() != 5) {
      stringstream strme;
      strme << "Syntax error at " << ptFileName << ":" << count;
      UserMessage::Add(strme.str());
      abort();
    }

    const string &sourcePhraseString = tokens[0]
                                       , &targetPhraseString = tokens[1]
                                           , &scoreString        = tokens[2]
                                               , &alignString        = tokens[3];

    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
    if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
      TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
      continue;
    }

    Tokenize<float>(scoreVector, scoreString);
    const size_t numScoreComponents = GetNumScoreComponents();
    if (scoreVector.size() != numScoreComponents) {
      stringstream strme;
      strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
            << numScoreComponents << ") of score components on line " << count;
      UserMessage::Add(strme.str());
      abort();
    }

    UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
    		"Number of scores incorrectly specified");

    // parse source & find pt node

    // constituent labels
    Word *sourceLHS;
    Word *targetLHS;

    // source
    Phrase sourcePhrase( 0);
    sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS);

    // create target phrase obj
    TargetPhrase *targetPhrase = new TargetPhrase();
    targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS);

    // rest of target phrase
    targetPhrase->SetAlignmentInfo(alignString);
    targetPhrase->SetTargetLHS(targetLHS);
    //targetPhrase->SetDebugOutput(string("New Format pt ") + line);

    // component score, for n-best output
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);

    targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
    targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply());

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);

    count++;

    if (format == HieroFormat) { // reformat line
      delete line;
    } else {
      // do nothing
    }

  }

  // sort and prune each target phrase collection
  SortAndPrune(rootNode);

  //removedirectoryrecursively(dirName);
}
std::vector<std::vector<int>> GetCombinationsIterative(const std::vector<int>& input)
{
	typedef std::vector<std::vector<int>> ResultType;
	typedef std::vector<int> InputType;
	typedef std::vector<std::tuple<int, InputType>> ProblemType;
	typedef std::map<int, ProblemType> ProblemListType;
	typedef std::queue<InputType> LevelResultsType;

	ResultType result;
	
	ProblemListType problemTree;

	int currentLevel = input.size();
	int startLevel = currentLevel;
	problemTree[currentLevel] = ProblemType
	{ 
		std::tuple<int, InputType> {0, input}
	};
	InputType temp = input;
	int nodeNr = 1;
	while (currentLevel > 2)
	{
		problemTree[currentLevel - 1] = ProblemType{};
		const auto& nodes = problemTree[currentLevel];
		for (const auto& node : nodes)
		{
			temp = std::get<1>(node);
			for (const auto& element : temp)
			{
				std::vector<int> diffSet = temp;
				diffSet.erase(std::find(diffSet.begin(), diffSet.end(), element));
				problemTree[currentLevel - 1].push_back(std::make_tuple(element, diffSet));
			}
		}
		currentLevel = currentLevel - 1;
	}

	LevelResultsType resultsCurrentLevel;
	LevelResultsType resultsPreviousLevel;
	
	while (currentLevel < startLevel)
	{
		resultsCurrentLevel = {};
		const auto& nodes = problemTree[currentLevel];
		int partitionSize = currentLevel == 2 ? 2 : resultsPreviousLevel.size() / nodes.size();
		for (const auto& node : nodes)
		{
			temp = std::get<1>(node);
			if (temp.size() == 2)
			{
				resultsPreviousLevel.push({ temp[0], temp[1] });
				resultsPreviousLevel.push({ temp[1], temp[0] });
			}
			for (int i = 0; i < partitionSize ; ++i)
			{
				auto& previousLevelResult = resultsPreviousLevel.front();
				previousLevelResult.insert(previousLevelResult.begin(), std::get<0>(node));
				resultsCurrentLevel.push(resultsPreviousLevel.front());
				resultsPreviousLevel.pop();
			}
		}
		resultsPreviousLevel = resultsCurrentLevel;
		++currentLevel;
	}

	while (!resultsCurrentLevel.empty())
	{
		result.push_back(resultsCurrentLevel.front());
		resultsCurrentLevel.pop();
	}

	return result;
}