LexicalReorderingTableCreator::LexicalReorderingTableCreator(
  std::string inPath, std::string outPath, std::string tempfilePath,
  size_t orderBits, size_t fingerPrintBits, bool multipleScoreTrees,
  size_t quantize
#ifdef WITH_THREADS
  , size_t threads
#endif
  )
  : m_inPath(inPath), m_outPath(outPath), m_tempfilePath(tempfilePath),
  m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
  m_numScoreComponent(0), m_multipleScoreTrees(multipleScoreTrees),
  m_quantize(quantize), m_separator(" ||| "),
  m_hash(m_orderBits, m_fingerPrintBits), m_lastFlushedLine(-1)
#ifdef WITH_THREADS  
  , m_threads(threads)
#endif
{  
  PrintInfo();
    
  m_outFile = std::fopen(m_outPath.c_str(), "w");
  
  std::cerr << "Pass 1/2: Creating phrase index + Counting scores" << std::endl;
  m_hash.BeginSave(m_outFile); 


  if(tempfilePath.size()) {
    MmapAllocator<unsigned char> allocEncoded(util::FMakeTemp(tempfilePath));
    m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocEncoded);
  }
  else {
    m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>();
  }
  
  EncodeScores();
  
  std::cerr << "Intermezzo: Calculating Huffman code sets" << std::endl;
  CalcHuffmanCodes();
  
  std::cerr << "Pass 2/2: Compressing scores" << std::endl;
  
  
    if(tempfilePath.size()) {
    MmapAllocator<unsigned char> allocCompressed(util::FMakeTemp(tempfilePath));
    m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocCompressed);
  }
  else {
    m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>();
  }
  CompressScores();
  
  std::cerr << "Saving to " << m_outPath << std::endl;
  Save();
  std::cerr << "Done" << std::endl;
  std::fclose(m_outFile);
}
std::string PhraseTableCreator::EncodeLine(std::vector<std::string>& tokens, size_t ownRank)
{
  std::string sourcePhraseStr = tokens[0];
  std::string targetPhraseStr = tokens[1];
  std::string scoresStr = tokens[2];

  std::string alignmentStr = "";
  if(tokens.size() > 3)
    alignmentStr = tokens[3];

  std::vector<std::string> s = Tokenize(sourcePhraseStr);

  size_t phraseLength = s.size();
  if(m_maxPhraseLength < phraseLength)
    m_maxPhraseLength = phraseLength;

  std::vector<std::string> t = Tokenize(targetPhraseStr);
  std::vector<float> scores = Tokenize<float>(scoresStr);

  if(scores.size() != m_numScoreComponent) {
    std::cerr << "Error: Wrong number of scores detected ("
              << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
    std::cerr << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[3] << " ..." << std::endl;
    abort();
  }

  std::set<AlignPoint> a;
  if(m_coding != None || m_useAlignmentInfo) {
    std::vector<size_t> positions = Tokenize<size_t>(alignmentStr, " \t-");
    for(size_t i = 0; i < positions.size(); i += 2) {
      a.insert(AlignPoint(positions[i], positions[i+1]));
    }
  }

  std::stringstream encodedTargetPhrase;

  if(m_coding == PREnc) {
    EncodeTargetPhrasePREnc(s, t, a, ownRank, encodedTargetPhrase);
  } else if(m_coding == REnc) {
    EncodeTargetPhraseREnc(s, t, a, encodedTargetPhrase);
  } else {
    EncodeTargetPhraseNone(t, encodedTargetPhrase);
  }

  EncodeScores(scores, encodedTargetPhrase);

  if(m_useAlignmentInfo)
    EncodeAlignment(a, encodedTargetPhrase);

  return encodedTargetPhrase.str();
}
LexicalReorderingTableCreator::LexicalReorderingTableCreator(
  std::string inPath, std::string outPath,
  size_t orderBits, size_t fingerPrintBits, bool multipleScoreTrees,
  size_t quantize
#ifdef WITH_THREADS
  , size_t threads
#endif
  )
  : m_inPath(inPath), m_outPath(outPath), m_orderBits(orderBits),
  m_fingerPrintBits(fingerPrintBits), m_numScoreComponent(0),
  m_multipleScoreTrees(multipleScoreTrees), m_quantize(quantize),
  m_separator(" ||| "), m_hash(m_orderBits, m_fingerPrintBits),
  m_lastFlushedLine(-1)
#ifdef WITH_THREADS  
  , m_threads(threads)
#endif
{  
  PrintInfo();
    
  m_outFile = std::fopen(m_outPath.c_str(), "w");
  
  std::cerr << "Pass 1/2: Creating phrase index + Counting scores" << std::endl;
  m_hash.BeginSave(m_outFile); 
  EncodeScores();
  
  std::cerr << "Intermezzo: Calculating Huffman code sets" << std::endl;
  CalcHuffmanCodes();
  
  std::cerr << "Pass 2/2: Compressing scores" << std::endl;
  CompressScores();
  
  std::cerr << "Saving to " << m_outPath << std::endl;
  Save();
  std::cerr << "Done" << std::endl;
  std::fclose(m_outFile);
}