Пример #1
0
void Data::loadNBest(const string &file)
{
  TRACE_ERR("loading nbest from " << file << endl);
  inputfilestream inp(file); // matches a stream with a file. Opens the file
  if (!inp.good())
    throw runtime_error("Unable to open: " + file);

  ScoreStats scoreentry;
  string line, sentence_index, sentence, feature_str;

  while (getline(inp, line, '\n')) {
    if (line.empty()) continue;
    // adding statistics for error measures
    scoreentry.clear();

    getNextPound(line, sentence_index, "|||"); // first field
    getNextPound(line, sentence, "|||");       // second field
    getNextPound(line, feature_str, "|||");    // third field

    m_scorer->prepareStats(sentence_index, sentence, scoreentry);
    m_score_data->add(scoreentry, sentence_index);

    // examine first line for name of features
    if (!existsFeatureNames()) {
      InitFeatureMap(feature_str);
    }
    AddFeatures(feature_str, sentence_index);
  }
  inp.close();
}
Пример #2
0
void Data::loadNBest(const string &file, bool oneBest)
{
  TRACE_ERR("loading nbest from " << file << endl);
  util::FilePiece in(file.c_str());

  ScoreStats scoreentry;
  string sentence, feature_str, alignment;
  int sentence_index;

  while (true) {
    try {
      StringPiece line = in.ReadLine();
      if (line.empty()) continue;
      // adding statistics for error measures
      scoreentry.clear();

      util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||"));

      sentence_index = ParseInt(*it);
      if (oneBest && m_score_data->exists(sentence_index)) continue;
      ++it;
      sentence = it->as_string();
      ++it;
      feature_str = it->as_string();
      ++it;

      if (it) {
        ++it;                             // skip model score.

        if (it) {
          alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment
          ++it;
          if (it) {
            alignment = it->as_string(); //sixth field (if present) is word alignment
          }
        }
      }
      //TODO check alignment exists if scorers need it

      if (m_scorer->useAlignment()) {
        sentence += "|||";
        sentence += alignment;
      }
      m_scorer->prepareStats(sentence_index, sentence, scoreentry);

      m_score_data->add(scoreentry, sentence_index);

      // examine first line for name of features
      if (!existsFeatureNames()) {
        InitFeatureMap(feature_str);
      }
      AddFeatures(feature_str, sentence_index);
    } catch (util::EndOfFileException &e) {
      PrintUserTime("Loaded N-best lists");
      break;
    }
  }
}
Пример #3
0
void Data::loadnbest(const std::string &file)
{
  TRACE_ERR("loading nbest from " << file << std::endl);

  FeatureStats featentry;
  ScoreStats scoreentry;
  std::string sentence_index;

  inputfilestream inp(file); // matches a stream with a file. Opens the file

  if (!inp.good())
    throw runtime_error("Unable to open: " + file);

  std::string substring, subsubstring, stringBuf;
  std::string theSentence;
  std::string::size_type loc;

  while (getline(inp,stringBuf,'\n')) {
    if (stringBuf.empty()) continue;

//		TRACE_ERR("stringBuf: " << stringBuf << std::endl);

    getNextPound(stringBuf, substring, "|||"); //first field
    sentence_index = substring;

    getNextPound(stringBuf, substring, "|||"); //second field
    theSentence = substring;

// adding statistics for error measures
    featentry.reset();
    scoreentry.clear();

    theScorer->prepareStats(sentence_index, theSentence, scoreentry);

    scoredata->add(scoreentry, sentence_index);

    getNextPound(stringBuf, substring, "|||"); //third field

    // examine first line for name of features
    if (!existsFeatureNames()) {
      std::string stringsupport=substring;
      std::string features="";
      std::string tmpname="";

      size_t tmpidx=0;
      while (!stringsupport.empty()) {
        //			TRACE_ERR("Decompounding: " << substring << std::endl);
        getNextPound(stringsupport, subsubstring);

        // string ending with ":" are skipped, because they are the names of the features
        if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
          features+=tmpname+"_"+stringify(tmpidx)+" ";
          tmpidx++;
        }
        // ignore sparse feature name
        else if (subsubstring.find("_") != string::npos) {
          // also ignore its value
          getNextPound(stringsupport, subsubstring);
        }
        // update current feature name
        else {
          tmpidx=0;
          tmpname=subsubstring.substr(0,subsubstring.size() - 1);
        }
      }

      featdata->setFeatureMap(features);
    }

    // adding features
    while (!substring.empty()) {
//			TRACE_ERR("Decompounding: " << substring << std::endl);
      getNextPound(substring, subsubstring);

      // no ':' -> feature value that needs to be stored
      if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
        featentry.add(ATOFST(subsubstring.c_str()));
      }
      // sparse feature name? store as well
      else if (subsubstring.find("_") != string::npos) {
        std::string name = subsubstring;
        getNextPound(substring, subsubstring);
        featentry.addSparse( name, atof(subsubstring.c_str()) );
        _sparse_flag = true;
      }
    }
    //cerr << "number of sparse features: " << featentry.getSparse().size() << endl;
    featdata->add(featentry,sentence_index);
  }

  inp.close();
}
Пример #4
0
void DataAsiya::loadNBest(const string &file)
{
  TRACE_ERR("loading nbest from DataAsiya " << file << endl);
  inputfilestream inp(file); // matches a stream with a file. Opens the file
  if (!inp.good())
    throw runtime_error("Unable to open: " + file);

  ScoreStats scoreentry;
  string line, sentence_index, sentence, feature_str, alignment;

  AsiyaScorer* a_scorer = dynamic_cast<AsiyaScorer*>(m_scorer);
  
  /*todo. change this loop. instead of obtaining the score for each sentence, obtain all the scores at once!*/
  while (getline(inp, line, '\n')) {
    if (line.empty()) continue;
    // adding statistics for error measures
    scoreentry.clear();

    getNextPound(line, sentence_index, "|||"); // first field
    getNextPound(line, sentence, "|||");       // second field
    getNextPound(line, feature_str, "|||");    // third field

    if (line.length() > 0) {
      string temp;
      getNextPound(line, temp, "|||"); //fourth field sentence score
      if (line.length() > 0) {
        getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer
      }
    }
    //TODO check alignment exists if scorers need it
    if (a_scorer->useAlignment()) {
      sentence += "|||";
      sentence += alignment;
    }
    // prepare stats gets all the scores for sentence_i of sentence_index
//    a_scorer->addCandidateSentence(sentence_index, sentence);
    a_scorer->prepareStats(atoi(sentence_index.c_str()), sentence, scoreentry);

    // examine first line for name of features
    if (!existsFeatureNames()) {
      InitFeatureMap(feature_str);
    }
    AddFeatures(feature_str, atoi(sentence_index.c_str()));
  }

  a_scorer->doScoring();
//  TRACE_ERR("before getAllScoreStats" << endl);

  std::vector<std::vector <ScoreStats> > allScoreStats = a_scorer->getAllScoreStats();
  for (int i = 0; i < allScoreStats.size(); ++i)
      for(int j = 0; j < allScoreStats[i].size(); ++j)
      {
          stringstream ss;
          ss << i;
          m_score_data->add(allScoreStats[i][j], atoi(ss.str().c_str()));
//          TRACE_ERR("allScoreStats[" << i << "].size() " << allScoreStats[i].size() << " " << allScoreStats[i][j] << endl);
      }


  inp.close();
//  a_scorer->doScoring( m_score_data );

  //score each sentence
  //a_scorer->prepareStats(sentence_index, sentence, scoreentry);
  // save the score for previous sentence. Do it aling with previous function
  //m_score_data->add(scoreentry, sentence_index);  
  
}