コード例 #1
0
ファイル: io.cpp プロジェクト: 153370771/ltp
Instance* PostaggerReader::next() {
  if (is.eof()) {
    return 0;
  }

  cursor ++;
  if (trace && cursor % interval == 0) {
    INFO_LOG("reading: read %d0%% instances.", (cursor/ interval));
  }
  Instance* inst = new Instance;
  std::string line;

  std::getline(is, line);
  trim(line);

  if (line.size() == 0) {
    delete inst;
    return 0;
  }

  std::vector<std::string> words = split(line);
  for (size_t i = 0; i < words.size(); ++ i) {
    if (with_tag) {
      std::vector<std::string> sep = rsplit_by_sep(words[i], delimiter, 1);
      if (sep.size() == 2) {
        inst->raw_forms.push_back(sep[0]);
        inst->forms.push_back(sbc2dbc_x(sep[0]));
        inst->tags.push_back(sep[1]);
      } else {
        std::cerr << words[i] << std::endl;
        delete inst;
        return 0;
      }
    } else {
      inst->raw_forms.push_back(words[i]);
      inst->forms.push_back(sbc2dbc_x(words[i]));
    }
  }
  return inst;
}
コード例 #2
0
ファイル: io.cpp プロジェクト: HITalexwang/ltpsdp
Instance* CoNLLReader::next() {
  if (is.eof()) { return NULL;}

  Instance* inst = new Instance;
  std::string line;

  inst->raw_forms.push_back( SpecialOption::ROOT );
  inst->forms.push_back( SpecialOption::ROOT );
  inst->lemmas.push_back( SpecialOption::ROOT );
  inst->postags.push_back( SpecialOption::ROOT );
  inst->heads.push_back( -1 );
  inst->deprels.push_back( SpecialOption::ROOT );

  while (!is.eof()) {
    getline(is, line);
    trim(line);

    if (line.size() == 0) { break; }
    std::vector<std::string> items = split(line);

    if (items.size() < 8) {
      WARNING_LOG("Unknown conll format file");
    }

    inst->raw_forms.push_back( items[1] );  // items[1]: form
    inst->forms.push_back( sbc2dbc(items[1]) );
    inst->lemmas.push_back( items[2] );     // items[2]: lemma
    inst->postags.push_back( items[3] );    // items[4]: postag
    inst->heads.push_back( to_int(items[6]) );
    inst->deprels.push_back( items[7] );
  }

  if (inst->forms.size() == 1) {
    delete inst;
    inst = NULL;
  }

  return inst;
}
コード例 #3
0
ファイル: io.cpp プロジェクト: 153370771/ltp
Instance* SegmentReader::next() {
  if (is.eof()) {
    return 0;
  }

  cursor ++;
  if (trace && cursor % interval == 0) {
    INFO_LOG("reading: read %d0%% instances.", (cursor/ interval));
  }

  Instance* inst = new Instance;
  std::string line;

  std::getline(is, line);
  trim(line);

  if (line.size() == 0) {
    delete inst;
    return 0;
  }

  if (segmented) {
    std::vector<std::string> words = strutils::split(line);
    inst->words = words;

    for (size_t i = 0; i < words.size(); ++ i) {
      // std::vector<std::string> chars;
      // int num_chars = codecs::decode(words[i], chars);
      int num_chars = preprocessor.preprocess(words[i], inst->raw_forms,
          inst->forms, inst->chartypes);

      if (num_chars < 0) {
        delete inst;
        return 0;
      }

      for (size_t j = 0; j < num_chars; ++ j) {
        // inst->forms.push_back(chars[j]);
        if (1 == num_chars) {
          inst->tags.push_back( __s__ );
        } else {
          if (0 == j) {
            inst->tags.push_back( __b__ );
          } else if (num_chars - 1 == j) {
            inst->tags.push_back( __e__ );
          } else {
            inst->tags.push_back( __i__ );
          }
        }
      }
    }
  } else {
    int ret = preprocessor.preprocess(line, inst->raw_forms,
        inst->forms, inst->chartypes);

    if (ret < 0) {
      delete inst;
      return 0;
    }
  }
  return inst;
}