Exemple #1
0
// integrate postagger into LTP
int LTP::postag(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_POS) ) {
    return 0;
  }

  // dependency
  int ret = wordseg(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::postag, failed to perform word segment preprocess");
    return ret;
  }

  void * postagger = _resource.GetPostagger();
  if (0 == postagger) {
    ERROR_LOG("in LTP::postag, failed to init a postagger");
    return kPostagError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (0 == stnsNum) {
    ERROR_LOG("in LTP::postag, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    vector<string> vecWord;
    vector<string> vecPOS;

    xml.GetWordsFromSentence(vecWord, i);

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (0 == postagger_postag(postagger, vecWord, vecPOS)) {
      ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1);
      return kPostagError;
    }

    if (xml.SetPOSsToSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::postag, failed to write postag result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_POS);

  return 0;
}
Exemple #2
0
int LTP::srl(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_SRL) ) return 0;

  // dependency
  int ret = ner(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::srl, failed to perform ner preprocess");
    return ret;
  }

  ret = parser(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::srl, failed to perform parsing preprocess");
    return ret;
  }

  int stnsNum = xml.CountSentenceInDocument();
  if (stnsNum == 0) {
    ERROR_LOG("in LTP::srl, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    vector<string>              vecWord;
    vector<string>              vecPOS;
    vector<string>              vecNE;
    vector< pair<int, string> > vecParse;
    vector< pair<int, vector< pair<string, pair< int, int > > > > > vecSRLResult;

    if (xml.GetWordsFromSentence(vecWord, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get words from xml");
      return kReadXmlError;
    }

    if (xml.GetPOSsFromSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get postags from xml");
      return kReadXmlError;
    }

    if (xml.GetNEsFromSentence(vecNE, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get ner result from xml");
      return kReadXmlError;
    }

    if (xml.GetParsesFromSentence(vecParse, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get parsing result from xml");
      return kReadXmlError;
    }

    if (0 != srl_dosrl(vecWord, vecPOS, vecParse, vecSRLResult)) {
      ERROR_LOG("in LTP::srl, failed to perform srl on sent. #%d", i+1);
      return kSRLError;
    }

    int j = 0;
    for (; j < vecSRLResult.size(); ++j) {
      vector<string>        vecType;
      vector< pair<int, int> >  vecBegEnd;
      int k = 0;

      for (; k < vecSRLResult[j].second.size(); ++k) {
        vecType.push_back(vecSRLResult[j].second[k].first);
        vecBegEnd.push_back(vecSRLResult[j].second[k].second);
      }

      if (0 != xml.SetPredArgToWord(i, vecSRLResult[j].first, vecType, vecBegEnd)) {
        return kWriteXmlError;
      }
    }
  }

  xml.SetNote(NOTE_SRL);
  return 0;
}
Exemple #3
0
int LTP::parser(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_PARSER) ) return 0;

  int ret = postag(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::parser, failed to perform postag preprocessing");
    return ret;
  }

  void * parser = _resource.GetParser();

  if (parser == NULL) {
    ERROR_LOG("in LTP::parser, failed to init a parser");
    return kParserError;
  }

  int stnsNum = xml.CountSentenceInDocument();
  if (stnsNum == 0) {
    ERROR_LOG("in LTP::parser, number of sentences equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    std::vector<std::string>  vecWord;
    std::vector<std::string>  vecPOS;
    std::vector<int>          vecHead;
    std::vector<std::string>  vecRel;

    if (xml.GetWordsFromSentence(vecWord, i) != 0) {
      ERROR_LOG("in LTP::parser, failed to get words from xml");
      return kReadXmlError;
    }

    if (xml.GetPOSsFromSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::parser, failed to get postags from xml");
      return kReadXmlError;
    }

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (-1 == parser_parse(parser, vecWord, vecPOS, vecHead, vecRel)) {
      ERROR_LOG("in LTP::parser, failed to perform parse on sent. #%d", i+1);
      return kParserError;
    }

    if (0 != xml.SetParsesToSentence(vecHead, vecRel, i)) {
      ERROR_LOG("in LTP::parser, failed to write parse result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_PARSER);

  return 0;
}
Exemple #4
0
// perform ner over xml
int LTP::ner(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_NE) ) {
    return 0;
  }

  // dependency
  int ret = postag(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::ner, failed to perform postag preprocess");
    return ret;
  }

  void * ner = _resource.GetNER();

  if (NULL == ner) {
    ERROR_LOG("in LTP::ner, failed to init a ner.");
    return kNERError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (stnsNum == 0) {
    ERROR_LOG("in LTP::ner, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++ i) {
    vector<string> vecWord;
    vector<string> vecPOS;
    vector<string> vecNETag;

    if (xml.GetWordsFromSentence(vecWord, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get words from xml");
      return kReadXmlError;
    }

    if (xml.GetPOSsFromSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get postags from xml");
      return kNERError;
    }

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (0 == ner_recognize(ner, vecWord, vecPOS, vecNETag)) {
      ERROR_LOG("in LTP::ner, failed to perform ner on sent. #%d", i+1);
      return kNERError;
    }

    xml.SetNEsToSentence(vecNETag, i);
  }

  xml.SetNote(NOTE_NE);
  return 0;
}
Exemple #5
0
static std::string xml2jsonstr(const XML4NLP & xml, std::string str_type) {
  Json::Value root;

  int paragraphNum = xml.CountParagraphInDocument();

  for (int pid = 0; pid < paragraphNum; ++ pid) {
    Json::Value paragraph;

    int stnsNum = xml.CountSentenceInParagraph(pid);
    for (int sid = 0; sid < stnsNum; ++sid) {
      Json::Value sentence;

      std::vector<std::string> vecWord;
      std::vector<std::string> vecPOS;
      std::vector<std::string> vecNETag;
      std::vector<std::pair<int, std::string>> vecParse;
      //std::vector<std::vector<std::string>> vecSemResult;
      std::vector<std::vector<std::pair<int, std::string>>> vecSemResult;
      std::vector<std::pair<int, std::vector<std::pair<const char *, std::pair< int, int > > > > > vecSRLResult;

      // seg
      xml.GetWordsFromSentence(vecWord, pid, sid);

      // postag
      if (str_type == LTP_SERVICE_NAME_POSTAG
          || str_type == LTP_SERVICE_NAME_NER
          || str_type == LTP_SERVICE_NAME_DEPPARSE
          || str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        xml.GetPOSsFromSentence(vecPOS, pid, sid);
      }

      // ner
      if (str_type == LTP_SERVICE_NAME_NER
          || str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        xml.GetNEsFromSentence(vecNETag, pid, sid);
      }

      // dp
      if (str_type == LTP_SERVICE_NAME_DEPPARSE
          || str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        xml.GetParsesFromSentence(vecParse, pid, sid);
      }

      // srl
      if (str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        // get by word
      }

      for (int wid = 0; wid < vecWord.size(); ++wid) {
        Json::Value word;
        word["id"] = wid;
        word["cont"] = vecWord[wid];

        // postag
        if (str_type == LTP_SERVICE_NAME_POSTAG
            || str_type == LTP_SERVICE_NAME_NER
            || str_type == LTP_SERVICE_NAME_DEPPARSE
            || str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          word["pos"] = vecPOS[wid];

        }

        // ner
        if (str_type == LTP_SERVICE_NAME_NER
            || str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          word["ne"] = vecNETag[wid];
        }

        // dp
        if (str_type == LTP_SERVICE_NAME_DEPPARSE
            || str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          word["parent"] = vecParse[wid].first;
          word["relate"] = vecParse[wid].second;
        }

        // srl
        if (str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          Json::Value args;
          std::vector<std::string> vecType;
          std::vector<std::pair<int, int>> vecBegEnd;
          xml.GetPredArgToWord(pid, sid, wid, vecType, vecBegEnd);
          if (vecType.size() != 0) {
            for (int arg_id = 0; arg_id < vecType.size(); ++arg_id) {
              Json::Value arg;
              arg["id"] = arg_id;
              arg["type"] = vecType[arg_id];
              arg["beg"] = vecBegEnd[arg_id].first;
              arg["end"] = vecBegEnd[arg_id].second;
              args.append(arg);
            }
          } else {
            args.resize(0);
          }
          word["arg"] = args;
        }

        sentence.append(word);
      }

      paragraph.append(sentence);
    } // sentence
    root.append(paragraph);
  } // paragraph
  return root.toStyledString();
}