Beispiel #1
0
// If you do NOT split sentence explicitly,
// this will be called according to dependencies among modules
int LTP::splitSentence_dummy(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_SENT) ) {
    return 0;
  }

  int paraNum = xml.CountParagraphInDocument();

  if (paraNum == 0) {
    ERROR_LOG("in LTP::splitsent, There is no paragraph in doc,");
    ERROR_LOG("you may have loaded a blank file or have not loaded a file yet.");
    return kEmptyStringError;
  }

  for (int i = 0; i < paraNum; ++i) {
    vector<string> vecSentences;
    string para;
    xml.GetParagraph(i, para);

    if (0 == SplitSentence( para, vecSentences )) {
      ERROR_LOG("in LTP::splitsent, failed to split sentence");
      return kSplitSentenceError;
    }

    // dummy
    // vecSentences.push_back(para);
    if (0 != xml.SetSentencesToParagraph(vecSentences, i)) {
      ERROR_LOG("in LTP::splitsent, failed to write sentence to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_SENT);
  return 0;
}
Beispiel #2
0
int main(int argc, char *argv[])
{
	if (argc != 4)
	{
		cerr << "Usage: ./ltp_test <type> <test_xml_file> <result_file>" << endl;
		exit(1);
	}

	string type(argv[1]);

	xml4nlp.LoadXMLFromFile(argv[2]);
	if(type == "ws"){
		ltp.crfWordSeg();
	} else if(type == "pos"){
		ltp.postag();
	} else if(type == "ner"){
		ltp.ner();
	} else if(type == "dp"){
		ltp.gparser();
	} else if(type == "srl"){
		ltp.srl();
	} else {
		ltp.srl();
	}

	xml4nlp.SaveDOM(argv[3]);

	xml4nlp.ClearDOM();

	return 0;
}
Beispiel #3
0
// integrate postagger into LTP
int LTP::postag(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_POS) ) {
    return 0;
  }

  // dependency
  int ret = wordseg(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::postag, failed to perform word segment preprocess");
    return ret;
  }

  void * postagger = _resource.GetPostagger();
  if (0 == postagger) {
    ERROR_LOG("in LTP::postag, failed to init a postagger");
    return kPostagError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (0 == stnsNum) {
    ERROR_LOG("in LTP::postag, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    vector<string> vecWord;
    vector<string> vecPOS;

    xml.GetWordsFromSentence(vecWord, i);

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (0 == postagger_postag(postagger, vecWord, vecPOS)) {
      ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1);
      return kPostagError;
    }

    if (xml.SetPOSsToSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::postag, failed to write postag result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_POS);

  return 0;
}
Beispiel #4
0
int main(int argc, char *argv[])
{
    if (argc != 3)
    {
        cerr << "Usage: ./ltp_test <type> <test_file>" << endl;
        exit(1);
    }

    cout << "Begin ..." << endl;
    string sentence;
    string type(argv[1]);
    ifstream in(argv[2]);
    ofstream log_file("test.log");

    if (!in.is_open())
    {
        cerr << "Cann't open file!" << endl;
        exit(1);
    }

    while(in >> sentence){
        cout << "Input sentence is: " << sentence << endl;

        xml4nlp.CreateDOMFromString(sentence);
        if(type == "ws"){
            ltp.crfWordSeg();
            int wordNum = xml4nlp.CountWordInDocument();
            for (int i = 0; i < wordNum; ++i)
            {
                const char* word = xml4nlp.GetWord(i);
                if (word != NULL)
                {
                    log_file << word << " ";
                }
            }
        } else if(type == "pos"){
            ltp.postag();
        } else if(type == "ner"){
            ltp.ner();
        } else if(type == "dp"){
            ltp.gparser();
        } else if(type == "srl"){
            ltp.srl();
        } else {
            ltp.srl();
        }

        string result;
        xml4nlp.SaveDOM(result);

        cout << "Result is: " << result << endl;
        xml4nlp.ClearDOM();
    }

    return 0;
}
Beispiel #5
0
// integrate word segmentor into LTP
int LTP::wordseg(XML4NLP & xml) {
  if (xml.QueryNote(NOTE_WORD)) {
    return 0;
  }

  //
  int ret = splitSentence_dummy(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess.");
    return ret;
  }

  // get the segmentor pointer
  void * segmentor = _resource.GetSegmentor();
  if (0 == segmentor) {
    ERROR_LOG("in LTP::wordseg, failed to init a segmentor");
    return kWordsegError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (0 == stnsNum) {
    ERROR_LOG("in LTP::wordseg, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++ i) {
    std::string strStn = xml.GetSentence(i);
    std::vector<std::string> vctWords;

    if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) {
      ERROR_LOG("in LTP::wordseg, input sentence is too long");
      return kSentenceTooLongError;
    }

    if (0 == segmentor_segment(segmentor, strStn, vctWords)) {
      ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"",
          strStn.c_str());
      return kWordsegError;
    }

    if (0 != xml.SetWordsToSentence(vctWords, i)) {
      ERROR_LOG("in LTP::wordseg, failed to write segment result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_WORD);
  return 0;
}
Beispiel #6
0
int main(int argc, char *argv[])
{
    if (argc != 4)
    {
        cerr << "Usage: ./ltp_test <type> <test_file> <result_file>" << endl;
        exit(1);
    }

    string type(argv[1]);
    string in_file(argv[2]);
    string res_file(argv[3]);

    xml4nlp.CreateDOMFromFile(in_file.c_str());

    if (type == "ws") {
        ltp.crfWordSeg(xml4nlp);
    } else if(type == "pos"){
        ltp.postag(xml4nlp);
    } else if(type == "ner"){
        ltp.ner(xml4nlp);
    } else if(type == "dp"){
        ltp.gparser(xml4nlp);
    } else if(type == "srl"){
        ltp.srl(xml4nlp);
    } else {
        ltp.srl(xml4nlp);
    }

    string result;
    xml4nlp.SaveDOM(result);

    ofstream out(res_file.c_str());
    out << result << endl;
    cerr << "Results saved to " << res_file << endl;

    xml4nlp.ClearDOM();

    return 0;
}
Beispiel #7
0
int LTP::srl(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_SRL) ) return 0;

  // dependency
  int ret = ner(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::srl, failed to perform ner preprocess");
    return ret;
  }

  ret = parser(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::srl, failed to perform parsing preprocess");
    return ret;
  }

  int stnsNum = xml.CountSentenceInDocument();
  if (stnsNum == 0) {
    ERROR_LOG("in LTP::srl, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    vector<string>              vecWord;
    vector<string>              vecPOS;
    vector<string>              vecNE;
    vector< pair<int, string> > vecParse;
    vector< pair<int, vector< pair<string, pair< int, int > > > > > vecSRLResult;

    if (xml.GetWordsFromSentence(vecWord, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get words from xml");
      return kReadXmlError;
    }

    if (xml.GetPOSsFromSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get postags from xml");
      return kReadXmlError;
    }

    if (xml.GetNEsFromSentence(vecNE, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get ner result from xml");
      return kReadXmlError;
    }

    if (xml.GetParsesFromSentence(vecParse, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get parsing result from xml");
      return kReadXmlError;
    }

    if (0 != srl_dosrl(vecWord, vecPOS, vecParse, vecSRLResult)) {
      ERROR_LOG("in LTP::srl, failed to perform srl on sent. #%d", i+1);
      return kSRLError;
    }

    int j = 0;
    for (; j < vecSRLResult.size(); ++j) {
      vector<string>        vecType;
      vector< pair<int, int> >  vecBegEnd;
      int k = 0;

      for (; k < vecSRLResult[j].second.size(); ++k) {
        vecType.push_back(vecSRLResult[j].second[k].first);
        vecBegEnd.push_back(vecSRLResult[j].second[k].second);
      }

      if (0 != xml.SetPredArgToWord(i, vecSRLResult[j].first, vecType, vecBegEnd)) {
        return kWriteXmlError;
      }
    }
  }

  xml.SetNote(NOTE_SRL);
  return 0;
}
Beispiel #8
0
int LTP::parser(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_PARSER) ) return 0;

  int ret = postag(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::parser, failed to perform postag preprocessing");
    return ret;
  }

  void * parser = _resource.GetParser();

  if (parser == NULL) {
    ERROR_LOG("in LTP::parser, failed to init a parser");
    return kParserError;
  }

  int stnsNum = xml.CountSentenceInDocument();
  if (stnsNum == 0) {
    ERROR_LOG("in LTP::parser, number of sentences equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++i) {
    std::vector<std::string>  vecWord;
    std::vector<std::string>  vecPOS;
    std::vector<int>          vecHead;
    std::vector<std::string>  vecRel;

    if (xml.GetWordsFromSentence(vecWord, i) != 0) {
      ERROR_LOG("in LTP::parser, failed to get words from xml");
      return kReadXmlError;
    }

    if (xml.GetPOSsFromSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::parser, failed to get postags from xml");
      return kReadXmlError;
    }

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (-1 == parser_parse(parser, vecWord, vecPOS, vecHead, vecRel)) {
      ERROR_LOG("in LTP::parser, failed to perform parse on sent. #%d", i+1);
      return kParserError;
    }

    if (0 != xml.SetParsesToSentence(vecHead, vecRel, i)) {
      ERROR_LOG("in LTP::parser, failed to write parse result to xml");
      return kWriteXmlError;
    }
  }

  xml.SetNote(NOTE_PARSER);

  return 0;
}
Beispiel #9
0
// perform ner over xml
int LTP::ner(XML4NLP & xml) {
  if ( xml.QueryNote(NOTE_NE) ) {
    return 0;
  }

  // dependency
  int ret = postag(xml);
  if (0 != ret) {
    ERROR_LOG("in LTP::ner, failed to perform postag preprocess");
    return ret;
  }

  void * ner = _resource.GetNER();

  if (NULL == ner) {
    ERROR_LOG("in LTP::ner, failed to init a ner.");
    return kNERError;
  }

  int stnsNum = xml.CountSentenceInDocument();

  if (stnsNum == 0) {
    ERROR_LOG("in LTP::ner, number of sentence equals 0");
    return kEmptyStringError;
  }

  for (int i = 0; i < stnsNum; ++ i) {
    vector<string> vecWord;
    vector<string> vecPOS;
    vector<string> vecNETag;

    if (xml.GetWordsFromSentence(vecWord, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get words from xml");
      return kReadXmlError;
    }

    if (xml.GetPOSsFromSentence(vecPOS, i) != 0) {
      ERROR_LOG("in LTP::ner, failed to get postags from xml");
      return kNERError;
    }

    if (0 == vecWord.size()) {
      ERROR_LOG("Input sentence is empty.");
      return kEmptyStringError;
    }

    if (vecWord.size() > MAX_WORDS_NUM) {
      ERROR_LOG("Input sentence is too long.");
      return kSentenceTooLongError;
    }

    if (0 == ner_recognize(ner, vecWord, vecPOS, vecNETag)) {
      ERROR_LOG("in LTP::ner, failed to perform ner on sent. #%d", i+1);
      return kNERError;
    }

    xml.SetNEsToSentence(vecNETag, i);
  }

  xml.SetNote(NOTE_NE);
  return 0;
}
Beispiel #10
0
static int Service(struct mg_connection *conn) {
    char *sentence;
    char type[10];
    char xml[10];
    char buffer[POST_LEN];

    string str_post_data;
    string str_type;
    string str_xml;

    const struct mg_request_info *ri = mg_get_request_info(conn);

    if (!strcmp(ri->uri, "/ltp")) {
        int len;
        while((len = mg_read(conn, buffer, sizeof(buffer) - 1)) > 0){
            buffer[len] = 0;
            str_post_data += buffer;
        }

        TRACE_LOG("CDATA: %s", str_post_data.c_str());
        TRACE_LOG("CDATA length: %d", str_post_data.size());

        sentence = new char[str_post_data.size() + 1];

        mg_get_var(str_post_data.c_str(), 
                str_post_data.size(), 
                "s",
                sentence,
                str_post_data.size());

        mg_get_var(str_post_data.c_str(), 
                str_post_data.size(), 
                "t",
                type,
                sizeof(type) - 1);

        mg_get_var(str_post_data.c_str(), 
                str_post_data.size(), 
                "x",
                xml,
                sizeof(xml) - 1);

        // std::cerr << "sentence: " << sentence << std::endl;
        // std::cerr << "type    : " << type << std::endl;
        // std::cerr << "xml     : " << xml << std::endl;
        // std::cerr << "validation check" << std::endl;

        string strSentence = sentence;

        /*
         * validation check
         */
        if (strlen(sentence) == 0 || !isclear(strSentence)) {
            // std::cerr << "Failed validation check" << std::endl;
            WARNING_LOG("Failed string validation check");
            return 0;
        }

        if(strlen(type) == 0) {
            str_type = "";
        } else {
            str_type = type;
        }

        if(strlen(xml) == 0) {
            str_xml = "";
        } else {
            str_xml = xml;
        }

        delete []sentence;

        TRACE_LOG("Input sentence is: %s", strSentence.c_str());

        if(str_xml == "y"){
            xml4nlp.LoadXMLFromString(strSentence);
        } else {
            xml4nlp.CreateDOMFromString(strSentence);
        }

        if(str_type == "ws"){
            engine.wordseg();
        } else if(str_type == "pos"){
            engine.postag();
        } else if(str_type == "ner"){
            engine.ner();
        } else if(str_type == "dp"){
            engine.parser();
        } else if(str_type == "srl"){
            engine.srl();
        } else {
            engine.srl();
        }

        string strResult;
        xml4nlp.SaveDOM(strResult);

        strResult = "HTTP/1.1 200 OK\r\n\r\n" + strResult;

        // cout << "Result is: " << strResult << endl;
        mg_printf(conn, "%s", strResult.c_str());

        xml4nlp.ClearDOM();
    }
    return 1;
}
Beispiel #11
0
static std::string xml2jsonstr(const XML4NLP & xml, std::string str_type) {
  Json::Value root;

  int paragraphNum = xml.CountParagraphInDocument();

  for (int pid = 0; pid < paragraphNum; ++ pid) {
    Json::Value paragraph;

    int stnsNum = xml.CountSentenceInParagraph(pid);
    for (int sid = 0; sid < stnsNum; ++sid) {
      Json::Value sentence;

      std::vector<std::string> vecWord;
      std::vector<std::string> vecPOS;
      std::vector<std::string> vecNETag;
      std::vector<std::pair<int, std::string>> vecParse;
      //std::vector<std::vector<std::string>> vecSemResult;
      std::vector<std::vector<std::pair<int, std::string>>> vecSemResult;
      std::vector<std::pair<int, std::vector<std::pair<const char *, std::pair< int, int > > > > > vecSRLResult;

      // seg
      xml.GetWordsFromSentence(vecWord, pid, sid);

      // postag
      if (str_type == LTP_SERVICE_NAME_POSTAG
          || str_type == LTP_SERVICE_NAME_NER
          || str_type == LTP_SERVICE_NAME_DEPPARSE
          || str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        xml.GetPOSsFromSentence(vecPOS, pid, sid);
      }

      // ner
      if (str_type == LTP_SERVICE_NAME_NER
          || str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        xml.GetNEsFromSentence(vecNETag, pid, sid);
      }

      // dp
      if (str_type == LTP_SERVICE_NAME_DEPPARSE
          || str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        xml.GetParsesFromSentence(vecParse, pid, sid);
      }

      // srl
      if (str_type == LTP_SERVICE_NAME_SRL
          || str_type == LTP_SERVICE_NAME_ALL) {
        // get by word
      }

      for (int wid = 0; wid < vecWord.size(); ++wid) {
        Json::Value word;
        word["id"] = wid;
        word["cont"] = vecWord[wid];

        // postag
        if (str_type == LTP_SERVICE_NAME_POSTAG
            || str_type == LTP_SERVICE_NAME_NER
            || str_type == LTP_SERVICE_NAME_DEPPARSE
            || str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          word["pos"] = vecPOS[wid];

        }

        // ner
        if (str_type == LTP_SERVICE_NAME_NER
            || str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          word["ne"] = vecNETag[wid];
        }

        // dp
        if (str_type == LTP_SERVICE_NAME_DEPPARSE
            || str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          word["parent"] = vecParse[wid].first;
          word["relate"] = vecParse[wid].second;
        }

        // srl
        if (str_type == LTP_SERVICE_NAME_SRL
            || str_type == LTP_SERVICE_NAME_ALL) {
          Json::Value args;
          std::vector<std::string> vecType;
          std::vector<std::pair<int, int>> vecBegEnd;
          xml.GetPredArgToWord(pid, sid, wid, vecType, vecBegEnd);
          if (vecType.size() != 0) {
            for (int arg_id = 0; arg_id < vecType.size(); ++arg_id) {
              Json::Value arg;
              arg["id"] = arg_id;
              arg["type"] = vecType[arg_id];
              arg["beg"] = vecBegEnd[arg_id].first;
              arg["end"] = vecBegEnd[arg_id].second;
              args.append(arg);
            }
          } else {
            args.resize(0);
          }
          word["arg"] = args;
        }

        sentence.append(word);
      }

      paragraph.append(sentence);
    } // sentence
    root.append(paragraph);
  } // paragraph
  return root.toStyledString();
}