// If you do NOT split sentence explicitly, // this will be called according to dependencies among modules int LTP::splitSentence_dummy(XML4NLP & xml) { if ( xml.QueryNote(NOTE_SENT) ) { return 0; } int paraNum = xml.CountParagraphInDocument(); if (paraNum == 0) { ERROR_LOG("in LTP::splitsent, There is no paragraph in doc,"); ERROR_LOG("you may have loaded a blank file or have not loaded a file yet."); return kEmptyStringError; } for (int i = 0; i < paraNum; ++i) { vector<string> vecSentences; string para; xml.GetParagraph(i, para); if (0 == SplitSentence( para, vecSentences )) { ERROR_LOG("in LTP::splitsent, failed to split sentence"); return kSplitSentenceError; } // dummy // vecSentences.push_back(para); if (0 != xml.SetSentencesToParagraph(vecSentences, i)) { ERROR_LOG("in LTP::splitsent, failed to write sentence to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_SENT); return 0; }
int main(int argc, char *argv[]) { if (argc != 4) { cerr << "Usage: ./ltp_test <type> <test_xml_file> <result_file>" << endl; exit(1); } string type(argv[1]); xml4nlp.LoadXMLFromFile(argv[2]); if(type == "ws"){ ltp.crfWordSeg(); } else if(type == "pos"){ ltp.postag(); } else if(type == "ner"){ ltp.ner(); } else if(type == "dp"){ ltp.gparser(); } else if(type == "srl"){ ltp.srl(); } else { ltp.srl(); } xml4nlp.SaveDOM(argv[3]); xml4nlp.ClearDOM(); return 0; }
// integrate postagger into LTP int LTP::postag(XML4NLP & xml) { if ( xml.QueryNote(NOTE_POS) ) { return 0; } // dependency int ret = wordseg(xml); if (0 != ret) { ERROR_LOG("in LTP::postag, failed to perform word segment preprocess"); return ret; } void * postagger = _resource.GetPostagger(); if (0 == postagger) { ERROR_LOG("in LTP::postag, failed to init a postagger"); return kPostagError; } int stnsNum = xml.CountSentenceInDocument(); if (0 == stnsNum) { ERROR_LOG("in LTP::postag, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++i) { vector<string> vecWord; vector<string> vecPOS; xml.GetWordsFromSentence(vecWord, i); if (0 == vecWord.size()) { ERROR_LOG("Input sentence is empty."); return kEmptyStringError; } if (vecWord.size() > MAX_WORDS_NUM) { ERROR_LOG("Input sentence is too long."); return kSentenceTooLongError; } if (0 == postagger_postag(postagger, vecWord, vecPOS)) { ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1); return kPostagError; } if (xml.SetPOSsToSentence(vecPOS, i) != 0) { ERROR_LOG("in LTP::postag, failed to write postag result to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_POS); return 0; }
int main(int argc, char *argv[]) { if (argc != 3) { cerr << "Usage: ./ltp_test <type> <test_file>" << endl; exit(1); } cout << "Begin ..." << endl; string sentence; string type(argv[1]); ifstream in(argv[2]); ofstream log_file("test.log"); if (!in.is_open()) { cerr << "Cann't open file!" << endl; exit(1); } while(in >> sentence){ cout << "Input sentence is: " << sentence << endl; xml4nlp.CreateDOMFromString(sentence); if(type == "ws"){ ltp.crfWordSeg(); int wordNum = xml4nlp.CountWordInDocument(); for (int i = 0; i < wordNum; ++i) { const char* word = xml4nlp.GetWord(i); if (word != NULL) { log_file << word << " "; } } } else if(type == "pos"){ ltp.postag(); } else if(type == "ner"){ ltp.ner(); } else if(type == "dp"){ ltp.gparser(); } else if(type == "srl"){ ltp.srl(); } else { ltp.srl(); } string result; xml4nlp.SaveDOM(result); cout << "Result is: " << result << endl; xml4nlp.ClearDOM(); } return 0; }
// integrate word segmentor into LTP int LTP::wordseg(XML4NLP & xml) { if (xml.QueryNote(NOTE_WORD)) { return 0; } // int ret = splitSentence_dummy(xml); if (0 != ret) { ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess."); return ret; } // get the segmentor pointer void * segmentor = _resource.GetSegmentor(); if (0 == segmentor) { ERROR_LOG("in LTP::wordseg, failed to init a segmentor"); return kWordsegError; } int stnsNum = xml.CountSentenceInDocument(); if (0 == stnsNum) { ERROR_LOG("in LTP::wordseg, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++ i) { std::string strStn = xml.GetSentence(i); std::vector<std::string> vctWords; if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) { ERROR_LOG("in LTP::wordseg, input sentence is too long"); return kSentenceTooLongError; } if (0 == segmentor_segment(segmentor, strStn, vctWords)) { ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"", strStn.c_str()); return kWordsegError; } if (0 != xml.SetWordsToSentence(vctWords, i)) { ERROR_LOG("in LTP::wordseg, failed to write segment result to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_WORD); return 0; }
int main(int argc, char *argv[]) { if (argc != 4) { cerr << "Usage: ./ltp_test <type> <test_file> <result_file>" << endl; exit(1); } string type(argv[1]); string in_file(argv[2]); string res_file(argv[3]); xml4nlp.CreateDOMFromFile(in_file.c_str()); if (type == "ws") { ltp.crfWordSeg(xml4nlp); } else if(type == "pos"){ ltp.postag(xml4nlp); } else if(type == "ner"){ ltp.ner(xml4nlp); } else if(type == "dp"){ ltp.gparser(xml4nlp); } else if(type == "srl"){ ltp.srl(xml4nlp); } else { ltp.srl(xml4nlp); } string result; xml4nlp.SaveDOM(result); ofstream out(res_file.c_str()); out << result << endl; cerr << "Results saved to " << res_file << endl; xml4nlp.ClearDOM(); return 0; }
int LTP::srl(XML4NLP & xml) { if ( xml.QueryNote(NOTE_SRL) ) return 0; // dependency int ret = ner(xml); if (0 != ret) { ERROR_LOG("in LTP::srl, failed to perform ner preprocess"); return ret; } ret = parser(xml); if (0 != ret) { ERROR_LOG("in LTP::srl, failed to perform parsing preprocess"); return ret; } int stnsNum = xml.CountSentenceInDocument(); if (stnsNum == 0) { ERROR_LOG("in LTP::srl, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++i) { vector<string> vecWord; vector<string> vecPOS; vector<string> vecNE; vector< pair<int, string> > vecParse; vector< pair<int, vector< pair<string, pair< int, int > > > > > vecSRLResult; if (xml.GetWordsFromSentence(vecWord, i) != 0) { ERROR_LOG("in LTP::ner, failed to get words from xml"); return kReadXmlError; } if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { ERROR_LOG("in LTP::ner, failed to get postags from xml"); return kReadXmlError; } if (xml.GetNEsFromSentence(vecNE, i) != 0) { ERROR_LOG("in LTP::ner, failed to get ner result from xml"); return kReadXmlError; } if (xml.GetParsesFromSentence(vecParse, i) != 0) { ERROR_LOG("in LTP::ner, failed to get parsing result from xml"); return kReadXmlError; } if (0 != srl_dosrl(vecWord, vecPOS, vecParse, vecSRLResult)) { ERROR_LOG("in LTP::srl, failed to perform srl on sent. #%d", i+1); return kSRLError; } int j = 0; for (; j < vecSRLResult.size(); ++j) { vector<string> vecType; vector< pair<int, int> > vecBegEnd; int k = 0; for (; k < vecSRLResult[j].second.size(); ++k) { vecType.push_back(vecSRLResult[j].second[k].first); vecBegEnd.push_back(vecSRLResult[j].second[k].second); } if (0 != xml.SetPredArgToWord(i, vecSRLResult[j].first, vecType, vecBegEnd)) { return kWriteXmlError; } } } xml.SetNote(NOTE_SRL); return 0; }
int LTP::parser(XML4NLP & xml) { if ( xml.QueryNote(NOTE_PARSER) ) return 0; int ret = postag(xml); if (0 != ret) { ERROR_LOG("in LTP::parser, failed to perform postag preprocessing"); return ret; } void * parser = _resource.GetParser(); if (parser == NULL) { ERROR_LOG("in LTP::parser, failed to init a parser"); return kParserError; } int stnsNum = xml.CountSentenceInDocument(); if (stnsNum == 0) { ERROR_LOG("in LTP::parser, number of sentences equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++i) { std::vector<std::string> vecWord; std::vector<std::string> vecPOS; std::vector<int> vecHead; std::vector<std::string> vecRel; if (xml.GetWordsFromSentence(vecWord, i) != 0) { ERROR_LOG("in LTP::parser, failed to get words from xml"); return kReadXmlError; } if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { ERROR_LOG("in LTP::parser, failed to get postags from xml"); return kReadXmlError; } if (0 == vecWord.size()) { ERROR_LOG("Input sentence is empty."); return kEmptyStringError; } if (vecWord.size() > MAX_WORDS_NUM) { ERROR_LOG("Input sentence is too long."); return kSentenceTooLongError; } if (-1 == parser_parse(parser, vecWord, vecPOS, vecHead, vecRel)) { ERROR_LOG("in LTP::parser, failed to perform parse on sent. #%d", i+1); return kParserError; } if (0 != xml.SetParsesToSentence(vecHead, vecRel, i)) { ERROR_LOG("in LTP::parser, failed to write parse result to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_PARSER); return 0; }
// perform ner over xml int LTP::ner(XML4NLP & xml) { if ( xml.QueryNote(NOTE_NE) ) { return 0; } // dependency int ret = postag(xml); if (0 != ret) { ERROR_LOG("in LTP::ner, failed to perform postag preprocess"); return ret; } void * ner = _resource.GetNER(); if (NULL == ner) { ERROR_LOG("in LTP::ner, failed to init a ner."); return kNERError; } int stnsNum = xml.CountSentenceInDocument(); if (stnsNum == 0) { ERROR_LOG("in LTP::ner, number of sentence equals 0"); return kEmptyStringError; } for (int i = 0; i < stnsNum; ++ i) { vector<string> vecWord; vector<string> vecPOS; vector<string> vecNETag; if (xml.GetWordsFromSentence(vecWord, i) != 0) { ERROR_LOG("in LTP::ner, failed to get words from xml"); return kReadXmlError; } if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { ERROR_LOG("in LTP::ner, failed to get postags from xml"); return kNERError; } if (0 == vecWord.size()) { ERROR_LOG("Input sentence is empty."); return kEmptyStringError; } if (vecWord.size() > MAX_WORDS_NUM) { ERROR_LOG("Input sentence is too long."); return kSentenceTooLongError; } if (0 == ner_recognize(ner, vecWord, vecPOS, vecNETag)) { ERROR_LOG("in LTP::ner, failed to perform ner on sent. #%d", i+1); return kNERError; } xml.SetNEsToSentence(vecNETag, i); } xml.SetNote(NOTE_NE); return 0; }
static int Service(struct mg_connection *conn) { char *sentence; char type[10]; char xml[10]; char buffer[POST_LEN]; string str_post_data; string str_type; string str_xml; const struct mg_request_info *ri = mg_get_request_info(conn); if (!strcmp(ri->uri, "/ltp")) { int len; while((len = mg_read(conn, buffer, sizeof(buffer) - 1)) > 0){ buffer[len] = 0; str_post_data += buffer; } TRACE_LOG("CDATA: %s", str_post_data.c_str()); TRACE_LOG("CDATA length: %d", str_post_data.size()); sentence = new char[str_post_data.size() + 1]; mg_get_var(str_post_data.c_str(), str_post_data.size(), "s", sentence, str_post_data.size()); mg_get_var(str_post_data.c_str(), str_post_data.size(), "t", type, sizeof(type) - 1); mg_get_var(str_post_data.c_str(), str_post_data.size(), "x", xml, sizeof(xml) - 1); // std::cerr << "sentence: " << sentence << std::endl; // std::cerr << "type : " << type << std::endl; // std::cerr << "xml : " << xml << std::endl; // std::cerr << "validation check" << std::endl; string strSentence = sentence; /* * validation check */ if (strlen(sentence) == 0 || !isclear(strSentence)) { // std::cerr << "Failed validation check" << std::endl; WARNING_LOG("Failed string validation check"); return 0; } if(strlen(type) == 0) { str_type = ""; } else { str_type = type; } if(strlen(xml) == 0) { str_xml = ""; } else { str_xml = xml; } delete []sentence; TRACE_LOG("Input sentence is: %s", strSentence.c_str()); if(str_xml == "y"){ xml4nlp.LoadXMLFromString(strSentence); } else { xml4nlp.CreateDOMFromString(strSentence); } if(str_type == "ws"){ engine.wordseg(); } else if(str_type == "pos"){ engine.postag(); } else if(str_type == "ner"){ engine.ner(); } else if(str_type == "dp"){ engine.parser(); } else if(str_type == "srl"){ engine.srl(); } else { engine.srl(); } string strResult; xml4nlp.SaveDOM(strResult); strResult = "HTTP/1.1 200 OK\r\n\r\n" + strResult; // cout << "Result is: " << strResult << endl; mg_printf(conn, "%s", strResult.c_str()); xml4nlp.ClearDOM(); } return 1; }
static std::string xml2jsonstr(const XML4NLP & xml, std::string str_type) { Json::Value root; int paragraphNum = xml.CountParagraphInDocument(); for (int pid = 0; pid < paragraphNum; ++ pid) { Json::Value paragraph; int stnsNum = xml.CountSentenceInParagraph(pid); for (int sid = 0; sid < stnsNum; ++sid) { Json::Value sentence; std::vector<std::string> vecWord; std::vector<std::string> vecPOS; std::vector<std::string> vecNETag; std::vector<std::pair<int, std::string>> vecParse; //std::vector<std::vector<std::string>> vecSemResult; std::vector<std::vector<std::pair<int, std::string>>> vecSemResult; std::vector<std::pair<int, std::vector<std::pair<const char *, std::pair< int, int > > > > > vecSRLResult; // seg xml.GetWordsFromSentence(vecWord, pid, sid); // postag if (str_type == LTP_SERVICE_NAME_POSTAG || str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { xml.GetPOSsFromSentence(vecPOS, pid, sid); } // ner if (str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { xml.GetNEsFromSentence(vecNETag, pid, sid); } // dp if (str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { xml.GetParsesFromSentence(vecParse, pid, sid); } // srl if (str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { // get by word } for (int wid = 0; wid < vecWord.size(); ++wid) { Json::Value word; word["id"] = wid; word["cont"] = vecWord[wid]; // postag if (str_type == LTP_SERVICE_NAME_POSTAG || str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { word["pos"] = vecPOS[wid]; } // ner if (str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { word["ne"] = vecNETag[wid]; } // dp if (str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { word["parent"] = vecParse[wid].first; word["relate"] = vecParse[wid].second; } // srl if (str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { Json::Value args; std::vector<std::string> vecType; std::vector<std::pair<int, int>> vecBegEnd; xml.GetPredArgToWord(pid, sid, wid, vecType, vecBegEnd); if (vecType.size() != 0) { for (int arg_id = 0; arg_id < vecType.size(); ++arg_id) { Json::Value arg; arg["id"] = arg_id; arg["type"] = vecType[arg_id]; arg["beg"] = vecBegEnd[arg_id].first; arg["end"] = vecBegEnd[arg_id].second; args.append(arg); } } else { args.resize(0); } word["arg"] = args; } sentence.append(word); } paragraph.append(sentence); } // sentence root.append(paragraph); } // paragraph return root.toStyledString(); }