// If you do NOT split sentence explicitly, // this will be called according to dependencies among modules int LTP::splitSentence_dummy(XML4NLP & xml) { if ( xml.QueryNote(NOTE_SENT) ) { return 0; } int paraNum = xml.CountParagraphInDocument(); if (paraNum == 0) { ERROR_LOG("in LTP::splitsent, There is no paragraph in doc,"); ERROR_LOG("you may have loaded a blank file or have not loaded a file yet."); return kEmptyStringError; } for (int i = 0; i < paraNum; ++i) { vector<string> vecSentences; string para; xml.GetParagraph(i, para); if (0 == SplitSentence( para, vecSentences )) { ERROR_LOG("in LTP::splitsent, failed to split sentence"); return kSplitSentenceError; } // dummy // vecSentences.push_back(para); if (0 != xml.SetSentencesToParagraph(vecSentences, i)) { ERROR_LOG("in LTP::splitsent, failed to write sentence to xml"); return kWriteXmlError; } } xml.SetNote(NOTE_SENT); return 0; }
static std::string xml2jsonstr(const XML4NLP & xml, std::string str_type) { Json::Value root; int paragraphNum = xml.CountParagraphInDocument(); for (int pid = 0; pid < paragraphNum; ++ pid) { Json::Value paragraph; int stnsNum = xml.CountSentenceInParagraph(pid); for (int sid = 0; sid < stnsNum; ++sid) { Json::Value sentence; std::vector<std::string> vecWord; std::vector<std::string> vecPOS; std::vector<std::string> vecNETag; std::vector<std::pair<int, std::string>> vecParse; //std::vector<std::vector<std::string>> vecSemResult; std::vector<std::vector<std::pair<int, std::string>>> vecSemResult; std::vector<std::pair<int, std::vector<std::pair<const char *, std::pair< int, int > > > > > vecSRLResult; // seg xml.GetWordsFromSentence(vecWord, pid, sid); // postag if (str_type == LTP_SERVICE_NAME_POSTAG || str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { xml.GetPOSsFromSentence(vecPOS, pid, sid); } // ner if (str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { xml.GetNEsFromSentence(vecNETag, pid, sid); } // dp if (str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { xml.GetParsesFromSentence(vecParse, pid, sid); } // srl if (str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { // get by word } for (int wid = 0; wid < vecWord.size(); ++wid) { Json::Value word; word["id"] = wid; word["cont"] = vecWord[wid]; // postag if (str_type == LTP_SERVICE_NAME_POSTAG || str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { word["pos"] = vecPOS[wid]; } // ner if (str_type == LTP_SERVICE_NAME_NER || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { word["ne"] = vecNETag[wid]; } // dp if (str_type == LTP_SERVICE_NAME_DEPPARSE || str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { word["parent"] = vecParse[wid].first; word["relate"] = vecParse[wid].second; } // srl if (str_type == LTP_SERVICE_NAME_SRL || str_type == LTP_SERVICE_NAME_ALL) { Json::Value args; std::vector<std::string> vecType; std::vector<std::pair<int, int>> vecBegEnd; xml.GetPredArgToWord(pid, sid, wid, vecType, vecBegEnd); if (vecType.size() != 0) { for (int arg_id = 0; arg_id < vecType.size(); ++arg_id) { Json::Value arg; arg["id"] = arg_id; arg["type"] = vecType[arg_id]; arg["beg"] = vecBegEnd[arg_id].first; arg["end"] = vecBegEnd[arg_id].second; args.append(arg); } } else { args.resize(0); } word["arg"] = args; } sentence.append(word); } paragraph.append(sentence); } // sentence root.append(paragraph); } // paragraph return root.toStyledString(); }