AnnotatedParseTree::shared_tree create_tree_from_string(const string& line) { int depth = 0; bool awaiting_num = false; std::vector<char> current_word; AnnotatedParseTree::shared_tree root = nullptr; auto current_node = root; stringstream ss(line); char ch; const char left_parenthesis = '('; const char right_parenthesis = ')'; const char space = ' '; while (ss) { ch = ss.get(); if (awaiting_num) { current_node->label = (uint)((int) (ch - '0')); awaiting_num = false; } else { if (ch == left_parenthesis) { if (++depth > 1) { // replace current head node by this node: current_node->children.emplace_back(make_shared<AnnotatedParseTree>(depth, current_node)); current_node = current_node->children.back(); root->add_general_child(current_node); } else { root = make_shared<AnnotatedParseTree>(depth); current_node = root; } awaiting_num = true; } else if (ch == right_parenthesis) { // assign current word: if (current_word.size() > 0) { replace_char_by_char(current_word, '\xa0', space); current_node->sentence = string(current_word.begin(), current_word.end()); current_node->udepth = 1; // erase current word current_word.clear(); } // go up a level: depth--; if (current_node->has_parent) { uint& current_node_udepth = current_node->udepth; current_node = current_node->parent.lock(); current_node->udepth = std::max(current_node_udepth+1, current_node->udepth); } else { current_node = nullptr; } } else if (ch == space) { // ignore spacing continue; } else { // add to current read word current_word.emplace_back(ch); } } } if (depth != 0) throw std::invalid_argument("ParseError: Not an equal amount of closing and opening parentheses"); return root; }
///////////////////////////////////////////////////////////////////////////////////// /// read raw text from a string and create a initial DOM tree. /// the paragraphs are separated by CR ("\r\n") ///////////////////////////////////////////////////////////////////////////////////// int XML4NLP::CreateDOMFromString(const string & str) { ClearDOM(); if (0 != BuildDOMFrame()) return -1; string strTmp = str; replace_char_by_char(strTmp, '\r', '\n'); // std::cout << strTmp << std::endl; istringstream in(strTmp); // How to use istringstream? int i = 0; while (getline(in, strTmp)) { clean_str(strTmp); if (strTmp.empty()) { continue; } if (0 != BuildParagraph(strTmp, i++)) { return -1; } } return 0; }