Пример #1
0
    AnnotatedParseTree::shared_tree create_tree_from_string(const string& line) {
        int depth = 0;
        bool awaiting_num = false;
        std::vector<char> current_word;
        AnnotatedParseTree::shared_tree root = nullptr;
        auto current_node = root;
        stringstream ss(line);
        char ch;
        const char left_parenthesis  = '(';
        const char right_parenthesis = ')';
        const char space             = ' ';

        while (ss) {
            ch = ss.get();
            if (awaiting_num) {
                current_node->label = (uint)((int) (ch - '0'));
                awaiting_num = false;
            } else {
                if (ch == left_parenthesis) {
                    if (++depth > 1) {
                        // replace current head node by this node:
                        current_node->children.emplace_back(make_shared<AnnotatedParseTree>(depth, current_node));
                        current_node = current_node->children.back();
                        root->add_general_child(current_node);
                    } else {
                        root = make_shared<AnnotatedParseTree>(depth);
                        current_node = root;
                    }
                    awaiting_num = true;
                } else if (ch == right_parenthesis) {
                    // assign current word:
                    if (current_word.size() > 0) {
                        replace_char_by_char(current_word, '\xa0', space);
                        current_node->sentence = string(current_word.begin(), current_word.end());
                        current_node->udepth   = 1;
                        // erase current word
                        current_word.clear();
                    }
                    // go up a level:
                    depth--;
                    if (current_node->has_parent) {
                        uint& current_node_udepth = current_node->udepth;
                        current_node = current_node->parent.lock();
                        current_node->udepth = std::max(current_node_udepth+1, current_node->udepth);
                    } else {
                        current_node = nullptr;
                    }
                } else if (ch == space) {
                    // ignore spacing
                    continue;
                } else {
                    // add to current read word
                    current_word.emplace_back(ch);
                }
            }
        }
        if (depth != 0)
            throw std::invalid_argument("ParseError: Not an equal amount of closing and opening parentheses");
        return root;
    }
Пример #2
0
/////////////////////////////////////////////////////////////////////////////////////
/// read raw text from a string and create a initial DOM tree.
/// the paragraphs are separated by CR ("\r\n")
/////////////////////////////////////////////////////////////////////////////////////
int XML4NLP::CreateDOMFromString(const string & str) {
  ClearDOM();

  if (0 != BuildDOMFrame()) return -1;

  string strTmp = str;
  replace_char_by_char(strTmp, '\r', '\n');

  // std::cout << strTmp << std::endl;
  istringstream in(strTmp);  // How to use istringstream?
  int i = 0;
  while (getline(in, strTmp)) {
    clean_str(strTmp);

    if (strTmp.empty()) {
      continue;
    }

    if (0 != BuildParagraph(strTmp, i++)) {
      return -1;
    }
  }

  return 0;
}