bool MessageParser::parse(CNode& post) { // parse author CSelection author = post.find("span.name > strong"); if (author.nodeNum() < 1) return false; rawMessage.author=author.nodeAt(0).text(); // parse message CSelection message = post.find(".postbody > div"); if (message.nodeNum() < 1) return false; rawMessage.content = message.nodeAt(0).text(); // parse date // TODO(arthur) CSelection date= post.find(".postdetails"); if (date.nodeNum() >= 2) if (date.nodeAt(1).childNum() >= 4) { rawMessage.date.fromString( date.nodeAt(1).childAt(3).text()); } return true; }
void EDGAR_FilingFile::FindEDGAR_Tables(RunMode run_mode) { // always start fresh EDGAR_filing_tables_.clear(); CDocument the_filing; the_filing.parse(EDGAR_filing_content_.c_str()); CSelection c = the_filing.find("table"); // ConvertMarkupToText is a time consuming function -- it actually calls out to // an external process right now -- so let's do some async'ing !! // keep track of our async processes here. std::vector<std::future<std::string>> tasks; for (int indx = 0 ; indx < c.nodeNum(); ++indx) { CNode pNode = c.nodeAt(indx); // use the 'Outer' functions to include the table tags in the extracted content. std::string content = EDGAR_filing_content_.substr(pNode.startPosOuter(), pNode.endPosOuter() - pNode.startPosOuter()); if (TableHasMarkup(content)) { if (run_mode == RunMode::do_sync) { std::string plain_table = ConvertMarkupToText(content); EDGAR_filing_tables_.push_back(plain_table); } else tasks.push_back(std::async(&EDGAR_FilingFile::ConvertMarkupToText, this, content)); } else EDGAR_filing_tables_.push_back(content); } // now, let's go look for our output... for (int count = tasks.size(); count; --count) { int i = wait_for_any(tasks, std::chrono::microseconds{100}); std::string converted_text; try { converted_text = tasks[i].get(); } catch (...) { // any problems, let's just ignore them. poco_error(the_logger_, "Some problem with an async process"); continue; } if (! converted_text.empty()) EDGAR_filing_tables_.push_back(converted_text); } } // ----- end of method EDGAR_FilingFile::FindEDGAR_Tables -----