void jsWindow::load( const string& url ) { string to_load = url; string refer = location->url.tostring(); int relocs = 0; HttpResponse* hresp; i_response_ptr resp; while (true) { HttpRequest* hreq = new HttpRequest(to_load); hreq->Method(HttpRequest::wemGet); hreq->depth(relocs); hreq->SetReferer(refer); i_request_ptr req(hreq); resp = browser->http_request(req); hresp = (HttpResponse*)resp.get(); if (hresp->HttpCode() < 300 || hresp->HttpCode() >= 400) { break; } // process redirects if (relocs < 5) { relocs++; refer = to_load; string to_load = hresp->Headers().find_first("Location"); if (to_load.empty()) { break; } // delete response resp.reset(); } // if relocs < 5 // delete request req.reset(); } // while relocations if (hresp->HttpCode() < 300) { boost::shared_ptr<html_document> doc(new html_document()); if(doc) { doc->ParseData(resp); // execute scripts #ifdef V8_DOMSHELL string source; assign_document( doc ); size_t i = 0; for(v8_wrapper::iterator_dfs it = document->begin_dfs(); it != document->end_dfs(); ++it) { if((*it)->m_tag == HTML_TAG_script && (*it)->m_entity) { document->v8_wrapper::Registrator<v8_wrapper::jsDocument>::m_data.m_execution_point = *it; source = (*it)->m_entity->attr("#code"); std::string src_url = (*it)->m_entity->attr("src"); if(src_url != "" && source == "") { if(src_url.find(':') == -1) { src_url = refer.substr(0, refer.rfind('/') + 1) + src_url; } HttpResponse* hresp_; i_response_ptr resp_; while (true) { HttpRequest* hreq_ = new HttpRequest(src_url); hreq_->Method(HttpRequest::wemGet); hreq_->depth(relocs); hreq_->SetReferer(refer); i_request_ptr req_(hreq_); resp_ = browser->http_request(req_); hresp_ = (HttpResponse*)resp_.get(); if (hresp_->HttpCode() < 300 || hresp_->HttpCode() >= 400) { break; } } if (hresp_->HttpCode() < 300) { source.assign((const char*)&resp_->Data()[0], resp_->Data().size()); (*it)->m_entity->attr("#code", source); } } #ifdef _DEBUG LOG4CXX_TRACE(webEngine::iLogger::GetLogger(), _T("audit_jscript::parse_scripts execute script #") << i++ << "; Source:\n" << source); #endif browser->execute_string(source, "", true, true); } } process_events(browser, doc, false); #endif } } else { LOG4CXX_WARN(webEngine::iLogger::GetLogger(), _T("jsWindow::load the ") << hresp->RealUrl().tostring() << _T(" failed! HTTP code=") << hresp->HttpCode()); } }
i_document_ptr html_parser::parse(boost::shared_ptr<i_response> input) { html_document_ptr parser(new html_document); HttpResponse* htResp; try { htResp = reinterpret_cast<HttpResponse*>(input.get()); if ((htResp->HttpCode() > 0 && htResp->HttpCode() < 300) || htResp->Data().size() > 0) { string cType = htResp->ContentType(); boost::trim(cType); if (cType == "") { // set "text/html" by default /// @todo: move it to the options!!! cType == "text/html"; } LOG4CXX_TRACE(logger, _T("HttpInventory::process: content-type analyze method = ") << opt_ctype_method); bool cTypeProcess = false; switch(opt_ctype_method) { case 0: // any content-type cTypeProcess = true; break; case 1: // empty and "text/*" if (cType == "" || starts_with(cType, "text/")) { cTypeProcess = true; } break; case 2: // only "text/*" if (starts_with(cType, "text/")) { cTypeProcess = true; } break; case 3: // empty and "text/html" if (cType == "" || starts_with(cType, "text/html")) { cTypeProcess = true; } break; case 4: // only "text/*" if (starts_with(cType, "text/html")) { cTypeProcess = true; } break; default: cTypeProcess = false; LOG4CXX_WARN(logger, _T("HttpInventory::process: unknown content-type analyze method = ") << opt_ctype_method); break; } if (cTypeProcess) { #ifdef DEBUG boost::posix_time::ptime pretm = boost::posix_time::microsec_clock::local_time(); #endif bool pres = parser->ParseData(input); #ifdef DEBUG boost::posix_time::ptime postm = boost::posix_time::microsec_clock::local_time(); boost::posix_time::time_period duration(pretm, postm); LOG4CXX_DEBUG(logger, _T("HttpInventory::process ") << htResp->Data().size() << _T(" bytes parsed at ") << duration.length().total_milliseconds() << _T(" milliseconds")); #endif if ( !pres ) { parser.reset(); } } // if need to process } // if HTTP code valid } catch (bad_cast) { LOG4CXX_ERROR(logger, _T("HttpInventory::process: The response from ") << input->BaseUrl().tostring() << _T(" isn't the HttpResponse!")); } return parser; }