示例#1
0
void jsWindow::load( const string& url )
{
    string to_load = url;
    string refer = location->url.tostring();
    int relocs = 0;

    HttpResponse* hresp;
    i_response_ptr resp;
    while (true) {
        HttpRequest* hreq = new HttpRequest(to_load);
        hreq->Method(HttpRequest::wemGet);
        hreq->depth(relocs);
        hreq->SetReferer(refer);
        i_request_ptr req(hreq);
        resp = browser->http_request(req);
        hresp = (HttpResponse*)resp.get();
        if (hresp->HttpCode() < 300 || hresp->HttpCode() >= 400) {
            break;
        }
        // process redirects
        if (relocs < 5) {
            relocs++;
            refer = to_load;
            string to_load = hresp->Headers().find_first("Location");
            if (to_load.empty()) {
                break;
            }
            // delete response
            resp.reset();
        } // if relocs < 5
        // delete request
        req.reset();
    } // while relocations
    if (hresp->HttpCode() < 300) {
        boost::shared_ptr<html_document> doc(new html_document());
        if(doc) {
            doc->ParseData(resp);
        // execute scripts
#ifdef V8_DOMSHELL
        string source;
            assign_document( doc );

            size_t i = 0;
            for(v8_wrapper::iterator_dfs it = document->begin_dfs(); it != document->end_dfs(); ++it) {
                if((*it)->m_tag == HTML_TAG_script && (*it)->m_entity) {
                    document->v8_wrapper::Registrator<v8_wrapper::jsDocument>::m_data.m_execution_point = *it;
                    source = (*it)->m_entity->attr("#code");
                    std::string src_url = (*it)->m_entity->attr("src");
                    if(src_url != "" && source == "") {
                        if(src_url.find(':') == -1) {
                            src_url = refer.substr(0, refer.rfind('/') + 1) + src_url;
                        }
                        HttpResponse* hresp_;
                        i_response_ptr resp_;
                        while (true) {
                            HttpRequest* hreq_ = new HttpRequest(src_url);
                            hreq_->Method(HttpRequest::wemGet);
                            hreq_->depth(relocs);
                            hreq_->SetReferer(refer);
                            i_request_ptr req_(hreq_);
                            resp_ = browser->http_request(req_);
                            hresp_ = (HttpResponse*)resp_.get();
                            if (hresp_->HttpCode() < 300 || hresp_->HttpCode() >= 400) {
                                break;
                            }
                        }
                        if (hresp_->HttpCode() < 300) {
                            source.assign((const char*)&resp_->Data()[0], resp_->Data().size());
                            (*it)->m_entity->attr("#code", source);
                        }
                    }
#ifdef _DEBUG
                    LOG4CXX_TRACE(webEngine::iLogger::GetLogger(), _T("audit_jscript::parse_scripts execute script #") << i++ << "; Source:\n" << source);
#endif
                browser->execute_string(source, "", true, true);
            }
        }

            process_events(browser, doc, false);
#endif
        }
    } else {
        LOG4CXX_WARN(webEngine::iLogger::GetLogger(), _T("jsWindow::load the ") << hresp->RealUrl().tostring() << _T(" failed! HTTP code=") << hresp->HttpCode());
    }
}
i_document_ptr html_parser::parse(boost::shared_ptr<i_response> input)
{
    html_document_ptr parser(new html_document);

    HttpResponse* htResp;

    try {
        htResp = reinterpret_cast<HttpResponse*>(input.get());

        if ((htResp->HttpCode() > 0 && htResp->HttpCode() < 300) || htResp->Data().size() > 0) {
            string cType = htResp->ContentType();
            boost::trim(cType);
            if (cType == "") {
                // set "text/html" by default
                /// @todo: move it to the options!!!
                cType == "text/html";
            }
            LOG4CXX_TRACE(logger, _T("HttpInventory::process: content-type analyze method = ") << opt_ctype_method);
            bool cTypeProcess = false;
            switch(opt_ctype_method) {
            case 0: // any content-type
                cTypeProcess = true;
                break;
            case 1: // empty and "text/*"
                if (cType == "" || starts_with(cType, "text/")) {
                    cTypeProcess = true;
                }
                break;
            case 2: // only "text/*"
                if (starts_with(cType, "text/")) {
                    cTypeProcess = true;
                }
                break;
            case 3: // empty and "text/html"
                if (cType == "" || starts_with(cType, "text/html")) {
                    cTypeProcess = true;
                }
                break;
            case 4: // only "text/*"
                if (starts_with(cType, "text/html")) {
                    cTypeProcess = true;
                }
                break;
            default:
                cTypeProcess = false;
                LOG4CXX_WARN(logger, _T("HttpInventory::process: unknown content-type analyze method = ") << opt_ctype_method);
                break;
            }

            if (cTypeProcess) {
#ifdef DEBUG
                boost::posix_time::ptime pretm = boost::posix_time::microsec_clock::local_time();
#endif
                bool pres = parser->ParseData(input);
#ifdef DEBUG
                boost::posix_time::ptime postm = boost::posix_time::microsec_clock::local_time();
                boost::posix_time::time_period duration(pretm, postm);
                LOG4CXX_DEBUG(logger, _T("HttpInventory::process ") << htResp->Data().size() << _T(" bytes parsed at ") << duration.length().total_milliseconds() << _T(" milliseconds"));
#endif
                if ( !pres ) {
                    parser.reset();
                }
            } // if need to process
        } // if HTTP code valid
    } catch (bad_cast) {
        LOG4CXX_ERROR(logger, _T("HttpInventory::process: The response from ") << input->BaseUrl().tostring() << _T(" isn't the HttpResponse!"));
    }

    return parser;
}