void TWebPgFetchEvent::OnFetchEnd(const PHttpResp& HttpResp){ EAssert(HttpResp->IsOk()); EndMSecs=TTm::GetCurUniMSecs(); int StatusCd=HttpResp->GetStatusCd(); if (StatusCd/100==2){ // codes 2XX - ok ChangeLastUrlToLc(HttpResp); PWebPg WebPg=TWebPg::New(UrlStrV, IpNumV, HttpResp); WebPg->PutFetchMSecs(GetFetchMSecs()); CloseConn(); Fetch->DisconnUrl(FId); Fetch->OnFetch(FId, WebPg); } else if (StatusCd/100==3){ // codes 3XX - redirection ChangeLastUrlToLc(HttpResp); if (UrlStrV.Len()<5){ TStr RedirUrlStr=HttpResp->GetFldVal("Location"); PUrl RedirUrl=TUrl::New(RedirUrlStr, CurUrl->GetUrlStr()); if (RedirUrl->IsOk(usHttp)){ TStrQuV CookieQuV; HttpResp->GetCookieKeyValDmPathQuV(CookieQuV); AddCookieKeyValDmPathQuV(CookieQuV); CurUrl=RedirUrl; Retries=0; CloseConn(); TSockHost::GetAsyncSockHost(CurUrl->GetIpNumOrHostNm(), this); } else { TStr MsgStr=TStr("Invalid Redirection URL (")+RedirUrlStr+")"; OnFetchError(MsgStr); } } else { TStr MsgStr=TStr("Cycling Redirection [")+TopUrl->GetUrlStr()+"]"; OnFetchError(MsgStr); } } else { // all other codes - error TStr MsgStr=TStr("Http Error (")+ TInt::GetStr(StatusCd)+"/"+HttpResp->GetReasonPhrase()+")"; OnFetchError(MsgStr); } }
int TWebPgFetch::FetchUrl(const PUrl& Url, const bool& QueueAtEnd){ int FId=-1; if (Url->IsOk(usHttp) && Url->IsPortOk()){ FId=GetNextFId(); ConnUrl(FId, Url, QueueAtEnd); } else { TStr MsgStr=TStr("Invalid URL [")+Url->GetUrlStr()+"]"; OnError(FId, MsgStr); } return FId; }
///////////////////////////////////////////////// // Web-Fetch-Blocking void TWebFetchBlocking::GetWebPg(const PUrl& Url, bool& Ok, TStr& MsgStr, PWebPg& WebPg, const PNotify& Notify, const TStr& ProxyStr, const TStr& UserAgentStr){ TNotify::OnNotify(Notify, ntInfo, TStr("Fetching: ")+Url->GetUrlStr()); TWebFetchBlocking WebFetch; if (!UserAgentStr.Empty()) { WebFetch.PutUserAgentStr(UserAgentStr); } WebFetch.PutProxyStr(ProxyStr); WebFetch.FetchUrl(Url); TLoop::Ref(); TLoop::Run(); Ok=WebFetch.Ok; MsgStr=WebFetch.MsgStr; WebPg=WebFetch.WebPg; if (!Ok){ TNotify::OnNotify(Notify, ntInfo, TStr("Fetching Error: ["+MsgStr+"]")); } }
void TWebPgFetchEvent::OnGetHost(const PSockHost& SockHost) { if (SockHost->IsOk()){ UrlStrV.Add(CurUrl->GetUrlStr()); OppSockClosed=false; SockMem.Clr(); Sock=TSock::New(this); int PortN; if (ProxyStr.Empty()){ PortN=CurUrl->GetPortN(); } else { TStr ProxyHostNm; TStr PortNStr; ProxyStr.SplitOnCh(ProxyHostNm, ':', PortNStr); PortN=PortNStr.GetInt(80); } Sock->PutTimeOut(TimeOutMSecs); Sock->Connect(SockHost, PortN); } else { OnFetchError("Invalid Host"); } }
void TWebPgFetchEvent::OnFetchError(const TStr& MsgStr){ Fetch->OnError(FId, MsgStr+" ["+CurUrl->GetUrlStr()+"]"); CloseConn(); Fetch->DisconnUrl(FId); }
bool IsCTxtHttpResp(const PUrl& Url, const PHttpResp& HttpResp, const int& MnCTxtToks){ if (HttpResp->IsStatusCd_Ok()){ PWebPg WebPg=TWebPg::New(Url->GetUrlStr(), HttpResp); if (HttpResp->IsContType(THttp::TextHtmlFldVal)){ TMem BodyMem=HttpResp->GetBodyAsMem(); PSIn BodyMemIn=TMemIn::New(BodyMem); // prepare html-tokens PHtmlDoc HtmlDoc=THtmlDoc::New(BodyMemIn, hdtAll, false); int Toks=HtmlDoc->GetToks(); THtmlLxSym TokSym; TStr TokStr; // prepare continuous-text indicators int CTxtToks=0; TChA CTxtChA; bool CTxtP=false; // prepare script & style flag bool InScript=false; bool InStyle=false; // traverse tokens for (int TokN=0; TokN<Toks; TokN++){ // get token data HtmlDoc->GetTok(TokN, TokSym, TokStr); switch (TokSym){ case hsyStr: case hsyNum: case hsySSym: if (!InScript&&!InStyle){ // text token CTxtToks++; CTxtChA+=TokStr; CTxtChA+=' '; } break; case hsyBTag: if (!InScript&&!InStyle){ if (TokStr=="<SCRIPT>"){ // start of script InScript=true; CTxtToks=0; CTxtChA.Clr(); } else if (TokStr=="<STYLE>"){ // start of style InStyle=true; CTxtToks=0; CTxtChA.Clr(); } else { if ((TokStr=="<P>")||(TokStr=="<B>")||(TokStr=="<I>")){ // skip in-text-tags } else { // non-text-tags - break continuous-text CTxtToks=0; CTxtChA.Clr(); } } } break; case hsyETag: if (InScript||InStyle){ if (TokStr=="<SCRIPT>"){ // end of script InScript=false; } else if (TokStr=="<STYLE>"){ // end of style InStyle=false; } } break; default: // non-text-token - break continuous-text CTxtToks=0; CTxtChA.Clr(); break; } // stop if enough continuous-text if (CTxtToks>MnCTxtToks){ CTxtP=true; break; } } if (CTxtP){ printf("%s\n", Url->GetUrlStr().CStr()); } return CTxtP; } } return false; }
void TMongSrv::OnHttpRq(const int& SockId, const PHttpRq& HttpRq) { // check http-request correctness - return if error if (!HttpRq->IsOk()) { TNotify::OnNotify(Notify, ntInfo, "Web-Server: Bad Http Request."); return; } // check url correctness - return if error PUrl RqUrl = HttpRq->GetUrl(); if (!RqUrl->IsOk()) { TNotify::OnNotify(Notify, ntInfo, "Web-Server: Bad Url Requested."); return; } // construct http-response PHttpResp HttpResp; if (!RqUrl->GetPathStr().Empty()) { // get request-file-name TStr ExeFPath = TSysProc::GetExeFNm().GetFPath(); TStr RqFNm = RqUrl->GetPathStr(); if (RqFNm.LastCh() == '/') { RqFNm = RqFNm + "default.htm"; } if ((RqFNm[0] == '/') || (RqFNm[0] == '\\')) { RqFNm.DelSubStr(0, 0); } RqFNm = ExeFPath + RqFNm; // open file bool RqFOpened = false; PSIn RqSIn = TFIn::New(RqFNm, RqFOpened); if (!RqFOpened) { // prepare default html with time TChA HtmlChA; HtmlChA += "<html><title>Error - Not Found</title><body>"; HtmlChA += "File: "; HtmlChA += RqUrl->GetPathStr(); HtmlChA += " not found."; HtmlChA += "</body></html>"; PSIn BodySIn = TMIn::New(HtmlChA); HttpResp = PHttpResp( new THttpResp(THttp::ErrNotFoundStatusCd, THttp::TextHtmlFldVal, false, BodySIn, "")); } else { // file successfully opened PSIn BodySIn = RqSIn; if (THttp::IsHtmlFExt(RqFNm.GetFExt())) { // send text/html mime type if Html filemg_callback_t HttpResp = PHttpResp( new THttpResp(THttp::OkStatusCd, THttp::TextHtmlFldVal, false, BodySIn, "")); } else if (THttp::IsGifFExt(RqFNm.GetFExt())) { // send image/gif mime type if Gif file HttpResp = PHttpResp( new THttpResp(THttp::OkStatusCd, THttp::ImageGifFldVal, false, BodySIn, "")); } else { // send application/octet mime type HttpResp = PHttpResp( new THttpResp(THttp::OkStatusCd, THttp::AppOctetFldVal, false, BodySIn, "")); } } } else { // prepare default html with time TChA HtmlChA; HtmlChA += "<html><title>Welcome to TWebSrv (powered by mongoose 3.1)</title><body>"; HtmlChA += TSecTm::GetCurTm().GetStr(); HtmlChA += "</body></html>"; PSIn BodySIn = TMIn::New(HtmlChA); HttpResp = THttpResp::New(THttp::OkStatusCd, THttp::TextHtmlFldVal, false, BodySIn); } // construct & send response SendHttpResp(SockId, HttpResp); // notify if (RqUrl->IsOk()) { TChA MsgChA; MsgChA += "Web-Server: Request for '"; MsgChA += RqUrl->GetUrlStr(); MsgChA += "'."; TNotify::OnNotify(Notify, ntInfo, MsgChA); } }