// space seprated sequence of words (includes all non-blank characters, i.e., punctuations) TChA TStrUtil::GetCleanStr(const TChA& ChA) { char *b = (char *) ChA.CStr(); while (*b && ! TCh::IsAlNum(*b)) { b++; } if (*b == 0) { return TChA(); } TChA OutChA(ChA.Len()); char *e = b; bool ws=false; while (*e) { while (*e && TCh::IsWs(*e)) { e++; ws=true; } if (! *e) { break; } if (ws) { OutChA.AddCh(' '); ws=false; } OutChA.AddCh(*e); e++; } //OutChA.ToLc(); return OutChA; }
// space separated sequence of words, remove all punctuations, etc. TChA TStrUtil::GetCleanWrdStr(const TChA& ChA) { char *b = (char *) ChA.CStr(); while (*b && ! TCh::IsAlNum(*b)) { b++; } if (*b == 0) { return TChA(); } TChA OutChA(ChA.Len()); char *e = b, tmp; while (*e) { b = e; while (*e && (TCh::IsAlNum(*e) || ((*e=='\'' || *e=='-') && TCh::IsAlNum(*(e+1))))) { e++; } if (b < e) { tmp = *e; *e=0; OutChA += b; OutChA.AddCh(' '); *e = tmp; } while (*e && ! TCh::IsAlNum(*e)) { e++; } if (! *e) { break; } } OutChA.DelLastCh(); OutChA.ToLc(); return OutChA; }
void TWebTxtBsSrv::OnHttpRq(const int& SockId, const PHttpRq& HttpRq){ // request parameters TStr RqContTypeStr=THttp::TextHtmlFldVal; PUrlEnv UrlEnv; TStr QueryStr; TStr EQueryStr; TStr HitSetStr; TStr AcceptStr; // prepare & extract search-environment if (HttpRq->IsOk()){ // prepare search-environment PUrl Url=HttpRq->GetUrl(); UrlEnv=HttpRq->GetUrlEnv(); // if empty search-environment and url-path is not empty if (UrlEnv->Empty()&& (Url->GetPathSegs()>0)&&(!Url->GetPathSeg(0).Empty())){ // get document name TStr DocNm=Url->GetPathSeg(Url->GetPathSegs()-1); if (WebTxtBs->GetTxtBs()->IsDoc(DocNm)){ // document exists in text-base TStr DocStr=WebTxtBs->GetTxtBs()->GetDocStr(DocNm); PSIn HttpBodySIn=TMIn::New(DocStr); PHttpResp HttpResp= THttpResp::New(THttp::OkStatusCd, RqContTypeStr, false, HttpBodySIn); SendHttpResp(SockId, HttpResp); } else { // ordinary http request TWebSrv::OnHttpRq(SockId, HttpRq); } // end if no search request return; } // extract fields from search-environment QueryStr=UrlEnv->GetVal(QueryUrlFldNm).GetTrunc(); EQueryStr=THtmlLx::GetEscapedStr(QueryStr); HitSetStr=UrlEnv->GetVal(HitSetUrlFldNm).GetTrunc(); AcceptStr=UrlEnv->GetVal(AcceptUrlFldNm).GetTrunc(); if (AcceptStr.Empty()){RqContTypeStr=GetRqContType(HttpRq);} else {RqContTypeStr=AcceptStr;} } // hit-set int HitSetN=1; HitSetStr.IsInt(true, 1, TInt::Mx, HitSetN); int HitSetDocs=GetVarVal(RqContTypeStr, "HitSetDocs").GetInt(); int StrHitSets=GetVarVal(RqContTypeStr, "StrHitSets").GetInt(); // output buffer TChA OutChA(10000); // header TStr HdTpl=GetTplVal(RqContTypeStr, "Header"); HdTpl.ChangeStrAll(QueryMacro, EQueryStr); OutChA+=HdTpl; // html body if (HttpRq->IsOk()){ if (!QueryStr.Empty()){ // execute query PTxtBsRes TxtBsRes=WebTxtBs->Search(QueryStr); TStr EWixExpStr=THtmlLx::GetEscapedStr(TxtBsRes->GetWixExpStr()); // log string TChA QueryInfoChA; QueryInfoChA+="Query: "+QueryStr; //QueryInfoChA+=" ["+GetPeerNm(SockId)+"]"; QueryInfoChA+=" ["+TSecTm::GetCurTm().GetStr()+"]"; TNotify::OnNotify(Notify, ntInfo, QueryInfoChA); SLog->PutStr(QueryInfoChA); SLog->PutLn(); SLog->Flush(); // query-results processing if (TxtBsRes->IsOk()){ // result header TStr ResultHdTpl=GetTplVal(RqContTypeStr, "ResultHd"); ResultHdTpl.ChangeStrAll(QueryMacro, EWixExpStr); ResultHdTpl.ChangeStrAll(HitsMacro, TInt::GetStr(TxtBsRes->GetDocs())); OutChA+=ResultHdTpl; // result records int MnDocN; int MxDocN; TxtBsRes->GetHitSetMnMxDocN(HitSetN, HitSetDocs, MnDocN, MxDocN); for (int DocN=MnDocN; DocN<=MxDocN; DocN++){ // get result document data int MxDocTitleLen=GetVarVal(RqContTypeStr, "MxDocTitleLen").GetInt(); int MxDocCtxLen=GetVarVal(RqContTypeStr, "MxDocCtxLen").GetInt(); TStr DocNm; TStr DocTitleStr; TStr DocStr; TStr DocCtxStr; TxtBsRes->GetDocInfo(DocN, MxDocTitleLen, MxDocCtxLen, DocNm, DocTitleStr, DocStr, DocCtxStr); if (DocTitleStr.Empty()){DocTitleStr=DocNm;} // result record TStr ResultRecTpl=GetTplVal(RqContTypeStr, "ResultRec"); ResultRecTpl.ChangeStrAll(HitNumMacro, TInt::GetStr(DocN+1)); ResultRecTpl.ChangeStrAll(DocAddrMacro, DocNm); ResultRecTpl.ChangeStrAll(DocTitleMacro, DocTitleStr); ResultRecTpl.ChangeStrAll(DocCtxMacro, DocCtxStr); OutChA+=ResultRecTpl; } // result footer TStr ResultFtTpl=GetTplVal(RqContTypeStr, "ResultFt"); OutChA+=ResultFtTpl; // hit-set AddHitSetChA(TxtBsRes, RqContTypeStr, HitSetN, HitSetDocs, StrHitSets, UrlEnv, OutChA); } else { // bad query TStr BadQueryTpl=GetTplVal(RqContTypeStr, "BadQuery"); BadQueryTpl.ChangeStrAll(QueryMacro, EWixExpStr); OutChA+=BadQueryTpl; } } } else { // bad http-request TStr BadHttpRqTpl=GetTplVal(RqContTypeStr, "BadHttpRq"); OutChA+=BadHttpRqTpl; } // footer TStr FtTpl=GetTplVal(RqContTypeStr, "Footer"); FtTpl.ChangeStrAll(QueryMacro, EQueryStr); OutChA+=FtTpl; // construct & send response PSIn HttpBodySIn=TMIn::New(OutChA); PHttpResp HttpResp= THttpResp::New(THttp::OkStatusCd, RqContTypeStr, false, HttpBodySIn); SendHttpResp(SockId, HttpResp); }