int DpsSearchdGetWordResponse(DPS_AGENT *query,DPS_RESULT *Res,DPS_DB *cl) { DPS_URL_CRD_DB *wrd = NULL; DPS_URLDATA *udt = NULL; #ifdef WITH_REL_TRACK DPS_URLTRACK *trk = NULL; #endif DPS_SEARCHD_PACKET_HEADER hdr; ssize_t nrecv; char *msg; int done=0, rc = DPS_OK; char *wbuf, *p; DPS_WIDEWORDLIST_EX *wwl; DPS_WIDEWORD *ww_ex; DPS_WIDEWORD ww; size_t i; TRACE_IN(query, "DpsSearchdGetWordResponse"); Res->total_found=0; while(!done){ nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360); if(nrecv!=sizeof(hdr)){ sprintf(query->Conf->errstr,"Received incomplete header from searchd (%d bytes,errno:%d)",(int)nrecv, errno); TRACE_OUT(query); return DPS_ERROR;; } #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len); #endif switch(hdr.cmd){ case DPS_SEARCHD_CMD_ERROR: msg=(char*)DpsMalloc(hdr.len+1); if (msg == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360); if (nrecv >= 0) { msg[nrecv]='\0'; sprintf(query->Conf->errstr,"Searchd error: '%s',received:%d", msg, (int)nrecv); } rc = DPS_ERROR; DPS_FREE(msg); done=1; break; case DPS_SEARCHD_CMD_MESSAGE: msg=(char*)DpsMalloc(hdr.len+1); if (msg == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0] = '\0'; if (strncmp(msg, "Total_found", 11) == 0) { Res->total_found = (size_t)DPS_ATOI(msg + 12); Res->grand_total = (size_t)DPS_ATOI(strchr(msg + 12, (int)' ') + 1); } #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg); #endif DPS_FREE(msg); break; case DPS_SEARCHD_CMD_WORDS: DPS_FREE(wrd); wrd=(DPS_URL_CRD_DB*)DpsMalloc(hdr.len + 1); if (wrd == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, wrd, hdr.len, 360); /*Res->total_found=hdr.len/sizeof(*wrd);*/ Res->num_rows = (nrecv >= 0) ? (size_t)nrecv / sizeof(*wrd) : 0; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received words size=%d nwrd=%d\n",hdr.len, Res->num_rows /*Res->total_found*/); #endif done=1; break; case DPS_SEARCHD_CMD_SUGGEST: DPS_FREE(Res->Suggest); Res->Suggest = (char*)DpsMalloc(hdr.len + 1); if (Res->Suggest == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, Res->Suggest, hdr.len, 360); Res->Suggest[(nrecv >=0) ? nrecv : 0] = '\0'; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received Suggest size=%d\n", hdr.len); #endif break; case DPS_SEARCHD_CMD_PERSITE: Res->PerSite = (size_t*)DpsMalloc(hdr.len + 1); if (Res->PerSite == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, Res->PerSite, hdr.len, 360); #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received PerSite size=%d nwrd=%d\n", nrecv, Res->num_rows/*Res->total_found*/); #endif break; case DPS_SEARCHD_CMD_DATA: udt = (DPS_URLDATA*)DpsMalloc(hdr.len + 1); if (udt == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, udt, hdr.len, 360); #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received URLDATA size=%d nwrd=%d\n", nrecv, Res->num_rows); #endif break; #ifdef WITH_REL_TRACK case DPS_SEARCHD_CMD_TRACKDATA: trk = (DPS_URLTRACK*)DpsMalloc(hdr.len + 1); if (trk == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, trk, hdr.len, 360); #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received TRACKDATA size=%d nwrd=%d\n", nrecv, Res->num_rows); #endif break; #endif case DPS_SEARCHD_CMD_WITHOFFSET: /* Res->offset = 1;*/ break; case DPS_SEARCHD_CMD_QLC: if ((p = (char *)DpsXmalloc(hdr.len + 1)) != NULL) { if (DpsRecvall(cl->searchd, p, hdr.len, 360)) { DpsVarListReplaceStr(&query->Vars, "q", p); } } DPS_FREE(p); break; case DPS_SEARCHD_CMD_WWL: Res->PerSite = NULL; if ((wbuf = p = (char *)DpsXmalloc(hdr.len + 1)) != NULL) if (DpsRecvall(cl->searchd, wbuf, hdr.len, 360)) { wwl = (DPS_WIDEWORDLIST_EX *)p; p += sizeof(DPS_WIDEWORDLIST_EX); #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "wbuf :%x, wwl: %x, p: %x hdr.len:%d\n", wbuf, wwl, p, hdr.len); DpsLog(query, DPS_LOG_ERROR, "Received WWL nwords=%d nuniq=%d\n", wwl->nwords, wwl->nuniq); #endif /* DpsWideWordListFree(&Res->WWList);*/ for(i = 0; i < wwl->nwords; i++) { /* ww_ex = (DPS_WIDEWORD_EX *)((void*)&p[0]);*/ dps_memcpy((char*)&ww, p, sizeof(DPS_WIDEWORD_EX)); p += sizeof(DPS_WIDEWORD_EX); /* ww.order = ww_ex->order; ww.order_inquery = ww_ex->order_inquery; ww.count = ww_ex->count; ww.len = ww_ex->len; ww.ulen = ww_ex->ulen; ww.origin = ww_ex->origin; ww.crcword = ww_ex->crcword; */ ww.word = p; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Word {%d}: %s\n", ww.len+1, ww.word); #endif p += ww.len + 1; p += sizeof(dpsunicode_t) - ((SDPALIGN)p % sizeof(dpsunicode_t)); ww.uword = (dpsunicode_t*)p; p += sizeof(dpsunicode_t) * (ww.ulen + 1); DpsWideWordListAdd(&Res->WWList, &ww, DPS_WWL_STRICT); } Res->WWList.nuniq = wwl->nuniq; DPS_FREE(wbuf); } break; default: sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len); rc = DPS_ERROR; done=1; break; } } Res->CoordList.Coords = wrd; Res->CoordList.Data = udt; #ifdef WITH_REL_TRACK Res->CoordList.Track = trk; #endif TRACE_OUT(query); return rc; }
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) { char *val, *header_name; char secname[128], savec; DPS_VAR *Sec; DPS_TEXTITEM Item; if ((val = strchr(header_name = header->data, ':'))) { /* fprintf(stderr, "HEADER: %s\n", header_name); */ *val++='\0'; val = DpsTrim(val," \t:"); if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) { char *v; for(v=val ; *v ; v++) *v = dps_tolower(*v); } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) { char *part, *lpart; char *name = NULL; char *value = NULL; const char *domain = NULL; const char *path = NULL; dps_uint4 expire = 0; char secure = 'n'; for (part = dps_strtok_r(val, ";" , &lpart, &savec) ; part; part = dps_strtok_r(NULL, ";", &lpart, &savec)) { char *arg; part = DpsTrim(part, " "); if ((arg = strchr(part, '='))) { *arg++ = '\0'; if (!name) { name = part; value = arg; } else if (!strcasecmp(part, "path")) { path = arg; } else if (!strcasecmp(part, "domain")) { domain = arg; } else if (!strcasecmp(part, "secure")) { secure = 'y'; } else if (!strcasecmp(part, "expires")) { expire = (dps_uint4)DpsHttpDate2Time_t(arg); } } } if (name && value) { if (domain && domain[0] == '.') { domain++; } else { domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost"; } if (!path) { path = Doc->CurURL.path ? Doc->CurURL.path : "/"; } DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1); } /* token = dps_strtok_r(NULL,"\r\n",<); continue;*/ return; } } DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>"); dps_snprintf(secname,sizeof(secname),"header.%s", header_name); secname[sizeof(secname)-1]='\0'; if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) { Item.href = NULL; Item.str = val; Item.section = Sec->section; Item.section_name = secname; Item.len = 0; DpsTextListAdd(&Doc->TextList, &Item); } }
int main(int argc, char ** argv, char **envp) { const char *env, *bcharset, *lcharset, *conf_dir; char template_name[PATH_MAX+6]=""; char *template_filename = NULL; char *query_string = NULL; char self[1024]=""; char *url = NULL; const char *ResultContentType; int res,httpd=0; size_t catcolumns = 0; int page_size,page_number; DPS_ENV *Env; DPS_AGENT *Agent; DPS_VARLIST query_vars; /* Output Content-type if under HTTPD */ /* Some servers do not pass QUERY_STRING */ /* if the query was empty, so check */ /* REQUEST_METHOD too to be safe */ httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD")); if (!(conf_dir=getenv("DPS_ETC_DIR"))) conf_dir=DPS_CONF_DIR; DpsInit(argc, argv, envp); Env=DpsEnvInit(NULL); if (Env == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Env\n"); exit(0); } DpsVarListInit(&query_vars); Agent = DpsAgentInit(NULL, Env, 0); if (Agent == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Agent\n"); exit(0); } DpsVarListAddEnviron(&Env->Vars,"ENV"); /* Detect self and template name */ if((env = getenv("DPSEARCH_TEMPLATE"))) dps_strncpy(template_name, env, sizeof(template_name) - 1); else if((env = getenv("PATH_INFO")) && env[0]) dps_strncpy(template_name, env + 1, sizeof(template_name) - 1); if((env=getenv("DPSEARCH_SELF"))) dps_strncpy(self,env,sizeof(self)-1); if((env=getenv("QUERY_STRING"))){ query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc query_string\n"); exit(0); } dps_strncpy(query_string, env, dps_strlen(env) + 1); /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if((env=getenv("REDIRECT_STATUS"))){ /* Check Apache internal redirect */ /* via "AddHandler" and "Action" */ if(!self[0]){ dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1); } if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } }else{ /* CGI executed without Apache internal redirect */ /* Detect $Self variable with OS independant SLASHES */ if(!self[0]){ dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ char *s,*e; /*This is with OS specific SLASHES */ env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi"); if(strcmp(conf_dir,".")){ /* Take from the config directory */ dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self)); }else{ /* Take from the current directory */ dps_strncpy(template_name,env,sizeof(template_name)-1); } /* Find right slash if it presents */ s=((s=strrchr(template_name,DPSSLASH))?s:template_name); if (*template_filename == '\0') { /* Find .cgi substring */ if ((e = strstr(s, ".cgi")) != NULL) { /* Replace ".cgi" with ".htm" */ e[1]='h';e[2]='t';e[3]='m'; } else { dps_strcat(s, ".htm"); } e = strrchr(s, '/'); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(e + 1); } else { dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2); } } } }else{ /* Executed from command line */ /* or under server which does not */ /* pass an empty QUERY_STRING var */ if(argv[1]) { query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q=%s", argv[1]); } else { query_string = (char*)DpsRealloc(query_string, 1024); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q="); } /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } /*// Get template name from command line variable &tmplt */ if(!template_name[0]) dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename); } DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename); DPS_FREE(template_filename); Agent->tmpl.Env_Vars = &Env->Vars; DpsURLNormalizePath(template_name); if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if (strcmp(template_name, "filler.htm")) { /* trying load default template */ fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename); if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } else { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } /* set locale if specified */ if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) { setlocale(LC_ALL, url); /*#ifdef HAVE_ASPELL*/ { char *p; if ((p = strchr(url, '.')) != NULL) { *p = '\0'; DpsVarListReplaceStr(&Env->Vars, "g-lc", url); *p = '.'; } } /*#endif*/ url = NULL; } /* Call again to load search Limits if need */ DpsParseQueryString(Agent, &Env->Vars, query_string); Agent->Flags = Env->Flags; Agent->flags |= DPS_FLAG_UNOCON; Env->flags |= DPS_FLAG_UNOCON; DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0)); DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes")); DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name); DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR)); DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d", Env->Affixes.naffixes,Env->Spells.nspell, Env->Synonyms.nsynonyms, Env->Acronyms.nacronyms, Env->StopWords.nstopwords); DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords); DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*"); Agent->tmpl.Env_Vars = &Agent->Vars; /* DpsVarListAddEnviron(&Agent->Vars, "ENV");*/ /****************************************************************************************************************************************/ /* This is for query tracking */ DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string); DpsVarListAddStr(&Agent->Vars, "self", self); env = getenv("HTTP_X_FORWARDER_FOR"); if (env) { DpsVarListAddStr(&Agent->Vars, "IP", env); } else { env = getenv("REMOTE_ADDR"); DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost"); } bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1"); Env->bcs=DpsGetCharSet(bcharset); lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1"); Env->lcs=DpsGetCharSet(lcharset); ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html"); if(httpd){ if(!Env->bcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); }else if(!Env->lcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); }else{ printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset); } }else{ if(!Env->bcs){ printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); } if(!Env->lcs){ printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); } } /* These parameters taken from "variable section of template"*/ res = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS); page_size = dps_min(res, MAX_PS); page_number = DpsVarListFindInt(&Agent->Vars, "p", 0); if (page_number == 0) { page_number = DpsVarListFindInt(&Agent->Vars, "np", 0); DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1); } else page_number--; res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size; DpsVarListAddInt(&Agent->Vars, "pn", res); catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", "")); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom"); DpsVarListFree(&query_vars); DpsAgentFree(Agent); DpsEnvFree(Env); DPS_FREE(query_string); DPS_FREE(url); if (httpd) fflush(NULL); else fclose(stdout); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return DPS_OK; }
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) { char *token, *lt, *headers, savec; int oldstatus; DPS_DSTR header; Doc->Buf.content=NULL; oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0); DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size); DpsVarListDel(&Doc->Sections, "Content-Length"); DpsVarListDel(&Doc->Sections, "Last-Modified"); if (Doc->Buf.buf == NULL) return; /* Cut HTTP response header first */ for(token=Doc->Buf.buf;*token;token++){ if(!strncmp(token,"\r\n\r\n",4)){ if (token <= Doc->Buf.buf + Doc->Buf.size - 4) { *token='\0'; Doc->Buf.content = token + 4; } break; } else if(!strncmp(token,"\n\n",2)){ if (token <= Doc->Buf.buf + Doc->Buf.size - 2) { *token='\0'; Doc->Buf.content = token + 2; } break; } } /* Bad response, return */ if(!Doc->Buf.content) { if (token <= Doc->Buf.buf + Doc->Buf.size - 4) { if (token[2] == CR_CHAR) Doc->Buf.content = token + 4; else Doc->Buf.content = token + 2; } } /* Copy headers not to break them */ headers = (char*)DpsStrdup(Doc->Buf.buf); /* Now lets parse response header lines */ token = dps_strtok_r(headers, "\r\n", <, &savec); if(!token)return; if(!strncmp(token,"HTTP/",5)){ int status = atoi(token + 8); DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token); DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status ); }else{ DpsFree(headers); return; } token = dps_strtok_r(NULL, "\r\n", <, &savec); DpsDSTRInit(&header, 128); while(token){ if(strchr(token,':')) { if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); DpsDSTRFree(&header); DpsDSTRInit(&header, 128); } } DpsDSTRAppendStr(&header, token); token = dps_strtok_r(NULL, "\r\n", <, &savec); } if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); } DpsDSTRFree(&header); DPS_FREE(headers); /* Bad response, return */ if(!Doc->Buf.content) { return; } DpsVarListReplaceInt(&Doc->Sections,"Content-Length", Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size + DpsVarListFindInt(&Doc->Sections,"Content-Length", 0)); }
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) { #ifdef HAVE_SQL DPS_DSTR cookie; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; size_t i, blen = dps_strlen(hostinfo), slen; int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1); #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif TRACE_IN(Indexer, "DpsCookiesFind"); DpsDSTRInit(&cookie, 1024); for(i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; slen = dps_strlen(Coo->domain); if (slen > blen) continue; if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue; if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue; if (Coo->from_config != 1) have_no_cookies = 0; if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, Coo->name); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, Coo->value); } if (have_no_cookies) { char buf[2*PATH_MAX]; dpshash32_t url_id; DPS_DB *db; DPS_SQLRES Res; size_t rows; int rc; if (Server != NULL) { char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL); if (PingData != NULL) { char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n")); int method = DPS_METHOD_GET; dps_base64_decode(AuthPing, PingData, dps_strlen(PingData)); if (!strncasecmp(AuthPing, "GET", 3)) { method = DPS_METHOD_GET; PingData = DpsTrim(AuthPing + 3, " \t\r\n"); } else if (!strncasecmp(AuthPing, "POST", 4)) { method = DPS_METHOD_POST; PingData = DpsTrim(AuthPing + 4, " \t\r\n"); } else { DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing); PingData = NULL; } if (PingData != NULL) { size_t size = dps_strlen(PingData); { char PingURL[size + 2]; char PingBody[size]; DPS_DOCUMENT *rDoc; int result; rDoc = DpsDocInit(NULL); DpsSpiderParamInit(&rDoc->Spider); DpsVarList2Doc(rDoc, Server); rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE); rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE; if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) { DpsDocFree(rDoc); TRACE_OUT(Indexer); return; } rDoc->Buf.buf[0]='\0'; rDoc->subdoc = Indexer->Flags.SubDocLevel + 1; #if 1 dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo)); DpsVarListReplaceStr(&rDoc->Sections, "URL", buf); DpsURLParse(&rDoc->CurURL, buf); DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf); rDoc->method = DPS_METHOD_HEAD; /* DpsVarListFree(&rDoc->RequestHeaders);*/ if (Doc != NULL) { DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); } DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request"); if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response"); #endif sscanf(PingData, "%s %s", PingURL, PingBody); if (rDoc->method == DPS_METHOD_GET) { dps_strcat(PingURL, "?"); dps_strcat(PingURL, PingBody); } else { DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody); } DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL); DpsURLParse(&rDoc->CurURL, PingURL); DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL); rDoc->method = method; DpsVarListFree(&rDoc->RequestHeaders); DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); if (method == DPS_METHOD_POST) { dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1")); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf); dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody)); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf); } DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request"); #if 0 if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } #endif result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListDel(&rDoc->Sections, "body"); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response"); if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp)); DpsDocFree(rDoc); } } DpsFree(AuthPing); } } while(hostinfo != NULL) { url_id = DpsStrHash32(hostinfo); DpsSQLResInit(&Res); dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) { rows = DpsSQLNumRows(&Res); for(i = 0; i < rows; i++) { DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), *DpsSQLValue(&Res, i, 3), 0, 0, 0); if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0)); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1)); } if (rows == 0) { DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0); } } DpsSQLFree(&Res); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); } hostinfo = strchr(hostinfo, '.'); if (hostinfo != NULL) hostinfo++; } } if (cookie.data_size) { DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data); } DpsDSTRFree(&cookie); #endif TRACE_OUT(Indexer); return; }