static int DoStore(DPS_AGENT *Agent, urlid_t rec_id, Byte *Doc, size_t DocSize, char *Client) { z_stream zstream; DPS_BASE_PARAM P; int rc = DPS_OK; Byte *CDoc = NULL; size_t dbnum = ((size_t)rec_id) % ((Agent->flags & DPS_FLAG_UNOCON) ? Agent->Conf->dbl.nitems : Agent->dbl.nitems); DPS_DB *db = (Agent->flags & DPS_FLAG_UNOCON) ? &Agent->Conf->dbl.db[dbnum] : &Agent->dbl.db[dbnum]; zstream.zalloc = Z_NULL; zstream.zfree = Z_NULL; zstream.opaque = Z_NULL; zstream.next_in = Doc; if (deflateInit2(&zstream, 9, Z_DEFLATED, 15, 9, Z_DEFAULT_STRATEGY) == Z_OK) { zstream.avail_in = DocSize; zstream.avail_out = 2 * DocSize; CDoc = zstream.next_out = (Byte *) DpsMalloc(2 * DocSize + 1); if (zstream.next_out == NULL) { return DPS_ERROR; } deflate(&zstream, Z_FINISH); deflateEnd(&zstream); /* store operations */ bzero(&P, sizeof(P)); P.subdir = "store"; P.basename = "doc"; P.indname = "doc"; P.rec_id = rec_id; P.mode = DPS_WRITE_LOCK; P.NFiles = (db->StoredFiles) ? db->StoredFiles : DpsVarListFindInt(&Agent->Vars, "StoredFiles", 0x100); P.vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); P.A = Agent; if (DpsBaseWrite(&P, CDoc, zstream.total_out) != DPS_OK) { DpsLog(Agent, DPS_LOG_ERROR, "store/doc write error: %s", strerror(errno)); rc = DPS_ERROR; } DpsBaseClose(&P); if (rc == DPS_OK) DpsLog(Agent, DPS_LOG_EXTRA, "[%s] Stored rec_id: %x Size: %d Ratio: %5.2f%%", Client, rec_id, DocSize, 100.0 * zstream.total_out / DocSize); if (Agent->Flags.OptimizeAtUpdate) { DpsBaseOptimize(&P, ((int)rec_id) >> DPS_BASE_BITS); }
void DpsAppendTarget(DPS_AGENT *Indexer, const char *url, const char *lang, const int hops, int parent) { DPS_DOCUMENT *Doc, *Save; size_t i; TRACE_IN(Indexer, "AppendTarget"); DPS_GETLOCK(Indexer, DPS_LOCK_THREAD); DPS_GETLOCK(Indexer, DPS_LOCK_CONF); if (Indexer->Conf->Targets.num_rows > 0) { for (i = Indexer->Conf->Targets.num_rows - 1; i > 0; i--) { Doc = &Indexer->Conf->Targets.Doc[i]; if ((strcasecmp(DpsVarListFindStr(&Doc->Sections, "URL", ""), url) == 0) && (strcmp(DpsVarListFindStr(&Doc->RequestHeaders, "Accept-Language", ""), lang) == 0)) { DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF); DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD); TRACE_OUT(Indexer); return; } } } if ((Indexer->Conf->Targets.Doc = DpsRealloc(Save = Indexer->Conf->Targets.Doc, (Indexer->Conf->Targets.num_rows + 1) * sizeof(DPS_DOCUMENT))) == NULL) { Indexer->Conf->Targets.Doc = Save; DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF); DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD); TRACE_OUT(Indexer); return; } Doc = &Indexer->Conf->Targets.Doc[Indexer->Conf->Targets.num_rows]; DpsDocInit(Doc); DpsVarListAddStr(&Doc->Sections, "URL", url); DpsVarListAddInt(&Doc->Sections, "Hops", hops); DpsVarListDel(&Doc->Sections, "URL_ID"); DpsVarListReplaceInt(&Doc->Sections, "Referrer-ID", parent); if (*lang != '\0') DpsVarListAddStr(&Doc->RequestHeaders, "Accept-Language", lang); if (DPS_OK == DpsURLAction(Indexer, Doc, DPS_URL_ACTION_FINDBYURL)) { urlid_t url_id = DpsVarListFindInt(&Doc->Sections, "DP_ID", 0); if (url_id != 0) Indexer->Conf->Targets.num_rows++; else DpsDocFree(Doc); } /* fprintf(stderr, "-- AppandTarget: url:%s URL_ID:%d\n", url, DpsStrHash32(url));*/ DPS_RELEASELOCK(Indexer, DPS_LOCK_CONF); DpsURLAction(Indexer, Doc, DPS_URL_ACTION_ADD); DPS_RELEASELOCK(Indexer, DPS_LOCK_THREAD); TRACE_OUT(Indexer); return; }
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) { char *token, *lt, *headers; int oldstatus; DPS_DSTR header; Doc->Buf.content=NULL; oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0); DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size); DpsVarListDel(&Doc->Sections, "Content-Length"); DpsVarListDel(&Doc->Sections, "Last-Modified"); if (Doc->Buf.buf == NULL) return; /* Cut HTTP response header first */ for(token=Doc->Buf.buf;*token;token++){ if(!strncmp(token,"\r\n\r\n",4)){ *token='\0'; Doc->Buf.content = token + 4; break; }else if(!strncmp(token,"\n\n",2)){ *token='\0'; Doc->Buf.content = token + 2; break; } } /* Bad response, return */ if(!Doc->Buf.content) { if (token < Doc->Buf.buf + Doc->Buf.size - 4) { if (token[2] == '\r') Doc->Buf.content = token + 4; else Doc->Buf.content = token + 2; } else { return; } } /* Copy headers not to break them */ headers = (char*)DpsStrdup(Doc->Buf.buf); /* Now lets parse response header lines */ token = dps_strtok_r(headers,"\r\n",<); if(!token)return; if(!strncmp(token,"HTTP/",5)){ int status = atoi(token + 8); DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token); DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status ); }else{ DpsFree(headers); return; } token = dps_strtok_r(NULL,"\r\n",<); DpsDSTRInit(&header, 128); while(token){ if(strchr(token,':')) { if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); DpsDSTRFree(&header); DpsDSTRInit(&header, 128); } } DpsDSTRAppendStr(&header, token); token = dps_strtok_r(NULL,"\r\n",<); } if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); } DpsDSTRFree(&header); DPS_FREE(headers); DpsVarListInsInt(&Doc->Sections,"Content-Length",Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size); }
int main(int argc,char **argv, char **envp) { int ch, sleeps = 1, optimize = 0, obi = 0; unsigned int from = 0, to = 0xFFF, p_to = 0; DPS_ENV * Env; const char * config_name = DPS_CONF_DIR "/cached.conf"; DpsInit(argc, argv, envp); /* Initialize library */ DpsInitMutexes(); Env=DpsEnvInit(NULL); if (Env == NULL) exit(1); DpsSetLockProc(Env, DpsLockProc); /*#ifndef HAVE_SETPROCTITLE*/ ARGV = argv; ARGC = argc; /*#endif*/ while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){ switch (ch) { case 'f': sscanf(optarg, "%x", &from); break; case 't': sscanf(optarg, "%x", &p_to); break; case 'w': DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg); break; case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break; case 'b': obi++; break; case 'o': optimize++; break; case 'p': sleeps = atoi(optarg); break; case 'h': case '?': default: usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; break; } } argc -= optind; argv += optind; if(argc > 1) { usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; } else if (argc == 1) { config_name = argv[0]; } { DPS_LOGDEL *del_buf=NULL; size_t del_count = 0, log, bytes, n = 0; int dd, log_fd; struct stat sb; char dname[PATH_MAX] = ""; DPS_BASE_PARAM P; DPS_LOGWORD *log_buf = NULL; DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0); log2stderr = 1; if (Indexer == NULL) { fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__); exit(DPS_ERROR); } if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){ fprintf(stderr, "%s\n", DpsEnvErrMsg(Env)); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return DPS_ERROR; } DpsOpenLog("splitter", Env, log2stderr); Indexer->flags = Env->flags = DPS_FLAG_UNOCON; DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*"); bzero(&P, sizeof(P)); P.subdir = DPS_TREEDIR; P.basename = "wrd"; P.indname = "wrd"; P.mode = DPS_WRITE_LOCK; P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300); P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR)); P.A = Indexer; if (p_to != 0) to = p_to; else to = P.NFiles - 1; #ifdef HAVE_ZLIB P.zlib_method = Z_DEFLATED; P.zlib_level = 9; P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; P.zlib_memLevel = 9; P.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif /* Open del log file */ dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH); if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) { dps_strerror(NULL, 0, "Can't open del log '%s'", dname); exit(DPS_ERROR); } DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles); /* Allocate del buffer */ fstat(dd, &sb); if (sb.st_size != 0) { del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1); if (del_buf == NULL) { fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__); exit(0); } del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL); } DpsClose(dd); /* Remove duplicates URLs in DEL log */ /* Keep only oldest records for each URL */ if (del_count > 0) { DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count); if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups"); del_count = DpsRemoveDelLogDups(del_buf, del_count); } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to); for(log = from; log <= to; log++) { /* Open log file */ dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log); if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){ if (errno == ENOENT) { dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname); n = 0; /* continue;*/ } else { dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname); continue; } } else { DpsWriteLock(log_fd); DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log); fstat(log_fd, &sb); log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL; if (log_buf != NULL) { unlink(dname); bytes = read(log_fd,log_buf,(size_t)sb.st_size); (void)ftruncate(log_fd, (off_t)0); DpsUnLock(log_fd); DpsClose(log_fd); n = bytes / sizeof(DPS_LOGWORD); DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords"); n = DpsRemoveOldWords(log_buf, n, del_buf, del_count); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd); } else { n = 0; DpsUnLock(log_fd); DpsClose(log_fd); } } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize); if (obi) DpsBaseOptimize(&P, log); DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count); if (optimize) DpsBaseOptimize(&P, log); DpsBaseClose(&P); DPS_FREE(log_buf); DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to); DPSSLEEP(sleeps); } DPS_FREE(del_buf); DpsAgentFree(Indexer); DPS_FREE(P.vardir); } fprintf(stderr, "Splitting done.\n"); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return 0; }
int __DPSCALL DpsResAddDocInfoSearchd(DPS_AGENT * query,DPS_DB *cl,DPS_RESULT * Res,size_t clnum){ DPS_SEARCHD_PACKET_HEADER hdr; char * msg=NULL; size_t i; /* num=0,curnum=0;*/ int done = 0; ssize_t nsent,nrecv; char * dinfo=NULL; int rc=DPS_OK; char *textbuf; size_t dlen = 0; TRACE_IN(query, "DpsResAddDocInfoSearchd"); if(!Res->num_rows) { TRACE_OUT(query); return(DPS_OK); } for(i=0;i<Res->num_rows;i++){ size_t ulen; size_t olen; size_t nsec, r; DPS_DOCUMENT *D=&Res->Doc[i]; r = (size_t) 's'; for(nsec = 0; nsec < D->Sections.Root[r].nvars; nsec++) if (strcasecmp(D->Sections.Root[r].Var[nsec].name, "Score") == 0) D->Sections.Root[r].Var[nsec].section = 1; #ifdef WITH_MULTIDBADDR if (D->dbnum != cl->dbnum) continue; #endif textbuf = DpsDocToTextBuf(D, 1, 0); if (textbuf == NULL) {TRACE_OUT(query); return DPS_ERROR;} ulen = dps_strlen(textbuf)+2; olen = dlen; dlen = dlen + ulen; dinfo = (char*)DpsRealloc(dinfo, dlen + 1); if (dinfo == NULL) { DpsFree(textbuf); TRACE_OUT(query); return DPS_ERROR; } dinfo[olen] = '\0'; sprintf(dinfo + olen, "%s\r\n", textbuf); DpsFree(textbuf); } if (dinfo == NULL) { TRACE_OUT(query); return DPS_OK; } hdr.cmd=DPS_SEARCHD_CMD_DOCINFO; hdr.len = dps_strlen(dinfo); nsent = DpsSearchdSendPacket(cl->searchd, &hdr, dinfo); #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Sent DOCINFO size=%d buf=%s\n", hdr.len, dinfo); #endif while(!done){ char * tok, * lt; nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360); if(nrecv!=sizeof(hdr)){ DpsLog(query, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes, errno:%d)", (int)nrecv, errno); TRACE_OUT(query); return(DPS_ERROR); }else{ #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len); #endif } switch(hdr.cmd){ case DPS_SEARCHD_CMD_ERROR: msg=(char*)DpsMalloc(hdr.len+1); if (msg == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0]='\0'; sprintf(query->Conf->errstr,"Searchd error: '%s'",msg); rc=DPS_ERROR; DPS_FREE(msg); done=1; break; case DPS_SEARCHD_CMD_MESSAGE: msg=(char*)DpsMalloc(hdr.len+1); if (msg == NULL) { done = 1; break; } nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0]='\0'; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg); #endif DPS_FREE(msg); break; case DPS_SEARCHD_CMD_DOCINFO: dinfo = (char*)DpsRealloc(dinfo, hdr.len + 1); if (dinfo == NULL) { done=1; break; } nrecv = DpsRecvall(cl->searchd, dinfo, hdr.len, 360); dinfo[(nrecv > 0) ? nrecv : 0] = '\0'; #ifdef DEBUG_SDP DpsLog(query, DPS_LOG_ERROR, "Received DOCINFO size=%d buf=%s\n",hdr.len,dinfo); #endif tok = dps_strtok_r(dinfo, "\r\n", <, NULL); while(tok){ urlid_t Doc_url_id, Res_Doc_url_id; DPS_DOCUMENT Doc; DpsDocInit(&Doc); DpsDocFromTextBuf(&Doc,tok); Doc_url_id = (urlid_t)DpsVarListFindInt(&Doc.Sections, "DP_ID", 0); for(i=0;i<Res->num_rows;i++){ #ifdef WITH_MULTIDBADDR if (Res->Doc[i].dbnum != cl->dbnum) continue; #endif Res_Doc_url_id = (urlid_t)DpsVarListFindInt(&Res->Doc[i].Sections, "DP_ID", 0); if (Res_Doc_url_id == Doc_url_id) { DpsDocFromTextBuf(&Res->Doc[i], tok); break; } } tok = dps_strtok_r(NULL, "\r\n", <, NULL); DpsDocFree(&Doc); } DPS_FREE(dinfo); done=1; break; default: sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len); rc=DPS_ERROR; done=1; break; } } TRACE_OUT(query); return rc; }
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) { char *token, *lt, *headers, savec; int status, oldstatus; DPS_DSTR header; time_t now, last_mod_time; Doc->Buf.content=NULL; oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0); DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size); DpsVarListDel(&Doc->Sections, "Content-Length"); /* DpsVarListDel(&Doc->Sections, "Last-Modified");*/ /* if it's not deleted Lat-Modified equals to the first appearance in db */ if (Doc->Buf.buf == NULL) return; /* Cut HTTP response header first */ for(token=Doc->Buf.buf;*token;token++){ if(!strncmp(token,"\r\n\r\n",4)){ if (token <= Doc->Buf.buf + Doc->Buf.size - 4) { *token='\0'; Doc->Buf.content = token + 4; } break; } else if(!strncmp(token,"\n\n",2)){ if (token <= Doc->Buf.buf + Doc->Buf.size - 2) { *token='\0'; Doc->Buf.content = token + 2; } break; } } /* Bad response, return */ if(!Doc->Buf.content) { if (token <= Doc->Buf.buf + Doc->Buf.size - 4) { if (token[2] == CR_CHAR) Doc->Buf.content = token + 4; else Doc->Buf.content = token + 2; } } /* Copy headers not to break them */ headers = (char*)DpsStrdup(Doc->Buf.buf); /* Now lets parse response header lines */ token = dps_strtok_r(headers, "\r\n", <, &savec); if(!token) { DpsFree(headers); return; } if(!strncmp(token,"HTTP/",5)){ status = atoi(token + 8); DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token); DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status ); }else{ DpsFree(headers); return; } token = dps_strtok_r(NULL, "\r\n", <, &savec); DpsDSTRInit(&header, 128); while(token){ if(strchr(token,':')) { if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); DpsDSTRFree(&header); DpsDSTRInit(&header, 128); } } DpsDSTRAppendStr(&header, token); token = dps_strtok_r(NULL, "\r\n", <, &savec); } if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); } DpsDSTRFree(&header); DPS_FREE(headers); { now = Indexer->now; last_mod_time = DpsHttpDate2Time_t(DpsVarListFindStr(&Doc->Sections, "Last-Modified", "")); if (last_mod_time > now + 3600 * 4) { /* we have a document with Last-Modified time in the future */ DpsLog(Indexer, DPS_LOG_EXTRA, "Last-Modified date is deep in future (%d>%d), dropping it.", last_mod_time, now); DpsVarListDel(&Doc->Sections, "Last-Modified"); } } /* Bad response, return */ if(!Doc->Buf.content) { return; } DpsVarListReplaceInt(&Doc->Sections,"Content-Length", Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size + DpsVarListFindInt(&Doc->Sections,"Content-Length", 0)); }
int main(int argc, char ** argv, char **envp) { const char *env, *bcharset, *lcharset, *conf_dir; char template_name[PATH_MAX+6]=""; char *template_filename = NULL; char *query_string = NULL; char self[1024]=""; char *url = NULL; const char *ResultContentType; int res,httpd=0; size_t catcolumns = 0; int page_size,page_number; DPS_ENV *Env; DPS_AGENT *Agent; DPS_VARLIST query_vars; /* Output Content-type if under HTTPD */ /* Some servers do not pass QUERY_STRING */ /* if the query was empty, so check */ /* REQUEST_METHOD too to be safe */ httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD")); if (!(conf_dir=getenv("DPS_ETC_DIR"))) conf_dir=DPS_CONF_DIR; DpsInit(argc, argv, envp); Env=DpsEnvInit(NULL); if (Env == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Env\n"); exit(0); } DpsVarListInit(&query_vars); Agent = DpsAgentInit(NULL, Env, 0); if (Agent == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Agent\n"); exit(0); } DpsVarListAddEnviron(&Env->Vars,"ENV"); /* Detect self and template name */ if((env = getenv("DPSEARCH_TEMPLATE"))) dps_strncpy(template_name, env, sizeof(template_name) - 1); else if((env = getenv("PATH_INFO")) && env[0]) dps_strncpy(template_name, env + 1, sizeof(template_name) - 1); if((env=getenv("DPSEARCH_SELF"))) dps_strncpy(self,env,sizeof(self)-1); if((env=getenv("QUERY_STRING"))){ query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc query_string\n"); exit(0); } dps_strncpy(query_string, env, dps_strlen(env) + 1); /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if((env=getenv("REDIRECT_STATUS"))){ /* Check Apache internal redirect */ /* via "AddHandler" and "Action" */ if(!self[0]){ dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1); } if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } }else{ /* CGI executed without Apache internal redirect */ /* Detect $Self variable with OS independant SLASHES */ if(!self[0]){ dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ char *s,*e; /*This is with OS specific SLASHES */ env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi"); if(strcmp(conf_dir,".")){ /* Take from the config directory */ dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self)); }else{ /* Take from the current directory */ dps_strncpy(template_name,env,sizeof(template_name)-1); } /* Find right slash if it presents */ s=((s=strrchr(template_name,DPSSLASH))?s:template_name); if (*template_filename == '\0') { /* Find .cgi substring */ if ((e = strstr(s, ".cgi")) != NULL) { /* Replace ".cgi" with ".htm" */ e[1]='h';e[2]='t';e[3]='m'; } else { dps_strcat(s, ".htm"); } e = strrchr(s, '/'); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(e + 1); } else { dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2); } } } }else{ /* Executed from command line */ /* or under server which does not */ /* pass an empty QUERY_STRING var */ if(argv[1]) { query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q=%s", argv[1]); } else { query_string = (char*)DpsRealloc(query_string, 1024); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q="); } /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } /*// Get template name from command line variable &tmplt */ if(!template_name[0]) dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename); } DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename); DPS_FREE(template_filename); Agent->tmpl.Env_Vars = &Env->Vars; DpsURLNormalizePath(template_name); if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if (strcmp(template_name, "filler.htm")) { /* trying load default template */ fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename); if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } else { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } /* set locale if specified */ if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) { setlocale(LC_ALL, url); /*#ifdef HAVE_ASPELL*/ { char *p; if ((p = strchr(url, '.')) != NULL) { *p = '\0'; DpsVarListReplaceStr(&Env->Vars, "g-lc", url); *p = '.'; } } /*#endif*/ url = NULL; } /* Call again to load search Limits if need */ DpsParseQueryString(Agent, &Env->Vars, query_string); Agent->Flags = Env->Flags; Agent->flags |= DPS_FLAG_UNOCON; Env->flags |= DPS_FLAG_UNOCON; DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0)); DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes")); DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name); DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR)); DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d", Env->Affixes.naffixes,Env->Spells.nspell, Env->Synonyms.nsynonyms, Env->Acronyms.nacronyms, Env->StopWords.nstopwords); DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords); DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*"); Agent->tmpl.Env_Vars = &Agent->Vars; /* DpsVarListAddEnviron(&Agent->Vars, "ENV");*/ /****************************************************************************************************************************************/ /* This is for query tracking */ DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string); DpsVarListAddStr(&Agent->Vars, "self", self); env = getenv("HTTP_X_FORWARDER_FOR"); if (env) { DpsVarListAddStr(&Agent->Vars, "IP", env); } else { env = getenv("REMOTE_ADDR"); DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost"); } bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1"); Env->bcs=DpsGetCharSet(bcharset); lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1"); Env->lcs=DpsGetCharSet(lcharset); ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html"); if(httpd){ if(!Env->bcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); }else if(!Env->lcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); }else{ printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset); } }else{ if(!Env->bcs){ printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); } if(!Env->lcs){ printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); } } /* These parameters taken from "variable section of template"*/ res = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS); page_size = dps_min(res, MAX_PS); page_number = DpsVarListFindInt(&Agent->Vars, "p", 0); if (page_number == 0) { page_number = DpsVarListFindInt(&Agent->Vars, "np", 0); DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1); } else page_number--; res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size; DpsVarListAddInt(&Agent->Vars, "pn", res); catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", "")); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom"); DpsVarListFree(&query_vars); DpsAgentFree(Agent); DpsEnvFree(Env); DPS_FREE(query_string); DPS_FREE(url); if (httpd) fflush(NULL); else fclose(stdout); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return DPS_OK; }
extern __C_LINK int __DPSCALL DpsBaseOptimize(DPS_BASE_PARAM *P, int sbase) { struct stat sb; urlid_t base, base_from, base_to; long unsigned ActualSize, OriginalSize, i, nitems; off_t pos, posold, NewItemPos, SSize; dps_uint8 diff, gain; double dr = 0.0, cr = 0.0; ssize_t nread; size_t rsize; ssize_t wr; int OptimizeRatio, res, error_cnt; char buffer[BUFSIZ]; DPS_BASEITEM *hTable; DPS_SORTBASEITEM *si = NULL; OptimizeRatio = DpsVarListFindInt(&P->A->Vars, "OptimizeRatio", 15); P->mode = DPS_WRITE_LOCK; if (sbase == -1) { base_from = 0; base_to = (urlid_t)P->NFiles; } else { base_from = sbase; base_to = sbase + 1; } for (base = base_from; base < base_to; base++) { error_cnt = 0; gain = (dps_uint8)0; P->rec_id = ((base & DPS_BASE_MASK) << DPS_BASE_BITS); if (DpsBaseOpen(P, DPS_WRITE_LOCK) != DPS_OK) { DpsLog(P->A, DPS_LOG_ERROR, "Can't open base %s/%s {%s:%d}", P->subdir, P->basename, __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } if (fstat(P->Sfd, &sb) == 0) { SSize = sb.st_size; } else { if ((SSize = (off_t)lseek(P->Sfd, (off_t)0, SEEK_END)) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Sfilename, __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } } nitems = 0; ActualSize = 0; OriginalSize = 0; while(read(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) { nitems++; if ((P->Item.rec_id != 0) && ((dps_uint8)P->Item.offset < (dps_uint8)SSize) && (P->Item.size > 0)) { ActualSize += (long unsigned)P->Item.size; OriginalSize += (long unsigned)(P->Item.orig_size ? P->Item.orig_size : P->Item.size); } } if (ftruncate(P->Ifd, (off_t)(nitems * sizeof(DPS_BASEITEM))) != 0) { dps_strerror(P->A, DPS_LOG_EXTRA, "ftruncate error (pos:%ld) [%s:%d]", (off_t)(nitems * sizeof(DPS_BASEITEM)), __FILE__, __LINE__); } dr = (nitems) ? fabs(100.0 * ((long unsigned)SSize - ActualSize) / ((double)SSize + 1.0)) : 0.0; cr = (nitems) ? fabs(100.0 * ActualSize / (OriginalSize + 1)) : 0.0; DpsLog(P->A, DPS_LOG_EXTRA, "Optimize: %s/%s base 0x%X, %ld recs defrag: %.2f%% Ratio: %.2f%% Data: %ld File: %ld", P->subdir, P->basename, P->FileNo, nitems, dr, cr, ActualSize, (long)SSize); if ((dr >= (double)OptimizeRatio) || (ActualSize == 0 && SSize != 0)) { si = (DPS_SORTBASEITEM*)DpsMalloc((nitems + 1) * sizeof(DPS_SORTBASEITEM)); if (si == NULL) { DpsLog(P->A, DPS_LOG_ERROR, "Can't alloc si (%d bytes) at {%s:%d}", (nitems + 1) * sizeof(DPS_SORTBASEITEM), __FILE__, __LINE__); DpsBaseClose(P); return DPS_ERROR; } if (lseek(P->Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } for (i = 0; (i < nitems) && (read(P->Ifd, &si[i].Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)); ) { if(si[i].Item.rec_id != 0 && ((dps_uint8)si[i].Item.offset < (dps_uint8)SSize) && (si[i].Item.size > 0) && (si[i].Item.size < ActualSize) ) { i++; } } if (i < nitems) nitems = i; if (nitems > 1) DpsSort((void*)si, (size_t)nitems, sizeof(DPS_SORTBASEITEM), cmpsi); gain = (dps_uint8)0; pos = (off_t)0; posold = (off_t)0; if (nitems > 0) { if ((long unsigned)si[0].Item.offset < (long unsigned)SSize) { posold = (off_t)si[0].Item.offset; } else { si[0].Item.offset = (off_t)0; si[0].Item.size = 0; } } if (nitems > 1) { if (si[0].Item.size > (rsize = (size_t)(si[1].Item.offset - si[0].Item.offset))) { DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by offset: %ld -> %ld", (long)si[0].Item.size, (long)rsize); si[0].Item.size = rsize; error_cnt++; } } if ((diff = (dps_uint8)posold) > 0) { for( lseek(P->Sfd, posold, SEEK_SET), rsize = 0; (rsize < si[0].Item.size) && ((nread = read(P->Sfd, buffer, (rsize + BUFSIZ < si[0].Item.size) ? BUFSIZ : (si[0].Item.size - rsize) )) > 0); lseek(P->Sfd, posold, SEEK_SET) ) { lseek(P->Sfd, pos, SEEK_SET); (void)write(P->Sfd, buffer, (size_t)nread); rsize += (size_t)nread; posold += (off_t)nread; pos += (off_t)nread; } si[0].Item.offset = 0; if (rsize != si[0].Item.size) { DpsLog(P->A, DPS_LOG_ERROR, "si[0] size adjusted by size: %ld -> %ld", (long)si[0].Item.size, (long)rsize); si[0].Item.size = rsize; error_cnt++; } gain += diff; } if (nitems > 0) for (i = 0; i < nitems - 1; i++) { if ((long unsigned)si[i + 1].Item.offset > (long unsigned)SSize) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i + 1].Item.offset, (long)SSize); si[i + 1].Item.size = 0; si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size; error_cnt++; } else { pos = (off_t)(si[i].Item.offset + si[i].Item.size); posold = (off_t)si[i + 1].Item.offset; if (i < nitems - 2) { if (si[i + 1].Item.size > (rsize = (size_t)(si[i + 2].Item.offset - si[i + 1].Item.offset))) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by offset: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize ); si[i + 1].Item.size = rsize; error_cnt++; } } if ((diff = (dps_uint8)posold - (dps_uint8)pos) > 0) { for( lseek(P->Sfd, posold, SEEK_SET), rsize = 0; (rsize < si[i + 1].Item.size) && ((nread = read(P->Sfd, buffer, (rsize + BUFSIZ < si[i + 1].Item.size) ? BUFSIZ : (si[i + 1].Item.size - rsize) )) > 0); lseek(P->Sfd, posold, SEEK_SET) ) { lseek(P->Sfd, pos, SEEK_SET); (void)write(P->Sfd, buffer, (size_t)nread); rsize += (size_t)nread; posold += (off_t)nread; pos += (off_t)nread; } if (rsize != si[i + 1].Item.size) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] size adjusted by size: %ld -> %ld", i + 1, (long)si[i + 1].Item.size, (long)rsize); si[i + 1].Item.size = rsize; error_cnt++; } si[i + 1].Item.offset = si[i].Item.offset + si[i].Item.size; gain += diff; } } } posold = SSize; pos = (nitems) ? (off_t)(si[nitems - 1].Item.offset + si[nitems - 1].Item.size) : (off_t)0; if (ftruncate(P->Sfd, (off_t)(pos)) != 0) { dps_strerror(P->A, DPS_LOG_ERROR, "ftruncate error (pos:%ld) [%s:%d]", pos, __FILE__, __LINE__); } SSize = pos; if (posold > pos) { gain += ((dps_uint8)posold - (dps_uint8)pos); } /*if (gain != 0 || OptimizeRatio == 0 || error_cnt > 0)*/ { posold = lseek(P->Ifd, (off_t)0, SEEK_END); (void)ftruncate(P->Ifd, (off_t)0); lseek(P->Ifd, (off_t)0, SEEK_SET); if ((hTable = (DPS_BASEITEM *)DpsXmalloc(sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) == NULL) { DpsLog(P->A, DPS_LOG_ERROR, "Memory alloc error hTable: %d bytes", sizeof(DPS_BASEITEM) * DPS_HASH_PRIME); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if ( (wr = write(P->Ifd, hTable, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME)) != sizeof(DPS_BASEITEM) * DPS_HASH_PRIME) { dps_strerror(P->A, DPS_LOG_ERROR, "[%s:%d] Can't set new index for file %s\nwritten %d bytes of %d", __FILE__, __LINE__, P->Ifilename, wr, sizeof(DPS_BASEITEM) * DPS_HASH_PRIME); DPS_FREE(hTable); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } DPS_FREE(hTable); for (i = 0; i < nitems; i++) { if (si[i].Item.rec_id == 0 || si[i].Item.size == 0) continue; if ((long)si[i].Item.offset > (long)SSize) { DpsLog(P->A, DPS_LOG_ERROR, "si[%ld] too long offset: %ld > %ld, removing", i , (long)si[i].Item.offset, (long)SSize); error_cnt++; continue; } P->rec_id = si[i].Item.rec_id; if ((res = DpsBaseSeek(P, DPS_WRITE_LOCK)) != DPS_OK) { DpsBaseClose(P); DPS_FREE(si); return res; } if (P->Item.rec_id != P->rec_id) { if (P->mishash && P->Item.rec_id != 0) { if ((P->Item.next = (dps_uint8)(NewItemPos = lseek(P->Ifd, (off_t)0, SEEK_END))) == (dps_uint8)-1) { DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } P->CurrentItemPos = (dps_uint8)NewItemPos; } } P->Item = si[i].Item; P->Item.next = (off_t)0; if (lseek(P->Ifd, (off_t)P->CurrentItemPos, SEEK_SET) == (off_t)-1) { DpsLog(P->A, DPS_LOG_ERROR, "Can't seek %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } if (write(P->Ifd, &P->Item, sizeof(DPS_BASEITEM)) != sizeof(DPS_BASEITEM)) { DpsLog(P->A, DPS_LOG_ERROR, "Can't write index for file %s {%s:%d}", P->Ifilename, __FILE__, __LINE__); DpsBaseClose(P); DPS_FREE(si); return DPS_ERROR; } } pos = lseek(P->Ifd, (off_t)0, SEEK_END); gain += ((dps_uint8)posold - (dps_uint8)pos); DpsLog(P->A, DPS_LOG_DEBUG, "Optimize: %s/%s base 0x%X cleaned, %ld bytes freed", P->subdir, P->basename, base, gain); } DPS_FREE(si); } if (error_cnt) base--; DpsBaseClose(P); } return DPS_OK; }
extern __C_LINK int __DPSCALL DpsBaseRelocate(DPS_AGENT *Agent, int base_type) { DPS_BASE_PARAM O, N; DPS_BASE_PARAM *Old = &O, *New = &N; size_t base, i, ndel, mdel = 128, data_len; urlid_t *todel = (int*)DpsMalloc(128 * sizeof(urlid_t)); void *data; bzero(Old, sizeof(O)); bzero(New, sizeof(N)); switch(base_type) { case 0: /* stored */ Old->subdir = "store"; Old->basename = "doc"; Old->indname = "doc"; Old->mode = DPS_WRITE_LOCK; Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldStoredFiles", 0x100); Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); Old->A = Agent; New->subdir = "store"; New->basename = "doc"; New->indname = "doc"; New->mode = DPS_WRITE_LOCK; New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "StoredFiles", 0x100); New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); New->A = Agent; DpsLog(Agent, DPS_LOG_INFO, "Relocating stored database"); break; case 1: /* URL data */ Old->subdir = DPS_URLDIR; Old->basename = "info"; Old->indname = "info"; Old->mode = DPS_WRITE_LOCK; Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldURLDataFiles", 0x300); Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); Old->A = Agent; #ifdef HAVE_ZLIB O.zlib_method = Z_DEFLATED; O.zlib_level = 9; O.zlib_windowBits = DPS_BASE_INFO_WINDOWBITS; O.zlib_memLevel = 9; O.zlib_strategy = DPS_BASE_INFO_STRATEGY; #endif New->subdir = DPS_URLDIR; New->basename = "info"; New->indname = "info"; New->mode = DPS_WRITE_LOCK; New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "URLDataFiles", 0x300); New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); New->A = Agent; #ifdef HAVE_ZLIB N.zlib_method = Z_DEFLATED; N.zlib_level = 9; N.zlib_windowBits = DPS_BASE_INFO_WINDOWBITS; N.zlib_memLevel = 9; N.zlib_strategy = DPS_BASE_INFO_STRATEGY; #endif DpsLog(Agent, DPS_LOG_INFO, "Relocating URLData database"); break; case 2: /* tree wrd */ Old->subdir = DPS_TREEDIR; Old->basename = "wrd"; Old->indname = "wrd"; Old->mode = DPS_WRITE_LOCK; Old->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "OldWrdFiles", 0x300); Old->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); Old->A = Agent; #ifdef HAVE_ZLIB O.zlib_method = Z_DEFLATED; O.zlib_level = 9; O.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; O.zlib_memLevel = 9; O.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif New->subdir = DPS_TREEDIR; New->basename = "wrd"; New->indname = "wrd"; New->mode = DPS_WRITE_LOCK; New->NFiles = (size_t)DpsVarListFindInt(&Agent->Vars, "WrdFiles", 0x300); New->vardir = DpsVarListFindStr(&Agent->Vars, "VarDir", DPS_VAR_DIR); New->A = Agent; #ifdef HAVE_ZLIB N.zlib_method = Z_DEFLATED; N.zlib_level = 9; N.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; N.zlib_memLevel = 9; N.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif DpsLog(Agent, DPS_LOG_INFO, "Relocating Wrd database"); break; default: DPS_FREE(todel); return DPS_OK; } for (base = 0; base < O.NFiles; base++) { ndel = 0; if (have_sigterm || have_sigint || have_sigalrm) { DpsLog(Agent, DPS_LOG_EXTRA, "%s signal received. Exiting chackup", (have_sigterm) ? "SIGTERM" : (have_sigint) ? "SIGINT" : "SIGALRM"); DpsBaseClose(Old); DpsBaseClose(New); DPS_FREE(todel); return DPS_OK; } Old->rec_id = (urlid_t)(base << DPS_BASE_BITS); if (DpsBaseOpen(Old, DPS_READ_LOCK) != DPS_OK) { DpsBaseClose(Old); DpsBaseClose(New); continue; } if (lseek(O.Ifd, (off_t)0, SEEK_SET) == (off_t)-1) { DpsLog(Agent, DPS_LOG_ERROR, "Can't seeek for file %s", Old->Ifilename); DpsBaseClose(Old); DpsBaseClose(New); DPS_FREE(todel); return DPS_ERROR; } while (read(Old->Ifd, &Old->Item, sizeof(DPS_BASEITEM)) == sizeof(DPS_BASEITEM)) { if (Old->Item.rec_id != 0) { if (ndel >= mdel) { mdel += 128; todel = (urlid_t*)DpsRealloc(todel, mdel * sizeof(urlid_t)); if (todel == NULL) { DpsBaseClose(Old); DpsBaseClose(New); DpsLog(Agent, DPS_LOG_ERROR, "Can't realloc %d bytes %s:%d", mdel * sizeof(urlid_t), __FILE__, __LINE__); DPS_FREE(todel); return DPS_ERROR; } } todel[ndel++] = Old->Item.rec_id; } } DpsBaseClose(Old); for (i = 0; i < ndel; i++) { Old->rec_id = todel[i]; data = DpsBaseARead(Old, &data_len); if (data == NULL) continue; DpsBaseDelete(Old); DpsBaseClose(Old); New->rec_id = todel[i]; DpsBaseWrite(New, data, data_len); DpsBaseClose(New); DPS_FREE(data); } DpsLog(Agent, DPS_LOG_EXTRA, "\tbase: %d [0x%x], %d records relocated", base, base, ndel); } DPS_FREE(todel); for (base = N.NFiles; base < O.NFiles; base++) { Old->rec_id = (urlid_t)(base << DPS_BASE_BITS); if (DpsBaseOpen(Old, DPS_READ_LOCK) != DPS_OK) { DpsBaseClose(Old); continue; } unlink(O.Ifilename); unlink(O.Sfilename); DpsBaseClose(Old); } return DPS_OK; }
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) { #ifdef HAVE_SQL DPS_DSTR cookie; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; size_t i, blen = dps_strlen(hostinfo), slen; int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1); #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif TRACE_IN(Indexer, "DpsCookiesFind"); DpsDSTRInit(&cookie, 1024); for(i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; slen = dps_strlen(Coo->domain); if (slen > blen) continue; if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue; if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue; if (Coo->from_config != 1) have_no_cookies = 0; if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, Coo->name); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, Coo->value); } if (have_no_cookies) { char buf[2*PATH_MAX]; dpshash32_t url_id; DPS_DB *db; DPS_SQLRES Res; size_t rows; int rc; if (Server != NULL) { char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL); if (PingData != NULL) { char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n")); int method = DPS_METHOD_GET; dps_base64_decode(AuthPing, PingData, dps_strlen(PingData)); if (!strncasecmp(AuthPing, "GET", 3)) { method = DPS_METHOD_GET; PingData = DpsTrim(AuthPing + 3, " \t\r\n"); } else if (!strncasecmp(AuthPing, "POST", 4)) { method = DPS_METHOD_POST; PingData = DpsTrim(AuthPing + 4, " \t\r\n"); } else { DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing); PingData = NULL; } if (PingData != NULL) { size_t size = dps_strlen(PingData); { char PingURL[size + 2]; char PingBody[size]; DPS_DOCUMENT *rDoc; int result; rDoc = DpsDocInit(NULL); DpsSpiderParamInit(&rDoc->Spider); DpsVarList2Doc(rDoc, Server); rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE); rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE; if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) { DpsDocFree(rDoc); TRACE_OUT(Indexer); return; } rDoc->Buf.buf[0]='\0'; rDoc->subdoc = Indexer->Flags.SubDocLevel + 1; #if 1 dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo)); DpsVarListReplaceStr(&rDoc->Sections, "URL", buf); DpsURLParse(&rDoc->CurURL, buf); DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf); rDoc->method = DPS_METHOD_HEAD; /* DpsVarListFree(&rDoc->RequestHeaders);*/ if (Doc != NULL) { DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); } DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request"); if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response"); #endif sscanf(PingData, "%s %s", PingURL, PingBody); if (rDoc->method == DPS_METHOD_GET) { dps_strcat(PingURL, "?"); dps_strcat(PingURL, PingBody); } else { DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody); } DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL); DpsURLParse(&rDoc->CurURL, PingURL); DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL); rDoc->method = method; DpsVarListFree(&rDoc->RequestHeaders); DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); if (method == DPS_METHOD_POST) { dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1")); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf); dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody)); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf); } DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request"); #if 0 if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } #endif result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListDel(&rDoc->Sections, "body"); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response"); if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp)); DpsDocFree(rDoc); } } DpsFree(AuthPing); } } while(hostinfo != NULL) { url_id = DpsStrHash32(hostinfo); DpsSQLResInit(&Res); dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) { rows = DpsSQLNumRows(&Res); for(i = 0; i < rows; i++) { DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), *DpsSQLValue(&Res, i, 3), 0, 0, 0); if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0)); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1)); } if (rows == 0) { DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0); } } DpsSQLFree(&Res); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); } hostinfo = strchr(hostinfo, '.'); if (hostinfo != NULL) hostinfo++; } } if (cookie.data_size) { DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data); } DpsDSTRFree(&cookie); #endif TRACE_OUT(Indexer); return; }