int Dps_ftp_send_cmd(DPS_CONN *connp, const char *cmd) { char *buf; size_t len; connp->err = 0; len = dps_strlen(cmd)+2; buf = DpsXmalloc(len+1); if (buf == NULL) return -1; dps_snprintf(buf, len+1, "%s\r\n", cmd); socket_buf_clear(connp); if (socket_write(connp, buf)){ DPS_FREE( buf); return -1; } #ifdef DEBUG_FTP fprintf(stderr, "ftp://%s (cmd) : %s", connp->hostname, buf); /*DpsLog(connp->indexer, DPS_LOG_DEBUG, "ftp://%s (cmd) : %s", connp->hostname, buf);*/ #endif DPS_FREE(buf); if (Dps_ftp_read_line(connp)) return -1; #ifdef DEBUG_FTP fprintf(stderr, "ftp://%s (reply): %s", connp->hostname, connp->buf); /*DpsLog(connp->indexer, DPS_LOG_DEBUG, "ftp://%s (reply): %s", connp->hostname, connp->buf);*/ #endif return(Dps_ftp_get_reply(connp)); }
int Dps_ftp_connect(DPS_AGENT *Agent, DPS_CONN *connp, char *hostname, int port, char *user, char *passwd, int timeout) { size_t len; if (!connp) return -1; if (connp->connected == DPS_NET_CONNECTED) Dps_ftp_close(connp); connp->connected = DPS_NET_NOTCONNECTED; if (!port) { connp->port = 21; connp->connp->port = 20; } else { connp->port = port; connp->connp->port = port - 1; } connp->timeout = timeout; if (!hostname) return -1; len = dps_strlen(hostname); connp->hostname = DpsXrealloc(connp->hostname, len+1); if (connp->hostname == NULL) return -1; dps_snprintf(connp->hostname, len+1, "%s", hostname); if (Dps_ftp_open_control_port(Agent, connp)) return -1; if (Dps_ftp_login(connp, user, passwd)) return -1; Dps_ftp_set_binary(connp); connp->connected = DPS_NET_CONNECTED; return 0; }
void DpsCookiesClean(DPS_AGENT *A) { char buf[256]; DPS_DB *db; size_t i, dbfrom = 0, dbto; int res; if (A->Flags.robots_period == 0) return; dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE expires < %d", A->now); if (A->flags & DPS_FLAG_UNOCON) DPS_GETLOCK(A, DPS_LOCK_CONF); dbto = DPS_DBL_TO(A); if (A->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(A, DPS_LOCK_CONF); for (i = dbfrom; i < dbto; i++) { db = DPS_DBL_DB(A, i); if (A->flags & DPS_FLAG_UNOCON) DPS_GETLOCK(A, DPS_LOCK_DB); #ifdef HAVE_SQL res = DpsSQLAsyncQuery(db, NULL, buf); #endif if (res != DPS_OK) { DpsLog(A, DPS_LOG_ERROR, db->errstr); } if (A->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(A, DPS_LOCK_DB); if (res != DPS_OK) break; } }
char * DpsEnvErrMsg(DPS_ENV * Conf) { size_t i; DPS_DB *db; for(i = 0; i<Conf->dbl.nitems; i++){ db = &Conf->dbl.db[i]; if (db->errcode) { char *oe = (char*)DpsStrdup(Conf->errstr); dps_snprintf(Conf->errstr, 2048, "DB err: %s - %s", db->errstr, oe); DPS_FREE(oe); } } return(Conf->errstr); }
int Dps_ftp_rest(DPS_CONN *c, size_t rest) { char cmd[64]; int code; dps_snprintf(cmd, 63, "REST %u", rest); code = Dps_ftp_send_cmd(c, cmd); if (code == -1){ return -1; }else if ( code >3 ){ c->err = code; return -1; } return 0; }
int Dps_ftp_get(DPS_CONN *c, DPS_CONN *d, char *path, size_t max_doc_size){ char *cmd; size_t len; if (!path) return -1; len = dps_strlen(path) + 16; cmd = DpsXmalloc(len+1); if (cmd == NULL) return -1; dps_snprintf(cmd, len+1, "RETR %s", path); if (Dps_ftp_send_data_cmd(c, d, cmd, max_doc_size) == -1 && d->err != DPS_NET_FILE_TL) { DPS_FREE(cmd); return -1; } DPS_FREE(cmd); return 0; }
int Dps_ftp_open_data_port( DPS_CONN *c, DPS_CONN *d){ char buf[64]; unsigned char *a, *p; int code; if (!d) return -1; if (socket_getname(c, &d->sin) == -1) return -1; if (d->port) { d->sin.sin_port = htons(d->port); } if (socket_open(d)) return -1; if (socket_listen(d)){ return -1; } if (socket_getname(d, &d->sin) == -1){ return -1; } a = (unsigned char *)&d->sin.sin_addr; p = (unsigned char *)&d->sin.sin_port; dps_snprintf(buf, 64, "PORT %d,%d,%d,%d,%d,%d", a[0], a[1], a[2], a[3], p[0], p[1]); code = Dps_ftp_send_cmd(c, buf); if ((code < 0) || strncasecmp(c->buf, "200 ", 4)){ return -1; } d->user = c->user; d->pass = c->pass; return 0; }
int Dps_ftp_mdtm(DPS_CONN *c, char *path){ char *cmd; int code; size_t len; if (!path) return -1; len = dps_strlen(path) + 16; cmd = DpsXmalloc(len+1); if (cmd == NULL) return -1; dps_snprintf(cmd, len+1, "MDTM %s", path); code = Dps_ftp_send_cmd(c, cmd); DPS_FREE(cmd); if (code == -1){ return -1; }else if ( code >3 ){ c->err = code; return -1; } return (DpsFTPDate2Time_t(c->buf)); }
int Dps_ftp_list(DPS_CONN *c, DPS_CONN *d, char *path, char *filename, size_t max_doc_size){ char *cmd; size_t len; if (!filename){ cmd = DpsXmalloc(16); if (cmd == NULL) return -1; sprintf(cmd, "LIST"); }else{ len = dps_strlen(filename) + 16; cmd = DpsXmalloc(len+1); if (cmd == NULL) return -1; dps_snprintf(cmd, len+1, "LIST %s", filename); } if (Dps_ftp_send_data_cmd(c, d, cmd, max_doc_size)== -1) { DPS_FREE(cmd); /*DpsLog(c->indexer, DPS_LOG_DEBUG, "(ftp_list-err)->%s", d->buf);*/ return -1; } DPS_FREE(cmd); return ftp_parse_list(d, path); }
ssize_t Dps_ftp_size(DPS_CONN *c, char *path) { char *cmd; int code; size_t len; if (!path) return -1; len = dps_strlen(path) + 16; cmd = DpsXmalloc(len + 1); if (cmd == NULL) return -1; dps_snprintf(cmd, len + 1, "SIZE %s", path); code = Dps_ftp_send_cmd(c, cmd); DPS_FREE(cmd); if (code == -1){ return -1; }else if ( code >3 ){ c->err = code; return -1; } sscanf(c->buf, "213 %u", &len); return len; }
int Dps_ftp_cwd(DPS_CONN *c, char *path){ char *cmd; int code; size_t len; if (!path) return -1; if (*path == '\0') return 0; len = dps_strlen(path) + 16; cmd = DpsXmalloc(len+1); if (cmd == NULL) return -1; dps_snprintf(cmd, len+1, "CWD %s", path); code = Dps_ftp_send_cmd(c, cmd); DPS_FREE(cmd); if (code == -1){ return -1; }else if ( code >3 ){ c->err = code; return -1; } return 0; }
int Dps_ftp_login(DPS_CONN *connp, char *user, char *passwd){ char *buf; char user_tmp[32], passwd_tmp[64]; int code; size_t len; DPS_FREE(connp->user); DPS_FREE(connp->pass); if (!user) dps_snprintf(user_tmp, 32, "anonymous"); else { dps_snprintf(user_tmp, 32, "%s", user); connp->user = (char*)DpsStrdup(user); } if (!passwd) dps_snprintf(passwd_tmp, 64, "*****@*****.**", PACKAGE, VERSION); else { dps_snprintf(passwd_tmp, 32, "%s", passwd); connp->pass = (char*)DpsStrdup(passwd); } len = dps_strlen(user_tmp) + 5 /*dps_strlen("USER ")*/; buf = (char *) DpsXmalloc(len+1); if (buf == NULL) return -1; dps_snprintf(buf, len+1, "USER %s", user_tmp); code = Dps_ftp_send_cmd(connp, buf); DPS_FREE(buf); if (code == -1) return -1; else if (code == 2) /* Don't need password */ return 0; len = dps_strlen(passwd_tmp) + 5 /*dps_strlen("PASS ")*/; buf = (char *) DpsXmalloc(len+1); if (buf == NULL) return -1; dps_snprintf(buf, len+1, "PASS %s", passwd_tmp); code = Dps_ftp_send_cmd( connp, buf); DPS_FREE(buf); if (code > 3) return -1; return 0; }
static int MakeNestedIndex(DPS_AGENT *Indexer, DPS_UINT8URLIDLIST *L, const char *lim_name, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; size_t k, prev; urlid_t *data=NULL; DPS_UINT8_POS_LEN *ind=NULL; size_t mind=1000, nind=0, ndata; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0; int rc=DPS_OK; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); if(!L->Item)return(1); if (L->nitems > 1) DpsSort(L->Item, L->nitems, sizeof(DPS_UINT8URLID), (qsort_cmp)cmp_ind8); data = (urlid_t*)DpsMalloc((L->nitems + 1) * sizeof(urlid_t)); if(!data){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", (L->nitems + 1) * sizeof(urlid_t), __FILE__, __LINE__); goto err1; } ind=(DPS_UINT8_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } prev=0; for(k=0; k < L->nitems; k++) { data[k] = L->Item[k].url_id; if((k == L->nitems-1) || (L->Item[k].hi != L->Item[prev].hi) || (L->Item[k].lo != L->Item[prev].lo)) { if(nind==mind){ mind+=1000; ind=(DPS_UINT8_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT8_POS_LEN)); if(!ind) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__); goto err1; } } /* Fill index */ ind[nind].hi = L->Item[prev].hi; ind[nind].lo = L->Item[prev].lo; ind[nind].pos = prev * sizeof(*data); if (k == L->nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%08X%08X - %d %d\n", ind[nind].hi, ind[nind].lo, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } ndata = L->nitems; ClearIndex8(L); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR,DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(dat_fd); if((ndata * sizeof(*data)) != (size_t)write(dat_fd, data, ndata * sizeof(*data))) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, DPSSLASH,DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT8_POS_LEN))){ DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: ClearIndex8(L); DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
__C_LINK int __DPSCALL DpsCacheMakeIndexes(DPS_AGENT *Indexer, DPS_DB *db) { DPS_UINT8URLIDLIST L8; DPS_UINT4URLIDLIST L4; DPS_VARLIST *v = &Indexer->Conf->Vars; size_t i, r; char *ind, *nm, *lfname; bzero(&L4, sizeof(DPS_UINT4URLIDLIST)); bzero(&L8, sizeof(DPS_UINT8URLIDLIST)); r = (size_t) 'l'; for (i = 0; i < v->Root[r].nvars; i++) { if (!strncasecmp("Limit-", v->Root[r].Var[i].name, 6)) { ind = v->Root[r].Var[i].val; lfname = v->Root[r].Var[i].name; nm = lfname + 6; if (!strcasecmp(ind, "category")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Category index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating category index"); if (DPS_OK == DpsLimit8(Indexer, &L8, "Category", DPS_IFIELD_TYPE_HEX8STR, db)) { MakeNestedIndex(Indexer, &L8, DPS_LIMFNAME_CAT, db); } } else if (!strcasecmp(ind, "tag")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Tag index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating tag index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "Tag", DPS_IFIELD_TYPE_STRCRC32, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_TAG, db); } } else if (!strcasecmp(ind, "link")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Link index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating link index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "link", DPS_IFIELD_TYPE_INT, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_LINK, db); } } else if (!strcasecmp(ind, "time")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Time index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating time index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "last_mod_time", DPS_IFIELD_TYPE_HOUR, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_TIME, db); } } else if (!strcasecmp(ind, "hostname")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Hostname index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating hostname index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "url", DPS_IFIELD_TYPE_HOSTNAME, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_HOST, db); } } else if (!strcasecmp(ind, "language")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Language index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating language index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "Content-Language", DPS_IFIELD_TYPE_STR2CRC32, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_LANG, db); } } else if (!strcasecmp(ind, "content")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Content-Type index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating Content-Type index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "Content-Type", DPS_IFIELD_TYPE_STRCRC32, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_CTYPE, db); } } else if (!strcasecmp(ind, "siteid")) { /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Site_id index creation", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating Site_id index"); if (DPS_OK == DpsLimit4(Indexer, &L4, "site_id", DPS_IFIELD_TYPE_INT, db)) { MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_SITE, db); } } else { char *buf, *req, *dbaddr; DPS_DB ldb, *pdb = &ldb; size_t buf_len = dps_strlen(nm) + 16; if ((buf = (char*) DpsMalloc(buf_len * sizeof(char))) == NULL) { DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d chars at %s:%d", buf_len, __FILE__, __LINE__); return DPS_ERROR; } dps_setproctitle("[%d] %s index creation", Indexer->handle, nm); DpsLog(Indexer, DPS_LOG_EXTRA, "Creating %s index", nm); dps_snprintf(buf, buf_len, "Req-%s", nm); req = DpsVarListFindStr(&Indexer->Conf->Vars, buf, NULL); if (req != NULL) { dps_snprintf(buf, buf_len, "dbaddr-%s", nm); dbaddr = DpsVarListFindStr(&Indexer->Conf->Vars, buf, NULL); if (dbaddr != NULL) { DpsDBSetAddr(pdb, dbaddr, DPS_OPEN_MODE_READ); } else { pdb = db; } if (!strcasecmp(ind, "nex8str")) { if (DPS_OK == DpsSQLLimit8(Indexer, &L8, req, DPS_IFIELD_TYPE_HEX8STR, pdb)) { MakeNestedIndex(Indexer, &L8, lfname, pdb); } } else { int field_type = DPS_IFIELD_TYPE_INT; if (!strcasecmp(ind, "strcrc32")) field_type = DPS_IFIELD_TYPE_STRCRC32; else if (!strcasecmp(ind, "hour")) field_type = DPS_IFIELD_TYPE_HOUR; else if (!strcasecmp(ind, "hostname")) field_type = DPS_IFIELD_TYPE_HOSTNAME; else if (!strcasecmp(ind, "char2")) field_type = DPS_IFIELD_TYPE_STR2CRC32; else if (!strcasecmp(ind, "int")) field_type = DPS_IFIELD_TYPE_INT; if (DPS_OK == DpsSQLLimit4(Indexer, &L4, req, field_type, db)) { MakeLinearIndex(Indexer, &L4, lfname, db); } } } } /* To see the URL being indexed in "ps" output on xBSD */ dps_setproctitle("[%d] Indexes done.", Indexer->handle); DpsLog(Indexer, DPS_LOG_EXTRA, "Done"); } } return DPS_OK; }
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) { char *val, *header_name; char secname[128]; DPS_VAR *Sec; DPS_TEXTITEM Item; if ((val = strchr(header_name = header->data, ':'))) { /* fprintf(stderr, "HEADER: %s\n", header_name); */ *val++='\0'; val = DpsTrim(val," \t:"); if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) { char *v; for(v=val ; *v ; v++) *v = dps_tolower(*v); } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) { char *part, *lpart; char *name = NULL; char *value = NULL; const char *domain = NULL; const char *path = NULL; dps_uint4 expire = 0; char secure = 'n'; for (part = dps_strtok_r(val, ";" , &lpart) ; part; part = dps_strtok_r(NULL, ";", &lpart)) { char *arg; part = DpsTrim(part, " "); if ((arg = strchr(part, '='))) { *arg++ = '\0'; if (!name) { name = part; value = arg; } else if (!strcasecmp(part, "path")) { path = arg; } else if (!strcasecmp(part, "domain")) { domain = arg; } else if (!strcasecmp(part, "secure")) { secure = 'y'; } else if (!strcasecmp(part, "expires")) { expire = (dps_uint4)DpsHttpDate2Time_t(arg); } } } if (name && value) { if (domain && domain[0] == '.') { domain++; } else { domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost"; } if (!path) { path = Doc->CurURL.path ? Doc->CurURL.path : "/"; } DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1); } /* token = dps_strtok_r(NULL,"\r\n",<); continue;*/ return; } } DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>"); dps_snprintf(secname,sizeof(secname),"header.%s", header_name); secname[sizeof(secname)-1]='\0'; if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) { Item.href = NULL; Item.str = val; Item.section = Sec->section; Item.section_name = secname; Item.len = 0; DpsTextListAdd(&Doc->TextList, &Item); } }
static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) { DPS_ENV *Conf = Indexer->Conf; DPS_UINT4URLIDLIST L; size_t k,prev; urlid_t *data = NULL; DPS_UINT4_POS_LEN *ind=NULL; size_t mind=1000,nind=0; char fname[PATH_MAX]; int dat_fd=0, ind_fd=0, rc; const char *vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR); bzero(&L, sizeof(DPS_UINT4URLIDLIST)); rc = DpsLimit4(Indexer, &L, field, type, db); if(rc != DPS_OK) { DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__); goto err1; } if(!L.Item)return(1); if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4); data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data)); if(!data) { fprintf(stderr,"Error1: %s\n",strerror(errno)); goto err1; } ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error2: %s\n",strerror(errno)); goto err1; } prev=0; for(k=0; k<L.nitems; k++) { data[k]=L.Item[k].url_id; if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) { if(nind==mind) { mind+=1000; ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN)); if(!ind) { fprintf(stderr,"Error3: %s\n",strerror(errno)); goto err1; } } /* Fill index */ ind[nind].val=L.Item[prev].val; ind[nind].pos = prev * sizeof(*data); if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data); else ind[nind].len = (k - prev) * sizeof(*data); DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len); nind++; prev=k; } } if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(dat_fd); if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(dat_fd); DpsClose(dat_fd); DPS_FREE(data); dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name); if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) { fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno)); goto err1; } DpsWriteLock(ind_fd); if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) { fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno)); goto err1; } DpsUnLock(ind_fd); DpsClose(ind_fd); DPS_FREE(ind); return(0); err1: if (L.mapped) { #ifdef HAVE_SYS_MMAN_H if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #elif defined(HAVE_SYS_SHM_H) if (shmdt(L.Item)) { fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno)); } #endif unlink(L.shm_name); } else { DPS_FREE(L.Item); } DPS_FREE(data); DPS_FREE(ind); if(dat_fd) DpsClose(dat_fd); if(ind_fd) DpsClose(ind_fd); return(1); }
int DpsChineseListLoad(DPS_AGENT *Agent, DPS_CHINALIST *List, const char *charset, const char *fname) { struct stat sb; char *str, *data = NULL, *cur_n = NULL; DPS_CHINAWORD chinaword; char word[PATH_MAX]; dpsunicode_t uword[256]; DPS_CHARSET *sys_int, *fcs; DPS_CONV to_uni; int fd; char savebyte; sys_int = DpsGetCharSet("sys-int"); if (!(fcs = DpsGetCharSet(charset))) { if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Charset '%s' not found or not supported", charset); else fprintf(stderr, "Charset '%s' not found or not supported", charset); return DPS_ERROR; } DpsConvInit(&to_uni, fcs, sys_int, Agent->Conf->CharsToEscape, DPS_RECODE_HTML); if (*fname != '/') { dps_snprintf(word, sizeof(word), "%s/%s", DpsVarListFindStr(&Agent->Conf->Vars, "EtcDir", DPS_CONF_DIR), fname); fname = word; } if (stat(fname, &sb)) { if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Unable to stat FreqDic file '%s': %s", fname, strerror(errno)); else fprintf(stderr, "Unable to stat FrecDic file '%s': %s", fname, strerror(errno)); return DPS_ERROR; } if ((fd = open(fname, O_RDONLY)) <= 0) { if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Unable to open FreqDic file '%s': %s", fname, strerror(errno)); else fprintf(stderr, "Unable to open FreqDic file '%s': %s", fname, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Unable to alloc %d bytes", sb.st_size); else fprintf(stderr, "Unable to alloc %ld bytes", (long)sb.st_size); close(fd); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Unable to read FreqDic file '%s': %s", fname, strerror(errno)); else fprintf(stderr, "Unable to read FreqDic file '%s': %s", fname, strerror(errno)); DPS_FREE(data); close(fd); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, NL_INT); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } close(fd); bzero((void*)&chinaword, sizeof(chinaword)); chinaword.word = uword; while(str != NULL) { if(!str[0]) goto loop_continue; if(str[0]=='#') goto loop_continue; sscanf(str, "%d %63s ", &chinaword.freq, word ); DpsConv(&to_uni, (char*)uword, sizeof(uword), word, sizeof(word)); DpsChineseListAdd(List, &chinaword); loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, NL_INT); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DpsChineseListSort(List); { register size_t i, j = 0; for (i = 1; i < List->nwords; i++) { if (cmpchinese(&List->ChiWord[j], &List->ChiWord[i]) == 0) { List->ChiWord[j].freq += List->ChiWord[i].freq; } else { j++; } } for (i = j + 1; i < List->nwords; i++) { DPS_FREE(List->ChiWord[i].word); } List->nwords = j + 1; } return DPS_OK; }
int main(int argc,char **argv, char **envp) { int ch, sleeps = 1, optimize = 0, obi = 0; unsigned int from = 0, to = 0xFFF, p_to = 0; DPS_ENV * Env; const char * config_name = DPS_CONF_DIR "/cached.conf"; DpsInit(argc, argv, envp); /* Initialize library */ DpsInitMutexes(); Env=DpsEnvInit(NULL); if (Env == NULL) exit(1); DpsSetLockProc(Env, DpsLockProc); /*#ifndef HAVE_SETPROCTITLE*/ ARGV = argv; ARGC = argc; /*#endif*/ while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){ switch (ch) { case 'f': sscanf(optarg, "%x", &from); break; case 't': sscanf(optarg, "%x", &p_to); break; case 'w': DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg); break; case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break; case 'b': obi++; break; case 'o': optimize++; break; case 'p': sleeps = atoi(optarg); break; case 'h': case '?': default: usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; break; } } argc -= optind; argv += optind; if(argc > 1) { usage(); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return 1; } else if (argc == 1) { config_name = argv[0]; } { DPS_LOGDEL *del_buf=NULL; size_t del_count = 0, log, bytes, n = 0; int dd, log_fd; struct stat sb; char dname[PATH_MAX] = ""; DPS_BASE_PARAM P; DPS_LOGWORD *log_buf = NULL; DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0); log2stderr = 1; if (Indexer == NULL) { fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__); exit(DPS_ERROR); } if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){ fprintf(stderr, "%s\n", DpsEnvErrMsg(Env)); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); return DPS_ERROR; } DpsOpenLog("splitter", Env, log2stderr); Indexer->flags = Env->flags = DPS_FLAG_UNOCON; DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*"); bzero(&P, sizeof(P)); P.subdir = DPS_TREEDIR; P.basename = "wrd"; P.indname = "wrd"; P.mode = DPS_WRITE_LOCK; P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300); P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR)); P.A = Indexer; if (p_to != 0) to = p_to; else to = P.NFiles - 1; #ifdef HAVE_ZLIB P.zlib_method = Z_DEFLATED; P.zlib_level = 9; P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS; P.zlib_memLevel = 9; P.zlib_strategy = DPS_BASE_WRD_STRATEGY; #endif /* Open del log file */ dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH); if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) { dps_strerror(NULL, 0, "Can't open del log '%s'", dname); exit(DPS_ERROR); } DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles); /* Allocate del buffer */ fstat(dd, &sb); if (sb.st_size != 0) { del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1); if (del_buf == NULL) { fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__); exit(0); } del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL); } DpsClose(dd); /* Remove duplicates URLs in DEL log */ /* Keep only oldest records for each URL */ if (del_count > 0) { DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count); if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups"); del_count = DpsRemoveDelLogDups(del_buf, del_count); } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to); for(log = from; log <= to; log++) { /* Open log file */ dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log); if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){ if (errno == ENOENT) { dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname); n = 0; /* continue;*/ } else { dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname); continue; } } else { DpsWriteLock(log_fd); DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log); fstat(log_fd, &sb); log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL; if (log_buf != NULL) { unlink(dname); bytes = read(log_fd,log_buf,(size_t)sb.st_size); (void)ftruncate(log_fd, (off_t)0); DpsUnLock(log_fd); DpsClose(log_fd); n = bytes / sizeof(DPS_LOGWORD); DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog); DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords"); n = DpsRemoveOldWords(log_buf, n, del_buf, del_count); if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd); } else { n = 0; DpsUnLock(log_fd); DpsClose(log_fd); } } DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize); if (obi) DpsBaseOptimize(&P, log); DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count); if (optimize) DpsBaseOptimize(&P, log); DpsBaseClose(&P); DPS_FREE(log_buf); DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to); DPSSLEEP(sleeps); } DPS_FREE(del_buf); DpsAgentFree(Indexer); DPS_FREE(P.vardir); } fprintf(stderr, "Splitting done.\n"); DpsEnvFree(Env); DpsDeInit(); DpsDestroyMutexes(); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return 0; }
int DpsCloneListSearchd(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_RESULT *Res, DPS_DB *db) { DPS_SEARCHD_PACKET_HEADER hdr; ssize_t nsent,nrecv; char *msg = NULL, *dinfo = NULL; char *tok, *lt; char buf[128]; int done = 0; int rc = DPS_OK; TRACE_IN(Indexer, "DpsCloneListSearchd"); dps_snprintf(buf, 128, "%s", DpsVarListFindStr(&Doc->Sections, "DP_ID", "0")); hdr.cmd = DPS_SEARCHD_CMD_CLONES; hdr.len = dps_strlen(buf); nsent = DpsSearchdSendPacket(db->searchd, &hdr, buf); while(!done){ nrecv = DpsRecvall(db->searchd, &hdr, sizeof(hdr), 360); if(nrecv != sizeof(hdr)){ DpsLog(Indexer, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes)", (int)nrecv); TRACE_OUT(Indexer); return(DPS_ERROR); }else{ #ifdef DEBUG_SDP DpsLog(Indexer, DPS_LOG_DEBUG, "Received header cmd=%d len=%d\n", hdr.cmd, hdr.len); #endif } switch(hdr.cmd){ case DPS_SEARCHD_CMD_ERROR: msg = (char*)DpsMalloc(hdr.len + 1); if (msg == NULL) { done=1; break; } nrecv = DpsRecvall(db->searchd, msg, hdr.len, 360); msg[(nrecv >= 0) ? nrecv : 0] = '\0'; sprintf(Indexer->Conf->errstr, "Searchd error: '%s'", msg); rc = DPS_ERROR; DPS_FREE(msg); done = 1; break; case DPS_SEARCHD_CMD_DOCINFO: dinfo = (char*)DpsMalloc(hdr.len + 1); if (dinfo == NULL) { done=1; break; } nrecv = DpsRecvall(db->searchd, dinfo, hdr.len, 360); dinfo[(nrecv >= 0) ? nrecv : 0] = '\0'; #ifdef DEBUG_SDP DpsLog(Indexer, DPS_LOG_DEBUG, "Received DOCINFO size=%d buf=%s\n", hdr.len, dinfo); #endif if (strcasecmp(dinfo, "nocloneinfo") != 0) { tok = dps_strtok_r(dinfo, "\r\n", <, NULL); while(tok){ DPS_DOCUMENT *D; size_t nd = Res->num_rows++; Res->Doc = (DPS_DOCUMENT*)DpsRealloc(Res->Doc, (Res->num_rows + 1) * sizeof(DPS_DOCUMENT)); if (Res->Doc == NULL) { sprintf(Indexer->Conf->errstr, "Realloc error"); rc = DPS_ERROR; break; } D = &Res->Doc[nd]; DpsDocInit(D); DpsDocFromTextBuf(D, tok); tok = dps_strtok_r(NULL, "\r\n", <, NULL); } } DPS_FREE(dinfo); done = 1; break; default: sprintf(Indexer->Conf->errstr, "Unknown searchd response: cmd=%d len=%d", hdr.cmd, hdr.len); rc = DPS_ERROR; done = 1; break; } } TRACE_OUT(Indexer); return rc; }
int __DPSCALL DpsFindWordsSearchd(DPS_AGENT *query, DPS_RESULT *Res, DPS_DB *searchd) { size_t maxlen = 1024; char *request, *edf = NULL, *e_empty = NULL; const char *df = DpsVarListFindStr(&query->Vars, "DateFormat", NULL); const char *empty = DpsVarListFindStr(&query->Vars, "empty", NULL); const char *qs = DpsVarListFindStr(&query->Vars, "QUERY_STRING", ""); const char *tmplt = DpsVarListFindStr(&query->Vars, "tmplt", ""); int res=DPS_OK; TRACE_IN(query, "DpsFindWordsSearchd"); if (df) { edf = (char*)DpsMalloc(dps_strlen(df) * 10 + 1); if (edf == NULL) { sprintf(query->Conf->errstr,"Can't allocate memory"); TRACE_OUT(query); return DPS_ERROR; } DpsEscapeURL(edf, df); maxlen += dps_strlen(edf); } if (empty) { e_empty = (char*)DpsMalloc(dps_strlen(empty) * 10 + 1); if (e_empty == NULL) { sprintf(query->Conf->errstr, "Can't allocate memory"); TRACE_OUT(query); return DPS_ERROR; } DpsEscapeURL(e_empty, empty); maxlen += dps_strlen(e_empty); } maxlen += dps_strlen(qs) + dps_strlen(tmplt) + 64; if (NULL==(request=(char*)DpsMalloc(maxlen))) { sprintf(query->Conf->errstr,"Can't allocate memory"); DPS_FREE(edf); TRACE_OUT(query); return DPS_ERROR; } dps_snprintf(request, maxlen, "%s&BrowserCharset=%s&IP=%s&g-lc=%s&ExcerptSize=%s&ExcerptPadding=%s&DoExcerpt=%s&tmplt=%s%s%s%s%s%s%s&sp=%s&sy=%s&s=%s", qs, DpsVarListFindStr(&query->Vars, "BrowserCharset", "iso-8859-1"), DpsVarListFindStr(&query->Vars, "IP", "localhost"), DpsVarListFindStr(&query->Vars, "g-lc", "en"), DpsVarListFindStr(&query->Vars, "ExcerptSize", "256"), DpsVarListFindStr(&query->Vars, "ExcerptPadding", "40"), (query->Flags.do_excerpt) ? "yes" : "no", tmplt, (edf) ? "&DateFormat=" : "", (edf) ? edf : "", (e_empty) ? "&empty=" : "", (e_empty) ? e_empty : "", (searchd->label) ? "&label=" : "", (searchd->label) ? searchd->label : "", DpsVarListFindStr(&query->Vars, "sp", "1"), DpsVarListFindStr(&query->Vars, "sy", "1"), DpsVarListFindStr(&query->Vars, "s", "RP") ); DPS_FREE(edf); DPS_FREE(e_empty); request[maxlen-1]='\0'; res = DpsSearchdSendWordRequest(query, searchd, request); DPS_FREE(request); if (DPS_OK != res) { TRACE_OUT(query); return res; } /* res = DpsSearchdGetWordResponse(query, Res, searchd); called later from DpsFind */ TRACE_OUT(query); return res; }
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) { char *val, *header_name; char secname[128], savec; DPS_VAR *Sec; DPS_TEXTITEM Item; if ((val = strchr(header_name = header->data, ':'))) { /* fprintf(stderr, "HEADER: %s\n", header_name); */ *val++='\0'; val = DpsTrim(val," \t:"); if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) { register char *v; for(v=val ; *v ; v++) *v = (char)dps_tolower((int)*v); } else if (Doc->Spider.use_robots && !strcasecmp(header_name, "X-Robots-Tag")) { char * lt; char * rtok; rtok = dps_strtok_r(val, " ,\r\n\t", <, &savec); while(rtok){ if(!strcasecmp(rtok, "ALL")){ /* Left Server parameters unchanged */ }else if(!strcasecmp(rtok, "NONE")){ Doc->Spider.follow = DPS_FOLLOW_NO; Doc->Spider.index = 0; if (DpsNeedLog(DPS_LOG_DEBUG)) { DpsVarListReplaceInt(&Doc->Sections, "Index", 0); DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO); } }else if(!strcasecmp(rtok, "NOINDEX")) { Doc->Spider.index = 0; /* Doc->method = DPS_METHOD_DISALLOW;*/ if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", 0); }else if(!strcasecmp(rtok, "NOFOLLOW")) { Doc->Spider.follow = DPS_FOLLOW_NO; if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO); }else if(!strcasecmp(rtok, "NOARCHIVE")) { DpsVarListReplaceStr(&Doc->Sections, "Z", ""); }else if(!strcasecmp(rtok, "INDEX")) { /* left server value unchanged */ if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", Doc->Spider.index); }else if(!strcasecmp(rtok, "FOLLOW")) { /* left server value unchanged */ if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", Doc->Spider.follow); } rtok = dps_strtok_r(NULL, " \r\n\t", <, &savec); } } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) { DpsCookiesAddStr(Indexer, &Doc->CurURL, val, 1); return; } } DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>"); dps_snprintf(secname,sizeof(secname),"header.%s", header_name); secname[sizeof(secname)-1]='\0'; if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) { bzero((void*)&Item, sizeof(Item)); Item.href = NULL; Item.str = val; Item.section = Sec->section; Item.section_name = secname; Item.strict = Sec->strict; Item.len = 0; (void)DpsTextListAdd(&Doc->TextList, &Item); } }
int DpsCookiesAdd(DPS_AGENT *Indexer, const char *domain, const char * path, const char *name, const char *value, const char secure, dps_uint4 expires, const char from_config, int insert_flag) { #ifdef HAVE_SQL char buf[3*PATH_MAX]; char path_esc[2*PATH_MAX+1]; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; DPS_DB *db; dpshash32_t url_id = DpsStrHash32(domain); size_t i; #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif if (Indexer->flags & DPS_FLAG_UNOCON) { if (Indexer->Conf->dbl.nitems == 0) return DPS_OK; DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { if (Indexer->dbl.nitems == 0) return DPS_OK; db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } (void)DpsDBEscStr(db, path_esc, DPS_NULL2EMPTY(path), dps_min(PATH_MAX,dps_strlen(DPS_NULL2EMPTY(path)))); for (i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; if (!strcasecmp(Coo->domain, domain) && !strcasecmp(Coo->path, DPS_NULL2EMPTY(path)) && !strcasecmp(Coo->name, name) && (Coo->secure == secure)/* && (Coo->from_config == from_config)*/ ) { DPS_FREE(Coo->value); Coo->value = DpsStrdup(value); /* Coo->expires = expires;*/ if (insert_flag) { dps_snprintf(buf, sizeof(buf), "UPDATE cookies SET value='%s',expires=%d WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'", value, expires, domain, path_esc, name, secure); DpsSQLAsyncQuery(db, NULL, buf); } if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif return DPS_OK; } } Cookies->Cookie = (DPS_COOKIE*)DpsRealloc(Cookies->Cookie, (Cookies->ncookies + 1) * sizeof(DPS_COOKIE)); if(Cookies->Cookie == NULL) { Cookies->ncookies = 0; if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif return DPS_ERROR; } Coo = &Cookies->Cookie[Cookies->ncookies]; /* Coo->expires = expires;*/ Coo->secure = secure; Coo->from_config = from_config; Coo->domain = DpsStrdup(domain); Coo->path = DpsStrdup(path); Coo->name = DpsStrdup(name); Coo->value = DpsStrdup(value); if (insert_flag) { if (Indexer->Flags.CheckInsertSQL) { dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'", domain, path_esc, name, secure); DpsSQLAsyncQuery(db, NULL, buf); } dps_snprintf(buf, sizeof(buf), "INSERT INTO cookies(expires,secure,domain,path,name,value)VALUES(%d,'%c','%s','%s','%s','%s')", expires, secure, domain, path_esc, name, value); DpsSQLAsyncQuery(db, NULL, buf); } Cookies->ncookies++; if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); #ifdef WITH_PARANOIA DpsViolationExit(Indexer->handle, paran); #endif #endif /*HAVE_SQL*/ return DPS_OK; }
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){ struct stat sb; char *str, *data = NULL, *cur_n = NULL; char lang[64]=""; DPS_CHARSET *cs=NULL; DPS_CHARSET *sys_int=DpsGetCharSet("sys-int"); DPS_CONV file_uni; DPS_WIDEWORD *ww = NULL; size_t key = 1; int flag_th = 0; int fd; char savebyte; if (stat(filename, &sb)) { fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno)); return DPS_ERROR; } if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size); DpsClose(fd); return DPS_ERROR; } if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) { dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno)); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } data[sb.st_size] = '\0'; str = data; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } while(str != NULL) { if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue; if(!strncasecmp(str,"Charset:",8)){ char * lasttok; char * charset; if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) { cs=DpsGetCharSet(charset); if(!cs){ dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'", charset, filename); DPS_FREE(data); DpsClose(fd); return DPS_ERROR; } DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0); } }else if(!strncasecmp(str,"Language:",9)){ char * lasttok; char * l; if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) { dps_strncpy(lang, l, sizeof(lang)-1); } }else if(!strncasecmp(str, "Thesaurus:", 10)) { char * lasttok; char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok); flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0; }else{ char *av[255]; size_t ac, i, j; dpsunicode_t *t; if(!cs){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } if(!lang[0]){ dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename); DpsClose(fd); DPS_FREE(data); return DPS_ERROR; } ac = DpsGetArgs(str, av, 255); if (ac < 2) goto loop_continue; if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR; for (i = 0; i < ac; i++) { ww[i].word = av[i]; ww[i].len = dps_strlen(av[i]); ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t)); if (ww[i].uword == NULL) return DPS_ERROR; DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1); DpsUniStrToLower(ww[i].uword); ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword); DPS_FREE(t); } for (i = 0; i < ac - 1; i++) { for (j = i + 1; j < ac; j++) { if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){ Env->Synonyms.msynonyms += 64; Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms); if (Env->Synonyms.Synonym == NULL) { Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0; return DPS_ERROR; } } bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add direct order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM)); /* Add reverse order */ Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword); Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0); Env->Synonyms.nsynonyms++; } } for (i = 0; i < ac; i++) { DPS_FREE(ww[i].uword); } do { key++; } while (key == 0); } loop_continue: str = cur_n; if (str != NULL) { *str = savebyte; cur_n = strchr(str, '\n'); if (cur_n != NULL) { cur_n++; savebyte = *cur_n; *cur_n = '\0'; } } } DPS_FREE(data); DPS_FREE(ww); DpsClose(fd); return DPS_OK; }
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) { #ifdef HAVE_SQL DPS_DSTR cookie; DPS_COOKIES *Cookies = &Indexer->Cookies; DPS_COOKIE *Coo; size_t i, blen = dps_strlen(hostinfo), slen; int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1); #ifdef WITH_PARANOIA void *paran = DpsViolationEnter(paran); #endif TRACE_IN(Indexer, "DpsCookiesFind"); DpsDSTRInit(&cookie, 1024); for(i = 0; i < Cookies->ncookies; i++) { Coo = &Cookies->Cookie[i]; slen = dps_strlen(Coo->domain); if (slen > blen) continue; if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue; if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue; if (Coo->from_config != 1) have_no_cookies = 0; if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, Coo->name); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, Coo->value); } if (have_no_cookies) { char buf[2*PATH_MAX]; dpshash32_t url_id; DPS_DB *db; DPS_SQLRES Res; size_t rows; int rc; if (Server != NULL) { char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL); if (PingData != NULL) { char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n")); int method = DPS_METHOD_GET; dps_base64_decode(AuthPing, PingData, dps_strlen(PingData)); if (!strncasecmp(AuthPing, "GET", 3)) { method = DPS_METHOD_GET; PingData = DpsTrim(AuthPing + 3, " \t\r\n"); } else if (!strncasecmp(AuthPing, "POST", 4)) { method = DPS_METHOD_POST; PingData = DpsTrim(AuthPing + 4, " \t\r\n"); } else { DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing); PingData = NULL; } if (PingData != NULL) { size_t size = dps_strlen(PingData); { char PingURL[size + 2]; char PingBody[size]; DPS_DOCUMENT *rDoc; int result; rDoc = DpsDocInit(NULL); DpsSpiderParamInit(&rDoc->Spider); DpsVarList2Doc(rDoc, Server); rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE); rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE; if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) { DpsDocFree(rDoc); TRACE_OUT(Indexer); return; } rDoc->Buf.buf[0]='\0'; rDoc->subdoc = Indexer->Flags.SubDocLevel + 1; #if 1 dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo)); DpsVarListReplaceStr(&rDoc->Sections, "URL", buf); DpsURLParse(&rDoc->CurURL, buf); DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf); rDoc->method = DPS_METHOD_HEAD; /* DpsVarListFree(&rDoc->RequestHeaders);*/ if (Doc != NULL) { DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); } DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request"); if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response"); #endif sscanf(PingData, "%s %s", PingURL, PingBody); if (rDoc->method == DPS_METHOD_GET) { dps_strcat(PingURL, "?"); dps_strcat(PingURL, PingBody); } else { DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody); } DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL); DpsURLParse(&rDoc->CurURL, PingURL); DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL); rDoc->method = method; DpsVarListFree(&rDoc->RequestHeaders); DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0"); DpsDocAddDocExtraHeaders(Indexer, Server, rDoc); DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc); DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*"); DpsDocAddServExtraHeaders(Server, rDoc); if (method == DPS_METHOD_POST) { dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1")); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf); dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody)); DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf); } DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request"); #if 0 if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) { DpsDocLookupConn(Indexer, rDoc); } else { DPS_FREE(rDoc->connp.connp); rDoc->connp = Doc->connp; } #endif result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */ DpsDocProcessResponseHeaders(Indexer, rDoc); DpsVarListDel(&rDoc->Sections, "body"); DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response"); if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp)); DpsDocFree(rDoc); } } DpsFree(AuthPing); } } while(hostinfo != NULL) { url_id = DpsStrHash32(hostinfo); DpsSQLResInit(&Res); dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_GETLOCK(Indexer, DPS_LOCK_DB); db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems]; } else { db = Indexer->dbl.db[url_id % Indexer->dbl.nitems]; } if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) { rows = DpsSQLNumRows(&Res); for(i = 0; i < rows; i++) { DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), *DpsSQLValue(&Res, i, 3), 0, 0, 0); if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue; if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue; if (cookie.data_size) DpsDSTRAppend(&cookie, "; ", 2); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0)); DpsDSTRAppend(&cookie, "=", 1); DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1)); } if (rows == 0) { DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0); } } DpsSQLFree(&Res); if (Indexer->flags & DPS_FLAG_UNOCON) { DPS_RELEASELOCK(Indexer, DPS_LOCK_DB); } hostinfo = strchr(hostinfo, '.'); if (hostinfo != NULL) hostinfo++; } } if (cookie.data_size) { DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data); } DpsDSTRFree(&cookie); #endif TRACE_OUT(Indexer); return; }
int main(int argc, char ** argv, char **envp) { const char *env, *bcharset, *lcharset, *conf_dir; char template_name[PATH_MAX+6]=""; char *template_filename = NULL; char *query_string = NULL; char self[1024]=""; char *url = NULL; const char *ResultContentType; int res,httpd=0; size_t catcolumns = 0; int page_size,page_number; DPS_ENV *Env; DPS_AGENT *Agent; DPS_VARLIST query_vars; /* Output Content-type if under HTTPD */ /* Some servers do not pass QUERY_STRING */ /* if the query was empty, so check */ /* REQUEST_METHOD too to be safe */ httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD")); if (!(conf_dir=getenv("DPS_ETC_DIR"))) conf_dir=DPS_CONF_DIR; DpsInit(argc, argv, envp); Env=DpsEnvInit(NULL); if (Env == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Env\n"); exit(0); } DpsVarListInit(&query_vars); Agent = DpsAgentInit(NULL, Env, 0); if (Agent == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc Agent\n"); exit(0); } DpsVarListAddEnviron(&Env->Vars,"ENV"); /* Detect self and template name */ if((env = getenv("DPSEARCH_TEMPLATE"))) dps_strncpy(template_name, env, sizeof(template_name) - 1); else if((env = getenv("PATH_INFO")) && env[0]) dps_strncpy(template_name, env + 1, sizeof(template_name) - 1); if((env=getenv("DPSEARCH_SELF"))) dps_strncpy(self,env,sizeof(self)-1); if((env=getenv("QUERY_STRING"))){ query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't alloc query_string\n"); exit(0); } dps_strncpy(query_string, env, dps_strlen(env) + 1); /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if((env=getenv("REDIRECT_STATUS"))){ /* Check Apache internal redirect */ /* via "AddHandler" and "Action" */ if(!self[0]){ dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1); } if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } }else{ /* CGI executed without Apache internal redirect */ /* Detect $Self variable with OS independant SLASHES */ if(!self[0]){ dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1); } if(!template_name[0]){ char *s,*e; /*This is with OS specific SLASHES */ env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi"); if(strcmp(conf_dir,".")){ /* Take from the config directory */ dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self)); }else{ /* Take from the current directory */ dps_strncpy(template_name,env,sizeof(template_name)-1); } /* Find right slash if it presents */ s=((s=strrchr(template_name,DPSSLASH))?s:template_name); if (*template_filename == '\0') { /* Find .cgi substring */ if ((e = strstr(s, ".cgi")) != NULL) { /* Replace ".cgi" with ".htm" */ e[1]='h';e[2]='t';e[3]='m'; } else { dps_strcat(s, ".htm"); } e = strrchr(s, '/'); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(e + 1); } else { dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2); } } } }else{ /* Executed from command line */ /* or under server which does not */ /* pass an empty QUERY_STRING var */ if(argv[1]) { query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q=%s", argv[1]); } else { query_string = (char*)DpsRealloc(query_string, 1024); if (query_string == NULL) { if(httpd){ printf("Content-Type: text/plain\r\n\r\n"); } printf("Can't realloc query_string\n"); exit(0); } sprintf(query_string, "q="); } /* Hack for Russian Apache from apache.lexa.ru */ /* QUERY_STRING is already converted to server */ /* character set. We must print original query */ /* string instead however. Under usual apache */ /* we'll use QUERY_STRING. Note that query_vars */ /* list will contain not unescaped values, so */ /* we don't have to escape them when displaying */ env = getenv("CHARSET_SAVED_QUERY_STRING"); DpsParseQStringUnescaped(&query_vars,env?env:query_string); /* Unescape and save variables from QUERY_STRING */ /* Env->Vars will have unescaped values however */ DpsParseQueryString(Agent,&Env->Vars,query_string); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", "")); if (*template_filename == '\0') { DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); } /*// Get template name from command line variable &tmplt */ if(!template_name[0]) dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename); } DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename); DPS_FREE(template_filename); Agent->tmpl.Env_Vars = &Env->Vars; DpsURLNormalizePath(template_name); if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if (strcmp(template_name, "filler.htm")) { /* trying load default template */ fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr); DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename); if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } else { if(httpd)printf("Content-Type: text/plain\r\n\r\n"); printf("%s\n",Env->errstr); DpsVarListFree(&query_vars); DpsEnvFree(Env); DPS_FREE(query_string); DpsAgentFree(Agent); return(0); } } /* set locale if specified */ if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) { setlocale(LC_ALL, url); /*#ifdef HAVE_ASPELL*/ { char *p; if ((p = strchr(url, '.')) != NULL) { *p = '\0'; DpsVarListReplaceStr(&Env->Vars, "g-lc", url); *p = '.'; } } /*#endif*/ url = NULL; } /* Call again to load search Limits if need */ DpsParseQueryString(Agent, &Env->Vars, query_string); Agent->Flags = Env->Flags; Agent->flags |= DPS_FLAG_UNOCON; Env->flags |= DPS_FLAG_UNOCON; DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0)); DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes")); DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name); DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR)); DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d", Env->Affixes.naffixes,Env->Spells.nspell, Env->Synonyms.nsynonyms, Env->Acronyms.nacronyms, Env->StopWords.nstopwords); DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords); DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords); DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*"); Agent->tmpl.Env_Vars = &Agent->Vars; /* DpsVarListAddEnviron(&Agent->Vars, "ENV");*/ /****************************************************************************************************************************************/ /* This is for query tracking */ DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string); DpsVarListAddStr(&Agent->Vars, "self", self); env = getenv("HTTP_X_FORWARDER_FOR"); if (env) { DpsVarListAddStr(&Agent->Vars, "IP", env); } else { env = getenv("REMOTE_ADDR"); DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost"); } bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1"); Env->bcs=DpsGetCharSet(bcharset); lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1"); Env->lcs=DpsGetCharSet(lcharset); ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html"); if(httpd){ if(!Env->bcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); }else if(!Env->lcs){ printf("Content-Type: text/plain\r\n\r\n"); printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); }else{ printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset); } }else{ if(!Env->bcs){ printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name); exit(0); } if(!Env->lcs){ printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name); exit(0); } } /* These parameters taken from "variable section of template"*/ res = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS); page_size = dps_min(res, MAX_PS); page_number = DpsVarListFindInt(&Agent->Vars, "p", 0); if (page_number == 0) { page_number = DpsVarListFindInt(&Agent->Vars, "np", 0); DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1); } else page_number--; res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size; DpsVarListAddInt(&Agent->Vars, "pn", res); catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", "")); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot"); DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom"); DpsVarListFree(&query_vars); DpsAgentFree(Agent); DpsEnvFree(Env); DPS_FREE(query_string); DPS_FREE(url); if (httpd) fflush(NULL); else fclose(stdout); #ifdef EFENCE fprintf(stderr, "Memory leaks checking\n"); DpsEfenceCheckLeaks(); #endif #ifdef FILENCE fprintf(stderr, "FD leaks checking\n"); DpsFilenceCheckLeaks(NULL); #endif return DPS_OK; }
static int ftp_parse_list(DPS_CONN *connp, char *path){ char *line, *buf_in, *ch, *buf_out, *tok, *fname; int len_h, len_f,len_p, i; char *dir, savec; size_t len,buf_len,cur_len; if (!connp->buf || !connp->buf_len) return 0; buf_in = connp->buf; /* 22 = dps_strlen(<a href=\"ftp://%s%s%s/\"></a>)*/ len_h = dps_strlen(connp->hostname) + ((connp->user) ? dps_strlen(connp->user) : 0) + ((connp->pass) ? dps_strlen(connp->pass) : 0) + 2 + 22; len_p = dps_strlen(path); cur_len = 0; buf_len = connp->buf_len; buf_out = DpsXmalloc(buf_len + 1); if (buf_out == NULL) return -1; buf_out[0] = '\0'; line = dps_strtok_r(buf_in, "\r\n", &tok, &savec); do{ if (!(fname = strtok(line, " "))) continue; /* drwxrwxrwx x user group size month date time file_name */ for(i=0; i<7; i++) if (!(fname = strtok(NULL, " "))) break; if (!(fname = strtok(NULL, ""))) continue; len = 0 ; len_f = len_h + len_p + dps_strlen(fname); if ((cur_len+len_f) >= buf_len){ buf_len += DPS_NET_BUF_SIZE; buf_out = DpsXrealloc(buf_out, buf_len + 1); } switch (line[0]){ case 'd': if (!fname || !strcmp(fname, ".") || !strcmp(fname, "..")) break; len = len_f; dps_snprintf(DPS_STREND(buf_out) /*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s/\"></a>\n", (connp->user) ? connp->user : "", (connp->user) ? ":" : "", (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "", connp->hostname, path, fname); break; case 'l': ch = strstr (fname, " -> "); if (!ch) break; len = ch - fname; dir = DpsMalloc(len+1); if (dir == NULL) return -1; dps_snprintf(dir, len+1, "%s", fname); len = len_h + len_p + dps_strlen(dir); dps_snprintf(DPS_STREND(buf_out)/*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s/\"></a>\n", (connp->user) ? connp->user : "", (connp->user) ? ":" : "", (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "", connp->hostname, path, dir); DPS_FREE(dir); /*ch +=4;*/ /* Check if it is absolute link */ /* if ((ch[0] == '/') || (ch[0] == '\\') || ((isalpha(ch[0]) && (ch[1]==':')))){ len = len_h+dps_strlen(ch); dps_snprintf(buf_out+cur_len, len+1, "<a href=\"ftp://%s%s/\"></a>", connp->hostname, ch); }else{ len = len_h+len_p+dps_strlen(ch); dps_snprintf(buf_out+cur_len, len+1, "<a href=\"ftp://%s%s%s/\"></a>", connp->hostname, path, ch); } */ break; case '-': len = len_f; dps_snprintf(DPS_STREND(buf_out)/*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s\"></a>\n", (connp->user) ? connp->user : "", (connp->user) ? ":" : "", (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "", connp->hostname, path, fname); break; } cur_len += len; }while( (line = dps_strtok_r(NULL, "\r\n", &tok, &savec))); if (cur_len+1 > connp->buf_len_total){ connp->buf_len_total = cur_len; connp->buf = DpsXrealloc(connp->buf, (size_t)connp->buf_len_total+1); if (connp->buf == NULL) return -1; } bzero(connp->buf, ((size_t)connp->buf_len_total+1)); dps_memmove(connp->buf, buf_out, cur_len); DPS_FREE(buf_out); return 0; }