Пример #1
0
int Dps_ftp_send_cmd(DPS_CONN *connp, const char *cmd) {
        char *buf;
	size_t len;

    	connp->err = 0;
	len = dps_strlen(cmd)+2;    
        buf = DpsXmalloc(len+1);
	if (buf == NULL) return -1;
	dps_snprintf(buf, len+1, "%s\r\n", cmd);
	socket_buf_clear(connp);
	if (socket_write(connp, buf)){
		DPS_FREE( buf);
		return -1;
	}
#ifdef DEBUG_FTP
	fprintf(stderr, "ftp://%s (cmd) : %s", connp->hostname, buf);
	/*DpsLog(connp->indexer, DPS_LOG_DEBUG, "ftp://%s (cmd) : %s", connp->hostname, buf);*/
#endif
	DPS_FREE(buf);
	if (Dps_ftp_read_line(connp))
		return -1;
#ifdef DEBUG_FTP
	fprintf(stderr, "ftp://%s (reply): %s", connp->hostname, connp->buf);
	/*DpsLog(connp->indexer, DPS_LOG_DEBUG, "ftp://%s (reply): %s", connp->hostname, connp->buf);*/
#endif
    	return(Dps_ftp_get_reply(connp));
}
Пример #2
0
int Dps_ftp_connect(DPS_AGENT *Agent, DPS_CONN *connp, char *hostname, int port, char *user, char *passwd, int timeout) {
	size_t len;
	
	if (!connp)
		return -1;

	if (connp->connected == DPS_NET_CONNECTED)
		Dps_ftp_close(connp);	
	connp->connected = DPS_NET_NOTCONNECTED;

	if (!port) {
		connp->port = 21;
		connp->connp->port = 20;
	} else {
		connp->port = port;
		connp->connp->port = port - 1;
	}

	connp->timeout = timeout;
	
	if (!hostname)
		return -1;
	len = dps_strlen(hostname);
	connp->hostname = DpsXrealloc(connp->hostname, len+1);
	if (connp->hostname == NULL) return -1;
	dps_snprintf(connp->hostname, len+1, "%s", hostname);
	
	if (Dps_ftp_open_control_port(Agent, connp))
		return -1;
	if (Dps_ftp_login(connp, user, passwd))
		return -1;
	Dps_ftp_set_binary(connp);
	connp->connected = DPS_NET_CONNECTED;
	return 0;
}
Пример #3
0
void DpsCookiesClean(DPS_AGENT *A) {
    char buf[256];
    DPS_DB	*db;
    size_t i, dbfrom = 0, dbto;
    int res;

    if (A->Flags.robots_period == 0) return;

    dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE expires < %d", A->now);

    if (A->flags & DPS_FLAG_UNOCON) DPS_GETLOCK(A, DPS_LOCK_CONF);
    dbto = DPS_DBL_TO(A);
    if (A->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(A, DPS_LOCK_CONF);

    for (i = dbfrom; i < dbto; i++) {
	db = DPS_DBL_DB(A, i);
	if (A->flags & DPS_FLAG_UNOCON) DPS_GETLOCK(A, DPS_LOCK_DB);
#ifdef HAVE_SQL
	res = DpsSQLAsyncQuery(db, NULL, buf);
#endif
	if (res != DPS_OK) {
	    DpsLog(A, DPS_LOG_ERROR, db->errstr);
	}
	if (A->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(A, DPS_LOCK_DB);
	if (res != DPS_OK) break;
    }
}
Пример #4
0
char * DpsEnvErrMsg(DPS_ENV * Conf) {
  size_t	i;
  DPS_DB *db;

  for(i = 0; i<Conf->dbl.nitems; i++){
    db = &Conf->dbl.db[i];
    if (db->errcode) {
      char *oe = (char*)DpsStrdup(Conf->errstr);
      dps_snprintf(Conf->errstr, 2048, "DB err: %s - %s", db->errstr, oe);
      DPS_FREE(oe);
    }
  }
  return(Conf->errstr);
}
Пример #5
0
int Dps_ftp_rest(DPS_CONN *c, size_t rest) {
	char cmd[64];
	int code;
	
	dps_snprintf(cmd, 63, "REST %u", rest);

	code = Dps_ftp_send_cmd(c, cmd);
	if (code == -1){
                return -1;
	}else if ( code >3 ){
		c->err = code;
		return -1;
	}
	return 0;
}
Пример #6
0
int Dps_ftp_get(DPS_CONN *c, DPS_CONN *d, char *path, size_t max_doc_size){
	char *cmd;
	size_t len;
	
        if (!path)
		return -1;

        len = dps_strlen(path) + 16;
	cmd = DpsXmalloc(len+1);
	if (cmd == NULL) return -1;
        dps_snprintf(cmd, len+1, "RETR %s", path);

	if (Dps_ftp_send_data_cmd(c, d, cmd, max_doc_size) == -1 && d->err != DPS_NET_FILE_TL) {
		DPS_FREE(cmd);
		return -1;
	}
	DPS_FREE(cmd);
	return 0;
}
Пример #7
0
int Dps_ftp_open_data_port( DPS_CONN *c, DPS_CONN *d){
	char buf[64];	
	unsigned char *a, *p;
	
	int code;
	
	if (!d)
		return -1;
	if (socket_getname(c, &d->sin) == -1)
		return -1;

	if (d->port) {
	  d->sin.sin_port = htons(d->port);
	}

	if (socket_open(d))
		return -1;

	if (socket_listen(d)){
		return -1;
	}
	if (socket_getname(d, &d->sin) == -1){
		return -1;
	}

        a = (unsigned char *)&d->sin.sin_addr;
        p = (unsigned char *)&d->sin.sin_port;

	dps_snprintf(buf, 64, "PORT %d,%d,%d,%d,%d,%d",
                a[0], a[1], a[2], a[3], p[0], p[1]);
	code = Dps_ftp_send_cmd(c, buf);
	if ((code < 0) || strncasecmp(c->buf, "200 ", 4)){
		return -1;
	}
	d->user = c->user;
	d->pass = c->pass;
	return 0;
}
Пример #8
0
int Dps_ftp_mdtm(DPS_CONN *c, char *path){
	char *cmd;
	int code;
	size_t len; 
	
	if (!path)
		return -1;
		
    	len = dps_strlen(path) + 16;
	cmd = DpsXmalloc(len+1);
	if (cmd == NULL) return -1;
	dps_snprintf(cmd, len+1, "MDTM %s", path);

	code = Dps_ftp_send_cmd(c, cmd);
	DPS_FREE(cmd);
	if (code == -1){
                return -1;
	}else if ( code >3 ){
		c->err = code;
		return -1;
	}
	return (DpsFTPDate2Time_t(c->buf));
}
Пример #9
0
int Dps_ftp_list(DPS_CONN *c, DPS_CONN *d, char *path, char *filename, size_t max_doc_size){
	char *cmd;
	size_t len;
	
	if (!filename){
		cmd = DpsXmalloc(16);
		if (cmd == NULL) return -1;
		sprintf(cmd, "LIST");
	}else{
    		len = dps_strlen(filename) + 16;
		cmd = DpsXmalloc(len+1);
		if (cmd == NULL) return -1;
    		dps_snprintf(cmd, len+1, "LIST %s", filename);
	}

	if (Dps_ftp_send_data_cmd(c, d, cmd, max_doc_size)== -1) {
		DPS_FREE(cmd);
		/*DpsLog(c->indexer, DPS_LOG_DEBUG, "(ftp_list-err)->%s", d->buf);*/
		return -1;
	}
	DPS_FREE(cmd);
	return ftp_parse_list(d, path);
}
Пример #10
0
ssize_t Dps_ftp_size(DPS_CONN *c, char *path) {
	char *cmd;
	int code;
	size_t len; 
	
	if (!path)
		return -1;
		
    	len = dps_strlen(path) + 16;
	cmd = DpsXmalloc(len + 1);
	if (cmd == NULL) return -1;
	dps_snprintf(cmd, len + 1, "SIZE %s", path);

	code = Dps_ftp_send_cmd(c, cmd);
	DPS_FREE(cmd);
	if (code == -1){
                return -1;
	}else if ( code >3 ){
		c->err = code;
		return -1;
	}
	sscanf(c->buf, "213 %u", &len);
	return len;
}
Пример #11
0
int Dps_ftp_cwd(DPS_CONN *c, char *path){
	char *cmd;
	int code;
	size_t len;
	
	if (!path)
		return -1;
	if (*path == '\0') return 0;
		
	len = dps_strlen(path) + 16;
	cmd = DpsXmalloc(len+1);
	if (cmd == NULL) return -1;
	dps_snprintf(cmd, len+1, "CWD %s", path);

	code = Dps_ftp_send_cmd(c, cmd);
	DPS_FREE(cmd);
	if (code == -1){
                return -1;
	}else if ( code >3 ){
		c->err = code;
		return -1;
	}
	return 0;
}
Пример #12
0
int Dps_ftp_login(DPS_CONN *connp, char *user, char *passwd){
	char *buf;
	char user_tmp[32], passwd_tmp[64];
	int code;
	size_t len;
	
	DPS_FREE(connp->user);
	DPS_FREE(connp->pass);
	if (!user)
		dps_snprintf(user_tmp, 32, "anonymous");
	else {
		dps_snprintf(user_tmp, 32, "%s", user);
		connp->user = (char*)DpsStrdup(user);
	}
	    
	if (!passwd)
		dps_snprintf(passwd_tmp, 64, "*****@*****.**", PACKAGE, VERSION);
	else {
		dps_snprintf(passwd_tmp, 32, "%s", passwd);
		connp->pass = (char*)DpsStrdup(passwd);
	}
	
	len = dps_strlen(user_tmp) + 5 /*dps_strlen("USER ")*/;
	buf = (char *) DpsXmalloc(len+1);
	if (buf == NULL) return -1;
	dps_snprintf(buf, len+1, "USER %s", user_tmp);
	code = Dps_ftp_send_cmd(connp, buf);
	DPS_FREE(buf);
	if (code == -1)
		return -1;
	else if (code == 2) /* Don't need password */
		return 0;
		
	len = dps_strlen(passwd_tmp) + 5 /*dps_strlen("PASS ")*/;
	buf = (char *) DpsXmalloc(len+1);
	if (buf == NULL) return -1;
	dps_snprintf(buf, len+1, "PASS %s", passwd_tmp);
	code = Dps_ftp_send_cmd( connp, buf);
	DPS_FREE(buf);
	if (code > 3)
		return -1;
	return 0;
}
Пример #13
0
static int MakeNestedIndex(DPS_AGENT *Indexer, DPS_UINT8URLIDLIST *L, const char *lim_name, DPS_DB *db) {
     DPS_ENV   *Conf = Indexer->Conf;
     size_t    k, prev;
     urlid_t   *data=NULL;
     DPS_UINT8_POS_LEN *ind=NULL;
     size_t    mind=1000, nind=0, ndata;
     char fname[PATH_MAX];
     int  dat_fd=0, ind_fd=0;
     int  rc=DPS_OK;
     const char	*vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR);
     
     if(!L->Item)return(1);
     
     if (L->nitems > 1) DpsSort(L->Item, L->nitems, sizeof(DPS_UINT8URLID), (qsort_cmp)cmp_ind8);
     
     data = (urlid_t*)DpsMalloc((L->nitems + 1) * sizeof(urlid_t));
     if(!data){
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", (L->nitems + 1) * sizeof(urlid_t), __FILE__, __LINE__);
       goto err1;
     }
     ind=(DPS_UINT8_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT8_POS_LEN));
     if(!ind){
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__);
       goto err1;
     }
     prev=0;
     for(k=0; k < L->nitems; k++) {
          data[k] = L->Item[k].url_id;
          if((k == L->nitems-1) || (L->Item[k].hi != L->Item[prev].hi) || (L->Item[k].lo != L->Item[prev].lo)) {
               if(nind==mind){
                    mind+=1000;
                    ind=(DPS_UINT8_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT8_POS_LEN));
                    if(!ind) {
		      DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d bytes [%s:%d]", mind * sizeof(DPS_UINT8_POS_LEN), __FILE__, __LINE__);
		      goto err1;
                    }
               }
               /* Fill index */
               ind[nind].hi = L->Item[prev].hi;
               ind[nind].lo = L->Item[prev].lo;
               ind[nind].pos = prev * sizeof(*data);
               if (k == L->nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
               else ind[nind].len = (k - prev) * sizeof(*data);
               DpsLog(Indexer, DPS_LOG_DEBUG, "%08X%08X - %d %d\n", ind[nind].hi, ind[nind].lo, (int)ind[nind].pos, ind[nind].len);
               nind++;
               
               prev=k;
          }
     }
     ndata = L->nitems;
     ClearIndex8(L);
     
     dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR,DPSSLASH, lim_name);
     if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
       goto err1;
     }
     DpsWriteLock(dat_fd);
     if((ndata * sizeof(*data)) != (size_t)write(dat_fd, data, ndata * sizeof(*data))) {
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
       goto err1;
     }
     DpsUnLock(dat_fd);
     DpsClose(dat_fd);
     DPS_FREE(data);

     dps_snprintf(fname,sizeof(fname)-1,"%s%c%s%c%s.ind", vardir, DPSSLASH,DPS_TREEDIR, DPSSLASH, lim_name);
     if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't open '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
       goto err1;
     }
     DpsWriteLock(ind_fd);
     if((nind*sizeof(DPS_UINT8_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT8_POS_LEN))){
       DpsLog(Indexer, DPS_LOG_ERROR, "Can't write '%s': %s [%s:%d]", fname, strerror(errno), __FILE__, __LINE__);
          goto err1;
     }
     DpsUnLock(ind_fd);
     DpsClose(ind_fd);
     DPS_FREE(ind);
     
     return(0);
     
err1:
     ClearIndex8(L);
     DPS_FREE(data);
     DPS_FREE(ind);
     if(dat_fd) DpsClose(dat_fd);
     if(ind_fd) DpsClose(ind_fd);
     return(1);
}
Пример #14
0
__C_LINK int __DPSCALL DpsCacheMakeIndexes(DPS_AGENT *Indexer, DPS_DB *db) {
  DPS_UINT8URLIDLIST  L8;
  DPS_UINT4URLIDLIST  L4;
  DPS_VARLIST *v = &Indexer->Conf->Vars;
  size_t i, r;
  char *ind, *nm, *lfname;

  bzero(&L4, sizeof(DPS_UINT4URLIDLIST));
  bzero(&L8, sizeof(DPS_UINT8URLIDLIST));
  
  r = (size_t) 'l';
  for (i = 0; i < v->Root[r].nvars; i++) {
    if (!strncasecmp("Limit-", v->Root[r].Var[i].name, 6)) {
      ind = v->Root[r].Var[i].val;
      lfname = v->Root[r].Var[i].name;
      nm = lfname + 6;
      if (!strcasecmp(ind, "category")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Category index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating category index");
	if (DPS_OK == DpsLimit8(Indexer, &L8, "Category",  DPS_IFIELD_TYPE_HEX8STR, db)) {
	  MakeNestedIndex(Indexer, &L8, DPS_LIMFNAME_CAT, db);
	}

      } else if (!strcasecmp(ind, "tag")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Tag index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating tag index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "Tag",  DPS_IFIELD_TYPE_STRCRC32, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_TAG, db);
	}

      } else if (!strcasecmp(ind, "link")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Link index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating link index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "link",  DPS_IFIELD_TYPE_INT, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_LINK, db);
	}

      } else if (!strcasecmp(ind, "time")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Time index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating time index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "last_mod_time",  DPS_IFIELD_TYPE_HOUR, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_TIME, db);
	}

      } else if (!strcasecmp(ind, "hostname")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Hostname index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating hostname index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "url",  DPS_IFIELD_TYPE_HOSTNAME, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_HOST, db);
	}

      } else if (!strcasecmp(ind, "language")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Language index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating language index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "Content-Language",  DPS_IFIELD_TYPE_STR2CRC32, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_LANG, db);
	}

      } else if (!strcasecmp(ind, "content")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Content-Type index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating Content-Type index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "Content-Type",  DPS_IFIELD_TYPE_STRCRC32, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_CTYPE, db);
	}

      } else if (!strcasecmp(ind, "siteid")) {

	/* To see the URL being indexed in "ps" output on xBSD */
	dps_setproctitle("[%d] Site_id index creation", Indexer->handle);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating Site_id index");
	if (DPS_OK == DpsLimit4(Indexer, &L4, "site_id",  DPS_IFIELD_TYPE_INT, db)) {
	  MakeLinearIndex(Indexer, &L4, DPS_LIMFNAME_SITE, db);
	}

      } else {
	char *buf, *req, *dbaddr;
	DPS_DB ldb, *pdb = &ldb;
	size_t buf_len = dps_strlen(nm) + 16;
	if ((buf = (char*) DpsMalloc(buf_len * sizeof(char))) == NULL) {
	  DpsLog(Indexer, DPS_LOG_ERROR, "Can't alloc %d chars at %s:%d", buf_len, __FILE__, __LINE__);
	  return DPS_ERROR;
	}
	dps_setproctitle("[%d] %s index creation", Indexer->handle, nm);
	DpsLog(Indexer, DPS_LOG_EXTRA, "Creating %s index", nm);
	dps_snprintf(buf, buf_len, "Req-%s", nm);
	req = DpsVarListFindStr(&Indexer->Conf->Vars, buf, NULL);
	if (req != NULL) {
	  dps_snprintf(buf, buf_len, "dbaddr-%s", nm);
	  dbaddr = DpsVarListFindStr(&Indexer->Conf->Vars, buf, NULL);
	  if (dbaddr != NULL) {
	    DpsDBSetAddr(pdb, dbaddr, DPS_OPEN_MODE_READ);
	  } else {
	    pdb = db;
	  }
	  if (!strcasecmp(ind, "nex8str")) {
	    if (DPS_OK == DpsSQLLimit8(Indexer, &L8, req, DPS_IFIELD_TYPE_HEX8STR, pdb)) {
	      MakeNestedIndex(Indexer, &L8, lfname, pdb);
	    }
	  } else {
	    int field_type = DPS_IFIELD_TYPE_INT;
	    if (!strcasecmp(ind, "strcrc32")) field_type = DPS_IFIELD_TYPE_STRCRC32;
	    else if (!strcasecmp(ind, "hour")) field_type = DPS_IFIELD_TYPE_HOUR;
	    else if (!strcasecmp(ind, "hostname")) field_type = DPS_IFIELD_TYPE_HOSTNAME;
	    else if (!strcasecmp(ind, "char2")) field_type = DPS_IFIELD_TYPE_STR2CRC32;
	    else if (!strcasecmp(ind, "int")) field_type = DPS_IFIELD_TYPE_INT;
	    if (DPS_OK == DpsSQLLimit4(Indexer, &L4, req,  field_type, db)) {
	      MakeLinearIndex(Indexer, &L4, lfname, db);
	    }
	  }
	}

      }
      /* To see the URL being indexed in "ps" output on xBSD */
      dps_setproctitle("[%d] Indexes done.", Indexer->handle);
      DpsLog(Indexer, DPS_LOG_EXTRA, "Done");
    }
  }
  return DPS_OK;
}
Пример #15
0
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) {
  char *val, *header_name;
  char	secname[128];
  DPS_VAR	*Sec;
  DPS_TEXTITEM Item;

  if ((val = strchr(header_name = header->data, ':'))) {
/*
  fprintf(stderr, "HEADER: %s\n", header_name);
*/
    *val++='\0';
    val = DpsTrim(val," \t:");
			
    if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) {
      char *v;
      for(v=val ; *v ; v++) 
	*v = dps_tolower(*v);
    } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) {
      char *part, *lpart;
      char *name = NULL;
      char *value = NULL;
      const char *domain = NULL;
      const char *path = NULL;
      dps_uint4 expire = 0;
      char secure = 'n';
      for (part = dps_strtok_r(val, ";" , &lpart) ; part;
	   part = dps_strtok_r(NULL, ";", &lpart)) {
	char *arg;
	part = DpsTrim(part, " ");
	if ((arg = strchr(part, '='))) {
	  *arg++ = '\0';
	  if (!name) {
	    name = part;
	    value = arg;
	  } else 
	    if (!strcasecmp(part, "path")) {
	      path = arg;
	    } else
	      if (!strcasecmp(part, "domain")) {
		domain = arg;
	      } else
		if (!strcasecmp(part, "secure")) {
		  secure = 'y';
		} else
		  if (!strcasecmp(part, "expires")) {
		    expire = (dps_uint4)DpsHttpDate2Time_t(arg);
		  }
	}
      }
      if (name && value) {
	if (domain && domain[0] == '.') {
	  domain++;
	} else {
	  domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost";
	}
	if (!path) {
	  path = Doc->CurURL.path ? Doc->CurURL.path : "/";
	}
	DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1);
      }
/*			  token = dps_strtok_r(NULL,"\r\n",&lt);
			  continue;*/
      return;
    }
  }

  DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>");

  dps_snprintf(secname,sizeof(secname),"header.%s", header_name);
  secname[sizeof(secname)-1]='\0';
  if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) {
    Item.href = NULL;
    Item.str = val;
    Item.section = Sec->section;
    Item.section_name = secname;
    Item.len = 0;
    DpsTextListAdd(&Doc->TextList, &Item);
  }
}
Пример #16
0
static int MakeLinearIndex(DPS_AGENT *Indexer, const char *field, const char *lim_name, int type, DPS_DB *db) {
    DPS_ENV *Conf = Indexer->Conf;
    DPS_UINT4URLIDLIST  L;
    size_t    k,prev;
    urlid_t   *data = NULL;
    DPS_UINT4_POS_LEN *ind=NULL;
    size_t    mind=1000,nind=0;
    char fname[PATH_MAX];
    int  dat_fd=0, ind_fd=0, rc;
    const char	*vardir = (db->vardir) ? db->vardir : DpsVarListFindStr(&Conf->Vars, "VarDir", DPS_VAR_DIR);

    bzero(&L, sizeof(DPS_UINT4URLIDLIST));

    rc = DpsLimit4(Indexer, &L, field, type, db);

    if(rc != DPS_OK) {
        DpsLog(Indexer, DPS_LOG_ERROR, "Error: %s [%s:%d]", DpsEnvErrMsg(Conf), __FILE__, __LINE__);
        goto err1;
    }

    if(!L.Item)return(1);

    if (L.nitems > 1) DpsSort(L.Item, L.nitems, sizeof(DPS_UINT4URLID), (qsort_cmp)cmp_ind4);

    data = (urlid_t*)DpsMalloc((L.nitems + 1) * sizeof(*data));
    if(!data) {
        fprintf(stderr,"Error1: %s\n",strerror(errno));
        goto err1;
    }
    ind=(DPS_UINT4_POS_LEN*)DpsMalloc(mind*sizeof(DPS_UINT4_POS_LEN));
    if(!ind) {
        fprintf(stderr,"Error2: %s\n",strerror(errno));
        goto err1;
    }
    prev=0;
    for(k=0; k<L.nitems; k++) {
        data[k]=L.Item[k].url_id;
        if((k==L.nitems-1) || (L.Item[k].val!=L.Item[prev].val)) {
            if(nind==mind) {
                mind+=1000;
                ind=(DPS_UINT4_POS_LEN*)DpsRealloc(ind,mind*sizeof(DPS_UINT4_POS_LEN));
                if(!ind) {
                    fprintf(stderr,"Error3: %s\n",strerror(errno));
                    goto err1;
                }
            }
            /* Fill index */
            ind[nind].val=L.Item[prev].val;
            ind[nind].pos = prev * sizeof(*data);
            if (k == L.nitems - 1) ind[nind].len = (k - prev + 1) * sizeof(*data);
            else ind[nind].len = (k - prev) * sizeof(*data);
            DpsLog(Indexer, DPS_LOG_DEBUG, "%d - pos:%x len:%d\n", ind[nind].val, (int)ind[nind].pos, ind[nind].len);
            nind++;

            prev=k;
        }
    }
    if (L.mapped) {
#ifdef HAVE_SYS_MMAN_H
        if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#elif defined(HAVE_SYS_SHM_H)
        if (shmdt(L.Item)) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#endif
        unlink(L.shm_name);
    } else {
        DPS_FREE(L.Item);
    }

    dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.dat", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name);
    if((dat_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
        fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsWriteLock(dat_fd);
    if((L.nitems * sizeof(*data)) != (size_t)write(dat_fd, data, L.nitems * sizeof(*data))) {
        fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsUnLock(dat_fd);
    DpsClose(dat_fd);
    DPS_FREE(data);

    dps_snprintf(fname,sizeof(fname),"%s%c%s%c%s.ind", vardir,DPSSLASH, DPS_TREEDIR, DPSSLASH, lim_name);
    if((ind_fd = DpsOpen3(fname, O_CREAT | O_WRONLY | O_TRUNC | DPS_BINARY, DPS_IWRITE)) < 0) {
        fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsWriteLock(ind_fd);
    if((nind*sizeof(DPS_UINT4_POS_LEN)) != (size_t)write(ind_fd,ind,nind*sizeof(DPS_UINT4_POS_LEN))) {
        fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
        goto err1;
    }
    DpsUnLock(ind_fd);
    DpsClose(ind_fd);
    DPS_FREE(ind);

    return(0);

err1:
    if (L.mapped) {
#ifdef HAVE_SYS_MMAN_H
        if (munmap(L.Item, (L.nitems + 1) * sizeof(DPS_UINT4URLID))) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#elif defined(HAVE_SYS_SHM_H)
        if (shmdt(L.Item)) {
            fprintf(stderr, "Can't shmdt '%s': %s\n", L.shm_name, strerror(errno));
        }
#endif
        unlink(L.shm_name);
    } else {
        DPS_FREE(L.Item);
    }
    DPS_FREE(data);
    DPS_FREE(ind);
    if(dat_fd) DpsClose(dat_fd);
    if(ind_fd) DpsClose(ind_fd);
    return(1);
}
Пример #17
0
int DpsChineseListLoad(DPS_AGENT *Agent, DPS_CHINALIST *List, const char *charset, const char *fname) {
     struct stat     sb;
     char *str, *data = NULL, *cur_n = NULL;
     DPS_CHINAWORD chinaword;
     char word[PATH_MAX];
     dpsunicode_t uword[256];
     DPS_CHARSET *sys_int, *fcs;
     DPS_CONV to_uni;
     int             fd;
     char            savebyte;

     sys_int = DpsGetCharSet("sys-int");
     if (!(fcs = DpsGetCharSet(charset))) {
       if (Agent->Conf->is_log_open) DpsLog(Agent, DPS_LOG_ERROR, "Charset '%s' not found or not supported", charset);
       else fprintf(stderr, "Charset '%s' not found or not supported", charset);
       return DPS_ERROR;
     }
     DpsConvInit(&to_uni, fcs, sys_int, Agent->Conf->CharsToEscape, DPS_RECODE_HTML);

     if (*fname != '/') {
       dps_snprintf(word, sizeof(word), "%s/%s", DpsVarListFindStr(&Agent->Conf->Vars, "EtcDir", DPS_CONF_DIR), fname);
       fname = word;
     }

     if (stat(fname, &sb)) {
       if (Agent->Conf->is_log_open) 
	    DpsLog(Agent, DPS_LOG_ERROR, "Unable to stat FreqDic file '%s': %s", fname, strerror(errno));
       else fprintf(stderr, "Unable to stat FrecDic file '%s': %s", fname, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = open(fname, O_RDONLY)) <= 0) {
       if (Agent->Conf->is_log_open) 
	    DpsLog(Agent, DPS_LOG_ERROR, "Unable to open FreqDic file '%s': %s", fname, strerror(errno));
       else fprintf(stderr, "Unable to open FreqDic file '%s': %s", fname, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       if (Agent->Conf->is_log_open) 
	 DpsLog(Agent, DPS_LOG_ERROR, "Unable to alloc %d bytes", sb.st_size);
       else fprintf(stderr, "Unable to alloc %ld bytes", (long)sb.st_size);
       close(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       if (Agent->Conf->is_log_open) 
	 DpsLog(Agent, DPS_LOG_ERROR, "Unable to read FreqDic file '%s': %s", fname, strerror(errno));
       else fprintf(stderr, "Unable to read FreqDic file '%s': %s", fname, strerror(errno));
       DPS_FREE(data);
       close(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, NL_INT);
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }
     close(fd);

     bzero((void*)&chinaword, sizeof(chinaword));
     chinaword.word = uword;
     while(str != NULL) {
          if(!str[0]) goto loop_continue;
          if(str[0]=='#') goto loop_continue;
          sscanf(str, "%d %63s ", &chinaword.freq, word );
	  DpsConv(&to_uni, (char*)uword, sizeof(uword), word, sizeof(word));
          DpsChineseListAdd(List, &chinaword);
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, NL_INT);
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DpsChineseListSort(List);
     { register size_t i, j = 0;
       for (i = 1; i < List->nwords; i++) {
	 if (cmpchinese(&List->ChiWord[j], &List->ChiWord[i]) == 0) {
	   List->ChiWord[j].freq += List->ChiWord[i].freq;
	 } else { j++;
	 }
       }
       for (i = j + 1; i < List->nwords; i++) {
	 DPS_FREE(List->ChiWord[i].word);
       }
       List->nwords = j + 1;
     }
     return DPS_OK;
}
Пример #18
0
int main(int argc,char **argv, char **envp) {
  int ch, sleeps = 1, optimize = 0, obi = 0;
  unsigned int from = 0, to = 0xFFF, p_to = 0;
	DPS_ENV * Env;
	const char * config_name = DPS_CONF_DIR "/cached.conf";

	DpsInit(argc, argv, envp); /* Initialize library */
	
	DpsInitMutexes();
	Env=DpsEnvInit(NULL);
	if (Env == NULL) exit(1);
	DpsSetLockProc(Env, DpsLockProc);

/*#ifndef HAVE_SETPROCTITLE*/
	ARGV = argv;
	ARGC = argc;
/*#endif*/
	while ((ch = getopt(argc, argv, "blt:f:op:w:v:h?")) != -1){
		switch (ch) {
			case 'f':
				sscanf(optarg, "%x", &from);
				break;	
			case 't': 
				sscanf(optarg, "%x", &p_to);
				break;
			case 'w':
			        DpsVarListReplaceStr(&Env->Vars, "VarDir", optarg);
				break;
                        case 'v': DpsSetLogLevel(NULL, atoi(optarg)); break;
                        case 'b': obi++; break;
                        case 'o': optimize++; break;
                        case 'p': sleeps = atoi(optarg); break;
			case 'h':
			case '?':
			default:
			  usage();
			  DpsEnvFree(Env);
			  DpsDeInit();
			  DpsDestroyMutexes();
				return 1;
				break;
		}
	}
	argc -= optind;
	argv += optind;

	if(argc > 1) {
		usage();
		DpsEnvFree(Env);
		DpsDeInit();
		DpsDestroyMutexes();
		return 1;
	} else if (argc == 1) {
	        config_name = argv[0];
	}
	{
		DPS_LOGDEL *del_buf=NULL;
		size_t del_count = 0, log, bytes, n = 0;
		int dd, log_fd;
		struct stat sb;
		char dname[PATH_MAX] = "";
		DPS_BASE_PARAM P;
		DPS_LOGWORD *log_buf = NULL;
		DPS_AGENT *Indexer = DpsAgentInit(NULL, Env, 0);

		log2stderr = 1;
		if (Indexer == NULL) {
		  fprintf(stderr, "Can't alloc Agent at %s:%d\n", __FILE__, __LINE__);
		  exit(DPS_ERROR);
		}
		
		if(DPS_OK != DpsEnvLoad(Indexer, config_name, (dps_uint8)0)){
		  fprintf(stderr, "%s\n", DpsEnvErrMsg(Env));
		  DpsEnvFree(Env);
		  DpsDeInit();
		  DpsDestroyMutexes();
		  return DPS_ERROR;
		}
		DpsOpenLog("splitter", Env, log2stderr);
		Indexer->flags = Env->flags = DPS_FLAG_UNOCON;
		DpsVarListAddLst(&Indexer->Vars, &Env->Vars, NULL, "*");

		bzero(&P, sizeof(P));
		P.subdir = DPS_TREEDIR;
		P.basename = "wrd";
		P.indname = "wrd";
		P.mode = DPS_WRITE_LOCK;
		P.NFiles = DpsVarListFindInt(&Indexer->Conf->Vars, "WrdFiles", 0x300);
		P.vardir = DpsStrdup(DpsVarListFindStr(&Indexer->Conf->Vars, "VarDir", DPS_VAR_DIR));
		P.A = Indexer;
		if (p_to != 0) to = p_to;
		else to = P.NFiles - 1;
#ifdef HAVE_ZLIB
		P.zlib_method = Z_DEFLATED;
		P.zlib_level = 9;
		P.zlib_windowBits = DPS_BASE_WRD_WINDOWBITS;
		P.zlib_memLevel = 9;
		P.zlib_strategy = DPS_BASE_WRD_STRATEGY;
#endif

		/* Open del log file */
		dps_snprintf(dname,sizeof(dname),"%s%c%s%cdel-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH);
		if((dd = DpsOpen2(dname, O_RDONLY | DPS_BINARY)) < 0) {
		  dps_strerror(NULL, 0, "Can't open del log '%s'", dname);
		  exit(DPS_ERROR);
		}

		DpsLog(Indexer, DPS_LOG_DEBUG, "VarDir: %s, WrdFiles: %d [%x]", P.vardir, P.NFiles, P.NFiles);

		/* Allocate del buffer */
		fstat(dd, &sb);
		if (sb.st_size != 0) {
		  del_buf=(DPS_LOGDEL*)DpsMalloc((size_t)sb.st_size + 1);
		  if (del_buf == NULL) {
		    fprintf(stderr, "Can't alloc %d bytes at %s:%d\n", (int)sb.st_size, __FILE__, __LINE__);
		    exit(0);
		  }
		  del_count=read(dd,del_buf,(size_t)sb.st_size)/sizeof(DPS_LOGDEL);
		}
		DpsClose(dd);

		/* Remove duplicates URLs in DEL log     */
		/* Keep only oldest records for each URL */
		if (del_count > 0) {
		  DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting del_buf: %d items", del_count);
		  if (del_count > 1) DpsSort(del_buf, (size_t)del_count, sizeof(DPS_LOGDEL), DpsCmpurldellog);
		    DpsLog(Indexer, DPS_LOG_DEBUG, "Removing DelLogDups");
		  del_count = DpsRemoveDelLogDups(del_buf, del_count);
		}

		DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Bufs from %d [%x] to %d [%x]", from, from, to, to);

		for(log = from; log <= to; log++) {

		  /* Open log file */
		  dps_snprintf(dname, sizeof(dname), "%s%c%s%c%03X-split.log", P.vardir, DPSSLASH, DPS_SPLDIR, DPSSLASH, log);
		  if((log_fd = DpsOpen2(dname, O_RDWR|DPS_BINARY)) < 0){
		    if (errno == ENOENT) {
		      dps_strerror(Indexer, DPS_LOG_DEBUG, "Can't open '%s'", dname);
		      n = 0;
/*		      continue;*/
		    } else {
		      dps_strerror(Indexer, DPS_LOG_ERROR, "Can't open '%s'", dname);
		      continue;
		    }
		  } else {
		    DpsWriteLock(log_fd); 
		    DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Log: %x", log);
		    fstat(log_fd, &sb);
		    log_buf = (sb.st_size > 0) ? (DPS_LOGWORD*)DpsMalloc((size_t)sb.st_size + 1) : NULL;
		    if (log_buf != NULL) {
		      unlink(dname);
		      bytes = read(log_fd,log_buf,(size_t)sb.st_size);
		      (void)ftruncate(log_fd, (off_t)0);
		      DpsUnLock(log_fd);
		      DpsClose(log_fd);
		      
		      n = bytes / sizeof(DPS_LOGWORD);
		      DpsLog(Indexer, DPS_LOG_DEBUG, "Sorting log_buf: %d items", n);
		      if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog);
		      DpsLog(Indexer, DPS_LOG_DEBUG, "Removing OldWords");
		      n = DpsRemoveOldWords(log_buf, n, del_buf, del_count);
		      if (n > 1) DpsSort(log_buf, n, sizeof(DPS_LOGWORD), (qsort_cmp)DpsCmplog_wrd);
		      
		    } else {
		      n = 0;
		      DpsUnLock(log_fd);
		      DpsClose(log_fd);
		    }
		  }

		  DpsLog(Indexer, DPS_LOG_DEBUG, "Processing Buf, optimize: %d", optimize);
		  if (obi) DpsBaseOptimize(&P, log);
		  DpsProcessBuf(Indexer, &P, log, log_buf, n, del_buf, del_count);
		  if (optimize) DpsBaseOptimize(&P, log);
		  DpsBaseClose(&P);
		  DPS_FREE(log_buf);

		  DpsLog(Indexer, DPS_LOG_DEBUG, "pas done: %d from %d to %d", log, from, to);
		  DPSSLEEP(sleeps);
		}
		DPS_FREE(del_buf);
		DpsAgentFree(Indexer);
		DPS_FREE(P.vardir);
	}

	fprintf(stderr, "Splitting done.\n");
	
	DpsEnvFree(Env);
	DpsDeInit();
	DpsDestroyMutexes();

#ifdef EFENCE
	fprintf(stderr, "Memory leaks checking\n");
	DpsEfenceCheckLeaks();
#endif
#ifdef FILENCE
	fprintf(stderr, "FD leaks checking\n");
	DpsFilenceCheckLeaks(NULL);
#endif
	return 0;
}
Пример #19
0
int DpsCloneListSearchd(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_RESULT *Res, DPS_DB *db) {
	DPS_SEARCHD_PACKET_HEADER hdr;
	ssize_t	nsent,nrecv;
	char *msg = NULL, *dinfo = NULL;
	char *tok, *lt;
	char buf[128];
	int done = 0;
	int	rc = DPS_OK;

	TRACE_IN(Indexer, "DpsCloneListSearchd");
	
	dps_snprintf(buf, 128, "%s", DpsVarListFindStr(&Doc->Sections, "DP_ID", "0"));
	hdr.cmd = DPS_SEARCHD_CMD_CLONES;
	hdr.len = dps_strlen(buf);
	nsent = DpsSearchdSendPacket(db->searchd, &hdr, buf);
	while(!done){
	  nrecv = DpsRecvall(db->searchd, &hdr, sizeof(hdr), 360);
		
		if(nrecv != sizeof(hdr)){
			DpsLog(Indexer, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes)", (int)nrecv);
			TRACE_OUT(Indexer);
			return(DPS_ERROR);
		}else{
#ifdef DEBUG_SDP
			DpsLog(Indexer, DPS_LOG_DEBUG, "Received header cmd=%d len=%d\n", hdr.cmd, hdr.len);
#endif
		}
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg = (char*)DpsMalloc(hdr.len + 1); 
				if (msg == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(db->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0] = '\0';
				sprintf(Indexer->Conf->errstr, "Searchd error: '%s'", msg);
				rc = DPS_ERROR;
				DPS_FREE(msg);
				done = 1;
				break;
			case DPS_SEARCHD_CMD_DOCINFO:
				dinfo = (char*)DpsMalloc(hdr.len + 1);
				if (dinfo == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(db->searchd, dinfo, hdr.len, 360);
				dinfo[(nrecv >= 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(Indexer, DPS_LOG_DEBUG, "Received DOCINFO size=%d buf=%s\n", hdr.len, dinfo);
#endif				
				if (strcasecmp(dinfo, "nocloneinfo") != 0) {

				  tok = dps_strtok_r(dinfo, "\r\n", &lt, NULL);
				
				  while(tok){
					DPS_DOCUMENT *D;
					size_t nd = Res->num_rows++;

					Res->Doc = (DPS_DOCUMENT*)DpsRealloc(Res->Doc, (Res->num_rows + 1) * sizeof(DPS_DOCUMENT));
					if (Res->Doc == NULL) {
					  sprintf(Indexer->Conf->errstr, "Realloc error");
					  rc = DPS_ERROR;
					  break;
					}
					D = &Res->Doc[nd];
					DpsDocInit(D);
					DpsDocFromTextBuf(D, tok);
					tok = dps_strtok_r(NULL, "\r\n", &lt, NULL);
				  }
				}
				DPS_FREE(dinfo);
				done = 1;
				break;
			default:
				sprintf(Indexer->Conf->errstr, "Unknown searchd response: cmd=%d len=%d", hdr.cmd, hdr.len);
				rc = DPS_ERROR;
				done = 1;
				break;
		}
	}
	TRACE_OUT(Indexer);
	return rc;
}
Пример #20
0
int __DPSCALL DpsFindWordsSearchd(DPS_AGENT *query, DPS_RESULT *Res, DPS_DB *searchd) {
	size_t		maxlen = 1024;
	char		*request, *edf = NULL, *e_empty = NULL;
	const char *df = DpsVarListFindStr(&query->Vars, "DateFormat", NULL);
	const char *empty = DpsVarListFindStr(&query->Vars, "empty", NULL);
	const char *qs = DpsVarListFindStr(&query->Vars, "QUERY_STRING", "");
	const char *tmplt = DpsVarListFindStr(&query->Vars, "tmplt", "");
	int		res=DPS_OK;

	TRACE_IN(query, "DpsFindWordsSearchd");

	if (df) {
	  edf = (char*)DpsMalloc(dps_strlen(df) * 10 + 1);
	  if (edf == NULL) {
		sprintf(query->Conf->errstr,"Can't allocate memory");
		TRACE_OUT(query);
		return DPS_ERROR;
	  }
	  DpsEscapeURL(edf, df);
	  maxlen += dps_strlen(edf);
	}
	if (empty) {
	  e_empty = (char*)DpsMalloc(dps_strlen(empty) * 10 + 1);
	  if (e_empty == NULL) {
		sprintf(query->Conf->errstr, "Can't allocate memory");
		TRACE_OUT(query);
		return DPS_ERROR;
	  }
	  DpsEscapeURL(e_empty, empty);
	  maxlen += dps_strlen(e_empty);
	}

	maxlen += dps_strlen(qs) + dps_strlen(tmplt) + 64;

	if (NULL==(request=(char*)DpsMalloc(maxlen))) {
		sprintf(query->Conf->errstr,"Can't allocate memory");
		DPS_FREE(edf);
		TRACE_OUT(query);
		return DPS_ERROR;
	}
	
     dps_snprintf(request, maxlen, "%s&BrowserCharset=%s&IP=%s&g-lc=%s&ExcerptSize=%s&ExcerptPadding=%s&DoExcerpt=%s&tmplt=%s%s%s%s%s%s%s&sp=%s&sy=%s&s=%s",
		  qs,
		  DpsVarListFindStr(&query->Vars, "BrowserCharset", "iso-8859-1"),
		  DpsVarListFindStr(&query->Vars, "IP", "localhost"),
		  DpsVarListFindStr(&query->Vars, "g-lc", "en"),
		  DpsVarListFindStr(&query->Vars, "ExcerptSize", "256"),
		  DpsVarListFindStr(&query->Vars, "ExcerptPadding", "40"),
		  (query->Flags.do_excerpt) ? "yes" : "no",
		  tmplt,
		  (edf) ? "&DateFormat=" : "", (edf) ? edf : "",
		  (e_empty) ? "&empty=" : "", (e_empty) ? e_empty : "",
		  (searchd->label) ? "&label=" : "", (searchd->label) ? searchd->label : "",
		  DpsVarListFindStr(&query->Vars, "sp", "1"),
		  DpsVarListFindStr(&query->Vars, "sy", "1"),
		  DpsVarListFindStr(&query->Vars, "s", "RP")
		  );
	DPS_FREE(edf);
	DPS_FREE(e_empty);

	request[maxlen-1]='\0';
	res = DpsSearchdSendWordRequest(query, searchd, request);
	DPS_FREE(request);
	if (DPS_OK != res) {
	  TRACE_OUT(query);
	  return res;
	}

/*	res = DpsSearchdGetWordResponse(query, Res, searchd);   called later from DpsFind */
	
	TRACE_OUT(query);
	return res;
}
Пример #21
0
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) {
  char *val, *header_name;
  char	secname[128], savec;
  DPS_VAR	*Sec;
  DPS_TEXTITEM Item;

  if ((val = strchr(header_name = header->data, ':'))) {
/*
  fprintf(stderr, "HEADER: %s\n", header_name);
*/
    *val++='\0';
    val = DpsTrim(val," \t:");
			
    if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) {
      register char *v;
      for(v=val ; *v ; v++) 
	*v = (char)dps_tolower((int)*v);
    } else if (Doc->Spider.use_robots && !strcasecmp(header_name, "X-Robots-Tag")) {
        char * lt;
	char * rtok;
					
	rtok = dps_strtok_r(val, " ,\r\n\t", &lt, &savec);
	while(rtok){
	  if(!strcasecmp(rtok, "ALL")){
	    /* Left Server parameters unchanged */
	  }else if(!strcasecmp(rtok, "NONE")){
	    Doc->Spider.follow = DPS_FOLLOW_NO;
	    Doc->Spider.index = 0;
	    if (DpsNeedLog(DPS_LOG_DEBUG)) {
	      DpsVarListReplaceInt(&Doc->Sections, "Index", 0);
	      DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO);
	    }
	  }else if(!strcasecmp(rtok, "NOINDEX")) {
	    Doc->Spider.index = 0;
/*          Doc->method = DPS_METHOD_DISALLOW;*/
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", 0);
	  }else if(!strcasecmp(rtok, "NOFOLLOW")) {
	    Doc->Spider.follow = DPS_FOLLOW_NO;
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO);
	  }else if(!strcasecmp(rtok, "NOARCHIVE")) {
	    DpsVarListReplaceStr(&Doc->Sections, "Z", "");
	  }else if(!strcasecmp(rtok, "INDEX")) {
            /* left server value unchanged */ 
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", Doc->Spider.index);
	  }else if(!strcasecmp(rtok, "FOLLOW")) {
            /* left server value unchanged */ 
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", Doc->Spider.follow);
	  }
	  rtok = dps_strtok_r(NULL, " \r\n\t", &lt, &savec);
	}
      
    } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) {


      DpsCookiesAddStr(Indexer, &Doc->CurURL, val, 1);

      return;
    }
  }

  DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>");

  dps_snprintf(secname,sizeof(secname),"header.%s", header_name);
  secname[sizeof(secname)-1]='\0';
  if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) {
    bzero((void*)&Item, sizeof(Item));
    Item.href = NULL;
    Item.str = val;
    Item.section = Sec->section;
    Item.section_name = secname;
    Item.strict = Sec->strict;
    Item.len = 0;
    (void)DpsTextListAdd(&Doc->TextList, &Item);
  }
}
Пример #22
0
int DpsCookiesAdd(DPS_AGENT *Indexer, const char *domain, const char * path, const char *name, const char *value, const char secure,
		  dps_uint4 expires, const char from_config, int insert_flag) {
#ifdef HAVE_SQL

  char buf[3*PATH_MAX];
  char path_esc[2*PATH_MAX+1];
  DPS_COOKIES *Cookies = &Indexer->Cookies;
  DPS_COOKIE *Coo;
  DPS_DB *db;
  dpshash32_t url_id = DpsStrHash32(domain);
  size_t i;
#ifdef WITH_PARANOIA
  void *paran = DpsViolationEnter(paran);
#endif

  if (Indexer->flags & DPS_FLAG_UNOCON) {
    if (Indexer->Conf->dbl.nitems == 0) return DPS_OK;
    DPS_GETLOCK(Indexer, DPS_LOCK_DB);
    db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems];
  } else {
    if (Indexer->dbl.nitems == 0) return DPS_OK;
    db = Indexer->dbl.db[url_id % Indexer->dbl.nitems];
  }
  (void)DpsDBEscStr(db, path_esc, DPS_NULL2EMPTY(path), dps_min(PATH_MAX,dps_strlen(DPS_NULL2EMPTY(path))));

  for (i = 0; i < Cookies->ncookies; i++) {
    Coo = &Cookies->Cookie[i];
    if (!strcasecmp(Coo->domain, domain) && !strcasecmp(Coo->path, DPS_NULL2EMPTY(path)) && !strcasecmp(Coo->name, name) && (Coo->secure == secure)/* && (Coo->from_config == from_config)*/ ) {
      DPS_FREE(Coo->value);
      Coo->value = DpsStrdup(value);
/*      Coo->expires = expires;*/
      if (insert_flag) {
	dps_snprintf(buf, sizeof(buf), "UPDATE cookies SET value='%s',expires=%d WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'",
		     value, expires, domain, path_esc, name, secure);
	DpsSQLAsyncQuery(db, NULL, buf);
      }
      if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
#ifdef WITH_PARANOIA
      DpsViolationExit(Indexer->handle, paran);
#endif
      return DPS_OK;
    }
  }

  Cookies->Cookie = (DPS_COOKIE*)DpsRealloc(Cookies->Cookie, (Cookies->ncookies + 1) * sizeof(DPS_COOKIE));
  if(Cookies->Cookie == NULL) {
    Cookies->ncookies = 0;
    if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
#ifdef WITH_PARANOIA
    DpsViolationExit(Indexer->handle, paran);
#endif
    return DPS_ERROR;
  }
  Coo = &Cookies->Cookie[Cookies->ncookies];
/*  Coo->expires = expires;*/
  Coo->secure = secure;
  Coo->from_config = from_config;
  Coo->domain = DpsStrdup(domain);
  Coo->path = DpsStrdup(path);
  Coo->name = DpsStrdup(name);
  Coo->value = DpsStrdup(value);
  if (insert_flag) {
    if (Indexer->Flags.CheckInsertSQL) {
      dps_snprintf(buf, sizeof(buf), "DELETE FROM cookies WHERE domain='%s' AND path='%s' AND name='%s' AND secure='%c'",
		   domain, path_esc, name, secure);
      DpsSQLAsyncQuery(db, NULL, buf);
    }
    dps_snprintf(buf, sizeof(buf), "INSERT INTO cookies(expires,secure,domain,path,name,value)VALUES(%d,'%c','%s','%s','%s','%s')",
		 expires, secure, domain, path_esc, name, value);
    DpsSQLAsyncQuery(db, NULL, buf);
  }
  Cookies->ncookies++;
  if (Indexer->flags & DPS_FLAG_UNOCON) DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
#ifdef WITH_PARANOIA
  DpsViolationExit(Indexer->handle, paran);
#endif

#endif /*HAVE_SQL*/
  return DPS_OK;
}
Пример #23
0
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){
     struct stat     sb;
     char      *str, *data = NULL, *cur_n = NULL;
     char      lang[64]="";
     DPS_CHARSET    *cs=NULL;
     DPS_CHARSET    *sys_int=DpsGetCharSet("sys-int");
     DPS_CONV  file_uni;
     DPS_WIDEWORD    *ww = NULL;
     size_t key = 1;
     int flag_th = 0;
     int             fd;
     char            savebyte;
     
     if (stat(filename, &sb)) {
       fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size);
       DpsClose(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno));
       DPS_FREE(data);
       DpsClose(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, '\n');
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }

     while(str != NULL) {
          if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue;
          
          if(!strncasecmp(str,"Charset:",8)){
               char * lasttok;
               char * charset;
               if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) {
                    cs=DpsGetCharSet(charset);
                    if(!cs){
                         dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'",
                                   charset, filename);
                         DPS_FREE(data);
			 DpsClose(fd);
                         return DPS_ERROR;
                    }
                    DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0);
               }
          }else
          if(!strncasecmp(str,"Language:",9)){
               char * lasttok;
               char * l;
               if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) {
                    dps_strncpy(lang, l, sizeof(lang)-1);
               }
          }else
          if(!strncasecmp(str, "Thesaurus:", 10)) {
               char * lasttok;
	       char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok);
	       flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0;
          }else{
               char      *av[255];
               size_t         ac, i, j;
	       dpsunicode_t *t;

               if(!cs){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }
               if(!lang[0]){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }

               ac = DpsGetArgs(str, av, 255);
               if (ac < 2) goto loop_continue;

               if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR;

               for (i = 0; i < ac; i++) {
                 ww[i].word = av[i];
                 ww[i].len = dps_strlen(av[i]);
                 ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t));
		 if (ww[i].uword == NULL) return DPS_ERROR;
                 DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1);
                 DpsUniStrToLower(ww[i].uword);
		 ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword);
		 DPS_FREE(t);
               }

               for (i = 0; i < ac - 1; i++) {
                 for (j = i + 1; j < ac; j++) {

                   if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){
                    Env->Synonyms.msynonyms += 64;
                    Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, 
                                                   sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms);
		    if (Env->Synonyms.Synonym == NULL) {
		      Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0;
		      return DPS_ERROR;
  		    }
                   }
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add direct order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add reverse order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
                 }
               }

               for (i = 0; i < ac; i++) {
                 DPS_FREE(ww[i].uword);
               }
               do { key++; } while (key == 0);
          }
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, '\n');
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DPS_FREE(ww);
     DpsClose(fd);
     return DPS_OK;
}
Пример #24
0
void DpsCookiesFind(DPS_AGENT *Indexer, DPS_SERVER *Server, DPS_DOCUMENT *Doc, const char *hostinfo) {
#ifdef HAVE_SQL
  DPS_DSTR cookie;
  DPS_COOKIES *Cookies = &Indexer->Cookies;
  DPS_COOKIE *Coo;
  size_t i, blen = dps_strlen(hostinfo), slen;
  int have_no_cookies = DpsVarListFindInt(&Doc->Sections, "have_no_cookies", 1);
#ifdef WITH_PARANOIA
  void *paran = DpsViolationEnter(paran);
#endif
  TRACE_IN(Indexer, "DpsCookiesFind");

  DpsDSTRInit(&cookie, 1024);
  for(i = 0; i < Cookies->ncookies; i++) {
    Coo = &Cookies->Cookie[i];
    slen = dps_strlen(Coo->domain);
    if (slen > blen) continue;
    if (Coo->secure == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue;
    if (strncasecmp(Coo->path, Doc->CurURL.path, dps_strlen(Coo->path))) continue;
    if (strcasecmp(Coo->domain, hostinfo + (blen - slen))) continue;
    if (Coo->from_config != 1) have_no_cookies = 0;
    if (Coo->name[0] == '\0' && Coo->value[0] == '\0') continue;
    if (cookie.data_size)
      DpsDSTRAppend(&cookie, "; ", 2);
    DpsDSTRAppendStr(&cookie, Coo->name);
    DpsDSTRAppend(&cookie, "=", 1);
    DpsDSTRAppendStr(&cookie, Coo->value);
  }
  if (have_no_cookies) {
    char buf[2*PATH_MAX];
    dpshash32_t url_id;
    DPS_DB *db;
    DPS_SQLRES Res;
    size_t rows;
    int rc;


	    if (Server != NULL) {
	      char *PingData = DpsVarListFindStr(&Server->Vars, "AuthPing", NULL);
	      if (PingData != NULL) {
		char *AuthPing = DpsStrdup(DpsTrim(PingData, " \t\r\n"));
		int method = DPS_METHOD_GET;
		dps_base64_decode(AuthPing, PingData, dps_strlen(PingData));
		if (!strncasecmp(AuthPing, "GET", 3)) {
		  method = DPS_METHOD_GET;
		  PingData = DpsTrim(AuthPing + 3, " \t\r\n");
		} else if (!strncasecmp(AuthPing, "POST", 4)) {
		  method = DPS_METHOD_POST;
		  PingData = DpsTrim(AuthPing + 4, " \t\r\n");
		} else {
		  DpsLog(Indexer, DPS_LOG_ERROR, "AuthPing should be GET or POST: %s", AuthPing);
		  PingData = NULL;
		}
		if (PingData != NULL) {
		  size_t size = dps_strlen(PingData);
		  {
		    char PingURL[size + 2];
		    char PingBody[size];
		    DPS_DOCUMENT *rDoc;
		    int result;

		    rDoc = DpsDocInit(NULL);
		    DpsSpiderParamInit(&rDoc->Spider);
		    DpsVarList2Doc(rDoc, Server);
		    rDoc->Buf.max_size = (size_t)DpsVarListFindInt(&Indexer->Vars, "MaxDocSize", DPS_MAXDOCSIZE);
		    rDoc->Buf.allocated_size = DPS_NET_BUF_SIZE;
		    if ((rDoc->Buf.buf = (char*)DpsMalloc(rDoc->Buf.allocated_size + 1)) == NULL) {
		      DpsDocFree(rDoc);
		      TRACE_OUT(Indexer);
		      return;
		    }
		    rDoc->Buf.buf[0]='\0';
		    rDoc->subdoc = Indexer->Flags.SubDocLevel + 1;

#if 1
		    dps_snprintf(buf, sizeof(buf), "%s://%s/", DPS_NULL2EMPTY(Doc->CurURL.schema), DPS_NULL2EMPTY(Doc->CurURL.hostinfo));
		    DpsVarListReplaceStr(&rDoc->Sections, "URL", buf);
		    DpsURLParse(&rDoc->CurURL, buf);
		    DpsLog(Indexer, DPS_LOG_INFO, "HOME: %s", buf);
		    rDoc->method = DPS_METHOD_HEAD;
		    /*		    DpsVarListFree(&rDoc->RequestHeaders);*/
		    if (Doc != NULL) {
		      DpsVarListReplaceLst(&rDoc->RequestHeaders, &Doc->RequestHeaders, NULL, "*"); 
		    }

		    DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0");
		    DpsDocAddDocExtraHeaders(Indexer, Server, rDoc);
		    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
		    DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*");
		    DpsDocAddServExtraHeaders(Server, rDoc);
		    DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "HOME.Request");
		    if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) {
		      DpsDocLookupConn(Indexer, rDoc);
		    } else {
		      DPS_FREE(rDoc->connp.connp);
		      rDoc->connp = Doc->connp;
		    }
		    result = DpsGetURL(Indexer, rDoc, NULL); /* Just get headers from the home as we need only Cookies from it */
		    DpsDocProcessResponseHeaders(Indexer, rDoc);
		    DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "HOME.Response");
#endif

		    sscanf(PingData, "%s %s", PingURL, PingBody);
		    if (rDoc->method == DPS_METHOD_GET) {
		      dps_strcat(PingURL, "?");
		      dps_strcat(PingURL, PingBody);
		    } else {
		      DpsVarListReplaceStr(&rDoc->Sections, "body", PingBody);
		    }
		    DpsVarListReplaceStr(&rDoc->Sections, "URL", PingURL);
		    DpsURLParse(&rDoc->CurURL, PingURL);
		    DpsLog(Indexer, DPS_LOG_INFO, "AUTH.PING: %s", PingURL);
		  
		    rDoc->method = method;
		    DpsVarListFree(&rDoc->RequestHeaders);
		    DpsVarListReplaceStr(&rDoc->Sections, "have_no_cookies", "0");
		    DpsDocAddDocExtraHeaders(Indexer, Server, rDoc);
		    DpsDocAddConfExtraHeaders(Indexer->Conf, rDoc);
		    DpsVarListReplaceLst(&rDoc->Sections, &Server->Vars, NULL, "*");
		    DpsDocAddServExtraHeaders(Server, rDoc);
		    if (method == DPS_METHOD_POST) {
		      dps_snprintf(buf, sizeof(buf), "application/x-www-form-urlencoded; charset=%s", DpsVarListFindStr(&Indexer->Conf->Vars, "LocalCharset", "iso-8859-1"));
		      DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Type", buf);
		      dps_snprintf(buf, sizeof(buf), "%d", dps_strlen(PingBody));
		      DpsVarListReplaceStr(&rDoc->RequestHeaders, "Content-Length", buf);
		    }
		  
		    DpsVarListLog(Indexer, &rDoc->RequestHeaders, DPS_LOG_DEBUG, "AUTHPING.Request");
#if 0
		    if (Doc == NULL || Indexer->Flags.cmd == DPS_IND_FILTER) {
		      DpsDocLookupConn(Indexer, rDoc);
		    } else {
		      DPS_FREE(rDoc->connp.connp);
		      rDoc->connp = Doc->connp;
		    }
#endif

		    result = DpsGetURL(Indexer, rDoc, NULL); /* Just get it as we need only Cookies from the headers */
		    DpsDocProcessResponseHeaders(Indexer, rDoc);
		    DpsVarListDel(&rDoc->Sections, "body");
		    DpsVarListLog(Indexer, &rDoc->Sections, DPS_LOG_DEBUG, "AUTHPING.Response");
		    if (Doc != NULL) bzero(&rDoc->connp, sizeof(rDoc->connp));
		    DpsDocFree(rDoc);
		  }
		}
		DpsFree(AuthPing);
	      }
	    }





    while(hostinfo != NULL) {
      url_id = DpsStrHash32(hostinfo);
      DpsSQLResInit(&Res);
      dps_snprintf(buf, sizeof(buf), "SELECT name,value,path,secure FROM cookies WHERE domain='%s'", hostinfo);
      if (Indexer->flags & DPS_FLAG_UNOCON) {
	DPS_GETLOCK(Indexer, DPS_LOCK_DB);
	db = Indexer->Conf->dbl.db[url_id % Indexer->Conf->dbl.nitems];
      } else {
	db = Indexer->dbl.db[url_id % Indexer->dbl.nitems];
      }
      if(DPS_OK == (rc = DpsSQLQuery(db, &Res, buf))) {
	rows = DpsSQLNumRows(&Res);
	for(i = 0; i < rows; i++) {
	  DpsCookiesAdd(Indexer, hostinfo, DpsSQLValue(&Res, i, 2), DpsSQLValue(&Res, i, 0), DpsSQLValue(&Res, i, 1), 
			*DpsSQLValue(&Res, i, 3), 0, 0, 0);
	  if (*DpsSQLValue(&Res, i, 3) == 'y' && strcasecmp(Doc->CurURL.schema, "https")) continue;
	  if (strncasecmp(DpsSQLValue(&Res, i, 2), Doc->CurURL.path, dps_strlen(DpsSQLValue(&Res, i, 2)))) continue;
	  if (cookie.data_size)
	    DpsDSTRAppend(&cookie, "; ", 2);
	  DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 0));
	  DpsDSTRAppend(&cookie, "=", 1);
	  DpsDSTRAppendStr(&cookie, DpsSQLValue(&Res, i, 1));
	}
	if (rows == 0) {
	  DpsCookiesAdd(Indexer, hostinfo, "/", "", "", 'n', 0, 0, 0);
	}
      }
      DpsSQLFree(&Res);
      if (Indexer->flags & DPS_FLAG_UNOCON) {
	DPS_RELEASELOCK(Indexer, DPS_LOCK_DB);
      }	  
      hostinfo = strchr(hostinfo, '.');
      if (hostinfo != NULL) hostinfo++;
    }
  }
  if (cookie.data_size) {
    DpsVarListReplaceStr(&Doc->RequestHeaders, "Cookie", cookie.data);
  }
  DpsDSTRFree(&cookie);
#endif
  TRACE_OUT(Indexer);
  return;
}
Пример #25
0
int main(int argc, char ** argv, char **envp) {
	const char	*env, *bcharset, *lcharset, *conf_dir;
	char		template_name[PATH_MAX+6]="";
	char            *template_filename = NULL;
	char		*query_string = NULL;
	char		self[1024]="";
	char		*url = NULL;
	const char      *ResultContentType;
	int		res,httpd=0;
	size_t          catcolumns = 0;
	int		page_size,page_number;
	DPS_ENV		*Env;
	DPS_AGENT	*Agent;
	DPS_VARLIST	query_vars;
	
	/* Output Content-type if under HTTPD	 */
	/* Some servers do not pass QUERY_STRING */
	/* if the query was empty, so check	 */
	/* REQUEST_METHOD too     to be safe     */
	
	httpd=(getenv("QUERY_STRING")||getenv("REQUEST_METHOD"));
	if (!(conf_dir=getenv("DPS_ETC_DIR")))
		conf_dir=DPS_CONF_DIR;
	
	
	DpsInit(argc, argv, envp);
	Env=DpsEnvInit(NULL);
	if (Env == NULL) {
	  if(httpd){
	    printf("Content-Type: text/plain\r\n\r\n");
	  }
	  printf("Can't alloc Env\n");
	  exit(0);
	}
	DpsVarListInit(&query_vars);
	Agent = DpsAgentInit(NULL, Env, 0);
	if (Agent == NULL) {
	  if(httpd){
	    printf("Content-Type: text/plain\r\n\r\n");
	  }
	  printf("Can't alloc Agent\n");
	  exit(0);
	}
	DpsVarListAddEnviron(&Env->Vars,"ENV");
	
	/* Detect self and template name */
	if((env = getenv("DPSEARCH_TEMPLATE")))
		dps_strncpy(template_name, env, sizeof(template_name) - 1);
	else if((env = getenv("PATH_INFO")) && env[0])
		dps_strncpy(template_name, env + 1, sizeof(template_name) - 1);
	
	if((env=getenv("DPSEARCH_SELF")))
		dps_strncpy(self,env,sizeof(self)-1);
	
	if((env=getenv("QUERY_STRING"))){
	        query_string = (char*)DpsRealloc(query_string, dps_strlen(env) + 2);
		if (query_string == NULL) {
		  if(httpd){
		    printf("Content-Type: text/plain\r\n\r\n");
		  }
		  printf("Can't alloc query_string\n");
		  exit(0);
		}
		dps_strncpy(query_string, env, dps_strlen(env) + 1);

		/* Hack for Russian Apache from apache.lexa.ru  */
		/* QUERY_STRING is already converted to server  */
		/* character set. We must print original query  */
		/* string instead however. Under usual apache   */ 
		/* we'll use QUERY_STRING. Note that query_vars */
		/* list will contain not unescaped values, so   */
		/* we don't have to escape them when displaying */
		env = getenv("CHARSET_SAVED_QUERY_STRING");
		DpsParseQStringUnescaped(&query_vars,env?env:query_string);
	
		/* Unescape and save variables from QUERY_STRING */
		/* Env->Vars will have unescaped values however  */
		DpsParseQueryString(Agent,&Env->Vars,query_string);
	
		template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", ""));
	
		if((env=getenv("REDIRECT_STATUS"))){

			/* Check Apache internal redirect  */
			/* via   "AddHandler" and "Action" */
			if(!self[0]){
				dps_strncpy(self,(env=getenv("REDIRECT_URL"))?env:"filler.cgi",sizeof(self)-1);
			}
			if(!template_name[0]){
				dps_strncpy(template_name,(env=getenv("PATH_TRANSLATED"))?env:"",sizeof(template_name)-1);
			}
			if (*template_filename == '\0') { 
			  DPS_FREE(template_filename); 
			  template_filename = (char*)DpsStrdup("filler.htm"); 
			}
		}else{
			/* CGI executed without Apache internal redirect */

			/* Detect $Self variable with OS independant SLASHES */
			if(!self[0]){
				dps_strncpy(self,(env=getenv("SCRIPT_NAME"))?env:"filler.cgi",sizeof(self)-1);
			}

			if(!template_name[0]){
				char *s,*e;
				
				/*This is with OS specific SLASHES */
				env=((env=getenv("SCRIPT_FILENAME"))?env:"filler.cgi");

				if(strcmp(conf_dir,".")){
					/* Take from the config directory */
					dps_snprintf(template_name, sizeof(template_name)-1, "%s/%s", 
						     conf_dir,(s=strrchr(env,DPSSLASH))?(s+1):(self));
				}else{
					/* Take from the current directory */
					dps_strncpy(template_name,env,sizeof(template_name)-1);
				}

				/* Find right slash if it presents */
				s=((s=strrchr(template_name,DPSSLASH))?s:template_name);

				if (*template_filename == '\0') {

				  /* Find .cgi substring */
				  if ((e = strstr(s, ".cgi")) != NULL) {
					/* Replace ".cgi" with ".htm" */
					e[1]='h';e[2]='t';e[3]='m';
				  } else {
				        dps_strcat(s, ".htm");
				  }
				  e = strrchr(s, '/');
				  DPS_FREE(template_filename);
				  template_filename = (char*)DpsStrdup(e + 1);
				} else {
				  dps_strncpy(s + 1, template_filename, sizeof(template_name) - (s - template_name) - 2);
				}
			}
		}
	}else{
		/* Executed from command line     */
		/* or under server which does not */
		/* pass an empty QUERY_STRING var */
		if(argv[1]) {
		  query_string = (char*)DpsRealloc(query_string, dps_strlen(argv[1]) + 10);
		  if (query_string == NULL) {
		    if(httpd){
		      printf("Content-Type: text/plain\r\n\r\n");
		    }
		    printf("Can't realloc query_string\n");
		    exit(0);
		  }
		  sprintf(query_string, "q=%s", argv[1]);
		} else {
		  query_string = (char*)DpsRealloc(query_string, 1024);
		  if (query_string == NULL) {
		    if(httpd){
		      printf("Content-Type: text/plain\r\n\r\n");
		    }
		    printf("Can't realloc query_string\n");
		    exit(0);
		  }
		  sprintf(query_string, "q=");
		}

		/* Hack for Russian Apache from apache.lexa.ru  */
		/* QUERY_STRING is already converted to server  */
		/* character set. We must print original query  */
		/* string instead however. Under usual apache   */ 
		/* we'll use QUERY_STRING. Note that query_vars */
		/* list will contain not unescaped values, so   */
		/* we don't have to escape them when displaying */
		env = getenv("CHARSET_SAVED_QUERY_STRING");
		DpsParseQStringUnescaped(&query_vars,env?env:query_string);
	
		/* Unescape and save variables from QUERY_STRING */
		/* Env->Vars will have unescaped values however  */
		DpsParseQueryString(Agent,&Env->Vars,query_string);

		DPS_FREE(template_filename);
		template_filename = (char*)DpsStrdup(DpsVarListFindStr(&Env->Vars, "tmplt", ""));
		if (*template_filename == '\0') { 
		  DPS_FREE(template_filename); template_filename = (char*)DpsStrdup("filler.htm"); 
		}
	
		/*// Get template name from command line variable &tmplt */
		if(!template_name[0])
			dps_snprintf(template_name,sizeof(template_name),"%s/%s", conf_dir, template_filename);
	}
	
	DpsVarListReplaceStr(&Agent->Conf->Vars, "tmplt", template_filename);
	DPS_FREE(template_filename);

	Agent->tmpl.Env_Vars = &Env->Vars;
	
	DpsURLNormalizePath(template_name);
	
	if (strncmp(template_name, conf_dir, dps_strlen(conf_dir)) 
	    || (res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) {
	  if (strcmp(template_name, "filler.htm")) { /* trying load default template */
	    fprintf(stderr, "Can't load template: '%s' %s\n", template_name, Env->errstr);
	    DPS_FREE(template_filename);
	    template_filename = (char*)DpsStrdup("filler.htm");
	    dps_snprintf(template_name, sizeof(template_name), "%s/%s", conf_dir, template_filename);

	    if ((res = DpsTemplateLoad(Agent, Env, &Agent->tmpl, template_name))) {

		if(httpd)printf("Content-Type: text/plain\r\n\r\n");
		printf("%s\n",Env->errstr);
		DpsVarListFree(&query_vars);
		DpsEnvFree(Env);
		DPS_FREE(query_string);
		DpsAgentFree(Agent);
		return(0);
	    }
	  } else {
		if(httpd)printf("Content-Type: text/plain\r\n\r\n");
		printf("%s\n",Env->errstr);
		DpsVarListFree(&query_vars);
		DpsEnvFree(Env);
		DPS_FREE(query_string);
		DpsAgentFree(Agent);
		return(0);
	  }
	}

	/* set locale if specified */
	if ((url = DpsVarListFindStr(&Env->Vars, "Locale", NULL)) != NULL) {
	  setlocale(LC_ALL, url);
/*#ifdef HAVE_ASPELL*/
	  { char *p;
	    if ((p = strchr(url, '.')) != NULL) {
	      *p = '\0';
	      DpsVarListReplaceStr(&Env->Vars, "g-lc", url);
	      *p = '.';
	    }
	  }
/*#endif*/
	  url = NULL;
	}
	
	/* Call again to load search Limits if need */
	DpsParseQueryString(Agent, &Env->Vars, query_string);
	Agent->Flags = Env->Flags;
	Agent->flags |= DPS_FLAG_UNOCON;
	Env->flags |= DPS_FLAG_UNOCON;
	DpsSetLogLevel(NULL, DpsVarListFindInt(&Env->Vars, "LogLevel", 0));
	DpsOpenLog("filler.cgi", Env, !strcasecmp(DpsVarListFindStr(&Env->Vars, "Log2stderr", (!httpd) ? "yes" : "no"), "yes"));
	DpsLog(Agent,DPS_LOG_ERROR,"filler.cgi started with '%s'",template_name);
		DpsLog(Agent, DPS_LOG_DEBUG, "VarDir: '%s'", DpsVarListFindStr(&Agent->Conf->Vars, "VarDir", DPS_VAR_DIR));
		DpsLog(Agent, DPS_LOG_DEBUG, "Affixes: %d, Spells: %d, Synonyms: %d, Acronyms: %d, Stopwords: %d",
		       Env->Affixes.naffixes,Env->Spells.nspell,
		       Env->Synonyms.nsynonyms,
		       Env->Acronyms.nacronyms,
		       Env->StopWords.nstopwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Chinese dictionary with %d entries", Env->Chi.nwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Korean dictionary with %d entries", Env->Korean.nwords);
		DpsLog(Agent, DPS_LOG_DEBUG, "Thai dictionary with %d entries", Env->Thai.nwords);
	DpsVarListAddLst(&Agent->Vars, &Env->Vars, NULL, "*");
	Agent->tmpl.Env_Vars = &Agent->Vars;
/*	DpsVarListAddEnviron(&Agent->Vars, "ENV");*/
/****************************************************************************************************************************************/
	/* This is for query tracking */
	DpsVarListAddStr(&Agent->Vars, "QUERY_STRING", query_string);
	DpsVarListAddStr(&Agent->Vars, "self", self);
	env = getenv("HTTP_X_FORWARDER_FOR");
	if (env) {
	  DpsVarListAddStr(&Agent->Vars, "IP", env);
	} else {
	  env = getenv("REMOTE_ADDR");
	  DpsVarListAddStr(&Agent->Vars, "IP", env ? env : "localhost");
	}
	
	bcharset = DpsVarListFindStr(&Agent->Vars, "BrowserCharset", "iso-8859-1");
	Env->bcs=DpsGetCharSet(bcharset);
	lcharset = DpsVarListFindStr(&Agent->Vars, "LocalCharset", "iso-8859-1");
	Env->lcs=DpsGetCharSet(lcharset);

	ResultContentType = DpsVarListFindStr(&Agent->Vars, "ResultContentType", "text/html");
	
	if(httpd){
		if(!Env->bcs){
			printf("Content-Type: text/plain\r\n\r\n");
			printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name);
			exit(0);
		}else if(!Env->lcs){
			printf("Content-Type: text/plain\r\n\r\n");
			printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name);
			exit(0);
		}else{
		  printf("Content-type: %s; charset=%s\r\n\r\n", ResultContentType, bcharset);
		}
	}else{
		if(!Env->bcs){
			printf("Unknown BrowserCharset '%s' in template '%s'\n",bcharset,template_name);
			exit(0);
		}
		if(!Env->lcs){
			printf("Unknown LocalCharset '%s' in template '%s'\n",lcharset,template_name);
			exit(0);
		}
	}
	
	/* These parameters taken from "variable section of template"*/
	
	res         = DpsVarListFindInt(&Agent->Vars, "ps", DPS_DEFAULT_PS);
	page_size   = dps_min(res, MAX_PS);
	page_number = DpsVarListFindInt(&Agent->Vars, "p", 0);
	if (page_number == 0) {
	  page_number = DpsVarListFindInt(&Agent->Vars, "np", 0);
	  DpsVarListReplaceInt(&Agent->Vars, "p", page_number + 1);
	} else page_number--;
	
	res = DpsVarListFindInt(&Agent->Vars, "np", 0) * page_size;
	DpsVarListAddInt(&Agent->Vars, "pn", res);
	
	catcolumns = (size_t)atoi(DpsVarListFindStr(&Agent->Vars, "CatColumns", ""));


	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "top");

	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "restop");
	
	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "res");

	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "resbot");
	
	DpsTemplatePrint(Agent, (DPS_OUTPUTFUNCTION)&fprintf, stdout, NULL, 0, &Agent->tmpl, "bottom");
	
	DpsVarListFree(&query_vars);
	DpsAgentFree(Agent);
	DpsEnvFree(Env);
	DPS_FREE(query_string);
	DPS_FREE(url);
	if (httpd) fflush(NULL); else fclose(stdout);
	
#ifdef EFENCE
	fprintf(stderr, "Memory leaks checking\n");
	DpsEfenceCheckLeaks();
#endif
#ifdef FILENCE
	fprintf(stderr, "FD leaks checking\n");
	DpsFilenceCheckLeaks(NULL);
#endif

	return DPS_OK;
}
Пример #26
0
static int ftp_parse_list(DPS_CONN *connp, char *path){
	char *line, *buf_in, *ch, *buf_out, *tok, *fname;
        int len_h, len_f,len_p, i;
	char *dir, savec;
	size_t len,buf_len,cur_len;
	
	if (!connp->buf || !connp->buf_len)
		return 0;
        buf_in = connp->buf;
	/* 22 = dps_strlen(<a href=\"ftp://%s%s%s/\"></a>)*/
        len_h = dps_strlen(connp->hostname) + ((connp->user) ? dps_strlen(connp->user) : 0) + ((connp->pass) ? dps_strlen(connp->pass) : 0) + 2 + 22;
        len_p = dps_strlen(path);
        cur_len = 0;
        buf_len = connp->buf_len;
        buf_out = DpsXmalloc(buf_len + 1);
	if (buf_out == NULL) return -1;
	buf_out[0] = '\0';
	line = dps_strtok_r(buf_in, "\r\n", &tok, &savec);
        do{
    		if (!(fname = strtok(line, " ")))
			continue;
		/* drwxrwxrwx x user group size month date time file_name */
		for(i=0; i<7; i++)
            		if (!(fname = strtok(NULL, " ")))
				break;
		if (!(fname = strtok(NULL, "")))
			continue;
		len = 0 ;
		len_f = len_h + len_p + dps_strlen(fname);
	        if ((cur_len+len_f) >= buf_len){
			buf_len += DPS_NET_BUF_SIZE;
			buf_out = DpsXrealloc(buf_out, buf_len + 1);
		}
					
		switch (line[0]){
			case 'd':
				if (!fname || !strcmp(fname, ".") || !strcmp(fname, ".."))
				        break;
				len = len_f;
    				dps_snprintf(DPS_STREND(buf_out) /*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s/\"></a>\n",
					 (connp->user) ? connp->user : "", (connp->user) ? ":" : "",
					 (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "",
					    connp->hostname, path, fname);
				break;
    	    	        case 'l':
				ch = strstr (fname, " -> ");
				if (!ch)
				    break;
				len = ch - fname;
				dir = DpsMalloc(len+1);
				if (dir == NULL) return -1;
				dps_snprintf(dir, len+1, "%s", fname);
				len = len_h + len_p + dps_strlen(dir);
				dps_snprintf(DPS_STREND(buf_out)/*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s/\"></a>\n", 
					 (connp->user) ? connp->user : "", (connp->user) ? ":" : "",
					 (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "",
					    connp->hostname, path, dir);
				DPS_FREE(dir);
				/*ch +=4;*/
				/* Check if it is absolute link */
/*				if ((ch[0] == '/') || (ch[0] == '\\') ||
					 ((isalpha(ch[0]) && (ch[1]==':')))){
					len = len_h+dps_strlen(ch);
					dps_snprintf(buf_out+cur_len, len+1, "<a href=\"ftp://%s%s/\"></a>", 
						    connp->hostname, ch);
				}else{
					len = len_h+len_p+dps_strlen(ch);
	    				dps_snprintf(buf_out+cur_len, len+1, "<a href=\"ftp://%s%s%s/\"></a>", 
						    connp->hostname, path, ch);
				}
*/
				break;
    	    	        case '-':
				len =  len_f; 
		    	        dps_snprintf(DPS_STREND(buf_out)/*buf_out+cur_len*/, len+1, "<a  href=\"ftp://%s%s%s%s%s/%s%s\"></a>\n", 
					 (connp->user) ? connp->user : "", (connp->user) ? ":" : "",
					 (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "",
					    connp->hostname, path, fname);

				break;
		}
		cur_len += len;
		
	}while( (line = dps_strtok_r(NULL, "\r\n", &tok, &savec)));

	if (cur_len+1 > connp->buf_len_total){
		connp->buf_len_total = cur_len;  
		connp->buf = DpsXrealloc(connp->buf, (size_t)connp->buf_len_total+1);
		if (connp->buf == NULL) return -1;
	}
	bzero(connp->buf, ((size_t)connp->buf_len_total+1));
	dps_memmove(connp->buf, buf_out, cur_len);
	DPS_FREE(buf_out);
	return 0;
}