Пример #1
0
int DpsCookiesAddStr(DPS_AGENT *Indexer, DPS_URL *CurURL, const char *cookie_str, int insert_flag) {
  char *part, *lpart;
  char *name = NULL;
  char *value = NULL;
  char *domain = NULL, *orig_domain = NULL;
  char *path = NULL;
  dps_uint4 expire = 0, need_free_domain = 1, need_free_path = 1;
  char secure = 'n', savec;

  for (part = dps_strtok_r(cookie_str, ";" , &lpart, &savec) ; part;
       part = dps_strtok_r(NULL, ";", &lpart, &savec)) {
    char *arg;

    part = DpsTrim(part, " ");
    if ((arg = strchr(part, '='))) {
      *arg++ = '\0';
      if (!name) {
	name = part;
	DpsFree(value);
	value = DpsStrdup(arg);
      } else 
	if (!strcasecmp(part, "path")) {
	  DpsFree(path);
	  path = DpsStrdup(arg);
	} else
	  if (!strcasecmp(part, "domain")) {
	    DpsFree(orig_domain);
	    orig_domain = domain = DpsStrdup(arg);
	  } else
	    if (!strcasecmp(part, "secure")) {
	      secure = 'y';
	    } else
	      if (!strcasecmp(part, "expires")) {
		expire = (dps_uint4)DpsHttpDate2Time_t(arg);
	      }
    }
  }
  if (name && value) {
    if (domain && domain[0] == '.') {
      domain++;
    } else {
      if (domain) DpsFree(orig_domain);
      domain = (CurURL && CurURL->hostname) ? CurURL->hostname : "localhost";
      need_free_domain = 0;
    }
    if (!path) {
      path = (CurURL && CurURL->path) ? CurURL->path : "/";
      need_free_path = 0;
    }

    DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, (CurURL==NULL)?1:0, 1);

  }

  DpsFree(value);
  if (need_free_path) DpsFree(path);
  if (need_free_domain) DpsFree(orig_domain);

  return DPS_OK;
}
Пример #2
0
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) {
  char *val, *header_name;
  char	secname[128];
  DPS_VAR	*Sec;
  DPS_TEXTITEM Item;

  if ((val = strchr(header_name = header->data, ':'))) {
/*
  fprintf(stderr, "HEADER: %s\n", header_name);
*/
    *val++='\0';
    val = DpsTrim(val," \t:");
			
    if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) {
      char *v;
      for(v=val ; *v ; v++) 
	*v = dps_tolower(*v);
    } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) {
      char *part, *lpart;
      char *name = NULL;
      char *value = NULL;
      const char *domain = NULL;
      const char *path = NULL;
      dps_uint4 expire = 0;
      char secure = 'n';
      for (part = dps_strtok_r(val, ";" , &lpart) ; part;
	   part = dps_strtok_r(NULL, ";", &lpart)) {
	char *arg;
	part = DpsTrim(part, " ");
	if ((arg = strchr(part, '='))) {
	  *arg++ = '\0';
	  if (!name) {
	    name = part;
	    value = arg;
	  } else 
	    if (!strcasecmp(part, "path")) {
	      path = arg;
	    } else
	      if (!strcasecmp(part, "domain")) {
		domain = arg;
	      } else
		if (!strcasecmp(part, "secure")) {
		  secure = 'y';
		} else
		  if (!strcasecmp(part, "expires")) {
		    expire = (dps_uint4)DpsHttpDate2Time_t(arg);
		  }
	}
      }
      if (name && value) {
	if (domain && domain[0] == '.') {
	  domain++;
	} else {
	  domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost";
	}
	if (!path) {
	  path = Doc->CurURL.path ? Doc->CurURL.path : "/";
	}
	DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1);
      }
/*			  token = dps_strtok_r(NULL,"\r\n",&lt);
			  continue;*/
      return;
    }
  }

  DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>");

  dps_snprintf(secname,sizeof(secname),"header.%s", header_name);
  secname[sizeof(secname)-1]='\0';
  if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) {
    Item.href = NULL;
    Item.str = val;
    Item.section = Sec->section;
    Item.section_name = secname;
    Item.len = 0;
    DpsTextListAdd(&Doc->TextList, &Item);
  }
}
Пример #3
0
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) {			
  char	*token, *lt, *headers;
	int     oldstatus;
	DPS_DSTR header;
	
	Doc->Buf.content=NULL;
	oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0);
	DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size);
	DpsVarListDel(&Doc->Sections, "Content-Length");
	DpsVarListDel(&Doc->Sections, "Last-Modified");

	if (Doc->Buf.buf == NULL) return;

	/* Cut HTTP response header first        */
	for(token=Doc->Buf.buf;*token;token++){
		if(!strncmp(token,"\r\n\r\n",4)){
			*token='\0';
			Doc->Buf.content = token + 4;
			break;
		}else
		if(!strncmp(token,"\n\n",2)){
			*token='\0';
			Doc->Buf.content = token + 2;
			break;
		}
	}
	
	/* Bad response, return */
	if(!Doc->Buf.content) {
	  if (token < Doc->Buf.buf + Doc->Buf.size - 4) {
	    if (token[2] == '\r') Doc->Buf.content = token + 4;
	    else Doc->Buf.content = token + 2;
	  } else {
	    return;
	  }
	}
	
	/* Copy headers not to break them */
	headers = (char*)DpsStrdup(Doc->Buf.buf);
	
	/* Now lets parse response header lines */
	token = dps_strtok_r(headers,"\r\n",&lt);
	
	if(!token)return;
	
	if(!strncmp(token,"HTTP/",5)){
		int	status = atoi(token + 8);
		DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token);
		DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status );
	}else{
	        DpsFree(headers);
		return;
	}
	token = dps_strtok_r(NULL,"\r\n",&lt);
	DpsDSTRInit(&header, 128);
	
	while(token){
	
		if(strchr(token,':')) {

		  if (header.data_size) {
		    DpsParseHTTPHeader(Indexer, Doc, &header);
		    DpsDSTRFree(&header);
		    DpsDSTRInit(&header, 128);
		  }

		}
		DpsDSTRAppendStr(&header, token);

		token = dps_strtok_r(NULL,"\r\n",&lt);
	}
	if (header.data_size) {
	  DpsParseHTTPHeader(Indexer, Doc, &header);
	}
	DpsDSTRFree(&header);
	DPS_FREE(headers);
	
	DpsVarListInsInt(&Doc->Sections,"Content-Length",Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size);
}
Пример #4
0
int DpsCloneListSearchd(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_RESULT *Res, DPS_DB *db) {
	DPS_SEARCHD_PACKET_HEADER hdr;
	ssize_t	nsent,nrecv;
	char *msg = NULL, *dinfo = NULL;
	char *tok, *lt;
	char buf[128];
	int done = 0;
	int	rc = DPS_OK;

	TRACE_IN(Indexer, "DpsCloneListSearchd");
	
	dps_snprintf(buf, 128, "%s", DpsVarListFindStr(&Doc->Sections, "DP_ID", "0"));
	hdr.cmd = DPS_SEARCHD_CMD_CLONES;
	hdr.len = dps_strlen(buf);
	nsent = DpsSearchdSendPacket(db->searchd, &hdr, buf);
	while(!done){
	  nrecv = DpsRecvall(db->searchd, &hdr, sizeof(hdr), 360);
		
		if(nrecv != sizeof(hdr)){
			DpsLog(Indexer, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes)", (int)nrecv);
			TRACE_OUT(Indexer);
			return(DPS_ERROR);
		}else{
#ifdef DEBUG_SDP
			DpsLog(Indexer, DPS_LOG_DEBUG, "Received header cmd=%d len=%d\n", hdr.cmd, hdr.len);
#endif
		}
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg = (char*)DpsMalloc(hdr.len + 1); 
				if (msg == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(db->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0] = '\0';
				sprintf(Indexer->Conf->errstr, "Searchd error: '%s'", msg);
				rc = DPS_ERROR;
				DPS_FREE(msg);
				done = 1;
				break;
			case DPS_SEARCHD_CMD_DOCINFO:
				dinfo = (char*)DpsMalloc(hdr.len + 1);
				if (dinfo == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(db->searchd, dinfo, hdr.len, 360);
				dinfo[(nrecv >= 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(Indexer, DPS_LOG_DEBUG, "Received DOCINFO size=%d buf=%s\n", hdr.len, dinfo);
#endif				
				if (strcasecmp(dinfo, "nocloneinfo") != 0) {

				  tok = dps_strtok_r(dinfo, "\r\n", &lt, NULL);
				
				  while(tok){
					DPS_DOCUMENT *D;
					size_t nd = Res->num_rows++;

					Res->Doc = (DPS_DOCUMENT*)DpsRealloc(Res->Doc, (Res->num_rows + 1) * sizeof(DPS_DOCUMENT));
					if (Res->Doc == NULL) {
					  sprintf(Indexer->Conf->errstr, "Realloc error");
					  rc = DPS_ERROR;
					  break;
					}
					D = &Res->Doc[nd];
					DpsDocInit(D);
					DpsDocFromTextBuf(D, tok);
					tok = dps_strtok_r(NULL, "\r\n", &lt, NULL);
				  }
				}
				DPS_FREE(dinfo);
				done = 1;
				break;
			default:
				sprintf(Indexer->Conf->errstr, "Unknown searchd response: cmd=%d len=%d", hdr.cmd, hdr.len);
				rc = DPS_ERROR;
				done = 1;
				break;
		}
	}
	TRACE_OUT(Indexer);
	return rc;
}
Пример #5
0
int __DPSCALL DpsSearchdCatAction(DPS_AGENT *A, DPS_CATEGORY *C, int cmd, void *db) {
	DPS_DB		*searchd = db;
	DPS_SEARCHD_PACKET_HEADER hdr;
	char *buf;
	ssize_t nsent, nrecv;
	int done = 0;
	int rc=DPS_OK;
	char *msg = NULL;
	char *dinfo = NULL;

	TRACE_IN(A, "DpsSearchdCatAction");

	hdr.cmd = DPS_SEARCHD_CMD_CATINFO;
	hdr.len = sizeof(int) + dps_strlen(C->addr) + 1;
	
	if ((buf = (char*)DpsMalloc(hdr.len + 1)) == NULL) {
	  DpsLog(A, DPS_LOG_ERROR, "Out of memory");
	  TRACE_OUT(A);
	  return DPS_ERROR;
	}

	*((int*)buf) = cmd;
	dps_strcpy(buf + sizeof(int), C->addr);

	nsent = DpsSearchdSendPacket(searchd->searchd, &hdr, buf);

	DPS_FREE(buf);

	while(!done) {
		char * tok, * lt;
		nrecv = DpsRecvall(searchd->searchd, &hdr, sizeof(hdr), 360);
		
		if(nrecv != sizeof(hdr)){
			DpsLog(A, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes)", (int)nrecv);
			TRACE_OUT(A);
			return(DPS_ERROR);
		}else{
#ifdef DEBUG_SDP
		  DpsLog(A, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n", hdr.cmd, hdr.len);
#endif
		}
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg = (char*)DpsMalloc(hdr.len + 1);
				if (msg == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(searchd->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0 ] = '\0';
				sprintf(A->Conf->errstr, "Searchd error: '%s'", msg);
				rc=DPS_ERROR;
				DPS_FREE(msg);
				done=1;
				break;
			case DPS_SEARCHD_CMD_MESSAGE:
				msg=(char*)DpsMalloc(hdr.len+1);
				if (msg == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(searchd->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(A, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg);
#endif
				DPS_FREE(msg);
				break;
			case DPS_SEARCHD_CMD_CATINFO:
			        dinfo=(char*)DpsMalloc(hdr.len+1);
				if (dinfo == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(searchd->searchd, dinfo, hdr.len, 360);
				dinfo[(nrecv >= 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(A, DPS_LOG_ERROR, "Received CATINFO size=%d buf=%s\n",hdr.len,dinfo);
#endif				

				C->ncategories = 0;
				tok = dps_strtok_r(dinfo, "\r\n", &lt, NULL);
				
				while(tok){
					DpsCatFromTextBuf(C, tok);
					
					tok = dps_strtok_r(NULL, "\r\n", &lt, NULL);
				}
				DPS_FREE(dinfo);
				done=1;
				break;
			default:
				sprintf(A->Conf->errstr, "Unknown searchd response: cmd=%d len=%d", hdr.cmd, hdr.len);
				rc=DPS_ERROR;
				done = 1;
				break;
		}
	}
	TRACE_OUT(A);
	return rc;
}
Пример #6
0
int __DPSCALL DpsResAddDocInfoSearchd(DPS_AGENT * query,DPS_DB *cl,DPS_RESULT * Res,size_t clnum){
	DPS_SEARCHD_PACKET_HEADER hdr;
	char * msg=NULL;
	size_t i; /* num=0,curnum=0;*/
	int done = 0;
	ssize_t nsent,nrecv;
	char * dinfo=NULL;
	int	rc=DPS_OK;
	char		*textbuf;
	size_t dlen = 0;
	
	TRACE_IN(query, "DpsResAddDocInfoSearchd");

	if(!Res->num_rows) { TRACE_OUT(query); return(DPS_OK); }
	
	for(i=0;i<Res->num_rows;i++){
	  size_t		ulen;
	  size_t		olen;
	  size_t		nsec, r;
	  DPS_DOCUMENT	*D=&Res->Doc[i];

	  r = (size_t) 's';
	  for(nsec = 0; nsec < D->Sections.Root[r].nvars; nsec++)
	    if (strcasecmp(D->Sections.Root[r].Var[nsec].name, "Score") == 0) D->Sections.Root[r].Var[nsec].section = 1;

#ifdef WITH_MULTIDBADDR
	  if (D->dbnum != cl->dbnum) continue;
#endif		  

	  textbuf = DpsDocToTextBuf(D, 1, 0);
	  if (textbuf == NULL) {TRACE_OUT(query); return DPS_ERROR;}
					
	  ulen = dps_strlen(textbuf)+2;
	  olen = dlen;
	  dlen = dlen + ulen;
	  dinfo = (char*)DpsRealloc(dinfo, dlen + 1);
	  if (dinfo == NULL) {
	    DpsFree(textbuf);
	    TRACE_OUT(query);
	    return DPS_ERROR;
	  }
	  dinfo[olen] = '\0';
	  sprintf(dinfo + olen, "%s\r\n", textbuf);
	  DpsFree(textbuf);
	}

	if (dinfo == NULL) {
	    TRACE_OUT(query);
	    return DPS_OK;
	}

	hdr.cmd=DPS_SEARCHD_CMD_DOCINFO;
	hdr.len = dps_strlen(dinfo);
	
	nsent = DpsSearchdSendPacket(cl->searchd, &hdr, dinfo);
#ifdef DEBUG_SDP
	DpsLog(query, DPS_LOG_ERROR, "Sent DOCINFO size=%d buf=%s\n", hdr.len, dinfo);
#endif				
	
	while(!done){
		char * tok, * lt;
		nrecv = DpsRecvall(cl->searchd, &hdr, sizeof(hdr), 360);
		
		if(nrecv!=sizeof(hdr)){
		  DpsLog(query, DPS_LOG_ERROR, "Received incomplete header from searchd (%d bytes, errno:%d)", (int)nrecv, errno);
			TRACE_OUT(query);
			return(DPS_ERROR);
		}else{
#ifdef DEBUG_SDP
		  DpsLog(query, DPS_LOG_ERROR, "Received header cmd=%d len=%d\n",hdr.cmd,hdr.len);
#endif
		}
		switch(hdr.cmd){
			case DPS_SEARCHD_CMD_ERROR:
				msg=(char*)DpsMalloc(hdr.len+1); 
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0]='\0';
				sprintf(query->Conf->errstr,"Searchd error: '%s'",msg);
				rc=DPS_ERROR;
				DPS_FREE(msg);
				done=1;
				break;
			case DPS_SEARCHD_CMD_MESSAGE:
				msg=(char*)DpsMalloc(hdr.len+1);
				if (msg == NULL) {
				  done = 1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, msg, hdr.len, 360);
				msg[(nrecv >= 0) ? nrecv : 0]='\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Message from searchd: '%s'\n",msg);
#endif
				DPS_FREE(msg);
				break;
			case DPS_SEARCHD_CMD_DOCINFO:
				dinfo = (char*)DpsRealloc(dinfo, hdr.len + 1);
				if (dinfo == NULL) {
				  done=1;
				  break;
				}
				nrecv = DpsRecvall(cl->searchd, dinfo, hdr.len, 360);
				dinfo[(nrecv > 0) ? nrecv : 0] = '\0';
#ifdef DEBUG_SDP
				DpsLog(query, DPS_LOG_ERROR, "Received DOCINFO size=%d buf=%s\n",hdr.len,dinfo);
#endif				
				tok = dps_strtok_r(dinfo, "\r\n", &lt, NULL);
				
				while(tok){
				  urlid_t Doc_url_id, Res_Doc_url_id;
					DPS_DOCUMENT Doc;
					
					DpsDocInit(&Doc);
					DpsDocFromTextBuf(&Doc,tok);
					Doc_url_id = (urlid_t)DpsVarListFindInt(&Doc.Sections, "DP_ID", 0);

					for(i=0;i<Res->num_rows;i++){				
#ifdef WITH_MULTIDBADDR
						if (Res->Doc[i].dbnum != cl->dbnum) continue;
#endif
						Res_Doc_url_id = (urlid_t)DpsVarListFindInt(&Res->Doc[i].Sections, "DP_ID", 0);
						if (Res_Doc_url_id == Doc_url_id) {
						  DpsDocFromTextBuf(&Res->Doc[i], tok);
						  break;
						}
					}
					tok = dps_strtok_r(NULL, "\r\n", &lt, NULL);
					DpsDocFree(&Doc);
				}
				DPS_FREE(dinfo);
				done=1;
				break;
			default:
				sprintf(query->Conf->errstr,"Unknown searchd response: cmd=%d len=%d",hdr.cmd,hdr.len);
				rc=DPS_ERROR;
				done=1;
				break;
		}
	}
	TRACE_OUT(query);
	return rc;
}
Пример #7
0
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) {
  char *val, *header_name;
  char	secname[128], savec;
  DPS_VAR	*Sec;
  DPS_TEXTITEM Item;

  if ((val = strchr(header_name = header->data, ':'))) {
/*
  fprintf(stderr, "HEADER: %s\n", header_name);
*/
    *val++='\0';
    val = DpsTrim(val," \t:");
			
    if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) {
      register char *v;
      for(v=val ; *v ; v++) 
	*v = (char)dps_tolower((int)*v);
    } else if (Doc->Spider.use_robots && !strcasecmp(header_name, "X-Robots-Tag")) {
        char * lt;
	char * rtok;
					
	rtok = dps_strtok_r(val, " ,\r\n\t", &lt, &savec);
	while(rtok){
	  if(!strcasecmp(rtok, "ALL")){
	    /* Left Server parameters unchanged */
	  }else if(!strcasecmp(rtok, "NONE")){
	    Doc->Spider.follow = DPS_FOLLOW_NO;
	    Doc->Spider.index = 0;
	    if (DpsNeedLog(DPS_LOG_DEBUG)) {
	      DpsVarListReplaceInt(&Doc->Sections, "Index", 0);
	      DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO);
	    }
	  }else if(!strcasecmp(rtok, "NOINDEX")) {
	    Doc->Spider.index = 0;
/*          Doc->method = DPS_METHOD_DISALLOW;*/
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", 0);
	  }else if(!strcasecmp(rtok, "NOFOLLOW")) {
	    Doc->Spider.follow = DPS_FOLLOW_NO;
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO);
	  }else if(!strcasecmp(rtok, "NOARCHIVE")) {
	    DpsVarListReplaceStr(&Doc->Sections, "Z", "");
	  }else if(!strcasecmp(rtok, "INDEX")) {
            /* left server value unchanged */ 
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", Doc->Spider.index);
	  }else if(!strcasecmp(rtok, "FOLLOW")) {
            /* left server value unchanged */ 
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", Doc->Spider.follow);
	  }
	  rtok = dps_strtok_r(NULL, " \r\n\t", &lt, &savec);
	}
      
    } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) {


      DpsCookiesAddStr(Indexer, &Doc->CurURL, val, 1);

      return;
    }
  }

  DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>");

  dps_snprintf(secname,sizeof(secname),"header.%s", header_name);
  secname[sizeof(secname)-1]='\0';
  if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) {
    bzero((void*)&Item, sizeof(Item));
    Item.href = NULL;
    Item.str = val;
    Item.section = Sec->section;
    Item.section_name = secname;
    Item.strict = Sec->strict;
    Item.len = 0;
    (void)DpsTextListAdd(&Doc->TextList, &Item);
  }
}
Пример #8
0
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) {			
    char *token, *lt, *headers, savec;
    int status, oldstatus;
    DPS_DSTR header;
    time_t now, last_mod_time;
	
	Doc->Buf.content=NULL;
	oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0);
	DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size);
	DpsVarListDel(&Doc->Sections, "Content-Length");
/*	DpsVarListDel(&Doc->Sections, "Last-Modified");*/ /* if it's not deleted Lat-Modified equals to the first appearance in db */

	if (Doc->Buf.buf == NULL) return;

	/* Cut HTTP response header first        */
	for(token=Doc->Buf.buf;*token;token++){
	  if(!strncmp(token,"\r\n\r\n",4)){
	    if (token <= Doc->Buf.buf + Doc->Buf.size - 4) {
			*token='\0';
			Doc->Buf.content = token + 4;
	    }
	    break;
	  } else if(!strncmp(token,"\n\n",2)){
	    if (token <= Doc->Buf.buf + Doc->Buf.size - 2) {
			*token='\0';
			Doc->Buf.content = token + 2;
	    }
	    break;
	  }
	}
	
	/* Bad response, return */
	if(!Doc->Buf.content) {
	  if (token <= Doc->Buf.buf + Doc->Buf.size - 4) {
	    if (token[2] == CR_CHAR) Doc->Buf.content = token + 4;
	    else Doc->Buf.content = token + 2;
	  }
	}
	
	/* Copy headers not to break them */
	headers = (char*)DpsStrdup(Doc->Buf.buf);
	
	/* Now lets parse response header lines */
	token = dps_strtok_r(headers, "\r\n", &lt, &savec);
	
	if(!token) {
	  DpsFree(headers);
	  return;
	}
	
	if(!strncmp(token,"HTTP/",5)){
		status = atoi(token + 8);
		DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token);
		DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status );
	}else{
	        DpsFree(headers);
		return;
	}
	token = dps_strtok_r(NULL, "\r\n", &lt, &savec);
	DpsDSTRInit(&header, 128);
	
	while(token){
	
		if(strchr(token,':')) {

		  if (header.data_size) {
		    DpsParseHTTPHeader(Indexer, Doc, &header);
		    DpsDSTRFree(&header);
		    DpsDSTRInit(&header, 128);
		  }

		}
		DpsDSTRAppendStr(&header, token);

		token = dps_strtok_r(NULL, "\r\n", &lt, &savec);
	}
	if (header.data_size) {
	  DpsParseHTTPHeader(Indexer, Doc, &header);
	}
	DpsDSTRFree(&header);
	DPS_FREE(headers);
	
	{
	    now = Indexer->now;
	    last_mod_time = DpsHttpDate2Time_t(DpsVarListFindStr(&Doc->Sections, "Last-Modified", ""));
	    if (last_mod_time > now + 3600 * 4) { /* we have a document with Last-Modified time in the future */
		DpsLog(Indexer, DPS_LOG_EXTRA, "Last-Modified date is deep in future (%d>%d), dropping it.", last_mod_time, now);
		DpsVarListDel(&Doc->Sections, "Last-Modified");
	    }
	}

	/* Bad response, return */
	if(!Doc->Buf.content) {
	    return;
	}
	DpsVarListReplaceInt(&Doc->Sections,"Content-Length", Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size + DpsVarListFindInt(&Doc->Sections,"Content-Length", 0));
}
Пример #9
0
static int ftp_parse_list(DPS_CONN *connp, char *path){
	char *line, *buf_in, *ch, *buf_out, *tok, *fname;
        int len_h, len_f,len_p, i;
	char *dir, savec;
	size_t len,buf_len,cur_len;
	
	if (!connp->buf || !connp->buf_len)
		return 0;
        buf_in = connp->buf;
	/* 22 = dps_strlen(<a href=\"ftp://%s%s%s/\"></a>)*/
        len_h = dps_strlen(connp->hostname) + ((connp->user) ? dps_strlen(connp->user) : 0) + ((connp->pass) ? dps_strlen(connp->pass) : 0) + 2 + 22;
        len_p = dps_strlen(path);
        cur_len = 0;
        buf_len = connp->buf_len;
        buf_out = DpsXmalloc(buf_len + 1);
	if (buf_out == NULL) return -1;
	buf_out[0] = '\0';
	line = dps_strtok_r(buf_in, "\r\n", &tok, &savec);
        do{
    		if (!(fname = strtok(line, " ")))
			continue;
		/* drwxrwxrwx x user group size month date time file_name */
		for(i=0; i<7; i++)
            		if (!(fname = strtok(NULL, " ")))
				break;
		if (!(fname = strtok(NULL, "")))
			continue;
		len = 0 ;
		len_f = len_h + len_p + dps_strlen(fname);
	        if ((cur_len+len_f) >= buf_len){
			buf_len += DPS_NET_BUF_SIZE;
			buf_out = DpsXrealloc(buf_out, buf_len + 1);
		}
					
		switch (line[0]){
			case 'd':
				if (!fname || !strcmp(fname, ".") || !strcmp(fname, ".."))
				        break;
				len = len_f;
    				dps_snprintf(DPS_STREND(buf_out) /*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s/\"></a>\n",
					 (connp->user) ? connp->user : "", (connp->user) ? ":" : "",
					 (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "",
					    connp->hostname, path, fname);
				break;
    	    	        case 'l':
				ch = strstr (fname, " -> ");
				if (!ch)
				    break;
				len = ch - fname;
				dir = DpsMalloc(len+1);
				if (dir == NULL) return -1;
				dps_snprintf(dir, len+1, "%s", fname);
				len = len_h + len_p + dps_strlen(dir);
				dps_snprintf(DPS_STREND(buf_out)/*buf_out+cur_len*/, len+1, "<a href=\"ftp://%s%s%s%s%s/%s%s/\"></a>\n", 
					 (connp->user) ? connp->user : "", (connp->user) ? ":" : "",
					 (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "",
					    connp->hostname, path, dir);
				DPS_FREE(dir);
				/*ch +=4;*/
				/* Check if it is absolute link */
/*				if ((ch[0] == '/') || (ch[0] == '\\') ||
					 ((isalpha(ch[0]) && (ch[1]==':')))){
					len = len_h+dps_strlen(ch);
					dps_snprintf(buf_out+cur_len, len+1, "<a href=\"ftp://%s%s/\"></a>", 
						    connp->hostname, ch);
				}else{
					len = len_h+len_p+dps_strlen(ch);
	    				dps_snprintf(buf_out+cur_len, len+1, "<a href=\"ftp://%s%s%s/\"></a>", 
						    connp->hostname, path, ch);
				}
*/
				break;
    	    	        case '-':
				len =  len_f; 
		    	        dps_snprintf(DPS_STREND(buf_out)/*buf_out+cur_len*/, len+1, "<a  href=\"ftp://%s%s%s%s%s/%s%s\"></a>\n", 
					 (connp->user) ? connp->user : "", (connp->user) ? ":" : "",
					 (connp->pass) ? connp->pass : "", (connp->user || connp->pass) ? "@" : "",
					    connp->hostname, path, fname);

				break;
		}
		cur_len += len;
		
	}while( (line = dps_strtok_r(NULL, "\r\n", &tok, &savec)));

	if (cur_len+1 > connp->buf_len_total){
		connp->buf_len_total = cur_len;  
		connp->buf = DpsXrealloc(connp->buf, (size_t)connp->buf_len_total+1);
		if (connp->buf == NULL) return -1;
	}
	bzero(connp->buf, ((size_t)connp->buf_len_total+1));
	dps_memmove(connp->buf, buf_out, cur_len);
	DPS_FREE(buf_out);
	return 0;
}
Пример #10
0
__C_LINK int __DPSCALL DpsSynonymListLoad(DPS_ENV * Env,const char * filename){
     struct stat     sb;
     char      *str, *data = NULL, *cur_n = NULL;
     char      lang[64]="";
     DPS_CHARSET    *cs=NULL;
     DPS_CHARSET    *sys_int=DpsGetCharSet("sys-int");
     DPS_CONV  file_uni;
     DPS_WIDEWORD    *ww = NULL;
     size_t key = 1;
     int flag_th = 0;
     int             fd;
     char            savebyte;
     
     if (stat(filename, &sb)) {
       fprintf(stderr, "Unable to stat synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((fd = DpsOpen2(filename, O_RDONLY)) <= 0) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to open synonyms file '%s': %s", filename, strerror(errno));
       return DPS_ERROR;
     }
     if ((data = (char*)DpsMalloc(sb.st_size + 1)) == NULL) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to alloc %d bytes", sb.st_size);
       DpsClose(fd);
       return DPS_ERROR;
     }
     if (read(fd, data, sb.st_size) != (ssize_t)sb.st_size) {
       dps_snprintf(Env->errstr,sizeof(Env->errstr)-1, "Unable to read synonym file '%s': %s", filename, strerror(errno));
       DPS_FREE(data);
       DpsClose(fd);
       return DPS_ERROR;
     }
     data[sb.st_size] = '\0';
     str = data;
     cur_n = strchr(str, '\n');
     if (cur_n != NULL) {
       cur_n++;
       savebyte = *cur_n;
       *cur_n = '\0';
     }

     while(str != NULL) {
          if(str[0]=='#'||str[0]==' '||str[0]=='\t'||str[0]=='\r'||str[0]=='\n') goto loop_continue;
          
          if(!strncasecmp(str,"Charset:",8)){
               char * lasttok;
               char * charset;
               if((charset = dps_strtok_r(str + 8, " \t\n\r", &lasttok))) {
                    cs=DpsGetCharSet(charset);
                    if(!cs){
                         dps_snprintf(Env->errstr, sizeof(Env->errstr), "Unknown charset '%s' in synonyms file '%s'",
                                   charset, filename);
                         DPS_FREE(data);
			 DpsClose(fd);
                         return DPS_ERROR;
                    }
                    DpsConvInit(&file_uni, cs, sys_int, Env->CharsToEscape, 0);
               }
          }else
          if(!strncasecmp(str,"Language:",9)){
               char * lasttok;
               char * l;
               if((l = dps_strtok_r(str + 9, " \t\n\r", &lasttok))) {
                    dps_strncpy(lang, l, sizeof(lang)-1);
               }
          }else
          if(!strncasecmp(str, "Thesaurus:", 10)) {
               char * lasttok;
	       char *tok = dps_strtok_r(str + 10, " \t\n\r", &lasttok);
	       flag_th = (strncasecmp(tok, "yes", 3) == 0) ? 1 : 0;
          }else{
               char      *av[255];
               size_t         ac, i, j;
	       dpsunicode_t *t;

               if(!cs){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Charset command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }
               if(!lang[0]){
                    dps_snprintf(Env->errstr,sizeof(Env->errstr)-1,"No Language command in synonyms file '%s'",filename);
                    DpsClose(fd); DPS_FREE(data);
                    return DPS_ERROR;
               }

               ac = DpsGetArgs(str, av, 255);
               if (ac < 2) goto loop_continue;

               if ((ww = (DPS_WIDEWORD*)DpsRealloc(ww, ac * sizeof(DPS_WIDEWORD))) == NULL) return DPS_ERROR;

               for (i = 0; i < ac; i++) {
                 ww[i].word = av[i];
                 ww[i].len = dps_strlen(av[i]);
                 ww[i].uword = t = (dpsunicode_t*)DpsMalloc((3 * ww[i].len + 1) * sizeof(dpsunicode_t));
		 if (ww[i].uword == NULL) return DPS_ERROR;
                 DpsConv(&file_uni, (char*)ww[i].uword, sizeof(dpsunicode_t) * (3 * ww[i].len + 1), av[i], ww[i].len + 1);
                 DpsUniStrToLower(ww[i].uword);
		 ww[i].uword = DpsUniNormalizeNFC(NULL, ww[i].uword);
		 DPS_FREE(t);
               }

               for (i = 0; i < ac - 1; i++) {
                 for (j = i + 1; j < ac; j++) {

                   if((Env->Synonyms.nsynonyms + 1) >= Env->Synonyms.msynonyms){
                    Env->Synonyms.msynonyms += 64;
                    Env->Synonyms.Synonym = (DPS_SYNONYM*)DpsRealloc(Env->Synonyms.Synonym, 
                                                   sizeof(DPS_SYNONYM)*Env->Synonyms.msynonyms);
		    if (Env->Synonyms.Synonym == NULL) {
		      Env->Synonyms.msynonyms = Env->Synonyms.nsynonyms = 0;
		      return DPS_ERROR;
  		    }
                   }
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add direct order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[i].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[j].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
               
                   bzero((void*)&Env->Synonyms.Synonym[Env->Synonyms.nsynonyms], sizeof(DPS_SYNONYM));
               
                   /* Add reverse order */
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.uword = DpsUniDup(ww[j].uword);
                   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.uword = DpsUniDup(ww[i].uword);
		   Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].p.count = 
		     Env->Synonyms.Synonym[Env->Synonyms.nsynonyms].s.count = (size_t)((flag_th) ? key : 0);
                   Env->Synonyms.nsynonyms++;
                 }
               }

               for (i = 0; i < ac; i++) {
                 DPS_FREE(ww[i].uword);
               }
               do { key++; } while (key == 0);
          }
     loop_continue:
	  str = cur_n;
	  if (str != NULL) {
	    *str = savebyte;
	    cur_n = strchr(str, '\n');
	    if (cur_n != NULL) {
	      cur_n++;
	      savebyte = *cur_n;
	      *cur_n = '\0';
	    }
	  }
     }
     DPS_FREE(data);
     DPS_FREE(ww);
     DpsClose(fd);
     return DPS_OK;
}