Пример #1
0
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) {
  char *val, *header_name;
  char	secname[128];
  DPS_VAR	*Sec;
  DPS_TEXTITEM Item;

  if ((val = strchr(header_name = header->data, ':'))) {
/*
  fprintf(stderr, "HEADER: %s\n", header_name);
*/
    *val++='\0';
    val = DpsTrim(val," \t:");
			
    if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) {
      char *v;
      for(v=val ; *v ; v++) 
	*v = dps_tolower(*v);
    } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) {
      char *part, *lpart;
      char *name = NULL;
      char *value = NULL;
      const char *domain = NULL;
      const char *path = NULL;
      dps_uint4 expire = 0;
      char secure = 'n';
      for (part = dps_strtok_r(val, ";" , &lpart) ; part;
	   part = dps_strtok_r(NULL, ";", &lpart)) {
	char *arg;
	part = DpsTrim(part, " ");
	if ((arg = strchr(part, '='))) {
	  *arg++ = '\0';
	  if (!name) {
	    name = part;
	    value = arg;
	  } else 
	    if (!strcasecmp(part, "path")) {
	      path = arg;
	    } else
	      if (!strcasecmp(part, "domain")) {
		domain = arg;
	      } else
		if (!strcasecmp(part, "secure")) {
		  secure = 'y';
		} else
		  if (!strcasecmp(part, "expires")) {
		    expire = (dps_uint4)DpsHttpDate2Time_t(arg);
		  }
	}
      }
      if (name && value) {
	if (domain && domain[0] == '.') {
	  domain++;
	} else {
	  domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost";
	}
	if (!path) {
	  path = Doc->CurURL.path ? Doc->CurURL.path : "/";
	}
	DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1);
      }
/*			  token = dps_strtok_r(NULL,"\r\n",&lt);
			  continue;*/
      return;
    }
  }

  DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>");

  dps_snprintf(secname,sizeof(secname),"header.%s", header_name);
  secname[sizeof(secname)-1]='\0';
  if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) {
    Item.href = NULL;
    Item.str = val;
    Item.section = Sec->section;
    Item.section_name = secname;
    Item.len = 0;
    DpsTextListAdd(&Doc->TextList, &Item);
  }
}
Пример #2
0
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) {
  char *val, *header_name;
  char	secname[128], savec;
  DPS_VAR	*Sec;
  DPS_TEXTITEM Item;

  if ((val = strchr(header_name = header->data, ':'))) {
/*
  fprintf(stderr, "HEADER: %s\n", header_name);
*/
    *val++='\0';
    val = DpsTrim(val," \t:");
			
    if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) {
      register char *v;
      for(v=val ; *v ; v++) 
	*v = (char)dps_tolower((int)*v);
    } else if (Doc->Spider.use_robots && !strcasecmp(header_name, "X-Robots-Tag")) {
        char * lt;
	char * rtok;
					
	rtok = dps_strtok_r(val, " ,\r\n\t", &lt, &savec);
	while(rtok){
	  if(!strcasecmp(rtok, "ALL")){
	    /* Left Server parameters unchanged */
	  }else if(!strcasecmp(rtok, "NONE")){
	    Doc->Spider.follow = DPS_FOLLOW_NO;
	    Doc->Spider.index = 0;
	    if (DpsNeedLog(DPS_LOG_DEBUG)) {
	      DpsVarListReplaceInt(&Doc->Sections, "Index", 0);
	      DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO);
	    }
	  }else if(!strcasecmp(rtok, "NOINDEX")) {
	    Doc->Spider.index = 0;
/*          Doc->method = DPS_METHOD_DISALLOW;*/
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", 0);
	  }else if(!strcasecmp(rtok, "NOFOLLOW")) {
	    Doc->Spider.follow = DPS_FOLLOW_NO;
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO);
	  }else if(!strcasecmp(rtok, "NOARCHIVE")) {
	    DpsVarListReplaceStr(&Doc->Sections, "Z", "");
	  }else if(!strcasecmp(rtok, "INDEX")) {
            /* left server value unchanged */ 
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", Doc->Spider.index);
	  }else if(!strcasecmp(rtok, "FOLLOW")) {
            /* left server value unchanged */ 
	    if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", Doc->Spider.follow);
	  }
	  rtok = dps_strtok_r(NULL, " \r\n\t", &lt, &savec);
	}
      
    } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) {


      DpsCookiesAddStr(Indexer, &Doc->CurURL, val, 1);

      return;
    }
  }

  DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>");

  dps_snprintf(secname,sizeof(secname),"header.%s", header_name);
  secname[sizeof(secname)-1]='\0';
  if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) {
    bzero((void*)&Item, sizeof(Item));
    Item.href = NULL;
    Item.str = val;
    Item.section = Sec->section;
    Item.section_name = secname;
    Item.strict = Sec->strict;
    Item.len = 0;
    (void)DpsTextListAdd(&Doc->TextList, &Item);
  }
}