static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) { char *val, *header_name; char secname[128]; DPS_VAR *Sec; DPS_TEXTITEM Item; if ((val = strchr(header_name = header->data, ':'))) { /* fprintf(stderr, "HEADER: %s\n", header_name); */ *val++='\0'; val = DpsTrim(val," \t:"); if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) { char *v; for(v=val ; *v ; v++) *v = dps_tolower(*v); } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) { char *part, *lpart; char *name = NULL; char *value = NULL; const char *domain = NULL; const char *path = NULL; dps_uint4 expire = 0; char secure = 'n'; for (part = dps_strtok_r(val, ";" , &lpart) ; part; part = dps_strtok_r(NULL, ";", &lpart)) { char *arg; part = DpsTrim(part, " "); if ((arg = strchr(part, '='))) { *arg++ = '\0'; if (!name) { name = part; value = arg; } else if (!strcasecmp(part, "path")) { path = arg; } else if (!strcasecmp(part, "domain")) { domain = arg; } else if (!strcasecmp(part, "secure")) { secure = 'y'; } else if (!strcasecmp(part, "expires")) { expire = (dps_uint4)DpsHttpDate2Time_t(arg); } } } if (name && value) { if (domain && domain[0] == '.') { domain++; } else { domain = Doc->CurURL.hostname ? Doc->CurURL.hostname : "localhost"; } if (!path) { path = Doc->CurURL.path ? Doc->CurURL.path : "/"; } DpsCookiesAdd(Indexer, domain, path, name, value, secure, expire, 1); } /* token = dps_strtok_r(NULL,"\r\n",<); continue;*/ return; } } DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>"); dps_snprintf(secname,sizeof(secname),"header.%s", header_name); secname[sizeof(secname)-1]='\0'; if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) { Item.href = NULL; Item.str = val; Item.section = Sec->section; Item.section_name = secname; Item.len = 0; DpsTextListAdd(&Doc->TextList, &Item); } }
static void DpsParseHTTPHeader(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc, DPS_DSTR *header) { char *val, *header_name; char secname[128], savec; DPS_VAR *Sec; DPS_TEXTITEM Item; if ((val = strchr(header_name = header->data, ':'))) { /* fprintf(stderr, "HEADER: %s\n", header_name); */ *val++='\0'; val = DpsTrim(val," \t:"); if (!strcasecmp(header_name, "Content-Type") || !strcasecmp(header_name, "Content-Encoding")) { register char *v; for(v=val ; *v ; v++) *v = (char)dps_tolower((int)*v); } else if (Doc->Spider.use_robots && !strcasecmp(header_name, "X-Robots-Tag")) { char * lt; char * rtok; rtok = dps_strtok_r(val, " ,\r\n\t", <, &savec); while(rtok){ if(!strcasecmp(rtok, "ALL")){ /* Left Server parameters unchanged */ }else if(!strcasecmp(rtok, "NONE")){ Doc->Spider.follow = DPS_FOLLOW_NO; Doc->Spider.index = 0; if (DpsNeedLog(DPS_LOG_DEBUG)) { DpsVarListReplaceInt(&Doc->Sections, "Index", 0); DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO); } }else if(!strcasecmp(rtok, "NOINDEX")) { Doc->Spider.index = 0; /* Doc->method = DPS_METHOD_DISALLOW;*/ if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", 0); }else if(!strcasecmp(rtok, "NOFOLLOW")) { Doc->Spider.follow = DPS_FOLLOW_NO; if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", DPS_FOLLOW_NO); }else if(!strcasecmp(rtok, "NOARCHIVE")) { DpsVarListReplaceStr(&Doc->Sections, "Z", ""); }else if(!strcasecmp(rtok, "INDEX")) { /* left server value unchanged */ if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Index", Doc->Spider.index); }else if(!strcasecmp(rtok, "FOLLOW")) { /* left server value unchanged */ if (DpsNeedLog(DPS_LOG_DEBUG)) DpsVarListReplaceInt(&Doc->Sections, "Follow", Doc->Spider.follow); } rtok = dps_strtok_r(NULL, " \r\n\t", <, &savec); } } else if (Doc->Spider.use_cookies && !strcasecmp(header_name, "Set-Cookie")) { DpsCookiesAddStr(Indexer, &Doc->CurURL, val, 1); return; } } DpsVarListReplaceStr(&Doc->Sections, header_name, val ? val : "<NULL>"); dps_snprintf(secname,sizeof(secname),"header.%s", header_name); secname[sizeof(secname)-1]='\0'; if((Sec = DpsVarListFind(&Doc->Sections, secname)) && val ) { bzero((void*)&Item, sizeof(Item)); Item.href = NULL; Item.str = val; Item.section = Sec->section; Item.section_name = secname; Item.strict = Sec->strict; Item.len = 0; (void)DpsTextListAdd(&Doc->TextList, &Item); } }