void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) { char *token, *lt, *headers; int oldstatus; DPS_DSTR header; Doc->Buf.content=NULL; oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0); DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size); DpsVarListDel(&Doc->Sections, "Content-Length"); DpsVarListDel(&Doc->Sections, "Last-Modified"); if (Doc->Buf.buf == NULL) return; /* Cut HTTP response header first */ for(token=Doc->Buf.buf;*token;token++){ if(!strncmp(token,"\r\n\r\n",4)){ *token='\0'; Doc->Buf.content = token + 4; break; }else if(!strncmp(token,"\n\n",2)){ *token='\0'; Doc->Buf.content = token + 2; break; } } /* Bad response, return */ if(!Doc->Buf.content) { if (token < Doc->Buf.buf + Doc->Buf.size - 4) { if (token[2] == '\r') Doc->Buf.content = token + 4; else Doc->Buf.content = token + 2; } else { return; } } /* Copy headers not to break them */ headers = (char*)DpsStrdup(Doc->Buf.buf); /* Now lets parse response header lines */ token = dps_strtok_r(headers,"\r\n",<); if(!token)return; if(!strncmp(token,"HTTP/",5)){ int status = atoi(token + 8); DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token); DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status ); }else{ DpsFree(headers); return; } token = dps_strtok_r(NULL,"\r\n",<); DpsDSTRInit(&header, 128); while(token){ if(strchr(token,':')) { if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); DpsDSTRFree(&header); DpsDSTRInit(&header, 128); } } DpsDSTRAppendStr(&header, token); token = dps_strtok_r(NULL,"\r\n",<); } if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); } DpsDSTRFree(&header); DPS_FREE(headers); DpsVarListInsInt(&Doc->Sections,"Content-Length",Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size); }
void DpsParseHTTPResponse(DPS_AGENT *Indexer, DPS_DOCUMENT *Doc) { char *token, *lt, *headers, savec; int status, oldstatus; DPS_DSTR header; time_t now, last_mod_time; Doc->Buf.content=NULL; oldstatus = DpsVarListFindInt(&Doc->Sections, "Status", 0); DpsVarListReplaceInt(&Doc->Sections, "ResponseSize", (int)Doc->Buf.size); DpsVarListDel(&Doc->Sections, "Content-Length"); /* DpsVarListDel(&Doc->Sections, "Last-Modified");*/ /* if it's not deleted Lat-Modified equals to the first appearance in db */ if (Doc->Buf.buf == NULL) return; /* Cut HTTP response header first */ for(token=Doc->Buf.buf;*token;token++){ if(!strncmp(token,"\r\n\r\n",4)){ if (token <= Doc->Buf.buf + Doc->Buf.size - 4) { *token='\0'; Doc->Buf.content = token + 4; } break; } else if(!strncmp(token,"\n\n",2)){ if (token <= Doc->Buf.buf + Doc->Buf.size - 2) { *token='\0'; Doc->Buf.content = token + 2; } break; } } /* Bad response, return */ if(!Doc->Buf.content) { if (token <= Doc->Buf.buf + Doc->Buf.size - 4) { if (token[2] == CR_CHAR) Doc->Buf.content = token + 4; else Doc->Buf.content = token + 2; } } /* Copy headers not to break them */ headers = (char*)DpsStrdup(Doc->Buf.buf); /* Now lets parse response header lines */ token = dps_strtok_r(headers, "\r\n", <, &savec); if(!token) { DpsFree(headers); return; } if(!strncmp(token,"HTTP/",5)){ status = atoi(token + 8); DpsVarListReplaceStr(&Doc->Sections,"ResponseLine",token); DpsVarListReplaceInt(&Doc->Sections, "Status", (oldstatus > status) ? oldstatus : status ); }else{ DpsFree(headers); return; } token = dps_strtok_r(NULL, "\r\n", <, &savec); DpsDSTRInit(&header, 128); while(token){ if(strchr(token,':')) { if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); DpsDSTRFree(&header); DpsDSTRInit(&header, 128); } } DpsDSTRAppendStr(&header, token); token = dps_strtok_r(NULL, "\r\n", <, &savec); } if (header.data_size) { DpsParseHTTPHeader(Indexer, Doc, &header); } DpsDSTRFree(&header); DPS_FREE(headers); { now = Indexer->now; last_mod_time = DpsHttpDate2Time_t(DpsVarListFindStr(&Doc->Sections, "Last-Modified", "")); if (last_mod_time > now + 3600 * 4) { /* we have a document with Last-Modified time in the future */ DpsLog(Indexer, DPS_LOG_EXTRA, "Last-Modified date is deep in future (%d>%d), dropping it.", last_mod_time, now); DpsVarListDel(&Doc->Sections, "Last-Modified"); } } /* Bad response, return */ if(!Doc->Buf.content) { return; } DpsVarListReplaceInt(&Doc->Sections,"Content-Length", Doc->Buf.buf-Doc->Buf.content+(int)Doc->Buf.size + DpsVarListFindInt(&Doc->Sections,"Content-Length", 0)); }