static int http_datahandler(myconn_t *rec, int iobytes, int startoffset, int *advancestep) { char *endofhdrs; int httpmajorver, httpminorver; char *xferencoding; int len = iobytes; char *bol, *buf; int hdrbytes, bodybytes = 0, bodyoffset, initialhdrbuflen, n; *advancestep = 0; switch (rec->httpdatastate) { case HTTPDATA_HEADERS: initialhdrbuflen = STRBUFLEN(rec->httpheaders); addtobufferraw(rec->httpheaders, rec->readbuf+startoffset, (iobytes - startoffset)); check_for_endofheaders: /* * Now see if we have the end-of-headers delimiter. * This SHOULD be <cr><lf><cr><lf>, but RFC 2616 says * you SHOULD recognize just plain <lf><lf>. * So try the second form, if the first one is not there. */ endofhdrs = strstr(STRBUF(rec->httpheaders), "\r\n\r\n"); if (endofhdrs) { endofhdrs += 4; } else { endofhdrs = strstr(STRBUF(rec->httpheaders), "\n\n"); if (endofhdrs) { endofhdrs += 2; } } if (!endofhdrs) { /* No more to do for now, but pass the databyte-count back to the caller for further processing. */ return iobytes; } else { /* Chop the non-header section of data from the headers */ strbufferchop(rec->httpheaders, strlen(endofhdrs)); } /* We have an end-of-header delimiter, but it could be just a "100 Continue" response */ sscanf(STRBUF(rec->httpheaders), "HTTP/%d.%d %d", &httpmajorver, &httpminorver, &rec->httpstatus); if (rec->httpstatus == 100) { /* * It's a "100" continue-status. * Just drop this set of headers, and re-do the end-of-headers check. */ strbuffer_t *newhdrbuf = newstrbuffer(0); addtobuffer(newhdrbuf, endofhdrs); freestrbuffer(rec->httpheaders); rec->httpheaders = newhdrbuf; goto check_for_endofheaders; } /* Have all the http headers now */ rec->httpdatastate = HTTPDATA_BODY; /* * Find the "Transfer-encoding: " header (if there is one) to see if the transfer uses chunks, * and grab "Content-Length:" to get the length of the body. */ xferencoding = NULL; bol = STRBUF(rec->httpheaders); while (bol && !xferencoding && !rec->httpcontentleft) { if (strncasecmp(bol, "Transfer-encoding:", 18) == 0) { bol += 18; bol += strspn(bol, " "); xferencoding = bol; } else if (strncasecmp(bol, "Content-Length:", 15) == 0) { bol += 15; bol += strspn(bol, " "); rec->httpcontentleft = atoi(bol); } else { bol = strchr(bol, '\n'); if (bol) bol++; } } if (xferencoding && (strncasecmp(xferencoding, "chunked", 7) == 0)) rec->httpchunkstate = HTTP_CHUNK_INIT; else { rec->httpchunkstate = (rec->httpcontentleft > 0) ? HTTP_CHUNK_NOTCHUNKED : HTTP_CHUNK_NOTCHUNKED_NOCLEN; } /* Done with all the http header processing. Call ourselves to handle any remaining data we got after the headers */ /* * To figure out how this works, here is the layout of rec->httpheaders. The first * (initialhdrbuflen) part is what we had before this call to http_datahandler, the * last (iobytes) part has been copied over from the current rec->buf. * endofhdrs points into rec->httpheaders. bodyoffset and bodybytes are relative, * so even though the body data is in rec->buf and NOT in rec->httpheaders, we can * calculate the offset and length of the body data. * * endofhdrs * ! * !-----------------------------!----------------------------! * * <......initialhdrbuflen.......> * <.........iobytes............> * <...............hdrbytes.....................> * <..bodyoffset..> * <..bodybytes..> */ hdrbytes = (endofhdrs - STRBUF(rec->httpheaders)); bodyoffset = hdrbytes - initialhdrbuflen; bodybytes = iobytes - bodyoffset; http_datahandler(rec, bodybytes, bodyoffset, advancestep); break; case HTTPDATA_BODY: buf = rec->readbuf+startoffset; while (len > 0) { bodybytes = 0; switch (rec->httpchunkstate) { case HTTP_CHUNK_NOTCHUNKED: case HTTP_CHUNK_NOTCHUNKED_NOCLEN: bodybytes = len; break; case HTTP_CHUNK_INIT: /* We're about to pick up a chunk length */ rec->httpleftinchunk = 0; rec->httpchunkstate = HTTP_CHUNK_GETLEN; break; case HTTP_CHUNK_GETLEN: /* We are collecting the length of the chunk */ n = hexvalue(*buf); if (n == -1) { rec->httpchunkstate = HTTP_CHUNK_SKIPLENCR; } else { rec->httpleftinchunk = rec->httpleftinchunk*16 + n; buf++; len--; } break; case HTTP_CHUNK_SKIPLENCR: /* We've got the length, now skip to the next LF */ if (*buf == '\n') { buf++; len--; rec->httpchunkstate = ((rec->httpleftinchunk > 0) ? HTTP_CHUNK_DATA : HTTP_CHUNK_NOMORE); } else if ((*buf == '\r') || (*buf == ' ')) { buf++; len--; } else { errprintf("Yikes - strange data following chunk len. Saw a '%c'\n", *buf); buf++; len--; } break; case HTTP_CHUNK_DATA: /* Passing off the data */ bodybytes = (len > rec->httpleftinchunk) ? rec->httpleftinchunk : len; rec->httpleftinchunk -= bodybytes; if (rec->httpleftinchunk == 0) rec->httpchunkstate = HTTP_CHUNK_SKIPENDCR; break; case HTTP_CHUNK_SKIPENDCR: /* Skip CR/LF after a chunk */ if (*buf == '\n') { buf++; len--; rec->httpchunkstate = HTTP_CHUNK_DONE; } else if (*buf == '\r') { buf++; len--; } else { errprintf("Yikes - strange data following chunk data. Saw a '%c'\n", *buf); buf++; len--; } break; case HTTP_CHUNK_DONE: /* One chunk is done, continue with the next */ rec->httpchunkstate = HTTP_CHUNK_GETLEN; break; case HTTP_CHUNK_NOMORE: /* All chunks done. Skip the rest (trailers) */ len = 0; break; } /* bodybytes holds the number of bytes data from buf that should go to userspace */ if (bodybytes > 0) { addtobufferraw(rec->httpbody, buf, bodybytes); buf += bodybytes; len -= bodybytes; if ((rec->httpcontentleft > 0) && (rec->httpcontentleft >= bodybytes)) rec->httpcontentleft -= bodybytes; dbgprintf("HTTP bodybytes %d, %d bytes left\n", bodybytes, rec->httpcontentleft); } } /* Done processing body content. Now see if we have all of it - if we do, then proceed to next step. */ dbgprintf("http chunkstate: %d\n",rec->httpchunkstate); switch (rec->httpchunkstate) { case HTTP_CHUNK_NOTCHUNKED: if (rec->httpcontentleft <= 0) *advancestep = 1; break; case HTTP_CHUNK_NOTCHUNKED_NOCLEN: /* We have no content-length: header, so keep going until we do two NULL-reads */ if ((rec->httplastbodyread == 0) && (bodybytes == 0)) *advancestep = 1; else rec->httplastbodyread = bodybytes; break; case HTTP_CHUNK_NOMORE: *advancestep = 1; break; default: break; } break; } return iobytes; }
char *unlimfgets(strbuffer_t *buffer, FILE *fd) { fgetsbuf_t *fg; size_t n; char *eoln = NULL; for (fg = fgetshead; (fg && (fg->fd != fd)); fg = fg->next) ; if (!fg) { errprintf("umlimfgets() called with bad input FD\n"); return NULL; } /* End of file ? */ if (!(fg->moretoread) && (*(fg->inbufp) == '\0')) { if (fg == fgetshead) { fgetshead = fgetshead->next; free(fg); } else { fgetsbuf_t *prev; for (prev = fgetshead; (prev->next != fg); prev = prev->next) ; prev->next = fg->next; free(fg); } return NULL; } /* Make sure the output buffer is empty */ clearstrbuffer(buffer); while (!eoln && (fg->moretoread || *(fg->inbufp))) { int continued = 0; if (*(fg->inbufp)) { /* Have some data in the buffer */ eoln = strchr(fg->inbufp, '\n'); if (eoln) { /* See if there's a continuation character just before the eoln */ char *contchar = eoln-1; while ((contchar > fg->inbufp) && isspace((int)*contchar) && (*contchar != '\\')) contchar--; continued = (*contchar == '\\'); if (continued) { *contchar = '\0'; addtobuffer(buffer, fg->inbufp); fg->inbufp = eoln+1; eoln = NULL; } else { char savech = *(eoln+1); *(eoln+1) = '\0'; addtobuffer(buffer, fg->inbufp); *(eoln+1) = savech; fg->inbufp = eoln+1; } } else { /* No newline in buffer, so add all of it to the output buffer */ addtobuffer(buffer, fg->inbufp); /* Input buffer is now empty */ *(fg->inbuf) = '\0'; fg->inbufp = fg->inbuf; } } if (!eoln && !continued) { /* Get data for the input buffer */ char *inpos = fg->inbuf; size_t insize = sizeof(fg->inbuf); /* If the last byte we read was a continuation char, we must do special stuff. * * Mike Romaniw discovered that if we hit an input with a newline exactly at * the point of a buffer refill, then strlen(*buffer) is 0, and contchar then * points before the start of the buffer. Bad. But this can only happen when * the previous char WAS a newline, and hence it is not a continuation line. * So the simple fix is to only do the cont-char stuff if **buffer is not NUL. * Hence the test for both *buffer and **buffer. */ if (STRBUF(buffer) && *STRBUF(buffer)) { char *contchar = STRBUF(buffer) + STRBUFLEN(buffer) - 1; while ((contchar > STRBUF(buffer)) && isspace((int)*contchar) && (*contchar != '\\')) contchar--; if (*contchar == '\\') { /* * Remove the cont. char from the output buffer, and stuff it into * the input buffer again - so we can check if there's a new-line coming. */ strbufferchop(buffer, 1); *(fg->inbuf) = '\\'; inpos++; insize--; } } n = fread(inpos, 1, insize-1, fd); *(inpos + n) = '\0'; fg->inbufp = fg->inbuf; if (n < insize-1) fg->moretoread = 0; } } return STRBUF(buffer); }
void sanitize_input(strbuffer_t *l, int stripcomment, int unescape) { int i; /* * This routine sanitizes an input line, stripping off leading/trailing whitespace. * If requested, it also strips comments. * If requested, it also un-escapes \-escaped charactes. */ /* Kill comments */ if (stripcomment || unescape) { char *p, *commentstart = NULL; char *noquotemarkers = (unescape ? "\"'#\\" : "\"'#"); char *inquotemarkers = (unescape ? "\"'\\" : "\"'"); int inquote = 0; p = STRBUF(l) + strcspn(STRBUF(l), noquotemarkers); while (*p && (commentstart == NULL)) { switch (*p) { case '\\': if (inquote) p += 2+strcspn(p+2, inquotemarkers); else p += 2+strcspn(p+2, noquotemarkers); break; case '"': case '\'': inquote = (1 - inquote); if (inquote) p += 1+strcspn(p+1, inquotemarkers); else p += 1+strcspn(p+1, noquotemarkers); break; case '#': if (!inquote) commentstart = p; break; } } if (commentstart) strbufferchop(l, STRBUFLEN(l) - (commentstart - STRBUF(l))); } /* Kill a trailing CR/NL */ i = strcspn(STRBUF(l), "\r\n"); if (i != STRBUFLEN(l)) strbufferchop(l, STRBUFLEN(l)-i); /* Kill trailing whitespace */ i = STRBUFLEN(l); while ((i > 0) && isspace((int)(*(STRBUF(l)+i-1)))) i--; if (i != STRBUFLEN(l)) strbufferchop(l, STRBUFLEN(l)-i); /* Kill leading whitespace */ i = strspn(STRBUF(l), " \t"); if (i > 0) { memmove(STRBUF(l), STRBUF(l)+i, STRBUFLEN(l)-i); strbufferchop(l, i); } if (unescape) { char *p; p = STRBUF(l) + strcspn(STRBUF(l), "\\"); while (*p) { memmove(p, p+1, STRBUFLEN(l)-(p-STRBUF(l))); strbufferchop(l, 1); p = p + 1 + strcspn(p+1, "\\"); } } }