static int cs_complete_http(const char *buf, int len, int head_only) { /* deal with HTTP request/response */ int i, content_len = 0, chunked = 0; /* need at least one line followed by \n or \r .. */ for (i = 0; ; i++) if (i == len) return 0; /* incomplete */ else if (buf[i] == '\n' || buf[i] == '\r') break; /* check to see if it's a response with content */ if (!head_only && !memcmp(buf, "HTTP/", 5)) { int j; for (j = 5; j < i; j++) if (buf[j] == ' ') { ++j; if (buf[j] == '1') /* 1XX */ ; else if (!memcmp(buf + j, "204", 3)) ; else if (!memcmp(buf + j, "304", 3)) ; else content_len = -1; break; } } #if 0 printf("len = %d\n", len); fwrite (buf, 1, len, stdout); printf("----------\n"); #endif for (i = 2; i <= len-2; ) { if (i > 8192) { return i; /* do not allow more than 8K HTTP header */ } if (skip_crlf(buf, len, &i)) { if (skip_crlf(buf, len, &i)) { /* inside content */ if (chunked) return cs_read_chunk(buf, i, len); else { /* not chunked ; inside body */ if (content_len == -1) return 0; /* no content length */ else if (len >= i + content_len) { return i + content_len; } } break; } else if (i < len - 20 && !yaz_strncasecmp((const char *) buf+i, "Transfer-Encoding:", 18)) { i+=18; while (buf[i] == ' ') i++; if (i < len - 8) if (!yaz_strncasecmp((const char *) buf+i, "chunked", 7)) chunked = 1; } else if (i < len - 17 && !yaz_strncasecmp((const char *)buf+i, "Content-Length:", 15)) { i+= 15; while (buf[i] == ' ') i++; content_len = 0; while (i <= len-4 && yaz_isdigit(buf[i])) content_len = content_len*10 + (buf[i++] - '0'); if (content_len < 0) /* prevent negative offsets */ content_len = 0; } else i++; } else i++; } return 0; }
void mp::HTMLParser::Rep::parse_str(HTMLParserEvent &event, const char *cp) { const char *text_start = cp; while (*cp) { if (*cp++ != '<') continue; if (nest && *cp == '!') { int i; tagText(event, text_start, cp - 1); if (cp[1] == '-' && cp[2] == '-') { for (i = 3; cp[i]; i++) if (cp[i] == '-' && cp[i+1] == '-' && cp[i+2] == '>') { i+= 2; event.openTagStart(cp, i); break; } } else { for (i = 1; cp[i] && cp[i] != '>'; i++) ; event.openTagStart(cp, i); } if (m_verbose) printf("------ dtd %.*s\n", i, cp); i += tagEnd(event, cp, i, cp + i); cp += i; text_start = cp; } else if (nest && *cp == '?') { int i; tagText(event, text_start, cp - 1); for (i = 1; cp[i] && cp[i] != '>'; i++) ; event.openTagStart(cp, i); if (m_verbose) printf("------ pi %.*s\n", i, cp); i += tagEnd(event, cp, i, cp + i); cp += i; text_start = cp; } else if (*cp == '/' && isAlpha(cp[1])) { int i; i = skipName(++cp); if (!nest) { if (i == 6 && !yaz_strncasecmp(cp, "script", i)) { int ws = skipSpace(cp + 6); if (cp[ws + 6] == '>') nest = true; /* really terminated */ } if (!nest) continue; } tagText(event, text_start, cp - 2); event.closeTag(cp, i); if (m_verbose) printf("------ tag close %.*s\n", i, cp); i += tagEnd(event, cp, i, cp + i); cp += i; text_start = cp; } else if (nest && isAlpha(*cp)) { int i, j; tagText(event, text_start, cp - 1); i = skipName(cp); event.openTagStart(cp, i); if (m_verbose) printf("------ tag open %.*s\n", i, cp); j = tagAttrs(event, cp, i, cp + i); j += tagEnd(event, cp, i, cp + i + j); if (i == 6 && !yaz_strncasecmp(cp, "script", i)) nest = false; cp += i + j; text_start = cp; } } tagText(event, text_start, cp); }