static warc_string_t _warc_rduri(const char *buf, size_t bsz) { static const char _key[] = "\r\nWARC-Target-URI:"; const char *const eob = buf + bsz; const char *val; const char *uri; const char *eol; warc_string_t res = {0U, NULL}; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return res; } /* overread whitespace */ for (val += sizeof(_key) - 1U; val < eob && isspace(*val); val++); /* overread URL designators */ if ((uri = xmemmem(val, eob - val, "://", 3U)) == NULL) { /* not touching that! */ return res; } else if ((eol = memchr(uri, '\n', eob - uri)) == NULL) { /* no end of line? :O */ return res; } /* massage uri to point to after :// */ uri += 3U; /* also massage eol to point to the first whitespace * after the last non-whitespace character before * the end of the line */ for (; eol > uri && isspace(eol[-1]); eol--); /* now then, inspect the URI */ if (memcmp(val, "file", 4U) == 0) { /* perfect, nothing left to do here */ } else if (memcmp(val, "http", 4U) == 0 || memcmp(val, "ftp", 3U) == 0) { /* overread domain, and the first / */ while (uri < eol && *uri++ != '/'); } else { /* not sure what to do? best to bugger off */ return res; } res.str = uri; res.len = eol - uri; return res; }
static unsigned int _warc_rdtyp(const char *buf, size_t bsz) { static const char _key[] = "\r\nWARC-Type:"; const char *const eob = buf + bsz; const char *val; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return WT_NONE; } /* overread whitespace */ for (val += sizeof(_key) - 1U; val < eob && isspace(*val); val++); if (val + 8U > eob) { ; } else if (memcmp(val, "resource", 8U) == 0) { return WT_RSRC; } else if (memcmp(val, "warcinfo", 8U) == 0) { return WT_INFO; } else if (memcmp(val, "metadata", 8U) == 0) { return WT_META; } else if (memcmp(val, "request", 7U) == 0) { return WT_REQ; } else if (memcmp(val, "response", 8U) == 0) { return WT_RSP; } else if (memcmp(val, "conversi", 8U) == 0) { return WT_CONV; } else if (memcmp(val, "continua", 8U) == 0) { return WT_CONT; } return WT_NONE; }
static int deinfix1(const char *s, size_t z) { if (xmemmem(pool, npool, s, z)) { return -1; } if (npool + z >= zpool) { /* resize */ zpool = zpool * 2U ?: 4096U; pool = realloc(pool, zpool); }
static const char* _warc_find_eoh(const char *buf, size_t bsz) { static const char _marker[] = "\r\n\r\n"; const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U); if (hit != NULL) { hit += sizeof(_marker) - 1U; } return hit; }
static bool read_line(mux_ctx_t ctx, ff_msg_t msg) { static const char cmd_pmsg[] = "PRIVMSG"; static const size_t cmd_pmsg_sz = sizeof(cmd_pmsg) - 1; char *line; size_t llen; const char *cursor; char *p; llen = prchunk_getline(ctx->rdr, &line); /* we parse the line in 3 steps, receive time stamp, symbol, values */ cursor = line; msg->chan = NULL; msg->json = NULL; msg->msglen = 0UL; /* receive time stamp, always first on line */ if (UNLIKELY(parse_rcv_stmp(msg, &cursor) < 0)) { goto bugger; } /* parse the rest */ llen -= cursor - line; /* message types */ if ((p = xmemmem(cursor, llen, cmd_pmsg, cmd_pmsg_sz))) { msg->ty = FF_MSG_JSON; msg->chan = p + cmd_pmsg_sz + 1; if ((p = strchr(msg->chan, ' '))) { *p++ = '\0'; if (*p++ == ':') { /* ah a normal channel message */ llen -= p - cursor; msg->json = p; msg->msglen = llen; p[llen] = '\0'; return true; } } } return false; bugger: /* declare the line f****d */ fputs("line b0rked\n> ", stderr); fputs(line, stderr); fputc('\n', stderr); return false; }
static time_t _warc_rdmtm(const char *buf, size_t bsz) { static const char _key[] = "\r\nLast-Modified:"; const char *val; char *on = NULL; time_t res; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return (time_t)-1; } /* xstrpisotime() kindly overreads whitespace for us, so use that */ val += sizeof(_key) - 1U; res = xstrpisotime(val, &on); if (on == NULL || !isspace(*on)) { /* hm, can we trust that number? Best not. */ return (time_t)-1; } return res; }
static ssize_t _warc_rdlen(const char *buf, size_t bsz) { static const char _key[] = "\r\nContent-Length:"; const char *val; char *on = NULL; long int len; if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) { /* no bother */ return -1; } /* strtol kindly overreads whitespace for us, so use that */ val += sizeof(_key) - 1U; len = strtol(val, &on, 10); if (on == NULL || !isspace(*on)) { /* hm, can we trust that number? Best not. */ return -1; } return (size_t)len; }