static warc_string_t
_warc_rduri(const char *buf, size_t bsz)
{
	static const char _key[] = "\r\nWARC-Target-URI:";
	const char *const eob = buf + bsz;
	const char *val;
	const char *uri;
	const char *eol;
	warc_string_t res = {0U, NULL};

	if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
		/* no bother */
		return res;
	}
	/* overread whitespace */
	for (val += sizeof(_key) - 1U; val < eob && isspace(*val); val++);

	/* overread URL designators */
	if ((uri = xmemmem(val, eob - val, "://", 3U)) == NULL) {
		/* not touching that! */
		return res;
	} else if ((eol = memchr(uri, '\n', eob - uri)) == NULL) {
		/* no end of line? :O */
		return res;
	}

	/* massage uri to point to after :// */
	uri += 3U;
	/* also massage eol to point to the first whitespace
	 * after the last non-whitespace character before
	 * the end of the line */
	for (; eol > uri && isspace(eol[-1]); eol--);

	/* now then, inspect the URI */
	if (memcmp(val, "file", 4U) == 0) {
		/* perfect, nothing left to do here */

	} else if (memcmp(val, "http", 4U) == 0 ||
		   memcmp(val, "ftp", 3U) == 0) {
		/* overread domain, and the first / */
		while (uri < eol && *uri++ != '/');
	} else {
		/* not sure what to do? best to bugger off */
		return res;
	}
	res.str = uri;
	res.len = eol - uri;
	return res;
}
static unsigned int
_warc_rdtyp(const char *buf, size_t bsz)
{
	static const char _key[] = "\r\nWARC-Type:";
	const char *const eob = buf + bsz;
	const char *val;

	if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
		/* no bother */
		return WT_NONE;
	}
	/* overread whitespace */
	for (val += sizeof(_key) - 1U; val < eob && isspace(*val); val++);

	if (val + 8U > eob) {
		;
	} else if (memcmp(val, "resource", 8U) == 0) {
		return WT_RSRC;
	} else if (memcmp(val, "warcinfo", 8U) == 0) {
		return WT_INFO;
	} else if (memcmp(val, "metadata", 8U) == 0) {
		return WT_META;
	} else if (memcmp(val, "request", 7U) == 0) {
		return WT_REQ;
	} else if (memcmp(val, "response", 8U) == 0) {
		return WT_RSP;
	} else if (memcmp(val, "conversi", 8U) == 0) {
		return WT_CONV;
	} else if (memcmp(val, "continua", 8U) == 0) {
		return WT_CONT;
	}
	return WT_NONE;
}
Exemple #3
0
static int
deinfix1(const char *s, size_t z)
{
	if (xmemmem(pool, npool, s, z)) {
		return -1;
	}
	if (npool + z >= zpool) {
		/* resize */
		zpool = zpool * 2U ?: 4096U;
		pool = realloc(pool, zpool);
	}
static const char*
_warc_find_eoh(const char *buf, size_t bsz)
{
	static const char _marker[] = "\r\n\r\n";
	const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U);

	if (hit != NULL) {
		hit += sizeof(_marker) - 1U;
	}
	return hit;
}
Exemple #5
0
static bool
read_line(mux_ctx_t ctx, ff_msg_t msg)
{
	static const char cmd_pmsg[] = "PRIVMSG";
	static const size_t cmd_pmsg_sz = sizeof(cmd_pmsg) - 1;
	char *line;
	size_t llen;
	const char *cursor;
	char *p;

	llen = prchunk_getline(ctx->rdr, &line);

	/* we parse the line in 3 steps, receive time stamp, symbol, values */
	cursor = line;
	msg->chan = NULL;
	msg->json = NULL;
	msg->msglen = 0UL;
	/* receive time stamp, always first on line */
	if (UNLIKELY(parse_rcv_stmp(msg, &cursor) < 0)) {
		goto bugger;
	}
	/* parse the rest */
	llen -= cursor - line;

	/* message types */
	if ((p = xmemmem(cursor, llen, cmd_pmsg, cmd_pmsg_sz))) {
		msg->ty = FF_MSG_JSON;
		msg->chan = p + cmd_pmsg_sz + 1;

		if ((p = strchr(msg->chan, ' '))) {
			*p++ = '\0';

			if (*p++ == ':') {
				/* ah a normal channel message */
				llen -= p - cursor;
				msg->json = p;
				msg->msglen = llen;
				p[llen] = '\0';
				return true;
			}
		}
	}
	return false;
bugger:
	/* declare the line f****d */
	fputs("line b0rked\n> ", stderr);
	fputs(line, stderr);
	fputc('\n', stderr);
	return false;
}
static time_t
_warc_rdmtm(const char *buf, size_t bsz)
{
	static const char _key[] = "\r\nLast-Modified:";
	const char *val;
	char *on = NULL;
	time_t res;

	if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
		/* no bother */
		return (time_t)-1;
	}

	/* xstrpisotime() kindly overreads whitespace for us, so use that */
	val += sizeof(_key) - 1U;
	res = xstrpisotime(val, &on);
	if (on == NULL || !isspace(*on)) {
		/* hm, can we trust that number?  Best not. */
		return (time_t)-1;
	}
	return res;
}
static ssize_t
_warc_rdlen(const char *buf, size_t bsz)
{
	static const char _key[] = "\r\nContent-Length:";
	const char *val;
	char *on = NULL;
	long int len;

	if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
		/* no bother */
		return -1;
	}

	/* strtol kindly overreads whitespace for us, so use that */
	val += sizeof(_key) - 1U;
	len = strtol(val, &on, 10);
	if (on == NULL || !isspace(*on)) {
		/* hm, can we trust that number?  Best not. */
		return -1;
	}
	return (size_t)len;
}