示例#1
0
文件: html.c 项目: mruettgers/esniper
/*
 * Get next tag text, eliminating leading and trailing whitespace
 * and leaving only a single space for all internal whitespace.
 */
const char *
getTag(memBuf_t *mp)
{
	static char *buf = NULL;
	static size_t bufsize = 0;
	size_t count = 0;
	int inStr = 0, comment = 0, c;

	if (memEof(mp)) {
		log(("getTag(): returning NULL\n"));
		return NULL;
	}
	while ((c = memGetc(mp)) != EOF && c != '<')
		;
	if (c == EOF) {
		log(("getTag(): returning NULL\n"));
		return NULL;
	}

	/* first char - check for comment */
	c = memGetc(mp);
	if (c == '>') {
		log(("getTag(): returning empty tag\n"));
		return "";
	} else if (c == EOF) {
		log(("getTag(): returning NULL\n"));
		return NULL;
	}
	addchar(buf, bufsize, count, (char)c);
	if (c == '!') {
		int c2 = memGetc(mp);

		if (c2 == '>' || c2 == EOF) {
			term(buf, bufsize, count);
			log(("getTag(): returning %s\n", buf));
			return buf;
		}
		addchar(buf, bufsize, count, (char)c2);
		if (c2 == '-') {
			int c3 = memGetc(mp);

			if (c3 == '>' || c3 == EOF) {
				term(buf, bufsize, count);
				log(("getTag(): returning %s\n", buf));
				return buf;
			}
			addchar(buf, bufsize, count, (char)c3);
			comment = 1;
		}
	}

	if (comment) {
		while ((c = memGetc(mp)) != EOF) {
			if (c=='>' && buf[count-1]=='-' && buf[count-2]=='-') {
				term(buf, bufsize, count);
				log(("getTag(): returning %s\n", buf));
				return buf;
			}
			if (isspace(c) && buf[count-1] == ' ')
				continue;
			addchar(buf, bufsize, count, (char)c);
		}
	} else {
		while ((c = memGetc(mp)) != EOF) {
			switch (c) {
			case '\\':
				addchar(buf, bufsize, count, (char)c);
				c = memGetc(mp);
				if (c == EOF) {
					term(buf, bufsize, count);
					log(("getTag(): returning %s\n", buf));
					return buf;
				}
				addchar(buf, bufsize, count, (char)c);
				break;
			case '>':
				if (inStr)
					addchar(buf, bufsize, count, (char)c);
				else {
					term(buf, bufsize, count);
					log(("getTag(): returning %s\n", buf));
					return buf;
				}
				break;
			case ' ':
			case '\n':
			case '\r':
			case '\t':
			case '\v':
				if (inStr)
					addchar(buf, bufsize, count, (char)c);
				else if (count > 0 && buf[count-1] != ' ')
					addchar(buf, bufsize, count, ' ');
				break;
			case '"':
				inStr = !inStr;
				/* fall through */
			default:
				addchar(buf, bufsize, count, (char)c);
			}
		}
	}
	term(buf, bufsize, count);
	log(("getTag(): returning %s\n", count ? buf : "NULL"));
	return count ? buf : NULL;
}
示例#2
0
文件: http.c 项目: zuloo/esniper
/* get META refresh URL (if any) */
char *
memGetMetaRefresh(memBuf_t *mp)
{
	char *cp;
	static char *buf = NULL;
	char *bufptr;
	static size_t bufsize = 0;
	char *metaRefresh = NULL;

	if (!buf) {
		bufsize = 1024;
		buf = myMalloc(bufsize);
	}

	/* look for all "meta" tags until Refresh found */
	while (!metaRefresh && (cp = memStr(mp, "<meta")) != NULL) {
		int c;

		bufptr = buf;
		/* copy whole tag to buffer for processing */
		for (c = memGetc(mp); c != EOF && c != '>'; c = memGetc(mp)) {
			*bufptr++ = (char)c;
			if (bufptr > buf + (bufsize -1)) {
				bufsize += 1024;
				buf = myRealloc(buf, bufsize);
			}
		}

		/* terminate string */
		*bufptr = '\0';
		log(("found META tag: %s", buf));

		cp = strstr(buf, "http-equiv=");
		if (!cp) {
			log(("no http-equiv, looking for next"));
			continue;
		}
		cp += 11;

		if (strncasecmp(cp, "\"Refresh\"", 9)) {
			log(("no Refresh, looking for next"));
			continue;
		}

		cp = strstr(buf, "content=\"");
		if (!cp) {
			log(("no content, looking for next"));
			continue;
		}
		cp += 9;

		/* skip delay value (everything until ';') */
		while (*cp && *cp != ';') cp++;
		/* if not end of string skip ';' */
		if (*cp) cp++;
		/* and skip whitespace */
		while (*cp && isspace(*cp)) cp++;

		/* now there should be "url=" with optional whitespace around '=' */
		if (strncasecmp(cp, "url", 3)) {
			log(("no url key, looking for next"));
			continue;
		}
		cp += 3;

		while (*cp && isspace(*cp)) cp++;
		if (*cp != '=') {
			log(("no = after url, looking for next"));
			continue;
		}
		cp++;
		while (*cp && isspace(*cp)) cp++;

		/* this is the beginning of the redirection URL */
		bufptr = cp;
		cp = strchr(bufptr, '"');
		if (!cp) {
			log(("no closing \", looking for next"));
			continue;
		}
		/* cut off terminating '"' and other trailing garbage */
		*cp = '\0';
		metaRefresh = bufptr;
	}

	if (metaRefresh)
		log(("found redirection"));
	else
		log(("no redirection found"));

	memReset(mp);

	return metaRefresh;
}
示例#3
0
文件: html.c 项目: mruettgers/esniper
/*
 * Get next non-tag text, eliminating leading and trailing whitespace
 * and leaving only a single space for all internal whitespace.
 */
char *
getNonTag(memBuf_t *mp)
{
	static char *buf = NULL;
	static size_t bufsize = 0;
	size_t count = 0, amp = 0;
	int c;

	if (memEof(mp)) {
		log(("getNonTag(): returning NULL\n"));
		return NULL;
	}
	while ((c = memGetc(mp)) != EOF) {
		switch (c) {
		case '<':
			memUngetc(mp);
			if (count) {
				if (buf[count-1] == ' ')
					--count;
				term(buf, bufsize, count);
				log(("getNonTag(): returning %s\n", buf));
				return buf;
			} else
				(void)getTag(mp);
			break;
		case ' ':
		case '\n':
		case '\r':
		case '\t':
		case '\v':
		case 0x82: /* UTF-8 */
		case 0xC2: /* UTF-8 */
		case 0xC3: /* UTF-8 */
		case 0xA0: /* iso-8859-1 nbsp */
			if (count && buf[count-1] != ' ')
				addchar(buf, bufsize, count, ' ');
			break;
		case ';':
			if (amp > 0) {
				char *cp = &buf[amp];

				term(buf, bufsize, count);
				if (*cp == '#') {
					buf[amp-1] = (char)atoi(cp+1);
					count = amp;
				} else if (!strcmp(cp, "amp")) {
					count = amp;
				} else if (!strcmp(cp, "gt")) {
					buf[amp-1] = '>';
					count = amp;
				} else if (!strcmp(cp, "lt")) {
					buf[amp-1] = '<';
					count = amp;
				} else if (!strcmp(cp, "nbsp")) {
					buf[amp-1] = ' ';
					count = amp;
					if (count && buf[count-1] == ' ')
						--count;
				} else if (!strcmp(cp, "quot")) {
					buf[amp-1] = '&';
					count = amp;
				} else
					addchar(buf, bufsize, count, (char)c);
				amp = 0;
			} else
				addchar(buf, bufsize, count, (char)c);
			break;
		case '&':
			amp = count + 1;
			/* fall through */
		default:
			addchar(buf, bufsize, count, (char)c);
		}
	}
	if (count && buf[count-1] == ' ')
		--count;
	term(buf, bufsize, count);
	log(("getNonTag(): returning %s\n", count ? buf : "NULL"));
	return count ? buf : NULL;
} /* getNonTag() */