Beispiel #1
0
static void _rss_get_url(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos G_GNUC_WGET_UNUSED)
{
	struct rss_context *ctx = context;
	wget_string_t url;

	if (!val || !len)
		return;

	url.p = NULL;

	if ((flags & XML_FLG_ATTRIBUTE)) {
		if (!wget_strcasecmp_ascii(attr, "url") || !wget_strcasecmp_ascii(attr, "href")
			|| !wget_strcasecmp_ascii(attr, "src") || !wget_strcasecmp_ascii(attr, "domain")
			|| !wget_strcasecmp_ascii(attr, "xmlns") || !wget_strncasecmp_ascii(attr, "xmlns:", 6))
		{
			for (;len && c_isspace(*val); val++, len--); // skip leading spaces
			for (;len && c_isspace(val[len - 1]); len--);  // skip trailing spaces

			url.p = val;
			url.len = len;

			if (!ctx->urls)
				ctx->urls = wget_vector_create(32, -2, NULL);

			wget_vector_add(ctx->urls, &url, sizeof(url));
		}
	}
	else if ((flags & XML_FLG_CONTENT)) {
		const char *elem = strrchr(dir, '/');

		if (elem) {
			elem++;

			if (!wget_strcasecmp_ascii(elem, "guid") || !wget_strcasecmp_ascii(elem, "link")
				 || !wget_strcasecmp_ascii(elem, "comments") || !wget_strcasecmp_ascii(elem, "docs"))
			{
				for (;len && c_isspace(*val); val++, len--); // skip leading spaces
				for (;len && c_isspace(val[len - 1]); len--);  // skip trailing spaces

				// debug_printf("#2 %02X %s %s '%.*s' %zd\n", flags, dir, attr, (int) len, val, len);

				url.p = val;
				url.len = len;

				if (!ctx->urls)
					ctx->urls = wget_vector_create(32, -2, NULL);

				wget_vector_add(ctx->urls, &url, sizeof(url));
			}
		}
	}
}
Beispiel #2
0
static int check_piece_hash(wget_metalink_hash_t *hash, int fd, off_t offset, size_t length)
{
	char sum[128 + 1]; // large enough for sha-512 hex

	if (wget_hash_file_fd(hash->type, fd, sum, sizeof(sum), offset, length) != -1) {
		return !wget_strcasecmp_ascii(sum, hash->hash_hex);
	}

	return -1;
}
Beispiel #3
0
static int _check_file_fd(wget_metalink_hash_t *hash, int fd)
{
	char sum[128 + 1]; // large enough for sha-512 hex

	if (wget_hash_file_fd(hash->type, fd, sum, sizeof(sum), 0, 0) != -1) {
		return !wget_strcasecmp_ascii(sum, hash->hash_hex);
	}

	return -1;
}
Beispiel #4
0
char *wget_charset_transcode(const char *src, const char *src_encoding, const char *dst_encoding)
{
	if (!src)
		return NULL;

#ifdef HAVE_ICONV
	if (!src_encoding)
		src_encoding = "iso-8859-1"; // default character-set for most browsers
	if (!dst_encoding)
		dst_encoding = "iso-8859-1"; // default character-set for most browsers

	if (wget_strcasecmp_ascii(src_encoding, dst_encoding)) {
		char *ret = NULL;

		iconv_t cd=iconv_open(dst_encoding, src_encoding);

		if (cd != (iconv_t)-1) {
			char *tmp = (char *) src; // iconv won't change where src points to, but changes tmp itself
			size_t tmp_len = strlen(src);
			size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
			char *dst = xmalloc(dst_len + 1), *dst_tmp = dst;

			if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
				&& iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
			{
				ret = wget_strmemdup(dst, dst_len - dst_len_tmp);
				debug_printf("converted '%s' (%s) -> '%s' (%s)\n", src, src_encoding, ret, dst_encoding);
			} else
				error_printf(_("Failed to convert '%s' string into '%s' (%d)\n"), src_encoding, dst_encoding, errno);

			xfree(dst);
			iconv_close(cd);
		} else
			error_printf(_("Failed to prepare encoding '%s' into '%s' (%d)\n"), src_encoding, dst_encoding, errno);

		return ret;
	}
#endif

	return strdup(src);
}
Beispiel #5
0
static void test_parser(void)
{
	DIR *dirp;
	struct dirent *dp;
	const char *ext;
	char fname[128];
	int xml = 0, html = 0, css = 0;

	// test the XML / HTML parser, you should start the test with valgrind
	// to detect memory faults
	if ((dirp = opendir(SRCDIR "/files")) != NULL) {
		while ((dp = readdir(dirp)) != NULL) {
			if (*dp->d_name == '.') continue;
			if ((ext = strrchr(dp->d_name, '.'))) {
				snprintf(fname, sizeof(fname), SRCDIR "/files/%s", dp->d_name);
				if (!wget_strcasecmp_ascii(ext, ".xml")) {
					info_printf("parsing %s\n", fname);
					wget_xml_parse_file(fname, NULL, NULL, 0);
					xml++;
				}
/*				else if (!wget_strcasecmp_ascii(ext, ".html")) {
					info_printf("parsing %s\n", fname);
					wget_html_parse_file(fname, NULL, NULL, 0);
					html++;
				}
				else if (!wget_strcasecmp_ascii(ext, ".css")) {
					info_printf("parsing %s\n", fname);
					wget_css_parse_file(fname, _css_dump_uri, _css_dump_charset, NULL);
					css++;
				} */
			}
		}
		closedir(dirp);
	}

	info_printf("%d XML, %d HTML and %d CSS files parsed\n", xml, html, css);
}
Beispiel #6
0
/**
 * \param[in] hashname Name of the hashing algorithm (see table below)
 * \return A constant to be used by libwget hashing functions
 *
 * Get the hashing algorithms list item that corresponds to the named hashing algorithm.
 *
 * This function returns a constant that uniquely identifies a known supported hashing algorithm
 * within libwget. All the supported algorithms are listed in the ::wget_digest_algorithm_t enum.
 *
 * Algorithm name | Constant
 * -------------- | --------
 * sha1 or sha-1|WGET_DIGTYPE_SHA1
 * sha256 or sha-256|WGET_DIGTYPE_SHA256
 * sha512 or sha-512|WGET_DIGTYPE_SHA512
 * sha224 or sha-224|WGET_DIGTYPE_SHA224
 * sha384 or sha-384|WGET_DIGTYPE_SHA384
 * md5|WGET_DIGTYPE_MD5
 * md2|WGET_DIGTYPE_MD2
 * rmd160|WGET_DIGTYPE_RMD160
 */
wget_digest_algorithm_t wget_hash_get_algorithm(const char *hashname)
{
	if (hashname) {
		if (*hashname == 's' || *hashname == 'S') {
			if (!wget_strcasecmp_ascii(hashname, "sha-1") || !wget_strcasecmp_ascii(hashname, "sha1"))
				return WGET_DIGTYPE_SHA1;
			else if (!wget_strcasecmp_ascii(hashname, "sha-256") || !wget_strcasecmp_ascii(hashname, "sha256"))
				return WGET_DIGTYPE_SHA256;
			else if (!wget_strcasecmp_ascii(hashname, "sha-512") || !wget_strcasecmp_ascii(hashname, "sha512"))
				return WGET_DIGTYPE_SHA512;
			else if (!wget_strcasecmp_ascii(hashname, "sha-224") || !wget_strcasecmp_ascii(hashname, "sha224"))
				return WGET_DIGTYPE_SHA224;
			else if (!wget_strcasecmp_ascii(hashname, "sha-384") || !wget_strcasecmp_ascii(hashname, "sha384"))
				return WGET_DIGTYPE_SHA384;
		}
		else if (!wget_strcasecmp_ascii(hashname, "md5"))
			return WGET_DIGTYPE_MD5;
		else if (!wget_strcasecmp_ascii(hashname, "md2"))
			return WGET_DIGTYPE_MD2;
		else if (!wget_strcasecmp_ascii(hashname, "rmd160"))
			return WGET_DIGTYPE_RMD160;
	}

	error_printf(_("Unknown hash type '%s'\n"), hashname);
	return WGET_DIGTYPE_UNKNOWN;
}
Beispiel #7
0
static void html_parse_localfile(const char *fname)
{
	char *data;
	const char *encoding = NULL;
	size_t len;

	if ((data = wget_read_file(fname, &len))) {
		if ((unsigned char)data[0] == 0xFE && (unsigned char)data[1] == 0xFF) {
			// Big-endian UTF-16
			encoding = "UTF-16BE";

			// adjust behind BOM, ignore trailing single byte
			data += 2;
			len -= 2;
		} else if ((unsigned char)data[0] == 0xFF && (unsigned char)data[1] == 0xFE) {
			// Little-endian UTF-16
			encoding = "UTF-16LE";

			// adjust behind BOM
			data += 2;
			len -= 2;
		} else if ((unsigned char)data[0] == 0xEF && (unsigned char)data[1] == 0xBB && (unsigned char)data[2] == 0xBF) {
			// UTF-8
			encoding = "UTF-8";

			// adjust behind BOM
			data += 3;
			len -= 3;
		}

		if (encoding)
			printf("URI encoding '%s' set by BOM\n", encoding);

		if (!wget_strncasecmp_ascii(encoding, "UTF-16", 6)) {
			size_t n;
			char *utf8;

			len -= len & 1; // ignore single trailing byte, else charset conversion fails

			if (wget_memiconv(encoding, data, len, "UTF-8", &utf8, &n) == 0) {
				printf("Convert non-ASCII encoding '%s' to UTF-8\n", encoding);
				data = utf8;
			} else {
				printf("Failed to convert non-ASCII encoding '%s' to UTF-8, skip parsing\n", encoding);
				return;
			}
		}

		WGET_HTML_PARSED_RESULT *res  = wget_html_get_urls_inline(data, NULL, NULL);

		if (encoding) {
			if (res->encoding && wget_strcasecmp_ascii(encoding, res->encoding))
				printf("Encoding '%s' as stated in document has been ignored\n", encoding);
		}

		for (int it = 0; it < wget_vector_size(res->uris); it++) {
			WGET_HTML_PARSED_URL *html_url = wget_vector_get(res->uris, it);
			wget_string_t *url = &html_url->url;

			printf("  %s.%s '%.*s'\n", html_url->dir, html_url->attr, (int) url->len, url->p);
		}

		wget_xfree(data);
		wget_html_free_urls_inline(&res);
	}
}
Beispiel #8
0
static void _metalink_parse(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos G_GNUC_WGET_UNUSED)
{
	_metalink_context_t *ctx = context;
	char value[len + 1];

	// info_printf("\n%02X %s %s '%s'\n", flags, dir, attr, value);
	if (!(flags & (XML_FLG_CONTENT | XML_FLG_ATTRIBUTE))) return; // ignore comments

	if (wget_strncasecmp_ascii(dir, "/metalink/file", 14)) return;

	dir += 14;

	memcpy(value, val, len);
	value[len] = 0;

	if (!wget_strncasecmp_ascii(dir, "s/file", 6)) {
		// metalink 3 XML format
		dir += 6;

		if (attr) {
			if (*dir == 0) { // /metalink/file
				if (!wget_strcasecmp_ascii(attr, "name")) {
					ctx->metalink->name = wget_strmemdup(val, len);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/verification/pieces")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				} else if (!wget_strcasecmp_ascii(attr, "length")) {
					ctx->length = atoll(value);
				}
//			} else if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) {
//				if (!wget_strcasecmp_ascii(attr, "type")) {
//					ctx->id = atoi(value);
//				}
			} else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/resources/url")) {
				if (!wget_strcasecmp_ascii(attr, "location")) {
					sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code
//				} else if (!wget_strcasecmp_ascii(attr, "protocol")) {
//					sscanf(value, " %7[a-zA-Z]", ctx->protocol); // type of URL, e.g. HTTP, HTTPS, FTP, ...
//				} else if (!wget_strcasecmp_ascii(attr, "type")) {
//					sscanf(value, " %2[a-zA-Z]", ctx->type); // type of URL, e.g. HTTP, FTP, ...
				} else if (!wget_strcasecmp_ascii(attr, "preference")) {
					sscanf(value, " %6d", &ctx->priority);
					if (ctx->priority < 1 || ctx->priority > 999999)
						ctx->priority = 999999;
				}
			}
		} else {
			if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) {
				_add_piece(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) {
				_add_file_hash(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/size")) {
				ctx->metalink->size = atoll(value);
			} else if (!wget_strcasecmp_ascii(dir, "/resources/url")) {
				_add_mirror(ctx, value);
			}
		}
	} else {
		// metalink 4 XML format
		if (attr) {
			if (*dir == 0) { // /metalink/file
				if (!wget_strcasecmp_ascii(attr, "name")) {
					ctx->metalink->name = wget_strmemdup(val, len);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/pieces")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				} else if (!wget_strcasecmp_ascii(attr, "length")) {
					ctx->length = atoll(value);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/hash")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/url")) {
				if (!wget_strcasecmp_ascii(attr, "location")) {
					sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code
				} else if (!wget_strcasecmp_ascii(attr, "priority") || !wget_strcasecmp_ascii(attr, "preference")) {
					sscanf(value, " %6d", &ctx->priority);
					if (ctx->priority < 1 || ctx->priority > 999999)
						ctx->priority = 999999;
				}
			}
		} else {
			if (!wget_strcasecmp_ascii(dir, "/pieces/hash")) {
				_add_piece(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/hash")) {
				_add_file_hash(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/size")) {
				ctx->metalink->size = atoll(value);
			} else if (!wget_strcasecmp_ascii(dir, "/url")) {
				_add_mirror(ctx, value);
			}
		}
	}
}
Beispiel #9
0
static int compare_txt(struct ENTRY *a1, struct ENTRY *a2)
{
	return wget_strcasecmp_ascii(a1->txt, a2->txt);
}
Beispiel #10
0
static void test_strcasecmp_ascii(void)
{
	static const struct test_data {
		const char *
			s1;
		const char *
			s2;
		int
			result;
	} test_data[] = {
		{ NULL, NULL, 0 },
		{ NULL, "x", -1 },
		{ "x", NULL, 1 },
		{ "Abc", "abc", 0 },
		{ "abc", "abc", 0 },
		{ "abc", "ab", 'c' },
		{ "ab", "abc", -'c' },
		{ "abc", "", 'a' },
		{ "", "abc", -'a' },
	};
	static const struct test_data2 {
		const char *
			s1;
		const char *
			s2;
		size_t
			n;
		int
			result;
	} test_data2[] = {
		{ NULL, NULL, 1, 0 },
		{ NULL, "x", 1, -1 },
		{ "x", NULL, 1, 1 },
		{ "Abc", "abc", 2, 0 },
		{ "abc", "abc", 3, 0 },
		{ "abc", "ab", 2, 0 },
		{ "abc", "ab", 3, 'c' },
		{ "ab", "abc", 2, 0 },
		{ "ab", "abc", 3, -'c' },
		{ "abc", "", 1, 'a' },
		{ "", "abc", 1, -'a' },
		{ "", "abc", 0, 0 },
	};

	for (unsigned it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];

		int n = wget_strcasecmp_ascii(t->s1, t->s2);

		if (n == t->result)
			ok++;
		else {
			failed++;
			info_printf("Failed [%u]: wget_strcasecmp_ascii(%s,%s) -> %d (expected %d)\n", it, t->s1, t->s2, n, t->result);
		}
	}

	for (unsigned it = 0; it < countof(test_data2); it++) {
		const struct test_data2 *t = &test_data2[it];

		int n = wget_strncasecmp_ascii(t->s1, t->s2, t->n);

		if (n == t->result)
			ok++;
		else {
			failed++;
			info_printf("Failed [%u]: wget_strncasecmp_ascii(%s,%s,%zu) -> %d (expected %d)\n", it, t->s1, t->s2, t->n, n, t->result);
		}
	}

	for (unsigned it = 0; it < 26; it++) {
		char s1[8], s2[8];

		s1[0] = 'a' + it; s1[1] = 0;
		s2[0] = 'A' + it; s2[1] = 0;

		if (wget_strcasecmp_ascii(s1, s2) == 0)
			ok++;
		else {
			failed++;
			info_printf("Failed: wget_strcasecmp_ascii(%s,%s) != 0\n", s1, s2);
		}

		if (wget_strncasecmp_ascii(s1, s2, 1) == 0)
			ok++;
		else {
			failed++;
			info_printf("Failed: wget_strncasecmp_ascii(%s,%s) != 0\n", s1, s2);
		}
	}
}
Beispiel #11
0
static void test_parse_challenge(void)
{
	static const struct test_data {
		const char *
			input;
		const char *
			scheme[3];
	} test_data[] = {
		{	// simplebasic
			"Basic realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"BASIC REALM=\"foo\"",
			{ "Basic", NULL }
		},
		{	// simplebasicucase
			"Basic , realm=\"foo\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test realm\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"test-äöÜ\"",
			{ "Basic", NULL }
		},
		{	//
			"Basic realm=\"basic\", Newauth realm=\"newauth\"",
			{ "Basic", "Newauth", NULL }
		},
	};

	wget_vector_t *challenges;
	wget_http_challenge_t *challenge;

	// Testcases found here http://greenbytes.de/tech/tc/httpauth/
	challenges = wget_vector_create(2, 2, NULL);
	wget_vector_set_destructor(challenges, (void(*)(void *))wget_http_free_challenge);

	for (unsigned it = 0; it < countof(test_data); it++) {
		const struct test_data *t = &test_data[it];

		wget_http_parse_challenges(t->input, challenges);
		for (unsigned nchal = 0; nchal < countof(test_data[0].scheme) && t->scheme[nchal]; nchal++) {
			challenge = wget_vector_get(challenges, nchal);

			if (!t->scheme[nchal]) {
				if (challenge) {
					failed++;
					info_printf("Failed [%u]: wget_http_parse_challenges(%s) found %d challenges (expected %u)\n", it, t->input, wget_vector_size(challenges), nchal);
				}
				break;
			}

			if (!challenge) {
				failed++;
				info_printf("Failed [%u]: wget_http_parse_challenges(%s) did not find enough challenges\n", it, t->input);
				break;
			}

			if (!wget_strcasecmp_ascii(challenge->auth_scheme, t->scheme[nchal])) {
				ok++;
			} else {
				failed++;
				info_printf("Failed [%u]: wget_http_parse_challenges(%s) -> '%s' (expected '%s')\n", it, t->input, challenge->auth_scheme, t->scheme[nchal]);
			}
		}

		wget_vector_clear(challenges);
	}

	wget_http_free_challenges(&challenges);
}