Exemple #1
0
// Callback function, called from CSS parser for each @charset found.
static void _css_get_encoding(void *context, const char *encoding, size_t len)
{
	_CSS_CONTEXT *ctx = context;

	// take only the first @charset rule
	if (!*ctx->encoding) {
		*ctx->encoding = wget_strmemdup(encoding, len);
		debug_printf(_("URI content encoding = '%s'\n"), *ctx->encoding);
	}
}
Exemple #2
0
char *wget_charset_transcode(const char *src, const char *src_encoding, const char *dst_encoding)
{
	if (!src)
		return NULL;

#ifdef HAVE_ICONV
	if (!src_encoding)
		src_encoding = "iso-8859-1"; // default character-set for most browsers
	if (!dst_encoding)
		dst_encoding = "iso-8859-1"; // default character-set for most browsers

	if (wget_strcasecmp_ascii(src_encoding, dst_encoding)) {
		char *ret = NULL;

		iconv_t cd=iconv_open(dst_encoding, src_encoding);

		if (cd != (iconv_t)-1) {
			char *tmp = (char *) src; // iconv won't change where src points to, but changes tmp itself
			size_t tmp_len = strlen(src);
			size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
			char *dst = xmalloc(dst_len + 1), *dst_tmp = dst;

			if (iconv(cd, &tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
				&& iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
			{
				ret = wget_strmemdup(dst, dst_len - dst_len_tmp);
				debug_printf("converted '%s' (%s) -> '%s' (%s)\n", src, src_encoding, ret, dst_encoding);
			} else
				error_printf(_("Failed to convert '%s' string into '%s' (%d)\n"), src_encoding, dst_encoding, errno);

			xfree(dst);
			iconv_close(cd);
		} else
			error_printf(_("Failed to prepare encoding '%s' into '%s' (%d)\n"), src_encoding, dst_encoding, errno);

		return ret;
	}
#endif

	return strdup(src);
}
Exemple #3
0
// Callback function, called from CSS parser for each URI found.
static void _css_get_url(void *context, const char *url, size_t len, size_t pos)
{
	_CSS_CONTEXT *ctx = context;
	WGET_PARSED_URL parsed_url = { .len = len, .pos = pos, .url = wget_strmemdup(url, len), .abs_url = NULL };

	if (!ctx->uris) {
		ctx->uris = wget_vector_create(16, -2, NULL);
		wget_vector_set_destructor(ctx->uris, (wget_vector_destructor_t)_free_url);
	}

	wget_vector_add(ctx->uris, &parsed_url, sizeof(parsed_url));
}

static void _urls_to_absolute(wget_vector_t *urls, wget_iri_t *base)
{
	if (base && urls) {
		wget_buffer_t buf;
		wget_buffer_init(&buf, NULL, 1024);

		for (int it = 0; it < wget_vector_size(urls); it++) {
			WGET_PARSED_URL *url = wget_vector_get(urls, it);

			if (wget_iri_relative_to_abs(base, url->url, url->len, &buf))
				url->abs_url = wget_strmemdup(buf.data, buf.length);
			else
				error_printf("Cannot resolve relative URI '%s'\n", url->url);
		}

		wget_buffer_deinit(&buf);
	}
}

wget_vector_t *wget_css_get_urls(const char *css, size_t len, wget_iri_t *base, const char **encoding)
{
	_CSS_CONTEXT context = { .encoding = encoding };

	wget_css_parse_buffer(css, len, _css_get_url, encoding ? _css_get_encoding : NULL, &context);
	_urls_to_absolute(context.uris, base);

	return context.uris;
}
Exemple #4
0
const char *wget_str_to_ascii(const char *src)
{
#ifdef WITH_LIBIDN2
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;
#ifdef WITH_LIBUNISTRING
		uint8_t *lower, resbuf[256];
		size_t len = sizeof(resbuf) - 1; // leave space for additional \0 byte

		// we need a conversion to lowercase
		lower = u8_tolower((uint8_t *)src, u8_strlen((uint8_t *)src), 0, UNINORM_NFKC, resbuf, &len);
		if (!lower) {
			error_printf("u8_tolower(%s) failed (%d)\n", src, errno);
			return src;
		}

		// u8_tolower() does not terminate the result string
		if (lower == resbuf) {
			lower[len]=0;
		} else {
			uint8_t *tmp = lower;
			lower = (uint8_t *)wget_strmemdup((char *)lower, len);
			xfree(tmp);
		}

		if ((rc = idn2_lookup_u8(lower, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), lower, rc, idn2_strerror(rc));

		if (lower != resbuf)
			xfree(lower);
#else
		if ((rc = idn2_lookup_u8((uint8_t *)src, (uint8_t **)&asc, 0)) == IDN2_OK) {
			debug_printf("idn2 '%s' -> '%s'\n", src, asc);
			src = asc;
		} else
			error_printf(_("toASCII(%s) failed (%d): %s\n"), src, rc, idn2_strerror(rc));
#endif
	}
#elif WITH_LIBIDN
	if (wget_str_needs_encoding(src)) {
		char *asc = NULL;
		int rc;

		if (_utf8_is_valid(src)) {
			// idna_to_ascii_8z() automatically converts UTF-8 to lowercase

			if ((rc = idna_to_ascii_8z(src, &asc, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
				// debug_printf("toASCII '%s' -> '%s'\n", src, asc);
				src = asc;
			} else
				error_printf(_("toASCII failed (%d): %s\n"), rc, idna_strerror(rc));
		}
		else
			error_printf(_("Invalid UTF-8 sequence not converted: '%s'\n"), src);
	}
#else
	if (wget_str_needs_encoding(src)) {
		error_printf(_("toASCII not available: '%s'\n"), src);
	}
#endif

	return src;
}
Exemple #5
0
int wget_netrc_db_load(wget_netrc_db_t *netrc_db, const char *fname)
{
	wget_netrc_t netrc;
	FILE *fp;
	char *buf = NULL, *linep, *p, *key = NULL;
	size_t bufsize = 0;
	ssize_t buflen;
	int nentries = 0, in_macdef = 0, in_machine = 0;

	if (!netrc_db || !fname || !*fname)
		return -1;

	if ((fp = fopen(fname, "r"))) {
		while ((buflen = wget_getline(&buf, &bufsize, fp)) >= 0) {
			linep = buf;

			while (isspace(*linep)) linep++; // ignore leading whitespace

			if (*linep == '#')
				continue; // skip comments

			// strip off \r\n
			while (buflen > 0 && (buf[buflen] == '\n' || buf[buflen] == '\r'))
				buf[--buflen] = 0;

			if (!*linep) {
				// empty lines reset macro processing
				in_macdef = 0;
				continue;
			} else if (in_macdef)
				continue; // still processing 'macdef' macro

			// now we expect key value pairs, e.g.: machine example.com
			xfree(key);
			for (p = linep; *linep && !isspace(*linep);) linep++;
			key = wget_strmemdup(p, linep - p);

			if (!strcmp(key, "machine") || !strcmp(key, "default")) {
				if (in_machine)
					wget_netrc_db_add(netrc_db, wget_memdup(&netrc, sizeof(netrc)));

				wget_netrc_init(&netrc);
				in_machine = 1;

				if (!strcmp(key, "default")) {
					netrc.key = wget_strdup("default");
					continue;
				}
			} else if (!in_machine)
				continue; // token outside of machine or default

			while (isspace(*linep)) linep++;
			for (p = linep; *linep && !isspace(*linep);) linep++;

			if (!strcmp(key, "login")) {
				if (!netrc.login)
					netrc.login = wget_strmemdup(p, linep - p);
			} else if (!strcmp(key, "password")) {
				if (!netrc.password)
					netrc.password = wget_strmemdup(p, linep - p);
			} else if (!strcmp(key, "macdef")) {
				in_macdef = 1; // the above code skips until next empty line
			}
		}

		if (in_machine)
			wget_netrc_db_add(netrc_db, wget_memdup(&netrc, sizeof(netrc)));

		xfree(key);
		xfree(buf);
		fclose(fp);

		nentries = wget_hashmap_size(netrc_db->machines);

		debug_printf("loaded %d .netrc %s\n", nentries, nentries != 1 ? "entries" : "entry");
	} else if (errno != ENOENT)
		error_printf(_("Failed to open .netrc file '%s' (%d)\n"), fname, errno);

	return nentries;
}
Exemple #6
0
static int _ocsp_db_load(wget_ocsp_db_t *ocsp_db, FILE *fp, int load_hosts)
{
	wget_ocsp_t ocsp;
	char *buf = NULL, *linep, *p;
	size_t bufsize = 0;
	ssize_t buflen;
	time_t now = time(NULL);
	int ok;

	while ((buflen = wget_getline(&buf, &bufsize, fp)) >= 0) {
		linep = buf;

		while (isspace(*linep)) linep++; // ignore leading whitespace
		if (!*linep) continue; // skip empty lines

		if (*linep == '#')
			continue; // skip comments

		// strip off \r\n
		while (buflen > 0 && (buf[buflen] == '\n' || buf[buflen] == '\r'))
			buf[--buflen] = 0;

		wget_ocsp_init(&ocsp);
		ok = 0;

		// parse cert's sha-256 checksum
		if (*linep) {
			for (p = linep; *linep && !isspace(*linep);) linep++;
			ocsp.key = wget_strmemdup(p, linep - p);
		}

		// parse max age
		if (*linep) {
			for (p = ++linep; *linep && !isspace(*linep);) linep++;
			ocsp.maxage = atol(p);
			if (ocsp.maxage < now) {
				// drop expired entry
				wget_ocsp_deinit(&ocsp);
				continue;
			}
			ok = 1;
		}

		// parse mtime (age of this entry)
		if (*linep) {
			for (p = ++linep; *linep && !isspace(*linep);) linep++;
			ocsp.mtime = atol(p);
		}

		// parse mtime (age of this entry)
		if (*linep) {
			for (p = ++linep; *linep && !isspace(*linep);) linep++;
			ocsp.valid = atoi(p);
		}

		if (ok) {
			if (load_hosts)
				wget_ocsp_db_add_host(ocsp_db, wget_memdup(&ocsp, sizeof(ocsp)));
			else
				wget_ocsp_db_add_fingerprint(ocsp_db, wget_memdup(&ocsp, sizeof(ocsp)));
		} else {
			wget_ocsp_deinit(&ocsp);
			error_printf(_("Failed to parse OCSP line: '%s'\n"), buf);
		}
	}

	xfree(buf);

	if (ferror(fp))
		return -1;

	return 0;
}
Exemple #7
0
static void _metalink_parse(void *context, int flags, const char *dir, const char *attr, const char *val, size_t len, size_t pos G_GNUC_WGET_UNUSED)
{
	_metalink_context_t *ctx = context;
	char value[len + 1];

	// info_printf("\n%02X %s %s '%s'\n", flags, dir, attr, value);
	if (!(flags & (XML_FLG_CONTENT | XML_FLG_ATTRIBUTE))) return; // ignore comments

	if (wget_strncasecmp_ascii(dir, "/metalink/file", 14)) return;

	dir += 14;

	memcpy(value, val, len);
	value[len] = 0;

	if (!wget_strncasecmp_ascii(dir, "s/file", 6)) {
		// metalink 3 XML format
		dir += 6;

		if (attr) {
			if (*dir == 0) { // /metalink/file
				if (!wget_strcasecmp_ascii(attr, "name")) {
					ctx->metalink->name = wget_strmemdup(val, len);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/verification/pieces")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				} else if (!wget_strcasecmp_ascii(attr, "length")) {
					ctx->length = atoll(value);
				}
//			} else if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) {
//				if (!wget_strcasecmp_ascii(attr, "type")) {
//					ctx->id = atoi(value);
//				}
			} else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/resources/url")) {
				if (!wget_strcasecmp_ascii(attr, "location")) {
					sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code
//				} else if (!wget_strcasecmp_ascii(attr, "protocol")) {
//					sscanf(value, " %7[a-zA-Z]", ctx->protocol); // type of URL, e.g. HTTP, HTTPS, FTP, ...
//				} else if (!wget_strcasecmp_ascii(attr, "type")) {
//					sscanf(value, " %2[a-zA-Z]", ctx->type); // type of URL, e.g. HTTP, FTP, ...
				} else if (!wget_strcasecmp_ascii(attr, "preference")) {
					sscanf(value, " %6d", &ctx->priority);
					if (ctx->priority < 1 || ctx->priority > 999999)
						ctx->priority = 999999;
				}
			}
		} else {
			if (!wget_strcasecmp_ascii(dir, "/verification/pieces/hash")) {
				_add_piece(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/verification/hash")) {
				_add_file_hash(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/size")) {
				ctx->metalink->size = atoll(value);
			} else if (!wget_strcasecmp_ascii(dir, "/resources/url")) {
				_add_mirror(ctx, value);
			}
		}
	} else {
		// metalink 4 XML format
		if (attr) {
			if (*dir == 0) { // /metalink/file
				if (!wget_strcasecmp_ascii(attr, "name")) {
					ctx->metalink->name = wget_strmemdup(val, len);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/pieces")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				} else if (!wget_strcasecmp_ascii(attr, "length")) {
					ctx->length = atoll(value);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/hash")) {
				if (!wget_strcasecmp_ascii(attr, "type")) {
					sscanf(value, "%15s", ctx->hash_type);
				}
			} else if (!wget_strcasecmp_ascii(dir, "/url")) {
				if (!wget_strcasecmp_ascii(attr, "location")) {
					sscanf(value, " %2[a-zA-Z]", ctx->location); // ISO 3166-1 alpha-2 two letter country code
				} else if (!wget_strcasecmp_ascii(attr, "priority") || !wget_strcasecmp_ascii(attr, "preference")) {
					sscanf(value, " %6d", &ctx->priority);
					if (ctx->priority < 1 || ctx->priority > 999999)
						ctx->priority = 999999;
				}
			}
		} else {
			if (!wget_strcasecmp_ascii(dir, "/pieces/hash")) {
				_add_piece(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/hash")) {
				_add_file_hash(ctx, value);
			} else if (!wget_strcasecmp_ascii(dir, "/size")) {
				ctx->metalink->size = atoll(value);
			} else if (!wget_strcasecmp_ascii(dir, "/url")) {
				_add_mirror(ctx, value);
			}
		}
	}
}