static char *
uri_normalized_copy (const char *part, int length,
		     const char *unescape_extra)
{
	unsigned char *s, *d, c;
	char *normalized = g_strndup (part, length);
	gboolean need_fixup = FALSE;

	s = d = (unsigned char *)normalized;
	do {
		if (*s == '%') {
			if (!g_ascii_isxdigit (s[1]) ||
			    !g_ascii_isxdigit (s[2])) {
				*d++ = *s;
				continue;
			}

			c = HEXCHAR (s);
			if (soup_char_is_uri_unreserved (c) ||
			    (unescape_extra && strchr (unescape_extra, c))) {
				*d++ = c;
				s += 2;
			} else {
				/* We leave it unchanged. We used to uppercase percent-encoded
				 * triplets but we do not do it any more as RFC3986 Section 6.2.2.1
				 * says that they only SHOULD be case normalized.
				 */
				*d++ = *s++;
				*d++ = *s++;
				*d++ = *s;
			}
		} else {
			if (!g_ascii_isgraph (*s))
				need_fixup = TRUE;
			*d++ = *s;
		}
	} while (*s++);

	if (need_fixup) {
		GString *fixed;

		fixed = g_string_new (NULL);
		s = (guchar *)normalized;
		while (*s) {
			if (g_ascii_isgraph (*s))
				g_string_append_c (fixed, *s);
			else
				g_string_append_printf (fixed, "%%%02X", (int)*s);
			s++;
		}
		g_free (normalized);
		normalized = g_string_free (fixed, FALSE);
	}

	return normalized;
}
Ejemplo n.º 2
0
static gboolean
parse_parameters (const gchar *argument, gint index, gint *parsed_position,
                  GHashTable **parameters, GError **error)
{
    gint local_index = 0;

    while (argument[index + local_index] == ' ') {
        gint i, current_argument_index;
        gint keyword_length, value_length = 0;
        const gchar *keyword, *value = NULL;

        current_argument_index = index + local_index + 1;
        keyword = argument + current_argument_index;
        if (!keyword[0])
            RETURN_ERROR_WITH_POSITION("parameter keyword is missing",
                                       argument, current_argument_index);
        if (!g_ascii_isalnum(keyword[0]))
            RETURN_ERROR_WITH_POSITION("parameter keyword should start with "
                                       "alphabet or digit",
                                       argument, current_argument_index);

        i = 1;
        while (g_ascii_isalnum(keyword[i]) || keyword[i] == '-') {
            i++;
        }
        keyword_length = i;

        if (keyword[i] == '=') {
            gint j = 0;

            value = keyword + i + 1;
            while (g_ascii_isgraph(value[j]) && value[j] != '=') {
                j++;
            }
            value_length = j;
            i += 1 + j;
        }

        if (parameters) {
            if (!*parameters)
                *parameters = g_hash_table_new_full(g_str_hash,
                                                    g_str_equal,
                                                    g_free,
                                                    g_free);
            g_hash_table_insert(*parameters,
                                g_strndup(keyword, keyword_length),
                                value ? g_strndup(value, value_length) : NULL);
        }

        local_index += i + 1;
    }

    *parsed_position = local_index;
    return TRUE;
}
Ejemplo n.º 3
0
static gboolean
isvalid_rfc2428_delimiter(const guchar c)
{
    /* RFC2428 sect. 2 states rules for a valid delimiter */
    const gchar *forbidden = "0123456789abcdef.:";
    if (!g_ascii_isgraph(c))
        return FALSE;
    if (strchr(forbidden, g_ascii_tolower(c)))
        return FALSE;
    return TRUE;
}
Ejemplo n.º 4
0
static gboolean
parse_domain (const gchar *argument, gint index, gint *parsed_position,
              GError **error)
{
    gint i;
    const gchar *domain;

    domain = argument + index;
    if (domain[0] == '[') {
        i = 1;
        while (TRUE) {
            if (domain[i] == '[' || domain[i] == ']') {
                break;
            } else if (domain[i] == '\\') {
                i++;
                if (IS_TEXT(domain[i])) {
                    i++;
                } else {
                    RETURN_ERROR_WITH_POSITION("invalid quoted character "
                                               "in domain",
                                               argument, index + i);
                }
            } else if (g_ascii_isspace(domain[i])) {
                break;
            } else if (g_ascii_iscntrl(domain[i]) ||
                       g_ascii_isgraph(domain[i])) {
                i++;
            } else {
                break;
            }
        }
        if (domain[i] != ']')
            RETURN_ERROR_WITH_POSITION("terminate ']' is missing in domain",
                                       argument, index + i);
        i++;
    } else {
        i = 0;
        if (!g_ascii_isalnum(domain[i]))
            RETURN_ERROR_WITH_POSITION("domain should start with "
                                       "alphabet or digit",
                                       argument, index + i);
        do {
            i++;
            while (g_ascii_isalnum(domain[i]) || domain[i] == '-') {
                i++;
            }
        } while (domain[i] == '.');
    }

    *parsed_position = i;
    return TRUE;
}
Ejemplo n.º 5
0
static gboolean
parse_local_part (const gchar *argument, gint index, gint *parsed_position,
                  GError **error)
{
    gint i;
    const gchar *local_part;

    local_part = argument + index;
    if (local_part[0] == '"') {
        i = 1;
        while (TRUE) {
            if (local_part[i] == '\\') {
                i++;
                if (IS_TEXT(local_part[i])) {
                    i++;
                } else {
                    RETURN_ERROR_WITH_POSITION("invalid quoted character "
                                               "in local part",
                                               argument, index + i);
                }
            } else if (local_part[i] == '"') {
                break;
            } else if (g_ascii_isspace(local_part[i])) {
                break;
            } else if (g_ascii_iscntrl(local_part[i]) ||
                       g_ascii_isgraph(local_part[i])) {
                i++;
            } else {
                break;
            }
        }
        if (local_part[i] != '"')
            RETURN_ERROR_WITH_POSITION("end quote for local part is missing",
                                       argument, index + i);
        i++;
    } else {
        i = -1;
        do {
            i++;
            while (IS_ATOM_TEXT(local_part[i])) {
                i++;
            }
        } while (local_part[i] == '.');
    }

    *parsed_position = i;
    return TRUE;
}
Ejemplo n.º 6
0
gboolean
file_utils_filename_is_uri (const gchar  *filename,
                            GError      **error)
{
    g_return_val_if_fail (filename != NULL, FALSE);
    g_return_val_if_fail (error == NULL || *error == NULL, FALSE);

    if (strstr (filename, "://"))
    {
        gchar *scheme;
        gchar *canon;

        scheme = g_strndup (filename, (strstr (filename, "://") - filename));
        canon  = g_strdup (scheme);

        g_strcanon (canon, G_CSET_A_2_Z G_CSET_a_2_z G_CSET_DIGITS "+-.", '-');

        if (strcmp (scheme, canon) || ! g_ascii_isgraph (canon[0]))
        {
            g_set_error (error, G_FILE_ERROR, 0,
                         _("'%s:' is not a valid URI scheme"), scheme);

            g_free (scheme);
            g_free (canon);

            return FALSE;
        }

        g_free (scheme);
        g_free (canon);

        if (! g_utf8_validate (filename, -1, NULL))
        {
            g_set_error_literal (error,
                                 G_CONVERT_ERROR,
                                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                 _("Invalid character sequence in URI"));
            return FALSE;
        }

        return TRUE;
    }

    return FALSE;
}
Ejemplo n.º 7
0
struct rspamd_url *
rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
		struct html_tag_component *comp)
{
	struct rspamd_url *url;
	gchar *decoded;
	gint rc;
	gsize decoded_len;
	const gchar *p, *s;
	gchar *d;
	guint i, dlen;
	gboolean has_bad_chars = FALSE;
	static const gchar hexdigests[16] = "0123456789abcdef";

	p = start;

	/* Strip spaces from the url */
	/* Head spaces */
	while (g_ascii_isspace (*p) && p < start + len) {
		p ++;
		start ++;
		len --;
	}

	if (comp) {
		comp->start = p;
		comp->len = len;
	}

	/* Trailing spaces */
	p = start + len - 1;

	while (g_ascii_isspace (*p) && p >= start) {
		p --;
		len --;

		if (comp) {
			comp->len --;
		}
	}

	s = start;
	dlen = 0;

	for (i = 0; i < len; i ++) {
		if (G_UNLIKELY (((guint)s[i]) < 0x80 && !g_ascii_isgraph (s[i]))) {
			dlen += 3;
		}
		else {
			dlen ++;
		}
	}

	decoded = rspamd_mempool_alloc (pool, dlen + 1);
	d = decoded;

	/* We also need to remove all internal newlines and encode unsafe characters */
	for (i = 0; i < len; i ++) {
		if (G_UNLIKELY (s[i] == '\r' || s[i] == '\n')) {
			continue;
		}
		else if (G_UNLIKELY (((guint)s[i]) < 0x80 && !g_ascii_isgraph (s[i]))) {
			/* URL encode */
			*d++ = '%';
			*d++ = hexdigests[(s[i] >> 4) & 0xf];
			*d++ = hexdigests[s[i] & 0xf];
			has_bad_chars = TRUE;
		}
		else {