Esempio n. 1
0
static int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
{
	const char *start, *end;
	struct string* host = isReal ? &host_url->realLink : &host_url->displayLink;
	const char* URL = isReal ? url->realLink.data : url->displayLink.data;
	int rc;
	if ((rc = get_host(pchk, URL, isReal, phishy, &start, &end))) {
		return rc;
	}
	if(!start || !end) {
		string_assign_null(host);
	}
	else {
		if(( rc = string_assign_dup(host,start,end) ))
			return rc;
	}
	cli_dbgmsg("Phishcheck:host:%s\n", host->data);
	if(!isReal) {
		url->pre_fixup.host_start = start - URL;
		url->pre_fixup.host_end = end - URL;
	}
	if(!host->data)
		return CL_PHISH_CLEANUP_OK;
	if(*phishy&REAL_IS_MAILTO)
		return CL_PHISH_MAILTO_OK;
	if(strchr(host->data,' ')) {
		string_free(host);
		return CL_PHISH_TEXTURL;
	}
	if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
		/* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */
		string_free(host);
		return CL_PHISH_HEX_URL;
	}
	if(isReal && host->data[0]=='\0')
		return CL_PHISH_CLEAN;/* link without domain, such as: href="/isapi.dll?... */
	if(isNumeric(host->data)) {
		*phishy |= PHISHY_NUMERIC_IP;
	}
	return CL_PHISH_NODECISION;
}
Esempio n. 2
0
/* allocates memory */
static int
cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
{
	char *begin = URL->data;
	const char *end;
	size_t len;
	
	clear_msb(begin);
	/*if(begin == NULL)
		return;*/
	/*TODO: handle hex-encoded IPs*/
	while(isspace(*begin))
		begin++;

	len = strlen(begin);
	if(len == 0) {
		string_assign_null(URL);
		string_assign_null(pre_URL);
		return 0;
	}

	end = begin + len - 1;
	/*cli_dbgmsg("%d %d\n", end-begin, len);*/
	if(begin >= end) {
		string_assign_null(URL);
		string_assign_null(pre_URL);
		return 0;
	}
	while(isspace(*end))
		end--;
	/*TODO: convert \ to /, and stuff like that*/
	/* From mailscanner, my comments enclosed in {} */
	if(!strncmp(begin,dotnet,dotnet_len) || !strncmp(begin,adonet,adonet_len) || !strncmp(begin,aspnet,aspnet_len)) {
		string_assign_null(URL);
		string_assign_null(pre_URL);
	}
	else {
		size_t host_len;
		char* host_begin;
		int rc;

		str_replace(begin,end,'\\','/');
		/* some broken MUAs put > in the href, and then
		 * we get a false positive, so remove them */
		str_replace(begin,end,'<',' ');
		str_replace(begin,end,'>',' ');
		str_replace(begin,end,'\"',' ');
		str_replace(begin,end,';',' ');
		str_strip(&begin,&end,lt,lt_len);
		str_strip(&begin,&end,gt,gt_len);
		/* convert hostname to lowercase, but only hostname! */
		host_begin = strchr(begin,':');
		while(host_begin && host_begin[1]=='/') host_begin++;
		if(!host_begin) host_begin=begin;
		else host_begin++;
		host_len = strcspn(host_begin,"/?");
		str_make_lowercase(host_begin,host_len);
		/* convert %xx to real value */
		str_hex_to_char(&begin,&end);
		if(isReal) {
			/* htmlnorm converts \n to space, so we have to strip spaces */
			str_strip(&begin, &end, " ", 1);
		}
		else {
			/* trim space */
			while((begin <= end) && (begin[0]==' '))  begin++;
			while((begin <= end) && (end[0]==' ')) end--;
		}
		if (( rc = string_assign_dup(isReal ? URL : pre_URL,begin,end+1) )) {
			string_assign_null(URL);
			return rc;
		}
		if(!isReal) {
			str_fixup_spaces(&begin,&end);
			if (( rc = string_assign_dup(URL,begin,end+1) )) {
				return rc;
			}
		}
		/*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/
	}
	return 0;
}
Esempio n. 3
0
/* allocates memory */
static int
cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
{
	char *begin = URL->data;
	const char *end;
	size_t len;

	clear_msb(begin);
	/*if(begin == NULL)
		return;*/
	/*TODO: handle hex-encoded IPs*/
	while(isspace(*begin))
		begin++;

	len = strlen(begin);
	if(len == 0) {
		string_assign_null(URL);
		string_assign_null(pre_URL);
		return 0;
	}

	end = begin + len - 1;
	/*cli_dbgmsg("%d %d\n", end-begin, len);*/
	if(begin >= end) {
		string_assign_null(URL);
		string_assign_null(pre_URL);
		return 0;
	}
	while(isspace(*end))
		end--;
	/* From mailscanner, my comments enclosed in {} */
	if(!strncmp(begin,dotnet,dotnet_len) || !strncmp(begin,adonet,adonet_len) || !strncmp(begin,aspnet,aspnet_len)) {
		string_assign_null(URL);
		string_assign_null(pre_URL);
	}
	else {
		size_t host_len;
		char* host_begin;
		int rc;

		str_replace(begin,end,'\\','/');
		/* find beginning of hostname, because:
		 * - we want to keep only protocol, host, and 
		 *  strip path & query parameter(s) 
		 * - we want to make hostname lowercase*/
		host_begin = strchr(begin,':');
		while(host_begin && (host_begin < end) && (host_begin[1] == '/'))  host_begin++;
		if(!host_begin) host_begin=begin;
		else host_begin++;
		host_len = strcspn(host_begin,":/?");
	        if(host_begin + host_len > end + 1) {
			/* prevent hostname extending beyond end, it can happen
			 * if we have spaces at the end, we don't want those part of 
			 * the hostname */
			host_len = end - host_begin + 1;
		} else {
			/* cut the URL after the hostname */
			/* @end points to last character we want to be part of the URL */
			end = host_begin + host_len - 1;
		}
		host_begin[host_len] = '\0';
		/* convert hostname to lowercase, but only hostname! */
		str_make_lowercase(host_begin, host_len);
		/* some broken MUAs put > in the href, and then
		 * we get a false positive, so remove them */
		str_replace(begin,end,'<',' ');
		str_replace(begin,end,'>',' ');
		str_replace(begin,end,'\"',' ');
		str_replace(begin,end,';',' ');
		str_strip(&begin,&end,lt,lt_len);
		str_strip(&begin,&end,gt,gt_len);
		/* convert %xx to real value */
		str_hex_to_char(&begin,&end);
		if(isReal) {
			/* htmlnorm converts \n to space, so we have to strip spaces */
			str_strip(&begin, &end, " ", 1);
		}
		else {
			/* trim space */
			while((begin <= end) && (begin[0]==' '))  begin++;
			while((begin <= end) && (end[0]==' ')) end--;
		}
		if (( rc = string_assign_dup(isReal ? URL : pre_URL,begin,end+1) )) {
			string_assign_null(URL);
			return rc;
		}
		if(!isReal) {
			str_fixup_spaces(&begin,&end);
			if (( rc = string_assign_dup(URL, begin, end+1) )) {
				return rc;
			}
		}
	}
	return 0;
}