/* urls can't contain null pointer, caller must ensure this */ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls) { struct url_check host_url; enum phish_status rc=CL_PHISH_NODECISION; int phishy=0; const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck; if(!urls->realLink.data) return CL_PHISH_CLEAN; cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data, urls->displayLink.data); if(!strcmp(urls->realLink.data,urls->displayLink.data)) return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */ if((rc = cleanupURLs(urls))) { if(isPhishing(rc))/* not allowed to decide this is phishing */ return CL_PHISH_CLEAN; return rc;/* URLs identical after cleanup */ } if(whitelist_check(engine,urls,0)) return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */ if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) )&& ( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) || !(phishy&PHISHY_NUMERIC_IP))) { cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data); return CL_PHISH_TEXTURL; } if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,NULL,0,&urls->flags)) phishy |= DOMAIN_LISTED; else { /* although entire url is not listed, the host might be, * so defer phishing decisions till we know if host is listed*/ } url_check_init(&host_url); if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) { free_if_needed(&host_url); if(isPhishing(rc)) return CL_PHISH_CLEAN; return rc; } if(urls->flags&DOMAINLIST_REQUIRED) { if(!(phishy&DOMAIN_LISTED)) { if(domainlist_match(engine,host_url.displayLink.data,host_url.realLink.data,&urls->pre_fixup,1,&urls->flags)) phishy |= DOMAIN_LISTED; else { } } } /* link type filtering must occur after last domainlist_match */ if(urls->link_type & LINKTYPE_IMAGE && !(urls->flags&CHECK_IMG_URL)) return CL_PHISH_HOST_NOT_LISTED;/* its listed, but this link type is filtered */ if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) { urls->flags &= urls->always_check_flags; if(!urls->flags) { free_if_needed(&host_url); return CL_PHISH_HOST_NOT_LISTED; } } if(urls->flags&CHECK_CLOAKING) { /*Checks if URL is cloaked. Should we check if it contains another http://, https://? No because we might get false positives from redirect services.*/ if(strchr(urls->realLink.data,0x1)) { free_if_needed(&host_url); return CL_PHISH_CLOAKED_NULL; } if(isEncoded(urls->displayLink.data)) { free_if_needed(&host_url); return CL_PHISH_HEX_URL; } } if(urls->displayLink.data[0]=='\0') { free_if_needed(&host_url); return CL_PHISH_CLEAN; } if(urls->flags&CHECK_SSL && isSSL(urls->displayLink.data) && !isSSL(urls->realLink.data)) { free_if_needed(&host_url); return CL_PHISH_SSL_SPOOF; } if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) { free_if_needed(&host_url); return CL_PHISH_HOST_NOT_LISTED; } if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy))) { free_if_needed(&host_url); return rc; } if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) { free_if_needed(&host_url); return CL_PHISH_HOST_NOT_LISTED; } if(whitelist_check(engine,&host_url,1)) { free_if_needed(&host_url); return CL_PHISH_HOST_WHITELISTED; } if(urls->flags&HOST_SUFFICIENT) { if(!strcmp(urls->realLink.data,urls->displayLink.data)) { free_if_needed(&host_url); return CL_PHISH_HOST_OK; } if(urls->flags&DOMAIN_SUFFICIENT) { struct url_check domain_url; url_check_init(&domain_url); url_get_domain(pchk, &host_url,&domain_url); if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) { free_if_needed(&host_url); free_if_needed(&domain_url); return CL_PHISH_DOMAIN_OK; } free_if_needed(&domain_url); } free_if_needed(&host_url); }/*HOST_SUFFICIENT*/ /*we failed to find a reason why the 2 URLs are different, this is definitely phishing*/ if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) return CL_PHISH_HOST_NOT_LISTED; return phishy_map(phishy,CL_PHISH_NOMATCH); }
/* urls can't contain null pointer, caller must ensure this */ static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls) { struct url_check host_url; int rc = CL_PHISH_NODECISION; int phishy=0; const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck; if(!urls->realLink.data) return CL_PHISH_CLEAN; cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data, urls->displayLink.data); if(!isURL(urls->realLink.data, 0)) { cli_dbgmsg("Real 'url' is not url:%s\n",urls->realLink.data); return CL_PHISH_CLEAN; } if(( rc = url_hash_match(engine->domainlist_matcher, urls->realLink.data, strlen(urls->realLink.data)) )) { if (rc == CL_PHISH_CLEAN) { cli_dbgmsg("not analyzing, not a real url: %s\n", urls->realLink.data); return CL_PHISH_CLEAN; } else { cli_dbgmsg("Hash matched for: %s\n", urls->realLink.data); return rc; } } if(!strcmp(urls->realLink.data,urls->displayLink.data)) return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */ if (urls->displayLink.data[0] == '\0') { return CL_PHISH_CLEAN; } if((rc = cleanupURLs(urls))) { /* it can only return an error, or say its clean; * it is not allowed to decide it is phishing */ return rc < 0 ? rc : CL_PHISH_CLEAN; } cli_dbgmsg("Phishcheck:URL after cleanup: %s->%s\n", urls->realLink.data, urls->displayLink.data); if((!isURL(urls->displayLink.data, 1) ) && ( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) || !(phishy&PHISHY_NUMERIC_IP))) { cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data); return CL_PHISH_CLEAN; } if(whitelist_check(engine, urls, 0)) return CL_PHISH_CLEAN;/* if url is whitelisted don't perform further checks */ url_check_init(&host_url); if((rc = url_get_host(urls, &host_url, DOMAIN_DISPLAY, &phishy))) { free_if_needed(&host_url); return rc < 0 ? rc : CL_PHISH_CLEAN; } if (domainlist_match(engine, host_url.displayLink.data,host_url.realLink.data,&urls->pre_fixup,1)) { phishy |= DOMAIN_LISTED; } else { urls->flags &= urls->always_check_flags; /* don't return, we may need to check for ssl/cloaking */ } /* link type filtering must occur after last domainlist_match */ if(urls->link_type & LINKTYPE_IMAGE && !(urls->flags&CHECK_IMG_URL)) { free_if_needed(&host_url); return CL_PHISH_CLEAN;/* its listed, but this link type is filtered */ } if(urls->flags&CHECK_CLOAKING) { /*Checks if URL is cloaked. Should we check if it contains another http://, https://? No because we might get false positives from redirect services.*/ if(strchr(urls->realLink.data,0x1)) { free_if_needed(&host_url); return CL_PHISH_CLOAKED_NULL; } } if(urls->flags&CHECK_SSL && isSSL(urls->displayLink.data) && !isSSL(urls->realLink.data)) { free_if_needed(&host_url); return CL_PHISH_SSL_SPOOF; } if (!(phishy & DOMAIN_LISTED)) { free_if_needed(&host_url); return CL_PHISH_CLEAN; } if((rc = url_get_host(urls,&host_url,DOMAIN_REAL,&phishy))) { free_if_needed(&host_url); return rc < 0 ? rc : CL_PHISH_CLEAN; } if(whitelist_check(engine,&host_url,1)) { free_if_needed(&host_url); return CL_PHISH_CLEAN; } if(!strcmp(urls->realLink.data,urls->displayLink.data)) { free_if_needed(&host_url); return CL_PHISH_CLEAN; } { struct url_check domain_url; url_check_init(&domain_url); url_get_domain(&host_url,&domain_url); if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) { free_if_needed(&host_url); free_if_needed(&domain_url); return CL_PHISH_CLEAN; } free_if_needed(&domain_url); } free_if_needed(&host_url); /*we failed to find a reason why the 2 URLs are different, this is definitely phishing*/ return phishy_map(phishy,CL_PHISH_NOMATCH); }