static void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host) { char* domain; char* tld = strrchr(host->data,'.'); if(!tld) { cli_dbgmsg("Phishcheck: Encountered a host without a tld? (%s)\n",host->data); string_assign(dest,host); return; } if(isCountryCode(pchk,tld+1)) { const char* countrycode = tld+1; tld = rfind(host->data,'.',tld-host->data-1); if(!tld) { cli_dbgmsg("Phishcheck: Weird, a name with only 2 levels (%s)\n", host->data); string_assign(dest,host); return; } if(!isTLD(pchk,tld+1,countrycode-tld-1)) { string_assign_ref(dest,host,tld+1); return;/*it was a name like: subdomain.domain.uk, return domain.uk*/ } } /*we need to strip one more level, this is the actual domain*/ domain = rfind(host->data,'.',tld-host->data-1); if(!domain) { string_assign(dest,host); return;/* it was like sourceforge.net?*/ } string_assign_ref(dest,host,domain+1); }
// host must be NULL terminated char *getTLD ( char *host , int32_t hostLen ) { // make "s" point to last period in the host //char *s = host + gbstrlen(host) - 1; char *hostEnd = host + hostLen; char *s = hostEnd - 1; while ( s > host && *s !='.' ) s--; // point to the tld in question char *t = s; if ( *t == '.' ) t++; // reset our current tld ptr char *tld = NULL; // is t a valid tld? if so, set "tld" to "t". if ( isTLD ( t , hostEnd - t ) ) tld = t; // host had no period at most we had just a tld so return NULL if ( s == host ) return tld; // back up over last period s--; // just because it's in table doesn't mean we can't try going up more while ( s > host && *s !='.' ) s--; // point to the tld in question t = s; if ( *t == '.' ) t++; // is t a valid tld? if so, set "tld" to "t". if ( isTLD ( t , hostEnd - t ) ) tld = t; // host had no period at most we had just a tld so return NULL if ( s == host ) return tld; // . now only 1 tld has 2 period and that is "LKD.CO.IM" // . so waste another iteration for that (TODO: speed up?) // . back up over last period s--; // just because it's in table doesn't mean we can't try going up more while ( s > host && *s !='.' ) s--; // point to the tld in question t = s; if ( *t == '.' ) t++; // is t a valid tld? if so, set "tld" to "t". if ( isTLD ( t , hostEnd - t ) ) tld = t; // we must have gotten the tld by this point, if there was a valid one return tld; }
/* allocates memory */ static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* phishy,const char **hstart, const char **hend) { int rc,ismailto = 0; const char* start; const char* end=NULL; if(!URL) { *hstart=*hend=NULL; return 0; } start = strstr(URL,"://"); if(!start) { if(!strncmp(URL,mailto,mailto_len)) { start = URL + mailto_len; ismailto = 1; } else if (!isReal && *phishy&REAL_IS_MAILTO) { /* it is not required to use mailto: in the displayed url, they might use to:, or whatever */ end = URL+strlen(URL)+1; start = URL + strcspn(URL,": ")+1; if (start==end) start = URL; ismailto = 1; } else { start=URL;/*URL without protocol*/ if(isReal) cli_dbgmsg("Phishcheck: Real URL without protocol: %s\n",URL); else ismailto=2;/*no-protocol, might be mailto, @ is no problem*/ } } else start += 3; /* :// */ if(!ismailto || !isReal) { const char *realhost,*tld; do { end = start + strcspn(start,":/?"); realhost = strchr(start,'@'); if(realhost == NULL || (start!=end && realhost>end)) { /*don't check beyond end of hostname*/ break; } tld = strrchr(realhost,'.'); rc = tld ? isTLD(s,tld,tld-realhost-1) : 0; if(rc < 0) return rc; if(rc) *phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people, like http://[email protected]/ */ start = realhost+1;/*skip the username*/ } while(realhost);/*skip over multiple @ characters, text following last @ character is the real host*/ } else if (ismailto && isReal) *phishy |= REAL_IS_MAILTO; if(!end) { end = start + strcspn(start,":/?");/*especially important for mailto:[email protected]?subject=...*/ if(!end) end = start + strlen(start); } *hstart = start; *hend = end; return 0; }