示例#1
0
const char* CBBSHyperLink::FindEMailLink(const char *src, int &len) const
{
	const char* plink = NULL;
	while (*src)
	{
		while (*src && (*src == '@' || !IsURLChar(*src)))
			src += get_chw(src);
		plink = src;

		while (IsURLChar(*src) && *src != '@')
			src++;
		if (*src == '@' && plink != src)
		{
			const char* pend = src;	bool has_dot = false;
			while (IsURLChar(*pend))
			{
				if (*pend == '.')
					has_dot = true;
				pend++;
			}
			if (pend > src && has_dot && *(pend - 1) != '.')
			{
				len = int(pend) - int(plink);
				return plink;
			}
		}
	}
	return NULL;
}
示例#2
0
int ValidateURL( const char *url )
{
	if( url == NULL )
		return NS_FAILURE;
	/* URL must begin with http:// */
	if( ircstrncasecmp( url, "http://", 7 ) != 0 )
		return NS_FAILURE;
	/* Get pointer to rest of URL to test */
	url += 7;
	while( *url != '\0' )
	{
		if( !IsURLChar( *url ) )
			return NS_FAILURE;
		url++;
	}
	return NS_SUCCESS;
}
示例#3
0
const char* CBBSHyperLink::FindHyperLink(const char *src, int &len) const
{
	const char* pemail = FindEMailLink(src, len);
	const char* plink = NULL;
	while (*src)
	{
		while (*src && !IsURLSchemeChar(*src))
			src += get_chw(src);
		plink = src;

		while (IsURLSchemeChar(*src))
			src++;
		if (strncmp(src, "://", 3) == 0)
		{
			const char* pend = src;
			while (IsURLChar(*pend))
				pend++;
			if (pend > src)
			{
				if (pemail && pemail < plink)
					return pemail;

				//檢查是否為已知連結
				for (int i = 0;i < links.GetSize();i++)
				{
					int scheme_len = links[i].scheme.GetLength();
					const char* _plink = src - scheme_len;
					if (_plink >= plink && strnicmp(_plink, links[i].scheme, scheme_len) == 0)
					{
						plink = _plink;
						break;
					}
				}
				len = int(pend) - int(plink);
				return plink;
			}
		}
	}
	return pemail;
}
示例#4
0
int OAuth::UrlEncode(char* dst, size_t n_dst, const char* src, int n) {
	int wcur = 0,
		rcur = 0;
	for(;;) {
		unsigned char c = src[rcur++];
		if(--n < 0)
			break;
		else if(IsURLChar(c))
			dst[wcur++] = c;
		else if(c == ' ')
			dst[wcur++] = '+';
		else {
			dst[wcur++] = '%';
			dst[wcur++] = Get16Char(c>>4);
			dst[wcur++] = Get16Char(c&0x0f);
		}
	}
	dst[wcur] = '\0';
	if(wcur >= (int)n_dst) {
		//throw std::length_error("url_encode(): buffer overflow");
	}
	return wcur;
}
示例#5
0
int URLDetector::FindURL(const wxChar *text, int& len)
{
   // offset of the current value of text from the initial one
   int offset = 0;

match:
   int pos = scan(text, len);
   if ( !len )
      return -1;

   // the provisional start and end of the URL, will be changed below
   const wxChar *start = text + pos;
   const wxChar *p = start + len;

   // there are 2 different cases: a mailto: URL or a mail address and
   // anything else which we need to treat differently
   bool isMail = *start == '@';

   if ( isMail )
   {
      // look for the start of the address
      start--;
      while ( start > text && IsLocalPartChar(*start) )
         start--;

      // have we stopped at '<'?
      bool hasAngleBracket = *start == '<';
      if ( !hasAngleBracket )
      {
         if ( !IsLocalPartChar(*start) )
         {
            // we went too far backwards
            start++;
         }
         //else: we stopped at the start of the text
      }
      //else: keep '<' as part of the URL

      // now look for the end of it
      while ( *p && IsDomainChar(*p) )
      {
         p++;
      }

      // finally we should either have the brackets from both sides or none
      // at all
      if ( hasAngleBracket )
      {
         if ( *p == '>' )
         {
            // take the right bracket as well
            p++;
         }
         else
         {
            // forget about the left one
            start++;
         }
      }
   }
   else // !bare mail address
   {
      for ( ;; )
      {
         size_t lenURL = 0;
         while ( IsURLChar(*p) )
         {
            lenURL++;
            p++;
         }

         // URLs are frequently so long that they're spread across multiple
         // lines, so try to see if this might be the case here
         //
         // first of all we need to check whether it is at the end of line but
         // we should allow some trailing spaces
         const wxChar* q = p;
         while ( *q == ' ' )
            q++;

         if ( q[0] != '\r' || q[1] != '\n' )
            break; // not at the line end

         // also check if it's really long enough to be wrapped:
         // the short URLs normally shouldn't be wrapped
         static const size_t URL_WRAP_LEN = 30; // min len of wrapped URL
         if ( lenURL < URL_WRAP_LEN )
            break; // too short

         if ( !IsURLChar(q[2]) )
            break; // doesn't seem to be continued on the next line

         // heuristic text for end of URL detection
         if ( p - start > 5 && !CanBeWrapped(p) )
         {
            // it seems that the URL ends here
            break;
         }

         p = q + 2; // go to the start of next line

         // Check that the beginning of next line is not the start of
         // another URL.
         //
         // Note that although '@' alone is recognized as the beginning
         // of an URL: here it should not be the case.
         int nextlen = 0;
         int nextpos = scan(p, nextlen);
         if ( nextlen && nextpos == 0 && *p != '@')
         {
            p -= 2;

            // The start of the next line being the start of an URL on its own,
            // do not join the two.
            break;
         }

         // check whether the next line starts with a word -- this is a good
         // indication that the URL hasn't wrapped
         q = p;
         while ( wxIsalpha(*q) )
            q++;

         if ( *q == _T(' ') || (wxStrchr(_T(".,:;"), *q) && q[1] == _T(' ')) )
         {
            // looks like we've a word (i.e. sequence of letters terminated by
            // space or punctuation) at the start of the next line
            p -= 2;
            break;
         }

         // another special case: subsequent dashes are very unusual in URLs
         // but often used as separator lines, so we assume that they indicate
         // the end of the URL if we find them on the next line.
         if ( p[0] == '-' && p[1] == '-' )
            break;

         // it might be a wrapped URL but it might be not: it seems like we
         // get way too many false positives if we suppose that it's always
         // the case... so restrict the wrapped URLs detection to the case
         // when they occur at the beginning of the line, possibly after some
         // white space as this is how people usually format them
         q = start;
         while ( q >= text && *q != '\n' )
         {
            q--;

            if ( !wxIsspace(*q) )
               break;
         }

         // Does the URL start at the beginning of the line, or does it have
         // a '<' just in front?
         if ( q >= text && *q != '\n' && *q != '<')
            break;

         // it did occur at the start (or after '<'), suppose the URL is
         // wrapped and so we continue on the next line (and no need to test
         // the first character, it had been already done above)
         p++;
      }
   }

   // truncate any punctuation at the end
   while ( strchr(".:,)]!?", *(p - 1)) )
      p--;

   // additional checks for the matches which didn't have an explicit scheme
   if ( isMail || text[pos + len - 3 /* len of "://" */ ] != _T(':') )
   {
      // '@' matches may result in false positives, as not every '@' character
      // is inside a mailto URL so try to weed them out by requiring that the
      // mail address has a reasonable minimal length ("*****@*****.**" and
      // "www.xy.fr" are probably the shortest ones we can have, hence 8)
      // which at least avoids matching the bare '@'s
      bool good = (p - start) >= 8;

      if ( good )
      {
         // also check that we have at least one dot in the domain part for the
         // mail addresses
         const wxChar *
            pDot = wxTmemchr(text + pos + 1, '.', p - text - pos - 1);
         if ( !pDot )
         {
            good = false;
         }
         else if ( !isMail )
         {
            // and has either two dots or at least a slash the other URLs,
            // otherwise it probably isn't an address/URL neither (stuff like
            // "... using ftp.If you ... " shouldn't be recognized as an URL)
            good = wxTmemchr(pDot + 1, '.', p - pDot - 1) != NULL ||
                     wxTmemchr(pDot + 1, '/', p - pDot - 1) != NULL;
         }
      }

      if ( !good )
      {
         const int offDiff = pos + len;
         offset += offDiff;
         text += offDiff;

         // slightly more efficient than recursion...
         goto match;
      }
   }

   // return the length of the match
   len = p - start;

   return start - text + offset;
}