const char* CBBSHyperLink::FindEMailLink(const char *src, int &len) const { const char* plink = NULL; while (*src) { while (*src && (*src == '@' || !IsURLChar(*src))) src += get_chw(src); plink = src; while (IsURLChar(*src) && *src != '@') src++; if (*src == '@' && plink != src) { const char* pend = src; bool has_dot = false; while (IsURLChar(*pend)) { if (*pend == '.') has_dot = true; pend++; } if (pend > src && has_dot && *(pend - 1) != '.') { len = int(pend) - int(plink); return plink; } } } return NULL; }
int ValidateURL( const char *url ) { if( url == NULL ) return NS_FAILURE; /* URL must begin with http:// */ if( ircstrncasecmp( url, "http://", 7 ) != 0 ) return NS_FAILURE; /* Get pointer to rest of URL to test */ url += 7; while( *url != '\0' ) { if( !IsURLChar( *url ) ) return NS_FAILURE; url++; } return NS_SUCCESS; }
const char* CBBSHyperLink::FindHyperLink(const char *src, int &len) const { const char* pemail = FindEMailLink(src, len); const char* plink = NULL; while (*src) { while (*src && !IsURLSchemeChar(*src)) src += get_chw(src); plink = src; while (IsURLSchemeChar(*src)) src++; if (strncmp(src, "://", 3) == 0) { const char* pend = src; while (IsURLChar(*pend)) pend++; if (pend > src) { if (pemail && pemail < plink) return pemail; //檢查是否為已知連結 for (int i = 0;i < links.GetSize();i++) { int scheme_len = links[i].scheme.GetLength(); const char* _plink = src - scheme_len; if (_plink >= plink && strnicmp(_plink, links[i].scheme, scheme_len) == 0) { plink = _plink; break; } } len = int(pend) - int(plink); return plink; } } } return pemail; }
int OAuth::UrlEncode(char* dst, size_t n_dst, const char* src, int n) { int wcur = 0, rcur = 0; for(;;) { unsigned char c = src[rcur++]; if(--n < 0) break; else if(IsURLChar(c)) dst[wcur++] = c; else if(c == ' ') dst[wcur++] = '+'; else { dst[wcur++] = '%'; dst[wcur++] = Get16Char(c>>4); dst[wcur++] = Get16Char(c&0x0f); } } dst[wcur] = '\0'; if(wcur >= (int)n_dst) { //throw std::length_error("url_encode(): buffer overflow"); } return wcur; }
int URLDetector::FindURL(const wxChar *text, int& len) { // offset of the current value of text from the initial one int offset = 0; match: int pos = scan(text, len); if ( !len ) return -1; // the provisional start and end of the URL, will be changed below const wxChar *start = text + pos; const wxChar *p = start + len; // there are 2 different cases: a mailto: URL or a mail address and // anything else which we need to treat differently bool isMail = *start == '@'; if ( isMail ) { // look for the start of the address start--; while ( start > text && IsLocalPartChar(*start) ) start--; // have we stopped at '<'? bool hasAngleBracket = *start == '<'; if ( !hasAngleBracket ) { if ( !IsLocalPartChar(*start) ) { // we went too far backwards start++; } //else: we stopped at the start of the text } //else: keep '<' as part of the URL // now look for the end of it while ( *p && IsDomainChar(*p) ) { p++; } // finally we should either have the brackets from both sides or none // at all if ( hasAngleBracket ) { if ( *p == '>' ) { // take the right bracket as well p++; } else { // forget about the left one start++; } } } else // !bare mail address { for ( ;; ) { size_t lenURL = 0; while ( IsURLChar(*p) ) { lenURL++; p++; } // URLs are frequently so long that they're spread across multiple // lines, so try to see if this might be the case here // // first of all we need to check whether it is at the end of line but // we should allow some trailing spaces const wxChar* q = p; while ( *q == ' ' ) q++; if ( q[0] != '\r' || q[1] != '\n' ) break; // not at the line end // also check if it's really long enough to be wrapped: // the short URLs normally shouldn't be wrapped static const size_t URL_WRAP_LEN = 30; // min len of wrapped URL if ( lenURL < URL_WRAP_LEN ) break; // too short if ( !IsURLChar(q[2]) ) break; // doesn't seem to be continued on the next line // heuristic text for end of URL detection if ( p - start > 5 && !CanBeWrapped(p) ) { // it seems that the URL ends here break; } p = q + 2; // go to the start of next line // Check that the beginning of next line is not the start of // another URL. // // Note that although '@' alone is recognized as the beginning // of an URL: here it should not be the case. int nextlen = 0; int nextpos = scan(p, nextlen); if ( nextlen && nextpos == 0 && *p != '@') { p -= 2; // The start of the next line being the start of an URL on its own, // do not join the two. break; } // check whether the next line starts with a word -- this is a good // indication that the URL hasn't wrapped q = p; while ( wxIsalpha(*q) ) q++; if ( *q == _T(' ') || (wxStrchr(_T(".,:;"), *q) && q[1] == _T(' ')) ) { // looks like we've a word (i.e. sequence of letters terminated by // space or punctuation) at the start of the next line p -= 2; break; } // another special case: subsequent dashes are very unusual in URLs // but often used as separator lines, so we assume that they indicate // the end of the URL if we find them on the next line. if ( p[0] == '-' && p[1] == '-' ) break; // it might be a wrapped URL but it might be not: it seems like we // get way too many false positives if we suppose that it's always // the case... so restrict the wrapped URLs detection to the case // when they occur at the beginning of the line, possibly after some // white space as this is how people usually format them q = start; while ( q >= text && *q != '\n' ) { q--; if ( !wxIsspace(*q) ) break; } // Does the URL start at the beginning of the line, or does it have // a '<' just in front? if ( q >= text && *q != '\n' && *q != '<') break; // it did occur at the start (or after '<'), suppose the URL is // wrapped and so we continue on the next line (and no need to test // the first character, it had been already done above) p++; } } // truncate any punctuation at the end while ( strchr(".:,)]!?", *(p - 1)) ) p--; // additional checks for the matches which didn't have an explicit scheme if ( isMail || text[pos + len - 3 /* len of "://" */ ] != _T(':') ) { // '@' matches may result in false positives, as not every '@' character // is inside a mailto URL so try to weed them out by requiring that the // mail address has a reasonable minimal length ("*****@*****.**" and // "www.xy.fr" are probably the shortest ones we can have, hence 8) // which at least avoids matching the bare '@'s bool good = (p - start) >= 8; if ( good ) { // also check that we have at least one dot in the domain part for the // mail addresses const wxChar * pDot = wxTmemchr(text + pos + 1, '.', p - text - pos - 1); if ( !pDot ) { good = false; } else if ( !isMail ) { // and has either two dots or at least a slash the other URLs, // otherwise it probably isn't an address/URL neither (stuff like // "... using ftp.If you ... " shouldn't be recognized as an URL) good = wxTmemchr(pDot + 1, '.', p - pDot - 1) != NULL || wxTmemchr(pDot + 1, '/', p - pDot - 1) != NULL; } } if ( !good ) { const int offDiff = pos + len; offset += offDiff; text += offDiff; // slightly more efficient than recursion... goto match; } } // return the length of the match len = p - start; return start - text + offset; }