// validate a given octet sequence for compliance with the specified // encoding bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets) { nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService (NS_UTF8CONVERTERSERVICE_CONTRACTID)); if (!cvtUTF8) { NS_WARNING("Can't get UTF8ConverterService\n"); return false; } nsCAutoString tmpRaw; tmpRaw.Assign(aOctets); nsCAutoString tmpDecoded; nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw, PromiseFlatCString(aCharset).get(), false, false, 1, tmpDecoded); if (rv != NS_OK) { // we can't decode; charset may be unsupported, or the octet sequence // is broken (illegal or incomplete octet sequence contained) NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n"); return false; } return true; }
// XXX : aTryLocaleCharset is not yet effective. nsresult nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, const char *aParamName, ParamDecoding aDecoding, const nsACString& aFallbackCharset, bool aTryLocaleCharset, char **aLang, nsAString& aResult) { aResult.Truncate(); nsresult rv; // get parameter (decode RFC 2231/5987 when applicable, as specified by // aDecoding (5987 being a subset of 2231) and return charset.) nsXPIDLCString med; nsXPIDLCString charset; rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, aDecoding, getter_Copies(charset), aLang, getter_Copies(med)); if (NS_FAILED(rv)) return rv; // convert to UTF-8 after charset conversion and RFC 2047 decoding // if necessary. nsCAutoString str1; rv = DecodeParameter(med, charset.get(), nullptr, false, str1); NS_ENSURE_SUCCESS(rv, rv); if (!aFallbackCharset.IsEmpty()) { nsCAutoString str2; nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); if (cvtUTF8 && NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, PromiseFlatCString(aFallbackCharset).get(), false, true, 1, str2))) { CopyUTF8toUTF16(str2, aResult); return NS_OK; } } if (IsUTF8(str1)) { CopyUTF8toUTF16(str1, aResult); return NS_OK; } if (aTryLocaleCharset && !NS_IsNativeUTF8()) return NS_CopyNativeToUnicode(str1, aResult); CopyASCIItoUTF16(str1, aResult); return NS_OK; }
NS_IMETHODIMP nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, const char* aCharset, const char* aDefaultCharset, PRBool aOverrideCharset, nsACString& aResult) { aResult.Truncate(); nsresult rv; // If aCharset is given, aParamValue was obtained from RFC2231 // encoding and we're pretty sure that it's in aCharset. if (aCharset && *aCharset) { nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv)); if (NS_SUCCEEDED(rv)) // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset, IS_7BIT_NON_ASCII_CHARSET(aCharset), aResult); } const nsAFlatCString& param = PromiseFlatCString(aParamValue); nsCAutoString unQuoted; nsACString::const_iterator s, e; param.BeginReading(s); param.EndReading(e); // strip '\' when used to quote CR, LF, '"' and '\' for ( ; s != e; ++s) { if ((*s == '\\')) { if (++s == e) { --s; // '\' is at the end. move back and append '\'. } else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') { --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\' } // else : skip '\' and append the quoted character. } unQuoted.Append(*s); } aResult = unQuoted; nsCAutoString decoded; // Try RFC 2047 encoding, instead. rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset, aOverrideCharset, PR_TRUE, decoded); if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) aResult = decoded; return rv; }
// XXX : aTryLocaleCharset is not yet effective. NS_IMETHODIMP nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, const char *aParamName, const nsACString& aFallbackCharset, PRBool aTryLocaleCharset, char **aLang, nsAString& aResult) { aResult.Truncate(); nsresult rv; // get parameter (decode RFC 2231 if it's RFC 2231-encoded and // return charset.) nsXPIDLCString med; nsXPIDLCString charset; rv = GetParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, getter_Copies(charset), aLang, getter_Copies(med)); if (NS_FAILED(rv)) return rv; // convert to UTF-8 after charset conversion and RFC 2047 decoding // if necessary. nsCAutoString str1; rv = DecodeParameter(med, charset.get(), nsnull, PR_FALSE, str1); NS_ENSURE_SUCCESS(rv, rv); if (!aFallbackCharset.IsEmpty()) { nsCAutoString str2; nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); if (cvtUTF8 && NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, PromiseFlatCString(aFallbackCharset).get(), PR_FALSE, str2))) { CopyUTF8toUTF16(str2, aResult); return NS_OK; } } if (IsUTF8(str1)) { CopyUTF8toUTF16(str1, aResult); return NS_OK; } if (aTryLocaleCharset && !NS_IsNativeUTF8()) return NS_CopyNativeToUnicode(str1, aResult); CopyASCIItoUTF16(str1, aResult); return NS_OK; }
// copy 'raw' sequences of octets in aInput to aOutput. // If aDefaultCharset is specified, the input is assumed to be in the // charset and converted to UTF-8. Otherwise, a blind copy is made. // If aDefaultCharset is specified, but the conversion to UTF-8 // is not successful, each octet is replaced by Unicode replacement // chars. *aOutput is advanced by the number of output octets. // static void CopyRawHeader(const char *aInput, PRUint32 aLen, const char *aDefaultCharset, nsACString &aOutput) { PRInt32 c; // If aDefaultCharset is not specified, make a blind copy. if (!aDefaultCharset || !*aDefaultCharset) { aOutput.Append(aInput, aLen); return; } // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 // A ~ may indicate it is HZ while (aLen && (c = PRUint8(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) { aOutput.Append(char(c)); aLen--; } if (!aLen) { return; } aInput--; // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii // string and aDefaultCharset is a 7bit non-ascii charset. bool skipCheck = (c == 0x1B || c == '~') && IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset); // If not UTF-8, treat as default charset nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); nsCAutoString utf8Text; if (cvtUTF8 && NS_SUCCEEDED( cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), aDefaultCharset, skipCheck, true, 1, utf8Text))) { aOutput.Append(utf8Text); } else { // replace each octet with Unicode replacement char in UTF-8. for (PRUint32 i = 0; i < aLen; i++) { c = PRUint8(*aInput++); if (c & 0x80) aOutput.Append(REPLACEMENT_CHAR); else aOutput.Append(char(c)); } } }
// |decode_mime_part2_str| taken from comi18n.c // Decode RFC2047-encoded words in the input and convert the result to UTF-8. // If aOverrideCharset is true, charset in RFC2047-encoded words is // ignored and aDefaultCharset is assumed, instead. aDefaultCharset // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8. //static nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, PRBool aOverrideCharset, nsACString &aResult) { const char *p, *q, *r; char *decodedText; const char *begin; // tracking pointer for where we are in the input buffer PRInt32 isLastEncodedWord = 0; const char *charsetStart, *charsetEnd; char charset[80]; // initialize charset name to an empty string charset[0] = '\0'; begin = aHeader; // To avoid buffer realloc, if possible, set capacity in advance. No // matter what, more than 3x expansion can never happen for all charsets // supported by Mozilla. SCSU/BCSU with the sliding window set to a // non-BMP block may be exceptions, but Mozilla does not support them. // Neither any known mail/news program use them. Even if there's, we're // safe because we don't use a raw *char any more. aResult.SetCapacity(3 * strlen(aHeader)); while ((p = PL_strstr(begin, "=?")) != 0) { if (isLastEncodedWord) { // See if it's all whitespace. for (q = begin; q < p; ++q) { if (!PL_strchr(" \t\r\n", *q)) break; } } if (!isLastEncodedWord || q < p) { // copy the part before the encoded-word CopyRawHeader(begin, p - begin, aDefaultCharset, aResult); begin = p; } p += 2; // Get charset info charsetStart = p; charsetEnd = 0; for (q = p; *q != '?'; q++) { if (*q <= ' ' || PL_strchr(especials, *q)) { goto badsyntax; } // RFC 2231 section 5 if (!charsetEnd && *q == '*') { charsetEnd = q; } } if (!charsetEnd) { charsetEnd = q; } // Check for too-long charset name if (PRUint32(charsetEnd - charsetStart) >= sizeof(charset)) goto badsyntax; memcpy(charset, charsetStart, charsetEnd - charsetStart); charset[charsetEnd - charsetStart] = 0; q++; if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b') goto badsyntax; if (q[1] != '?') goto badsyntax; r = q; for (r = q + 2; *r != '?'; r++) { if (*r < ' ') goto badsyntax; } if (r[1] != '=') goto badsyntax; else if (r == q + 2) { // it's empty, skip begin = r + 2; isLastEncodedWord = 1; continue; } if(*q == 'Q' || *q == 'q') decodedText = DecodeQ(q + 2, r - (q + 2)); else { // bug 227290. ignore an extraneous '=' at the end. // (# of characters in B-encoded part has to be a multiple of 4) PRInt32 n = r - (q + 2); n -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0; decodedText = PL_Base64Decode(q + 2, n, nsnull); } if (decodedText == nsnull) goto badsyntax; // Override charset if requested. Never override labeled UTF-8. // Use default charset instead of UNKNOWN-8BIT if ((aOverrideCharset && 0 != nsCRT::strcasecmp(charset, "UTF-8")) || (aDefaultCharset && 0 == nsCRT::strcasecmp(charset, "UNKNOWN-8BIT"))) { PL_strncpy(charset, aDefaultCharset, sizeof(charset) - 1); charset[sizeof(charset) - 1] = '\0'; } { nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); nsCAutoString utf8Text; // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. if (cvtUTF8 && NS_SUCCEEDED( cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText), charset, IS_7BIT_NON_ASCII_CHARSET(charset), utf8Text))) { aResult.Append(utf8Text); } else { aResult.Append(REPLACEMENT_CHAR); } } PR_Free(decodedText); begin = r + 2; isLastEncodedWord = 1; continue; badsyntax: // copy the part before the encoded-word aResult.Append(begin, p - begin); begin = p; isLastEncodedWord = 0; } // put the tail back CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult); nsCAutoString tempStr(aResult); tempStr.ReplaceChar('\t', ' '); aResult = tempStr; return NS_OK; }