// validate a given octet sequence for compliance with the specified
// encoding
bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
{
  nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
    (NS_UTF8CONVERTERSERVICE_CONTRACTID));
  if (!cvtUTF8) {
    NS_WARNING("Can't get UTF8ConverterService\n");
    return false;
  }

  nsCAutoString tmpRaw;
  tmpRaw.Assign(aOctets);
  nsCAutoString tmpDecoded;

  nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
                                             PromiseFlatCString(aCharset).get(),
                                             false, false, 1, tmpDecoded);

  if (rv != NS_OK) {
    // we can't decode; charset may be unsupported, or the octet sequence
    // is broken (illegal or incomplete octet sequence contained)
    NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
    return false;
  }

  return true;
}
// XXX : aTryLocaleCharset is not yet effective.
nsresult 
nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, 
                                      const char *aParamName,
                                      ParamDecoding aDecoding,
                                      const nsACString& aFallbackCharset, 
                                      bool aTryLocaleCharset, 
                                      char **aLang, nsAString& aResult)
{
    aResult.Truncate();
    nsresult rv;

    // get parameter (decode RFC 2231/5987 when applicable, as specified by
    // aDecoding (5987 being a subset of 2231) and return charset.)
    nsXPIDLCString med;
    nsXPIDLCString charset;
    rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 
                             aDecoding, getter_Copies(charset), aLang, 
                             getter_Copies(med));
    if (NS_FAILED(rv))
        return rv; 

    // convert to UTF-8 after charset conversion and RFC 2047 decoding 
    // if necessary.
    
    nsCAutoString str1;
    rv = DecodeParameter(med, charset.get(), nullptr, false, str1);
    NS_ENSURE_SUCCESS(rv, rv);

    if (!aFallbackCharset.IsEmpty())
    {
        nsCAutoString str2;
        nsCOMPtr<nsIUTF8ConverterService> 
          cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
        if (cvtUTF8 &&
            NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 
                PromiseFlatCString(aFallbackCharset).get(), false, true,
                                   1, str2))) {
          CopyUTF8toUTF16(str2, aResult);
          return NS_OK;
        }
    }

    if (IsUTF8(str1)) {
      CopyUTF8toUTF16(str1, aResult);
      return NS_OK;
    }

    if (aTryLocaleCharset && !NS_IsNativeUTF8()) 
      return NS_CopyNativeToUnicode(str1, aResult);

    CopyASCIItoUTF16(str1, aResult);
    return NS_OK;
}
NS_IMETHODIMP 
nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
                                       const char* aCharset,
                                       const char* aDefaultCharset,
                                       PRBool aOverrideCharset, 
                                       nsACString& aResult)
{
  aResult.Truncate();
  nsresult rv;  
  // If aCharset is given, aParamValue was obtained from RFC2231 
  // encoding and we're pretty sure that it's in aCharset.
  if (aCharset && *aCharset)
  {
    nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
    if (NS_SUCCEEDED(rv)) 
      // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
      return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
          IS_7BIT_NON_ASCII_CHARSET(aCharset), aResult);
  }

  const nsAFlatCString& param = PromiseFlatCString(aParamValue);
  nsCAutoString unQuoted;
  nsACString::const_iterator s, e;
  param.BeginReading(s);
  param.EndReading(e);

  // strip '\' when used to quote CR, LF, '"' and '\'
  for ( ; s != e; ++s) {
    if ((*s == '\\')) {
      if (++s == e) {
        --s; // '\' is at the end. move back and append '\'.
      }
      else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
        --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
      }
      // else : skip '\' and append the quoted character.
    }
    unQuoted.Append(*s);
  }

  aResult = unQuoted;
  
  nsCAutoString decoded;

  // Try RFC 2047 encoding, instead.
  rv = DecodeRFC2047Header(unQuoted.get(), aDefaultCharset, 
                           aOverrideCharset, PR_TRUE, decoded);
  
  if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
    aResult = decoded;
  
  return rv;
}
Exemplo n.º 4
0
// XXX : aTryLocaleCharset is not yet effective.
NS_IMETHODIMP 
nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, 
                                    const char *aParamName,
                                    const nsACString& aFallbackCharset, 
                                    PRBool aTryLocaleCharset, 
                                    char **aLang, nsAString& aResult)
{
    aResult.Truncate();
    nsresult rv;

    // get parameter (decode RFC 2231 if it's RFC 2231-encoded and 
    // return charset.)
    nsXPIDLCString med;
    nsXPIDLCString charset;
    rv = GetParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 
                              getter_Copies(charset), aLang, getter_Copies(med));
    if (NS_FAILED(rv))
        return rv; 

    // convert to UTF-8 after charset conversion and RFC 2047 decoding 
    // if necessary.
    
    nsCAutoString str1;
    rv = DecodeParameter(med, charset.get(), nsnull, PR_FALSE, str1);
    NS_ENSURE_SUCCESS(rv, rv);

    if (!aFallbackCharset.IsEmpty())
    {
        nsCAutoString str2;
        nsCOMPtr<nsIUTF8ConverterService> 
          cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
        if (cvtUTF8 &&
            NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 
                PromiseFlatCString(aFallbackCharset).get(), PR_FALSE, str2))) {
          CopyUTF8toUTF16(str2, aResult);
          return NS_OK;
        }
    }

    if (IsUTF8(str1)) {
      CopyUTF8toUTF16(str1, aResult);
      return NS_OK;
    }

    if (aTryLocaleCharset && !NS_IsNativeUTF8()) 
      return NS_CopyNativeToUnicode(str1, aResult);

    CopyASCIItoUTF16(str1, aResult);
    return NS_OK;
}
// copy 'raw' sequences of octets in aInput to aOutput.
// If aDefaultCharset is specified, the input is assumed to be in the
// charset and converted to UTF-8. Otherwise, a blind copy is made.
// If aDefaultCharset is specified, but the conversion to UTF-8
// is not successful, each octet is replaced by Unicode replacement
// chars. *aOutput is advanced by the number of output octets.
// static
void CopyRawHeader(const char *aInput, PRUint32 aLen, 
                   const char *aDefaultCharset, nsACString &aOutput)
{
  PRInt32 c;

  // If aDefaultCharset is not specified, make a blind copy.
  if (!aDefaultCharset || !*aDefaultCharset) {
    aOutput.Append(aInput, aLen);
    return;
  }

  // Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
  // A ~ may indicate it is HZ
  while (aLen && (c = PRUint8(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
    aOutput.Append(char(c));
    aLen--;
  }
  if (!aLen) {
    return;
  }
  aInput--;

  // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
  // string and aDefaultCharset is a 7bit non-ascii charset.
  bool skipCheck = (c == 0x1B || c == '~') && 
                     IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);

  // If not UTF-8, treat as default charset
  nsCOMPtr<nsIUTF8ConverterService> 
    cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
  nsCAutoString utf8Text;
  if (cvtUTF8 &&
      NS_SUCCEEDED(
      cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), 
                                   aDefaultCharset, skipCheck, true, 1,
                                   utf8Text))) {
    aOutput.Append(utf8Text);
  } else { // replace each octet with Unicode replacement char in UTF-8.
    for (PRUint32 i = 0; i < aLen; i++) {
      c = PRUint8(*aInput++);
      if (c & 0x80)
        aOutput.Append(REPLACEMENT_CHAR);
      else
        aOutput.Append(char(c));
    }
  }
}
Exemplo n.º 6
0
// |decode_mime_part2_str| taken from comi18n.c
// Decode RFC2047-encoded words in the input and convert the result to UTF-8.
// If aOverrideCharset is true, charset in RFC2047-encoded words is 
// ignored and aDefaultCharset is assumed, instead. aDefaultCharset
// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
//static
nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, 
                          PRBool aOverrideCharset, nsACString &aResult)
{
  const char *p, *q, *r;
  char *decodedText;
  const char *begin; // tracking pointer for where we are in the input buffer
  PRInt32 isLastEncodedWord = 0;
  const char *charsetStart, *charsetEnd;
  char charset[80];

  // initialize charset name to an empty string
  charset[0] = '\0';

  begin = aHeader;

  // To avoid buffer realloc, if possible, set capacity in advance. No 
  // matter what,  more than 3x expansion can never happen for all charsets
  // supported by Mozilla. SCSU/BCSU with the sliding window set to a
  // non-BMP block may be exceptions, but Mozilla does not support them. 
  // Neither any known mail/news program use them. Even if there's, we're
  // safe because we don't use a raw *char any more.
  aResult.SetCapacity(3 * strlen(aHeader));

  while ((p = PL_strstr(begin, "=?")) != 0) {
    if (isLastEncodedWord) {
      // See if it's all whitespace.
      for (q = begin; q < p; ++q) {
        if (!PL_strchr(" \t\r\n", *q)) break;
      }
    }

    if (!isLastEncodedWord || q < p) {
      // copy the part before the encoded-word
      CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
      begin = p;
    }

    p += 2;

    // Get charset info
    charsetStart = p;
    charsetEnd = 0;
    for (q = p; *q != '?'; q++) {
      if (*q <= ' ' || PL_strchr(especials, *q)) {
        goto badsyntax;
      }

      // RFC 2231 section 5
      if (!charsetEnd && *q == '*') {
        charsetEnd = q; 
      }
    }
    if (!charsetEnd) {
      charsetEnd = q;
    }

    // Check for too-long charset name
    if (PRUint32(charsetEnd - charsetStart) >= sizeof(charset)) 
      goto badsyntax;
    
    memcpy(charset, charsetStart, charsetEnd - charsetStart);
    charset[charsetEnd - charsetStart] = 0;

    q++;
    if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b')
      goto badsyntax;

    if (q[1] != '?')
      goto badsyntax;

    r = q;
    for (r = q + 2; *r != '?'; r++) {
      if (*r < ' ') goto badsyntax;
    }
    if (r[1] != '=')
        goto badsyntax;
    else if (r == q + 2) {
        // it's empty, skip
        begin = r + 2;
        isLastEncodedWord = 1;
        continue;
    }

    if(*q == 'Q' || *q == 'q')
      decodedText = DecodeQ(q + 2, r - (q + 2));
    else {
      // bug 227290. ignore an extraneous '=' at the end.
      // (# of characters in B-encoded part has to be a multiple of 4)
      PRInt32 n = r - (q + 2);
      n -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
      decodedText = PL_Base64Decode(q + 2, n, nsnull);
    }

    if (decodedText == nsnull)
      goto badsyntax;

    // Override charset if requested.  Never override labeled UTF-8.
    // Use default charset instead of UNKNOWN-8BIT
    if ((aOverrideCharset && 0 != nsCRT::strcasecmp(charset, "UTF-8")) ||
        (aDefaultCharset && 0 == nsCRT::strcasecmp(charset, "UNKNOWN-8BIT"))) {
      PL_strncpy(charset, aDefaultCharset, sizeof(charset) - 1);
      charset[sizeof(charset) - 1] = '\0';
    }

    {
      nsCOMPtr<nsIUTF8ConverterService> 
        cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
      nsCAutoString utf8Text;
      // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
      if (cvtUTF8 &&
          NS_SUCCEEDED(
            cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
            charset, IS_7BIT_NON_ASCII_CHARSET(charset), utf8Text))) {
        aResult.Append(utf8Text);
      } else {
        aResult.Append(REPLACEMENT_CHAR);
      }
    }
    PR_Free(decodedText);
    begin = r + 2;
    isLastEncodedWord = 1;
    continue;

  badsyntax:
    // copy the part before the encoded-word
    aResult.Append(begin, p - begin);
    begin = p;
    isLastEncodedWord = 0;
  }

  // put the tail back
  CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);

  nsCAutoString tempStr(aResult);
  tempStr.ReplaceChar('\t', ' ');
  aResult = tempStr;

  return NS_OK;
}