Beispiel #1
0
NS_IMETHODIMP
nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request, 
                                      const uint8_t* data, 
                                      uint32_t length, 
                                      nsACString& sniffedType)
{
  nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request));
  if (!channel)
    return NS_ERROR_NO_INTERFACE;

  // Check that this is a GET request, since you can't subscribe to a POST...
  nsCAutoString method;
  channel->GetRequestMethod(method);
  if (!method.Equals("GET")) {
    sniffedType.Truncate();
    return NS_OK;
  }

  // We need to find out if this is a load of a view-source document. In this
  // case we do not want to override the content type, since the source display
  // does not need to be converted from feed format to XUL. More importantly, 
  // we don't want to change the content type from something 
  // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html 
  // etc) to something that only the application fe knows about (maybe.feed) 
  // thus deactivating syntax highlighting.
  nsCOMPtr<nsIURI> originalURI;
  channel->GetOriginalURI(getter_AddRefs(originalURI));

  nsCAutoString scheme;
  originalURI->GetScheme(scheme);
  if (scheme.EqualsLiteral("view-source")) {
    sniffedType.Truncate();
    return NS_OK;
  }

  // Check the Content-Type to see if it is set correctly. If it is set to 
  // something specific that we think is a reliable indication of a feed, don't
  // bother sniffing since we assume the site maintainer knows what they're 
  // doing. 
  nsCAutoString contentType;
  channel->GetContentType(contentType);
  bool noSniff = contentType.EqualsLiteral(TYPE_RSS) ||
                   contentType.EqualsLiteral(TYPE_ATOM);

  // Check to see if this was a feed request from the location bar or from
  // the feed: protocol. This is also a reliable indication.
  // The value of the header doesn't matter.  
  if (!noSniff) {
    nsCAutoString sniffHeader;
    nsresult foundHeader =
      channel->GetRequestHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
                                sniffHeader);
    noSniff = NS_SUCCEEDED(foundHeader);
  }

  if (noSniff) {
    // check for an attachment after we have a likely feed.
    if(HasAttachmentDisposition(channel)) {
      sniffedType.Truncate();
      return NS_OK;
    }

    // set the feed header as a response header, since we have good metadata
    // telling us that the feed is supposed to be RSS or Atom
    channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
                               NS_LITERAL_CSTRING("1"), false);
    sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
    return NS_OK;
  }

  // Don't sniff arbitrary types.  Limit sniffing to situations that
  // we think can reasonably arise.
  if (!contentType.EqualsLiteral(TEXT_HTML) &&
      !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) &&
      // Same criterion as XMLHttpRequest.  Should we be checking for "+xml"
      // and check for text/xml and application/xml by hand instead?
      contentType.Find("xml") == -1) {
    sniffedType.Truncate();
    return NS_OK;
  }

  // Now we need to potentially decompress data served with 
  // Content-Encoding: gzip
  nsresult rv = ConvertEncodedData(request, data, length);
  if (NS_FAILED(rv))
    return rv;

  // We cap the number of bytes to scan at MAX_BYTES to prevent picking up 
  // false positives by accidentally reading document content, e.g. a "how to
  // make a feed" page.
  const char* testData;
  if (mDecodedData.IsEmpty()) {
    testData = (const char*)data;
    length = NS_MIN(length, MAX_BYTES);
  } else {
    testData = mDecodedData.get();
    length = NS_MIN(mDecodedData.Length(), MAX_BYTES);
  }

  // The strategy here is based on that described in:
  // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx
  // for interoperarbility purposes.

  // Thus begins the actual sniffing.
  nsDependentCSubstring dataString((const char*)testData, length);

  bool isFeed = false;

  // RSS 0.91/0.92/2.0
  isFeed = ContainsTopLevelSubstring(dataString, "<rss");

  // Atom 1.0
  if (!isFeed)
    isFeed = ContainsTopLevelSubstring(dataString, "<feed");

  // RSS 1.0
  if (!isFeed) {
    isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") &&
      dataString.Find(NS_RDF) != -1 &&
      dataString.Find(NS_RSS) != -1;
  }

  // If we sniffed a feed, coerce our internal type
  if (isFeed && !HasAttachmentDisposition(channel))
    sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
  else
    sniffedType.Truncate();
  return NS_OK;
}
void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest)
{
  NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
  if (!mContentType.IsEmpty()) return;

  const char* testData = mBuffer;
  uint32_t testDataLen = mBufferLen;
  // Check if data are compressed.
  nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(aRequest));
  if (channel) {
    nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen);
    if (NS_SUCCEEDED(rv)) {
      if (!mDecodedData.IsEmpty()) {
        testData = mDecodedData.get();
        testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE);
      }
    }
  }

  // First, run through all the types we can detect reliably based on
  // magic numbers
  uint32_t i;
  for (i = 0; i < sSnifferEntryNum; ++i) {
    if (testDataLen >= sSnifferEntries[i].mByteLen &&  // enough data
        memcmp(testData, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) {  // and type matches
      NS_ASSERTION(sSnifferEntries[i].mMimeType ||
                   sSnifferEntries[i].mContentTypeSniffer,
                   "Must have either a type string or a function to set the type");
      NS_ASSERTION(!sSnifferEntries[i].mMimeType ||
                   !sSnifferEntries[i].mContentTypeSniffer,
                   "Both a type string and a type sniffing function set;"
                   " using type string");
      if (sSnifferEntries[i].mMimeType) {
        mContentType = sSnifferEntries[i].mMimeType;
        NS_ASSERTION(!mContentType.IsEmpty(), 
                     "Content type should be known by now.");
        return;
      }
      if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) {
        NS_ASSERTION(!mContentType.IsEmpty(), 
                     "Content type should be known by now.");
        return;
      }        
    }
  }

  NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest,
                  (const uint8_t*)testData, testDataLen, mContentType);
  if (!mContentType.IsEmpty()) {
    return;
  }

  if (SniffForHTML(aRequest)) {
    NS_ASSERTION(!mContentType.IsEmpty(), 
                 "Content type should be known by now.");
    return;
  }
  
  // We don't know what this is yet.  Before we just give up, try
  // the URI from the request.
  if (SniffURI(aRequest)) {
    NS_ASSERTION(!mContentType.IsEmpty(), 
                 "Content type should be known by now.");
    return;
  }
  
  LastDitchSniff(aRequest);
  NS_ASSERTION(!mContentType.IsEmpty(), 
               "Content type should be known by now.");
}