diff options
Diffstat (limited to 'components/feeds/nsFeedSniffer.cpp')
-rw-r--r-- | components/feeds/nsFeedSniffer.cpp | 363 |
1 files changed, 0 insertions, 363 deletions
diff --git a/components/feeds/nsFeedSniffer.cpp b/components/feeds/nsFeedSniffer.cpp deleted file mode 100644 index f314d3d..0000000 --- a/components/feeds/nsFeedSniffer.cpp +++ /dev/null @@ -1,363 +0,0 @@ -/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsFeedSniffer.h" - - -#include "nsNetCID.h" -#include "nsXPCOM.h" -#include "nsCOMPtr.h" -#include "nsStringStream.h" - -#include "nsBrowserCompsCID.h" - -#include "nsICategoryManager.h" -#include "nsIServiceManager.h" -#include "nsComponentManagerUtils.h" -#include "nsServiceManagerUtils.h" - -#include "nsIStreamConverterService.h" -#include "nsIStreamConverter.h" - -#include "nsIStreamListener.h" - -#include "nsIHttpChannel.h" -#include "nsIMIMEHeaderParam.h" - -#include "nsMimeTypes.h" -#include "nsIURI.h" -#include <algorithm> - -#define TYPE_ATOM "application/atom+xml" -#define TYPE_RSS "application/rss+xml" -#define TYPE_MAYBE_FEED "application/vnd.mozilla.maybe.feed" - -#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -#define NS_RSS "http://purl.org/rss/1.0/" - -#define MAX_BYTES 512u - -NS_IMPL_ISUPPORTS(nsFeedSniffer, - nsIContentSniffer, - nsIStreamListener, - nsIRequestObserver) - -nsresult -nsFeedSniffer::ConvertEncodedData(nsIRequest* request, - const uint8_t* data, - uint32_t length) -{ - nsresult rv = NS_OK; - - mDecodedData = ""; - nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(request)); - if (!httpChannel) - return NS_ERROR_NO_INTERFACE; - - nsAutoCString contentEncoding; - httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"), - contentEncoding); - if (!contentEncoding.IsEmpty()) { - nsCOMPtr<nsIStreamConverterService> converterService(do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID)); - if (converterService) { - ToLowerCase(contentEncoding); - - nsCOMPtr<nsIStreamListener> converter; - rv = converterService->AsyncConvertData(contentEncoding.get(), - "uncompressed", this, nullptr, - getter_AddRefs(converter)); - NS_ENSURE_SUCCESS(rv, rv); - - converter->OnStartRequest(request, nullptr); - - nsCOMPtr<nsIStringInputStream> rawStream = - do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); - if (!rawStream) - return NS_ERROR_FAILURE; - - rv = rawStream->SetData((const char*)data, length); - NS_ENSURE_SUCCESS(rv, rv); - - rv = converter->OnDataAvailable(request, nullptr, rawStream, 0, length); - NS_ENSURE_SUCCESS(rv, rv); - - converter->OnStopRequest(request, nullptr, NS_OK); - } - } - return rv; -} - -template<int N> -static bool -StringBeginsWithLowercaseLiteral(nsAString& aString, - const char (&aSubstring)[N]) -{ - return StringHead(aString, N).LowerCaseEqualsLiteral(aSubstring); -} - -bool -HasAttachmentDisposition(nsIHttpChannel* httpChannel) -{ - if (!httpChannel) - return false; - - uint32_t disp; - nsresult rv = httpChannel->GetContentDisposition(&disp); - - if (NS_SUCCEEDED(rv) && disp == nsIChannel::DISPOSITION_ATTACHMENT) - return true; - - return false; -} - -/** - * @return the first occurrence of a character within a string buffer, - * or nullptr if not found - */ -static const char* -FindChar(char c, const char *begin, const char *end) -{ - for (; begin < end; ++begin) { - if (*begin == c) - return begin; - } - return nullptr; -} - -/** - * - * Determine if a substring is the "documentElement" in the document. - * - * All of our sniffed substrings: <rss, <feed, <rdf:RDF must be the "document" - * element within the XML DOM, i.e. the root container element. Otherwise, - * it's possible that someone embedded one of these tags inside a document of - * another type, e.g. a HTML document, and we don't want to show the preview - * page if the document isn't actually a feed. - * - * @param start - * The beginning of the data being sniffed - * @param end - * The end of the data being sniffed, right before the substring that - * was found. - * @returns true if the found substring is the documentElement, false - * otherwise. - */ -static bool -IsDocumentElement(const char *start, const char* end) -{ - // For every tag in the buffer, check to see if it's a PI, Doctype or - // comment, our desired substring or something invalid. - while ( (start = FindChar('<', start, end)) ) { - ++start; - if (start >= end) - return false; - - // Check to see if the character following the '<' is either '?' or '!' - // (processing instruction or doctype or comment)... these are valid nodes - // to have in the prologue. - if (*start != '?' && *start != '!') - return false; - - // Now advance the iterator until the '>' (We do this because we don't want - // to sniff indicator substrings that are embedded within other nodes, e.g. - // comments: <!-- <rdf:RDF .. > --> - start = FindChar('>', start, end); - if (!start) - return false; - - ++start; - } - return true; -} - -/** - * Determines whether or not a string exists as the root element in an XML data - * string buffer. - * @param dataString - * The data being sniffed - * @param substring - * The substring being tested for existence and root-ness. - * @returns true if the substring exists and is the documentElement, false - * otherwise. - */ -static bool -ContainsTopLevelSubstring(nsACString& dataString, const char *substring) -{ - int32_t offset = dataString.Find(substring); - if (offset == -1) - return false; - - const char *begin = dataString.BeginReading(); - - // Only do the validation when we find the substring. - return IsDocumentElement(begin, begin + offset); -} - -NS_IMETHODIMP -nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request, - const uint8_t* data, - uint32_t length, - nsACString& sniffedType) -{ - nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request)); - if (!channel) - return NS_ERROR_NO_INTERFACE; - - // Check that this is a GET request, since you can't subscribe to a POST... - nsAutoCString method; - channel->GetRequestMethod(method); - if (!method.EqualsLiteral("GET")) { - sniffedType.Truncate(); - return NS_OK; - } - - // We need to find out if this is a load of a view-source document. In this - // case we do not want to override the content type, since the source display - // does not need to be converted from feed format to XUL. More importantly, - // we don't want to change the content type from something - // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html - // etc) to something that only the application fe knows about (maybe.feed) - // thus deactivating syntax highlighting. - nsCOMPtr<nsIURI> originalURI; - channel->GetOriginalURI(getter_AddRefs(originalURI)); - - nsAutoCString scheme; - originalURI->GetScheme(scheme); - if (scheme.EqualsLiteral("view-source")) { - sniffedType.Truncate(); - return NS_OK; - } - - // Check the Content-Type to see if it is set correctly. If it is set to - // something specific that we think is a reliable indication of a feed, don't - // bother sniffing since we assume the site maintainer knows what they're - // doing. - nsAutoCString contentType; - channel->GetContentType(contentType); - bool noSniff = contentType.EqualsLiteral(TYPE_RSS) || - contentType.EqualsLiteral(TYPE_ATOM); - - // Check to see if this was a feed request from the location bar or from - // the feed: protocol. This is also a reliable indication. - // The value of the header doesn't matter. - if (!noSniff) { - nsAutoCString sniffHeader; - nsresult foundHeader = - channel->GetRequestHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"), - sniffHeader); - noSniff = NS_SUCCEEDED(foundHeader); - } - - if (noSniff) { - // check for an attachment after we have a likely feed. - if(HasAttachmentDisposition(channel)) { - sniffedType.Truncate(); - return NS_OK; - } - - // set the feed header as a response header, since we have good metadata - // telling us that the feed is supposed to be RSS or Atom - channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"), - NS_LITERAL_CSTRING("1"), false); - sniffedType.AssignLiteral(TYPE_MAYBE_FEED); - return NS_OK; - } - - // Don't sniff arbitrary types. Limit sniffing to situations that - // we think can reasonably arise. - if (!contentType.EqualsLiteral(TEXT_HTML) && - !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) && - // Same criterion as XMLHttpRequest. Should we be checking for "+xml" - // and check for text/xml and application/xml by hand instead? - contentType.Find("xml") == -1) { - sniffedType.Truncate(); - return NS_OK; - } - - // Now we need to potentially decompress data served with - // Content-Encoding: gzip - nsresult rv = ConvertEncodedData(request, data, length); - if (NS_FAILED(rv)) - return rv; - - // We cap the number of bytes to scan at MAX_BYTES to prevent picking up - // false positives by accidentally reading document content, e.g. a "how to - // make a feed" page. - const char* testData; - if (mDecodedData.IsEmpty()) { - testData = (const char*)data; - length = std::min(length, MAX_BYTES); - } else { - testData = mDecodedData.get(); - length = std::min(mDecodedData.Length(), MAX_BYTES); - } - - // The strategy here is based on that described in: - // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx - // for interoperarbility purposes. - - // Thus begins the actual sniffing. - nsDependentCSubstring dataString((const char*)testData, length); - - bool isFeed = false; - - // RSS 0.91/0.92/2.0 - isFeed = ContainsTopLevelSubstring(dataString, "<rss"); - - // Atom 1.0 - if (!isFeed) - isFeed = ContainsTopLevelSubstring(dataString, "<feed"); - - // RSS 1.0 - if (!isFeed) { - isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") && - dataString.Find(NS_RDF) != -1 && - dataString.Find(NS_RSS) != -1; - } - - // If we sniffed a feed, coerce our internal type - if (isFeed && !HasAttachmentDisposition(channel)) - sniffedType.AssignLiteral(TYPE_MAYBE_FEED); - else - sniffedType.Truncate(); - return NS_OK; -} - -NS_IMETHODIMP -nsFeedSniffer::OnStartRequest(nsIRequest* request, nsISupports* context) -{ - return NS_OK; -} - -nsresult -nsFeedSniffer::AppendSegmentToString(nsIInputStream* inputStream, - void* closure, - const char* rawSegment, - uint32_t toOffset, - uint32_t count, - uint32_t* writeCount) -{ - nsCString* decodedData = static_cast<nsCString*>(closure); - decodedData->Append(rawSegment, count); - *writeCount = count; - return NS_OK; -} - -NS_IMETHODIMP -nsFeedSniffer::OnDataAvailable(nsIRequest* request, nsISupports* context, - nsIInputStream* stream, uint64_t offset, - uint32_t count) -{ - uint32_t read; - return stream->ReadSegments(AppendSegmentToString, &mDecodedData, count, - &read); -} - -NS_IMETHODIMP -nsFeedSniffer::OnStopRequest(nsIRequest* request, nsISupports* context, - nsresult status) -{ - return NS_OK; -} |