summaryrefslogtreecommitdiffstats
path: root/parser/htmlparser/nsParser.cpp
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /parser/htmlparser/nsParser.cpp
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'parser/htmlparser/nsParser.cpp')
-rw-r--r--parser/htmlparser/nsParser.cpp1600
1 files changed, 1600 insertions, 0 deletions
diff --git a/parser/htmlparser/nsParser.cpp b/parser/htmlparser/nsParser.cpp
new file mode 100644
index 000000000..dd140c553
--- /dev/null
+++ b/parser/htmlparser/nsParser.cpp
@@ -0,0 +1,1600 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set sw=2 ts=2 et tw=79: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIAtom.h"
+#include "nsParser.h"
+#include "nsString.h"
+#include "nsCRT.h"
+#include "nsScanner.h"
+#include "plstr.h"
+#include "nsIStringStream.h"
+#include "nsIChannel.h"
+#include "nsICachingChannel.h"
+#include "nsIInputStream.h"
+#include "CNavDTD.h"
+#include "prenv.h"
+#include "prlock.h"
+#include "prcvar.h"
+#include "nsParserCIID.h"
+#include "nsReadableUtils.h"
+#include "nsCOMPtr.h"
+#include "nsExpatDriver.h"
+#include "nsIServiceManager.h"
+#include "nsICategoryManager.h"
+#include "nsISupportsPrimitives.h"
+#include "nsIFragmentContentSink.h"
+#include "nsStreamUtils.h"
+#include "nsHTMLTokenizer.h"
+#include "nsScriptLoader.h"
+#include "nsDataHashtable.h"
+#include "nsXPCOMCIDInternal.h"
+#include "nsMimeTypes.h"
+#include "mozilla/CondVar.h"
+#include "mozilla/Mutex.h"
+#include "nsParserConstants.h"
+#include "nsCharsetSource.h"
+#include "nsContentUtils.h"
+#include "nsThreadUtils.h"
+#include "nsIHTMLContentSink.h"
+
+#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/BinarySearch.h"
+
+using namespace mozilla;
+using mozilla::dom::EncodingUtils;
+
+#define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002
+#define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
+#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
+#define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
+#define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
+
+//-------------- Begin ParseContinue Event Definition ------------------------
+/*
+The parser can be explicitly interrupted by passing a return value of
+NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
+the parser to stop processing and allow the application to return to the event
+loop. The data which was left at the time of interruption will be processed
+the next time OnDataAvailable is called. If the parser has received its final
+chunk of data then OnDataAvailable will no longer be called by the networking
+module, so the parser will schedule a nsParserContinueEvent which will call
+the parser to process the remaining data after returning to the event loop.
+If the parser is interrupted while processing the remaining data it will
+schedule another ParseContinueEvent. The processing of data followed by
+scheduling of the continue events will proceed until either:
+
+ 1) All of the remaining data can be processed without interrupting
+ 2) The parser has been cancelled.
+
+
+This capability is currently used in CNavDTD and nsHTMLContentSink. The
+nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
+processed and when each token is processed. The nsHTML content sink records
+the time when the chunk has started processing and will return
+NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
+threshold called max tokenizing processing time. This allows the content sink
+to limit how much data is processed in a single chunk which in turn gates how
+much time is spent away from the event loop. Processing smaller chunks of data
+also reduces the time spent in subsequent reflows.
+
+This capability is most apparent when loading large documents. If the maximum
+token processing time is set small enough the application will remain
+responsive during document load.
+
+A side-effect of this capability is that document load is not complete when
+the last chunk of data is passed to OnDataAvailable since the parser may have
+been interrupted when the last chunk of data arrived. The document is complete
+when all of the document has been tokenized and there aren't any pending
+nsParserContinueEvents. This can cause problems if the application assumes
+that it can monitor the load requests to determine when the document load has
+been completed. This is what happens in Mozilla. The document is considered
+completely loaded when all of the load requests have been satisfied. To delay
+the document load until all of the parsing has been completed the
+nsHTMLContentSink adds a dummy parser load request which is not removed until
+the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
+DidBuildModel until the final chunk of data has been passed to the parser
+through the OnDataAvailable and there aren't any pending
+nsParserContineEvents.
+
+Currently the parser is ignores requests to be interrupted during the
+processing of script. This is because a document.write followed by JavaScript
+calls to manipulate the DOM may fail if the parser was interrupted during the
+document.write.
+
+For more details @see bugzilla bug 76722
+*/
+
+
+class nsParserContinueEvent : public Runnable
+{
+public:
+ RefPtr<nsParser> mParser;
+
+ explicit nsParserContinueEvent(nsParser* aParser)
+ : mParser(aParser)
+ {}
+
+ NS_IMETHOD Run() override
+ {
+ mParser->HandleParserContinueEvent(this);
+ return NS_OK;
+ }
+};
+
+//-------------- End ParseContinue Event Definition ------------------------
+
+/**
+ * default constructor
+ */
+nsParser::nsParser()
+{
+ Initialize(true);
+}
+
+nsParser::~nsParser()
+{
+ Cleanup();
+}
+
+void
+nsParser::Initialize(bool aConstructor)
+{
+ if (aConstructor) {
+ // Raw pointer
+ mParserContext = 0;
+ }
+ else {
+ // nsCOMPtrs
+ mObserver = nullptr;
+ mUnusedInput.Truncate();
+ }
+
+ mContinueEvent = nullptr;
+ mCharsetSource = kCharsetUninitialized;
+ mCharset.AssignLiteral("ISO-8859-1");
+ mInternalState = NS_OK;
+ mStreamStatus = NS_OK;
+ mCommand = eViewNormal;
+ mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
+ NS_PARSER_FLAG_PARSER_ENABLED |
+ NS_PARSER_FLAG_CAN_TOKENIZE;
+
+ mProcessingNetworkData = false;
+ mIsAboutBlank = false;
+}
+
+void
+nsParser::Cleanup()
+{
+#ifdef DEBUG
+ if (mParserContext && mParserContext->mPrevContext) {
+ NS_WARNING("Extra parser contexts still on the parser stack");
+ }
+#endif
+
+ while (mParserContext) {
+ CParserContext *pc = mParserContext->mPrevContext;
+ delete mParserContext;
+ mParserContext = pc;
+ }
+
+ // It should not be possible for this flag to be set when we are getting
+ // destroyed since this flag implies a pending nsParserContinueEvent, which
+ // has an owning reference to |this|.
+ NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
+}
+
+NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
+
+NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
+ NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
+NS_IMPL_CYCLE_COLLECTION_UNLINK_END
+
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
+ NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
+ CParserContext *pc = tmp->mParserContext;
+ while (pc) {
+ cb.NoteXPCOMChild(pc->mTokenizer);
+ pc = pc->mPrevContext;
+ }
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
+ NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
+ NS_INTERFACE_MAP_ENTRY(nsIParser)
+ NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
+ NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
+NS_INTERFACE_MAP_END
+
+// The parser continue event is posted only if
+// all of the data to parse has been passed to ::OnDataAvailable
+// and the parser has been interrupted by the content sink
+// because the processing of tokens took too long.
+
+nsresult
+nsParser::PostContinueEvent()
+{
+ if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
+ // If this flag isn't set, then there shouldn't be a live continue event!
+ NS_ASSERTION(!mContinueEvent, "bad");
+
+ // This creates a reference cycle between this and the event that is
+ // broken when the event fires.
+ nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
+ if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
+ NS_WARNING("failed to dispatch parser continuation event");
+ } else {
+ mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+ mContinueEvent = event;
+ }
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP_(void)
+nsParser::GetCommand(nsCString& aCommand)
+{
+ aCommand = mCommandStr;
+}
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about the command which caused the parser to be constructed. For example,
+ * this allows us to select a DTD which can do, say, view-source.
+ *
+ * @param aCommand the command string to set
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetCommand(const char* aCommand)
+{
+ mCommandStr.Assign(aCommand);
+ if (mCommandStr.EqualsLiteral("view-source")) {
+ mCommand = eViewSource;
+ } else if (mCommandStr.EqualsLiteral("view-fragment")) {
+ mCommand = eViewFragment;
+ } else {
+ mCommand = eViewNormal;
+ }
+}
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about the command which caused the parser to be constructed. For example,
+ * this allows us to select a DTD which can do, say, view-source.
+ *
+ * @param aParserCommand the command to set
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetCommand(eParserCommands aParserCommand)
+{
+ mCommand = aParserCommand;
+}
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @param aCharset- the charset of a document
+ * @param aCharsetSource- the source of the charset
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)
+{
+ mCharset = aCharset;
+ mCharsetSource = aCharsetSource;
+ if (mParserContext && mParserContext->mScanner) {
+ mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
+ }
+}
+
+void
+nsParser::SetSinkCharset(nsACString& aCharset)
+{
+ if (mSink) {
+ mSink->SetDocumentCharset(aCharset);
+ }
+}
+
+/**
+ * This method gets called in order to set the content
+ * sink for this parser to dump nodes to.
+ *
+ * @param nsIContentSink interface for node receiver
+ */
+NS_IMETHODIMP_(void)
+nsParser::SetContentSink(nsIContentSink* aSink)
+{
+ NS_PRECONDITION(aSink, "sink cannot be null!");
+ mSink = aSink;
+
+ if (mSink) {
+ mSink->SetParser(this);
+ nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
+ if (htmlSink) {
+ mIsAboutBlank = true;
+ }
+ }
+}
+
+/**
+ * retrieve the sink set into the parser
+ * @return current sink
+ */
+NS_IMETHODIMP_(nsIContentSink*)
+nsParser::GetContentSink()
+{
+ return mSink;
+}
+
+static nsIDTD*
+FindSuitableDTD(CParserContext& aParserContext)
+{
+ // We always find a DTD.
+ aParserContext.mAutoDetectStatus = ePrimaryDetect;
+
+ // Quick check for view source.
+ MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
+ "The old parser is not supposed to be used for View Source "
+ "anymore.");
+
+ // Now see if we're parsing HTML (which, as far as we're concerned, simply
+ // means "not XML").
+ if (aParserContext.mDocType != eXML) {
+ return new CNavDTD();
+ }
+
+ // If we're here, then we'd better be parsing XML.
+ NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
+ return new nsExpatDriver();
+}
+
+NS_IMETHODIMP
+nsParser::CancelParsingEvents()
+{
+ if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
+ NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
+ // Revoke the pending continue parsing event
+ mContinueEvent = nullptr;
+ mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+ }
+ return NS_OK;
+}
+
+////////////////////////////////////////////////////////////////////////
+
+/**
+ * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value
+ * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
+ * (which could be success or failure).
+ *
+ * To understand the motivation for this construct, consider these example
+ * methods:
+ *
+ * nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
+ * nsresult rv = NS_OK;
+ * ...
+ * return obj->DoThatThing();
+ * NS_ENSURE_SUCCESS(rv, rv);
+ * ...
+ * return rv;
+ * }
+ *
+ * void nsCaller::MakeThingsHappen() {
+ * return mSomething->DoThatThing(mWhatever);
+ * }
+ *
+ * Suppose, for whatever reason*, we want to shift responsibility for calling
+ * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
+ * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows:
+ *
+ * nsresult nsSomething::DoThatThing() {
+ * nsresult rv = NS_OK;
+ * ...
+ * ...
+ * return rv;
+ * }
+ *
+ * void nsCaller::MakeThingsHappen() {
+ * nsresult rv;
+ * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
+ * mWhatever->DoThatThing(),
+ * rv);
+ * return rv;
+ * }
+ *
+ * *Possible reasons include: nsCaller doesn't want to give mSomething access
+ * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
+ * be called regardless of how nsSomething::DoThatThing behaves, &c.
+ */
+#define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \
+ nsresult RV##__temp = EXPR1; \
+ RV = EXPR2; \
+ if (NS_FAILED(RV)) { \
+ RV = RV##__temp; \
+ } \
+}
+
+/**
+ * This gets called just prior to the model actually
+ * being constructed. It's important to make this the
+ * last thing that happens right before parsing, so we
+ * can delay until the last moment the resolution of
+ * which DTD to use (unless of course we're assigned one).
+ */
+nsresult
+nsParser::WillBuildModel(nsString& aFilename)
+{
+ if (!mParserContext)
+ return kInvalidParserContext;
+
+ if (eUnknownDetect != mParserContext->mAutoDetectStatus)
+ return NS_OK;
+
+ if (eDTDMode_unknown == mParserContext->mDTDMode ||
+ eDTDMode_autodetect == mParserContext->mDTDMode) {
+ if (mIsAboutBlank) {
+ mParserContext->mDTDMode = eDTDMode_quirks;
+ mParserContext->mDocType = eHTML_Quirks;
+ } else {
+ mParserContext->mDTDMode = eDTDMode_full_standards;
+ mParserContext->mDocType = eXML;
+ }
+ } // else XML fragment with nested parser context
+
+ NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
+ "Clobbering DTD for non-root parser context!");
+ mDTD = FindSuitableDTD(*mParserContext);
+ NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
+
+ nsITokenizer* tokenizer;
+ nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
+ nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
+ // nsIDTD::WillBuildModel used to be responsible for calling
+ // nsIContentSink::WillBuildModel, but that obligation isn't expressible
+ // in the nsIDTD interface itself, so it's sounder and simpler to give that
+ // responsibility back to the parser. The former behavior of the DTD was to
+ // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
+ // failure we should use sinkResult instead of rv, to preserve the old error
+ // handling behavior of the DTD:
+ return NS_FAILED(sinkResult) ? sinkResult : rv;
+}
+
+/**
+ * This gets called when the parser is done with its input.
+ * Note that the parser may have been called recursively, so we
+ * have to check for a prev. context before closing out the DTD/sink.
+ */
+nsresult
+nsParser::DidBuildModel(nsresult anErrorCode)
+{
+ nsresult result = anErrorCode;
+
+ if (IsComplete()) {
+ if (mParserContext && !mParserContext->mPrevContext) {
+ // Let sink know if we're about to end load because we've been terminated.
+ // In that case we don't want it to run deferred scripts.
+ bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
+ if (mDTD && mSink) {
+ nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
+ sinkResult = mSink->DidBuildModel(terminated);
+ // nsIDTD::DidBuildModel used to be responsible for calling
+ // nsIContentSink::DidBuildModel, but that obligation isn't expressible
+ // in the nsIDTD interface itself, so it's sounder and simpler to give
+ // that responsibility back to the parser. The former behavior of the
+ // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
+ // sink returns failure we should use sinkResult instead of dtdResult,
+ // to preserve the old error handling behavior of the DTD:
+ result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
+ }
+
+ //Ref. to bug 61462.
+ mParserContext->mRequest = nullptr;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * This method adds a new parser context to the list,
+ * pushing the current one to the next position.
+ *
+ * @param ptr to new context
+ */
+void
+nsParser::PushContext(CParserContext& aContext)
+{
+ NS_ASSERTION(aContext.mPrevContext == mParserContext,
+ "Trying to push a context whose previous context differs from "
+ "the current parser context.");
+ mParserContext = &aContext;
+}
+
+/**
+ * This method pops the topmost context off the stack,
+ * returning it to the user. The next context (if any)
+ * becomes the current context.
+ * @update gess7/22/98
+ * @return prev. context
+ */
+CParserContext*
+nsParser::PopContext()
+{
+ CParserContext* oldContext = mParserContext;
+ if (oldContext) {
+ mParserContext = oldContext->mPrevContext;
+ if (mParserContext) {
+ // If the old context was blocked, propagate the blocked state
+ // back to the new one. Also, propagate the stream listener state
+ // but don't override onStop state to guarantee the call to DidBuildModel().
+ if (mParserContext->mStreamListenerState != eOnStop) {
+ mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
+ }
+ }
+ }
+ return oldContext;
+}
+
+/**
+ * Call this when you want control whether or not the parser will parse
+ * and tokenize input (TRUE), or whether it just caches input to be
+ * parsed later (FALSE).
+ *
+ * @param aState determines whether we parse/tokenize or just cache.
+ * @return current state
+ */
+void
+nsParser::SetUnusedInput(nsString& aBuffer)
+{
+ mUnusedInput = aBuffer;
+}
+
+/**
+ * Call this when you want to *force* the parser to terminate the
+ * parsing process altogether. This is binary -- so once you terminate
+ * you can't resume without restarting altogether.
+ */
+NS_IMETHODIMP
+nsParser::Terminate(void)
+{
+ // We should only call DidBuildModel once, so don't do anything if this is
+ // the second time that Terminate has been called.
+ if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
+ return NS_OK;
+ }
+
+ nsresult result = NS_OK;
+ // XXX - [ until we figure out a way to break parser-sink circularity ]
+ // Hack - Hold a reference until we are completely done...
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+ mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
+
+ // CancelParsingEvents must be called to avoid leaking the nsParser object
+ // @see bug 108049
+ // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
+ // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
+ // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
+ CancelParsingEvents();
+
+ // If we got interrupted in the middle of a document.write, then we might
+ // have more than one parser context on our parsercontext stack. This has
+ // the effect of making DidBuildModel a no-op, meaning that we never call
+ // our sink's DidBuildModel and break the reference cycle, causing a leak.
+ // Since we're getting terminated, we manually clean up our context stack.
+ while (mParserContext && mParserContext->mPrevContext) {
+ CParserContext *prev = mParserContext->mPrevContext;
+ delete mParserContext;
+ mParserContext = prev;
+ }
+
+ if (mDTD) {
+ mDTD->Terminate();
+ DidBuildModel(result);
+ } else if (mSink) {
+ // We have no parser context or no DTD yet (so we got terminated before we
+ // got any data). Manually break the reference cycle with the sink.
+ result = mSink->DidBuildModel(true);
+ NS_ENSURE_SUCCESS(result, result);
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsParser::ContinueInterruptedParsing()
+{
+ // If there are scripts executing, then the content sink is jumping the gun
+ // (probably due to a synchronous XMLHttpRequest) and will re-enable us
+ // later, see bug 460706.
+ if (!IsOkToProcessNetworkData()) {
+ return NS_OK;
+ }
+
+ // If the stream has already finished, there's a good chance
+ // that we might start closing things down when the parser
+ // is reenabled. To make sure that we're not deleted across
+ // the reenabling process, hold a reference to ourselves.
+ nsresult result=NS_OK;
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+ nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
+
+#ifdef DEBUG
+ if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
+ NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
+ }
+#endif
+
+ bool isFinalChunk = mParserContext &&
+ mParserContext->mStreamListenerState == eOnStop;
+
+ mProcessingNetworkData = true;
+ if (sinkDeathGrip) {
+ sinkDeathGrip->WillParse();
+ }
+ result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
+ mProcessingNetworkData = false;
+
+ if (result != NS_OK) {
+ result=mInternalState;
+ }
+
+ return result;
+}
+
+/**
+ * Stops parsing temporarily. That's it will prevent the
+ * parser from building up content model.
+ */
+NS_IMETHODIMP_(void)
+nsParser::BlockParser()
+{
+ mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
+}
+
+/**
+ * Open up the parser for tokenization, building up content
+ * model..etc. However, this method does not resume parsing
+ * automatically. It's the callers' responsibility to restart
+ * the parsing engine.
+ */
+NS_IMETHODIMP_(void)
+nsParser::UnblockParser()
+{
+ if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
+ mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
+ } else {
+ NS_WARNING("Trying to unblock an unblocked parser.");
+ }
+}
+
+NS_IMETHODIMP_(void)
+nsParser::ContinueInterruptedParsingAsync()
+{
+ mSink->ContinueInterruptedParsingAsync();
+}
+
+/**
+ * Call this to query whether the parser is enabled or not.
+ */
+NS_IMETHODIMP_(bool)
+nsParser::IsParserEnabled()
+{
+ return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
+}
+
+/**
+ * Call this to query whether the parser thinks it's done with parsing.
+ */
+NS_IMETHODIMP_(bool)
+nsParser::IsComplete()
+{
+ return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
+}
+
+
+void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
+{
+ // Ignore any revoked continue events...
+ if (mContinueEvent != ev)
+ return;
+
+ mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
+ mContinueEvent = nullptr;
+
+ NS_ASSERTION(IsOkToProcessNetworkData(),
+ "Interrupted in the middle of a script?");
+ ContinueInterruptedParsing();
+}
+
+bool
+nsParser::IsInsertionPointDefined()
+{
+ return false;
+}
+
+void
+nsParser::PushDefinedInsertionPoint()
+{
+}
+
+void
+nsParser::PopDefinedInsertionPoint()
+{
+}
+
+void
+nsParser::MarkAsNotScriptCreated(const char* aCommand)
+{
+}
+
+bool
+nsParser::IsScriptCreated()
+{
+ return false;
+}
+
+/**
+ * This is the main controlling routine in the parsing process.
+ * Note that it may get called multiple times for the same scanner,
+ * since this is a pushed based system, and all the tokens may
+ * not have been consumed by the scanner during a given invocation
+ * of this method.
+ */
+NS_IMETHODIMP
+nsParser::Parse(nsIURI* aURL,
+ nsIRequestObserver* aListener,
+ void* aKey,
+ nsDTDMode aMode)
+{
+
+ NS_PRECONDITION(aURL, "Error: Null URL given");
+
+ nsresult result=kBadURL;
+ mObserver = aListener;
+
+ if (aURL) {
+ nsAutoCString spec;
+ nsresult rv = aURL->GetSpec(spec);
+ if (rv != NS_OK) {
+ return rv;
+ }
+ NS_ConvertUTF8toUTF16 theName(spec);
+
+ nsScanner* theScanner = new nsScanner(theName, false);
+ CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
+ mCommand, aListener);
+ if (pc && theScanner) {
+ pc->mMultipart = true;
+ pc->mContextType = CParserContext::eCTURL;
+ pc->mDTDMode = aMode;
+ PushContext(*pc);
+
+ result = NS_OK;
+ } else {
+ result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
+ }
+ }
+ return result;
+}
+
+/**
+ * Used by XML fragment parsing below.
+ *
+ * @param aSourceBuffer contains a string-full of real content
+ */
+nsresult
+nsParser::Parse(const nsAString& aSourceBuffer,
+ void* aKey,
+ bool aLastCall)
+{
+ nsresult result = NS_OK;
+
+ // Don't bother if we're never going to parse this.
+ if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
+ return result;
+ }
+
+ if (!aLastCall && aSourceBuffer.IsEmpty()) {
+ // Nothing is being passed to the parser so return
+ // immediately. mUnusedInput will get processed when
+ // some data is actually passed in.
+ // But if this is the last call, make sure to finish up
+ // stuff correctly.
+ return result;
+ }
+
+ // Maintain a reference to ourselves so we don't go away
+ // till we're completely done.
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+
+ if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
+ // Note: The following code will always find the parser context associated
+ // with the given key, even if that context has been suspended (e.g., for
+ // another document.write call). This doesn't appear to be exactly what IE
+ // does in the case where this happens, but this makes more sense.
+ CParserContext* pc = mParserContext;
+ while (pc && pc->mKey != aKey) {
+ pc = pc->mPrevContext;
+ }
+
+ if (!pc) {
+ // Only make a new context if we don't have one, OR if we do, but has a
+ // different context key.
+ nsScanner* theScanner = new nsScanner(mUnusedInput);
+ NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
+
+ eAutoDetectResult theStatus = eUnknownDetect;
+
+ if (mParserContext &&
+ mParserContext->mMimeType.EqualsLiteral("application/xml")) {
+ // Ref. Bug 90379
+ NS_ASSERTION(mDTD, "How come the DTD is null?");
+
+ if (mParserContext) {
+ theStatus = mParserContext->mAutoDetectStatus;
+ // Added this to fix bug 32022.
+ }
+ }
+
+ pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
+ 0, theStatus, aLastCall);
+ NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
+
+ PushContext(*pc);
+
+ pc->mMultipart = !aLastCall; // By default
+ if (pc->mPrevContext) {
+ pc->mMultipart |= pc->mPrevContext->mMultipart;
+ }
+
+ // Start fix bug 40143
+ if (pc->mMultipart) {
+ pc->mStreamListenerState = eOnDataAvail;
+ if (pc->mScanner) {
+ pc->mScanner->SetIncremental(true);
+ }
+ } else {
+ pc->mStreamListenerState = eOnStop;
+ if (pc->mScanner) {
+ pc->mScanner->SetIncremental(false);
+ }
+ }
+ // end fix for 40143
+
+ pc->mContextType=CParserContext::eCTString;
+ pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
+ pc->mDTDMode = eDTDMode_full_standards;
+
+ mUnusedInput.Truncate();
+
+ pc->mScanner->Append(aSourceBuffer);
+ // Do not interrupt document.write() - bug 95487
+ result = ResumeParse(false, false, false);
+ } else {
+ pc->mScanner->Append(aSourceBuffer);
+ if (!pc->mPrevContext) {
+ // Set stream listener state to eOnStop, on the final context - Fix 68160,
+ // to guarantee DidBuildModel() call - Fix 36148
+ if (aLastCall) {
+ pc->mStreamListenerState = eOnStop;
+ pc->mScanner->SetIncremental(false);
+ }
+
+ if (pc == mParserContext) {
+ // If pc is not mParserContext, then this call to ResumeParse would
+ // do the wrong thing and try to continue parsing using
+ // mParserContext. We need to wait to actually resume parsing on pc.
+ ResumeParse(false, false, false);
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+NS_IMETHODIMP
+nsParser::ParseFragment(const nsAString& aSourceBuffer,
+ nsTArray<nsString>& aTagStack)
+{
+ nsresult result = NS_OK;
+ nsAutoString theContext;
+ uint32_t theCount = aTagStack.Length();
+ uint32_t theIndex = 0;
+
+ // Disable observers for fragments
+ mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
+
+ for (theIndex = 0; theIndex < theCount; theIndex++) {
+ theContext.Append('<');
+ theContext.Append(aTagStack[theCount - theIndex - 1]);
+ theContext.Append('>');
+ }
+
+ if (theCount == 0) {
+ // Ensure that the buffer is not empty. Because none of the DTDs care
+ // about leading whitespace, this doesn't change the result.
+ theContext.Assign(' ');
+ }
+
+ // First, parse the context to build up the DTD's tag stack. Note that we
+ // pass false for the aLastCall parameter.
+ result = Parse(theContext,
+ (void*)&theContext,
+ false);
+ if (NS_FAILED(result)) {
+ mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
+ return result;
+ }
+
+ if (!mSink) {
+ // Parse must have failed in the XML case and so the sink was killed.
+ return NS_ERROR_HTMLPARSER_STOPPARSING;
+ }
+
+ nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
+ NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
+
+ fragSink->WillBuildContent();
+ // Now, parse the actual content. Note that this is the last call
+ // for HTML content, but for XML, we will want to build and parse
+ // the end tags. However, if tagStack is empty, it's the last call
+ // for XML as well.
+ if (theCount == 0) {
+ result = Parse(aSourceBuffer,
+ &theContext,
+ true);
+ fragSink->DidBuildContent();
+ } else {
+ // Add an end tag chunk, so expat will read the whole source buffer,
+ // and not worry about ']]' etc.
+ result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
+ &theContext,
+ false);
+ fragSink->DidBuildContent();
+
+ if (NS_SUCCEEDED(result)) {
+ nsAutoString endContext;
+ for (theIndex = 0; theIndex < theCount; theIndex++) {
+ // we already added an end tag chunk above
+ if (theIndex > 0) {
+ endContext.AppendLiteral("</");
+ }
+
+ nsString& thisTag = aTagStack[theIndex];
+ // was there an xmlns=?
+ int32_t endOfTag = thisTag.FindChar(char16_t(' '));
+ if (endOfTag == -1) {
+ endContext.Append(thisTag);
+ } else {
+ endContext.Append(Substring(thisTag,0,endOfTag));
+ }
+
+ endContext.Append('>');
+ }
+
+ result = Parse(endContext,
+ &theContext,
+ true);
+ }
+ }
+
+ mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
+
+ return result;
+}
+
+/**
+ * This routine is called to cause the parser to continue parsing its
+ * underlying stream. This call allows the parse process to happen in
+ * chunks, such as when the content is push based, and we need to parse in
+ * pieces.
+ *
+ * An interesting change in how the parser gets used has led us to add extra
+ * processing to this method. The case occurs when the parser is blocked in
+ * one context, and gets a parse(string) call in another context. In this
+ * case, the parserContexts are linked. No problem.
+ *
+ * The problem is that Parse(string) assumes that it can proceed unabated,
+ * but if the parser is already blocked that assumption is false. So we
+ * needed to add a mechanism here to allow the parser to continue to process
+ * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
+ * out of contexts.
+ *
+ *
+ * @param allowItertion : set to true if non-script resumption is requested
+ * @param aIsFinalChunk : tells us when the last chunk of data is provided.
+ * @return error code -- 0 if ok, non-zero if error.
+ */
+nsresult
+nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
+ bool aCanInterrupt)
+{
+ nsresult result = NS_OK;
+
+ if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
+ mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+
+ result = WillBuildModel(mParserContext->mScanner->GetFilename());
+ if (NS_FAILED(result)) {
+ mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
+ return result;
+ }
+
+ if (mDTD) {
+ mSink->WillResume();
+ bool theIterationIsOk = true;
+
+ while (result == NS_OK && theIterationIsOk) {
+ if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
+ // -- Ref: Bug# 22485 --
+ // Insert the unused input into the source buffer
+ // as if it was read from the input stream.
+ // Adding UngetReadable() per vidur!!
+ mParserContext->mScanner->UngetReadable(mUnusedInput);
+ mUnusedInput.Truncate(0);
+ }
+
+ // Only allow parsing to be interrupted in the subsequent call to
+ // build model.
+ nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
+ ? Tokenize(aIsFinalChunk)
+ : NS_OK;
+ result = BuildModel();
+
+ if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
+ PostContinueEvent();
+ }
+
+ theIterationIsOk = theTokenizerResult != kEOF &&
+ result != NS_ERROR_HTMLPARSER_INTERRUPTED;
+
+ // Make sure not to stop parsing too early. Therefore, before shutting
+ // down the parser, it's important to check whether the input buffer
+ // has been scanned to completion (theTokenizerResult should be kEOF).
+ // kEOF -> End of buffer.
+
+ // If we're told to block the parser, we disable all further parsing
+ // (and cache any data coming in) until the parser is re-enabled.
+ if (NS_ERROR_HTMLPARSER_BLOCK == result) {
+ mSink->WillInterrupt();
+ if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
+ // If we were blocked by a recursive invocation, don't re-block.
+ BlockParser();
+ }
+ return NS_OK;
+ }
+ if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
+ // Note: Parser Terminate() calls DidBuildModel.
+ if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
+ DidBuildModel(mStreamStatus);
+ mInternalState = result;
+ }
+
+ return NS_OK;
+ }
+ if ((NS_OK == result && theTokenizerResult == kEOF) ||
+ result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
+ bool theContextIsStringBased =
+ CParserContext::eCTString == mParserContext->mContextType;
+
+ if (mParserContext->mStreamListenerState == eOnStop ||
+ !mParserContext->mMultipart || theContextIsStringBased) {
+ if (!mParserContext->mPrevContext) {
+ if (mParserContext->mStreamListenerState == eOnStop) {
+ DidBuildModel(mStreamStatus);
+ return NS_OK;
+ }
+ } else {
+ CParserContext* theContext = PopContext();
+ if (theContext) {
+ theIterationIsOk = allowIteration && theContextIsStringBased;
+ if (theContext->mCopyUnused) {
+ if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
+ mInternalState = NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ delete theContext;
+ }
+
+ result = mInternalState;
+ aIsFinalChunk = mParserContext &&
+ mParserContext->mStreamListenerState == eOnStop;
+ // ...then intentionally fall through to mSink->WillInterrupt()...
+ }
+ }
+ }
+
+ if (theTokenizerResult == kEOF ||
+ result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
+ result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
+ mSink->WillInterrupt();
+ }
+ }
+ } else {
+ mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
+ }
+ }
+
+ return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
+}
+
+/**
+ * This is where we loop over the tokens created in the
+ * tokenization phase, and try to make sense out of them.
+ */
+nsresult
+nsParser::BuildModel()
+{
+ nsITokenizer* theTokenizer = nullptr;
+
+ nsresult result = NS_OK;
+ if (mParserContext) {
+ result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+ }
+
+ if (NS_SUCCEEDED(result)) {
+ if (mDTD) {
+ result = mDTD->BuildModel(theTokenizer, mSink);
+ }
+ } else {
+ mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
+ }
+ return result;
+}
+
+/*******************************************************************
+ These methods are used to talk to the netlib system...
+ *******************************************************************/
+
+nsresult
+nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
+{
+ NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
+ "Parser's nsIStreamListener API was not setup "
+ "correctly in constructor.");
+ if (mObserver) {
+ mObserver->OnStartRequest(request, aContext);
+ }
+ mParserContext->mStreamListenerState = eOnStart;
+ mParserContext->mAutoDetectStatus = eUnknownDetect;
+ mParserContext->mRequest = request;
+
+ NS_ASSERTION(!mParserContext->mPrevContext,
+ "Clobbering DTD for non-root parser context!");
+ mDTD = nullptr;
+
+ nsresult rv;
+ nsAutoCString contentType;
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
+ if (channel) {
+ rv = channel->GetContentType(contentType);
+ if (NS_SUCCEEDED(rv)) {
+ mParserContext->SetMimeType(contentType);
+ }
+ }
+
+ rv = NS_OK;
+
+ return rv;
+}
+
+static bool
+ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
+ nsCString& oCharset)
+{
+ // This code is rather pointless to have. Might as well reuse expat as
+ // seen in nsHtml5StreamParser. -- hsivonen
+ oCharset.Truncate();
+ if ((aLen >= 5) &&
+ ('<' == aBytes[0]) &&
+ ('?' == aBytes[1]) &&
+ ('x' == aBytes[2]) &&
+ ('m' == aBytes[3]) &&
+ ('l' == aBytes[4])) {
+ int32_t i;
+ bool versionFound = false, encodingFound = false;
+ for (i = 6; i < aLen && !encodingFound; ++i) {
+ // end of XML declaration?
+ if ((((char*) aBytes)[i] == '?') &&
+ ((i + 1) < aLen) &&
+ (((char*) aBytes)[i + 1] == '>')) {
+ break;
+ }
+ // Version is required.
+ if (!versionFound) {
+ // Want to avoid string comparisons, hence looking for 'n'
+ // and only if found check the string leading to it. Not
+ // foolproof, but fast.
+ // The shortest string allowed before this is (strlen==13):
+ // <?xml version
+ if ((((char*) aBytes)[i] == 'n') &&
+ (i >= 12) &&
+ (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
+ // Fast forward through version
+ char q = 0;
+ for (++i; i < aLen; ++i) {
+ char qi = ((char*) aBytes)[i];
+ if (qi == '\'' || qi == '"') {
+ if (q && q == qi) {
+ // ending quote
+ versionFound = true;
+ break;
+ } else {
+ // Starting quote
+ q = qi;
+ }
+ }
+ }
+ }
+ } else {
+ // encoding must follow version
+ // Want to avoid string comparisons, hence looking for 'g'
+ // and only if found check the string leading to it. Not
+ // foolproof, but fast.
+ // The shortest allowed string before this (strlen==26):
+ // <?xml version="1" encoding
+ if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
+ "encodin", (char*) (aBytes + i - 7), 7))) {
+ int32_t encStart = 0;
+ char q = 0;
+ for (++i; i < aLen; ++i) {
+ char qi = ((char*) aBytes)[i];
+ if (qi == '\'' || qi == '"') {
+ if (q && q == qi) {
+ int32_t count = i - encStart;
+ // encoding value is invalid if it is UTF-16
+ if (count > 0 && PL_strncasecmp("UTF-16",
+ (char*) (aBytes + encStart), count)) {
+ oCharset.Assign((char*) (aBytes + encStart), count);
+ }
+ encodingFound = true;
+ break;
+ } else {
+ encStart = i + 1;
+ q = qi;
+ }
+ }
+ }
+ }
+ } // if (!versionFound)
+ } // for
+ }
+ return !oCharset.IsEmpty();
+}
+
+inline char
+GetNextChar(nsACString::const_iterator& aStart,
+ nsACString::const_iterator& aEnd)
+{
+ NS_ASSERTION(aStart != aEnd, "end of buffer");
+ return (++aStart != aEnd) ? *aStart : '\0';
+}
+
+static nsresult
+NoOpParserWriteFunc(nsIInputStream* in,
+ void* closure,
+ const char* fromRawSegment,
+ uint32_t toOffset,
+ uint32_t count,
+ uint32_t *writeCount)
+{
+ *writeCount = count;
+ return NS_OK;
+}
+
+typedef struct {
+ bool mNeedCharsetCheck;
+ nsParser* mParser;
+ nsScanner* mScanner;
+ nsIRequest* mRequest;
+} ParserWriteStruct;
+
+/*
+ * This function is invoked as a result of a call to a stream's
+ * ReadSegments() method. It is called for each contiguous buffer
+ * of data in the underlying stream or pipe. Using ReadSegments
+ * allows us to avoid copying data to read out of the stream.
+ */
+static nsresult
+ParserWriteFunc(nsIInputStream* in,
+ void* closure,
+ const char* fromRawSegment,
+ uint32_t toOffset,
+ uint32_t count,
+ uint32_t *writeCount)
+{
+ nsresult result;
+ ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
+ const unsigned char* buf =
+ reinterpret_cast<const unsigned char*> (fromRawSegment);
+ uint32_t theNumRead = count;
+
+ if (!pws) {
+ return NS_ERROR_FAILURE;
+ }
+
+ if (pws->mNeedCharsetCheck) {
+ pws->mNeedCharsetCheck = false;
+ int32_t source;
+ nsAutoCString preferred;
+ nsAutoCString maybePrefer;
+ pws->mParser->GetDocumentCharset(preferred, source);
+
+ // This code was bogus when I found it. It expects the BOM or the XML
+ // declaration to be entirely in the first network buffer. -- hsivonen
+ if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
+ // The decoder will swallow the BOM. The UTF-16 will re-sniff for
+ // endianness. The value of preferred is now either "UTF-8" or "UTF-16".
+ preferred.Assign(maybePrefer);
+ source = kCharsetFromByteOrderMark;
+ } else if (source < kCharsetFromChannel) {
+ nsAutoCString declCharset;
+
+ if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
+ if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
+ preferred.Assign(maybePrefer);
+ source = kCharsetFromMetaTag;
+ }
+ }
+ }
+
+ pws->mParser->SetDocumentCharset(preferred, source);
+ pws->mParser->SetSinkCharset(preferred);
+
+ }
+
+ result = pws->mScanner->Append(fromRawSegment, theNumRead);
+ if (NS_SUCCEEDED(result)) {
+ *writeCount = count;
+ }
+
+ return result;
+}
+
+nsresult
+nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
+ nsIInputStream *pIStream, uint64_t sourceOffset,
+ uint32_t aLength)
+{
+ NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
+ eOnDataAvail == mParserContext->mStreamListenerState),
+ "Error: OnStartRequest() must be called before OnDataAvailable()");
+ NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
+ "Must have a buffered input stream");
+
+ nsresult rv = NS_OK;
+
+ if (mIsAboutBlank) {
+ MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
+ // ... but if an extension tries to feed us data for about:blank in a
+ // release build, silently ignore the data.
+ uint32_t totalRead;
+ rv = pIStream->ReadSegments(NoOpParserWriteFunc,
+ nullptr,
+ aLength,
+ &totalRead);
+ return rv;
+ }
+
+ CParserContext *theContext = mParserContext;
+
+ while (theContext && theContext->mRequest != request) {
+ theContext = theContext->mPrevContext;
+ }
+
+ if (theContext) {
+ theContext->mStreamListenerState = eOnDataAvail;
+
+ if (eInvalidDetect == theContext->mAutoDetectStatus) {
+ if (theContext->mScanner) {
+ nsScannerIterator iter;
+ theContext->mScanner->EndReading(iter);
+ theContext->mScanner->SetPosition(iter, true);
+ }
+ }
+
+ uint32_t totalRead;
+ ParserWriteStruct pws;
+ pws.mNeedCharsetCheck = true;
+ pws.mParser = this;
+ pws.mScanner = theContext->mScanner;
+ pws.mRequest = request;
+
+ rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ if (IsOkToProcessNetworkData()) {
+ nsCOMPtr<nsIParser> kungFuDeathGrip(this);
+ nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
+ mProcessingNetworkData = true;
+ if (sinkDeathGrip) {
+ sinkDeathGrip->WillParse();
+ }
+ rv = ResumeParse();
+ mProcessingNetworkData = false;
+ }
+ } else {
+ rv = NS_ERROR_UNEXPECTED;
+ }
+
+ return rv;
+}
+
+/**
+ * This is called by the networking library once the last block of data
+ * has been collected from the net.
+ */
+nsresult
+nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
+ nsresult status)
+{
+ nsresult rv = NS_OK;
+
+ CParserContext *pc = mParserContext;
+ while (pc) {
+ if (pc->mRequest == request) {
+ pc->mStreamListenerState = eOnStop;
+ pc->mScanner->SetIncremental(false);
+ break;
+ }
+
+ pc = pc->mPrevContext;
+ }
+
+ mStreamStatus = status;
+
+ if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
+ mProcessingNetworkData = true;
+ if (mSink) {
+ mSink->WillParse();
+ }
+ rv = ResumeParse(true, true);
+ mProcessingNetworkData = false;
+ }
+
+ // If the parser isn't enabled, we don't finish parsing till
+ // it is reenabled.
+
+
+ // XXX Should we wait to notify our observers as well if the
+ // parser isn't yet enabled?
+ if (mObserver) {
+ mObserver->OnStopRequest(request, aContext, status);
+ }
+
+ return rv;
+}
+
+
+/*******************************************************************
+ Here come the tokenization methods...
+ *******************************************************************/
+
+
+/**
+ * Part of the code sandwich, this gets called right before
+ * the tokenization process begins. The main reason for
+ * this call is to allow the delegate to do initialization.
+ */
+bool
+nsParser::WillTokenize(bool aIsFinalChunk)
+{
+ if (!mParserContext) {
+ return true;
+ }
+
+ nsITokenizer* theTokenizer;
+ nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+ NS_ENSURE_SUCCESS(result, false);
+ return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
+}
+
+
+/**
+ * This is the primary control routine to consume tokens.
+ * It iteratively consumes tokens until an error occurs or
+ * you run out of data.
+ */
+nsresult nsParser::Tokenize(bool aIsFinalChunk)
+{
+ nsITokenizer* theTokenizer;
+
+ nsresult result = NS_ERROR_NOT_AVAILABLE;
+ if (mParserContext) {
+ result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
+ }
+
+ if (NS_SUCCEEDED(result)) {
+ bool flushTokens = false;
+
+ bool killSink = false;
+
+ WillTokenize(aIsFinalChunk);
+ while (NS_SUCCEEDED(result)) {
+ mParserContext->mScanner->Mark();
+ result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
+ flushTokens);
+ if (NS_FAILED(result)) {
+ mParserContext->mScanner->RewindToMark();
+ if (kEOF == result){
+ break;
+ }
+ if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
+ killSink = true;
+ result = Terminate();
+ break;
+ }
+ } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
+ // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
+ // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
+ // Also remember to update the marked position.
+ mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
+ mParserContext->mScanner->Mark();
+ break;
+ }
+ }
+
+ if (killSink) {
+ mSink = nullptr;
+ }
+ } else {
+ result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
+ }
+
+ return result;
+}
+
+/**
+ * Get the channel associated with this parser
+ *
+ * @param aChannel out param that will contain the result
+ * @return NS_OK if successful
+ */
+NS_IMETHODIMP
+nsParser::GetChannel(nsIChannel** aChannel)
+{
+ nsresult result = NS_ERROR_NOT_AVAILABLE;
+ if (mParserContext && mParserContext->mRequest) {
+ result = CallQueryInterface(mParserContext->mRequest, aChannel);
+ }
+ return result;
+}
+
+/**
+ * Get the DTD associated with this parser
+ */
+NS_IMETHODIMP
+nsParser::GetDTD(nsIDTD** aDTD)
+{
+ if (mParserContext) {
+ NS_IF_ADDREF(*aDTD = mDTD);
+ }
+
+ return NS_OK;
+}
+
+/**
+ * Get this as nsIStreamListener
+ */
+nsIStreamListener*
+nsParser::GetStreamListener()
+{
+ return this;
+}