/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * nsIContentSerializer implementation that can be used with an * nsIDocumentEncoder to convert an XML DOM to an XML string that * could be parsed into more or less the original DOM. */ #include "nsXMLContentSerializer.h" #include "nsGkAtoms.h" #include "nsIDOMProcessingInstruction.h" #include "nsIDOMComment.h" #include "nsIDOMDocumentType.h" #include "nsIContent.h" #include "nsIDocument.h" #include "nsIDocumentEncoder.h" #include "nsIParserService.h" #include "nsNameSpaceManager.h" #include "nsTextFragment.h" #include "nsString.h" #include "mozilla/Sprintf.h" #include "nsUnicharUtils.h" #include "nsCRT.h" #include "nsContentUtils.h" #include "nsAttrName.h" #include "nsILineBreaker.h" #include "mozilla/dom/Element.h" #include "nsParserConstants.h" using namespace mozilla::dom; #define kXMLNS "xmlns" // to be readable, we assume that an indented line contains // at least this number of characters (arbitrary value here). // This is a limit for the indentation. #define MIN_INDENTED_LINE_LENGTH 15 // the string used to indent. #define INDENT_STRING " " #define INDENT_STRING_LENGTH 2 nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer) { RefPtr<nsXMLContentSerializer> it = new nsXMLContentSerializer(); it.forget(aSerializer); return NS_OK; } nsXMLContentSerializer::nsXMLContentSerializer() : mPrefixIndex(0), mColPos(0), mIndentOverflow(0), mIsIndentationAddedOnCurrentLine(false), mInAttribute(false), mAddNewlineForRootNode(false), mAddSpace(false), mMayIgnoreLineBreakSequence(false), mBodyOnly(false), mInBody(0) { } nsXMLContentSerializer::~nsXMLContentSerializer() { } NS_IMPL_ISUPPORTS(nsXMLContentSerializer, nsIContentSerializer) NS_IMETHODIMP nsXMLContentSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, const char* aCharSet, bool aIsCopying, bool aRewriteEncodingDeclaration) { mPrefixIndex = 0; mColPos = 0; mIndentOverflow = 0; mIsIndentationAddedOnCurrentLine = false; mInAttribute = false; mAddNewlineForRootNode = false; mAddSpace = false; mMayIgnoreLineBreakSequence = false; mBodyOnly = false; mInBody = 0; mCharset = aCharSet; mFlags = aFlags; // Set the line break character: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows mLineBreak.AssignLiteral("\r\n"); } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac mLineBreak.Assign('\r'); } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM mLineBreak.Assign('\n'); } else { mLineBreak.AssignLiteral(NS_LINEBREAK); // Platform/default } mDoRaw = !!(mFlags & nsIDocumentEncoder::OutputRaw); mDoFormat = (mFlags & nsIDocumentEncoder::OutputFormatted && !mDoRaw); mDoWrap = (mFlags & nsIDocumentEncoder::OutputWrap && !mDoRaw); mAllowLineBreaking = !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking); if (!aWrapColumn) { mMaxColumn = 72; } else { mMaxColumn = aWrapColumn; } mPreLevel = 0; mIsIndentationAddedOnCurrentLine = false; return NS_OK; } nsresult nsXMLContentSerializer::AppendTextData(nsIContent* aNode, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr, bool aTranslateEntities) { nsIContent* content = aNode; const nsTextFragment* frag; if (!content || !(frag = content->GetText())) { return NS_ERROR_FAILURE; } int32_t fragLength = frag->GetLength(); int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); int32_t length = endoffset - aStartOffset; NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); if (length <= 0) { // XXX Zero is a legal value, maybe non-zero values should be an // error. return NS_OK; } if (frag->Is2b()) { const char16_t *strStart = frag->Get2b() + aStartOffset; if (aTranslateEntities) { NS_ENSURE_TRUE(AppendAndTranslateEntities(Substring(strStart, strStart + length), aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(aStr.Append(Substring(strStart, strStart + length), mozilla::fallible), NS_ERROR_OUT_OF_MEMORY); } } else { if (aTranslateEntities) { NS_ENSURE_TRUE(AppendAndTranslateEntities(NS_ConvertASCIItoUTF16(frag->Get1b()+aStartOffset, length), aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(aStr.Append(NS_ConvertASCIItoUTF16(frag->Get1b()+aStartOffset, length), mozilla::fallible), NS_ERROR_OUT_OF_MEMORY); } } return NS_OK; } NS_IMETHODIMP nsXMLContentSerializer::AppendText(nsIContent* aText, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr) { NS_ENSURE_ARG(aText); nsAutoString data; nsresult rv; rv = AppendTextData(aText, aStartOffset, aEndOffset, data, true); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; if (mDoRaw || PreLevel() > 0) { NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoFormat) { NS_ENSURE_TRUE(AppendToStringFormatedWrapped(data, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoWrap) { NS_ENSURE_TRUE(AppendToStringWrapped(data, aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY); } return NS_OK; } NS_IMETHODIMP nsXMLContentSerializer::AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr) { NS_ENSURE_ARG(aCDATASection); nsresult rv; NS_NAMED_LITERAL_STRING(cdata , "<![CDATA["); if (mDoRaw || PreLevel() > 0) { NS_ENSURE_TRUE(AppendToString(cdata, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoFormat) { NS_ENSURE_TRUE(AppendToStringFormatedWrapped(cdata, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoWrap) { NS_ENSURE_TRUE(AppendToStringWrapped(cdata, aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(AppendToString(cdata, aStr), NS_ERROR_OUT_OF_MEMORY); } nsAutoString data; rv = AppendTextData(aCDATASection, aStartOffset, aEndOffset, data, false); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING("]]>"), aStr), NS_ERROR_OUT_OF_MEMORY); return NS_OK; } NS_IMETHODIMP nsXMLContentSerializer::AppendProcessingInstruction(nsIContent* aPI, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr) { nsCOMPtr<nsIDOMProcessingInstruction> pi = do_QueryInterface(aPI); NS_ENSURE_ARG(pi); nsresult rv; nsAutoString target, data, start; NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY); rv = pi->GetTarget(target); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; rv = pi->GetData(data); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; NS_ENSURE_TRUE(start.AppendLiteral("<?", mozilla::fallible), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(start.Append(target, mozilla::fallible), NS_ERROR_OUT_OF_MEMORY); if (mDoRaw || PreLevel() > 0) { NS_ENSURE_TRUE(AppendToString(start, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoFormat) { if (mAddSpace) { NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AppendToStringFormatedWrapped(start, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoWrap) { NS_ENSURE_TRUE(AppendToStringWrapped(start, aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(AppendToString(start, aStr), NS_ERROR_OUT_OF_MEMORY); } if (!data.IsEmpty()) { NS_ENSURE_TRUE(AppendToString(char16_t(' '), aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING("?>"), aStr), NS_ERROR_OUT_OF_MEMORY); MaybeFlagNewlineForRootNode(aPI); return NS_OK; } NS_IMETHODIMP nsXMLContentSerializer::AppendComment(nsIContent* aComment, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr) { nsCOMPtr<nsIDOMComment> comment = do_QueryInterface(aComment); NS_ENSURE_ARG(comment); nsresult rv; nsAutoString data; rv = comment->GetData(data); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; int32_t dataLength = data.Length(); if (aStartOffset || (aEndOffset != -1 && aEndOffset < dataLength)) { int32_t length = (aEndOffset == -1) ? dataLength : std::min(aEndOffset, dataLength); length -= aStartOffset; nsAutoString frag; if (length > 0) { data.Mid(frag, aStartOffset, length); } data.Assign(frag); } NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY); NS_NAMED_LITERAL_STRING(startComment, "<!--"); if (mDoRaw || PreLevel() > 0) { NS_ENSURE_TRUE(AppendToString(startComment, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoFormat) { if (mAddSpace) { NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AppendToStringFormatedWrapped(startComment, aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mDoWrap) { NS_ENSURE_TRUE(AppendToStringWrapped(startComment, aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(AppendToString(startComment, aStr), NS_ERROR_OUT_OF_MEMORY); } // Even if mDoformat, we don't format the content because it // could have been preformated by the author NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING("-->"), aStr), NS_ERROR_OUT_OF_MEMORY); MaybeFlagNewlineForRootNode(aComment); return NS_OK; } NS_IMETHODIMP nsXMLContentSerializer::AppendDoctype(nsIContent* aDocType, nsAString& aStr) { nsCOMPtr<nsIDOMDocumentType> docType = do_QueryInterface(aDocType); NS_ENSURE_ARG(docType); nsresult rv; nsAutoString name, publicId, systemId, internalSubset; rv = docType->GetName(name); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; rv = docType->GetPublicId(publicId); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; rv = docType->GetSystemId(systemId); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; rv = docType->GetInternalSubset(internalSubset); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING("<!DOCTYPE "), aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(name, aStr), NS_ERROR_OUT_OF_MEMORY); char16_t quote; if (!publicId.IsEmpty()) { NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING(" PUBLIC "), aStr), NS_ERROR_OUT_OF_MEMORY); if (publicId.FindChar(char16_t('"')) == -1) { quote = char16_t('"'); } else { quote = char16_t('\''); } NS_ENSURE_TRUE(AppendToString(quote, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(publicId, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(quote, aStr), NS_ERROR_OUT_OF_MEMORY); if (!systemId.IsEmpty()) { NS_ENSURE_TRUE(AppendToString(char16_t(' '), aStr), NS_ERROR_OUT_OF_MEMORY); if (systemId.FindChar(char16_t('"')) == -1) { quote = char16_t('"'); } else { quote = char16_t('\''); } NS_ENSURE_TRUE(AppendToString(quote, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(systemId, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(quote, aStr), NS_ERROR_OUT_OF_MEMORY); } } else if (!systemId.IsEmpty()) { if (systemId.FindChar(char16_t('"')) == -1) { quote = char16_t('"'); } else { quote = char16_t('\''); } NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING(" SYSTEM "), aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(quote, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(systemId, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(quote, aStr), NS_ERROR_OUT_OF_MEMORY); } if (!internalSubset.IsEmpty()) { NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING(" ["), aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(internalSubset, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(char16_t(']'), aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AppendToString(kGreaterThan, aStr), NS_ERROR_OUT_OF_MEMORY); MaybeFlagNewlineForRootNode(aDocType); return NS_OK; } nsresult nsXMLContentSerializer::PushNameSpaceDecl(const nsAString& aPrefix, const nsAString& aURI, nsIContent* aOwner) { NameSpaceDecl* decl = mNameSpaceStack.AppendElement(); if (!decl) return NS_ERROR_OUT_OF_MEMORY; decl->mPrefix.Assign(aPrefix); decl->mURI.Assign(aURI); // Don't addref - this weak reference will be removed when // we pop the stack decl->mOwner = aOwner; return NS_OK; } void nsXMLContentSerializer::PopNameSpaceDeclsFor(nsIContent* aOwner) { int32_t index, count; count = mNameSpaceStack.Length(); for (index = count - 1; index >= 0; index--) { if (mNameSpaceStack[index].mOwner != aOwner) { break; } mNameSpaceStack.RemoveElementAt(index); } } bool nsXMLContentSerializer::ConfirmPrefix(nsAString& aPrefix, const nsAString& aURI, nsIContent* aElement, bool aIsAttribute) { if (aPrefix.EqualsLiteral(kXMLNS)) { return false; } if (aURI.EqualsLiteral("http://www.w3.org/XML/1998/namespace")) { // The prefix must be xml for this namespace. We don't need to declare it, // so always just set the prefix to xml. aPrefix.AssignLiteral("xml"); return false; } bool mustHavePrefix; if (aIsAttribute) { if (aURI.IsEmpty()) { // Attribute in the null namespace. This just shouldn't have a prefix. // And there's no need to push any namespace decls aPrefix.Truncate(); return false; } // Attribute not in the null namespace -- must have a prefix mustHavePrefix = true; } else { // Not an attribute, so doesn't _have_ to have a prefix mustHavePrefix = false; } // Keep track of the closest prefix that's bound to aURI and whether we've // found such a thing. closestURIMatch holds the prefix, and uriMatch // indicates whether we actually have one. nsAutoString closestURIMatch; bool uriMatch = false; // Also keep track of whether we've seen aPrefix already. If we have, that // means that it's already bound to a URI different from aURI, so even if we // later (so in a more outer scope) see it bound to aURI we can't reuse it. bool haveSeenOurPrefix = false; int32_t count = mNameSpaceStack.Length(); int32_t index = count - 1; while (index >= 0) { NameSpaceDecl& decl = mNameSpaceStack.ElementAt(index); // Check if we've found a prefix match if (aPrefix.Equals(decl.mPrefix)) { // If the URIs match and aPrefix is not bound to any other URI, we can // use aPrefix if (!haveSeenOurPrefix && aURI.Equals(decl.mURI)) { // Just use our uriMatch stuff. That will deal with an empty aPrefix // the right way. We can break out of the loop now, though. uriMatch = true; closestURIMatch = aPrefix; break; } haveSeenOurPrefix = true; // If they don't, and either: // 1) We have a prefix (so we'd be redeclaring this prefix to point to a // different namespace) or // 2) We're looking at an existing default namespace decl on aElement (so // we can't create a new default namespace decl for this URI) // then generate a new prefix. Note that we do NOT generate new prefixes // if we happen to have aPrefix == decl->mPrefix == "" and mismatching // URIs when |decl| doesn't have aElement as its owner. In that case we // can simply push the new namespace URI as the default namespace for // aElement. if (!aPrefix.IsEmpty() || decl.mOwner == aElement) { NS_ASSERTION(!aURI.IsEmpty(), "Not allowed to add a xmlns attribute with an empty " "namespace name unless it declares the default " "namespace."); GenerateNewPrefix(aPrefix); // Now we need to validate our new prefix/uri combination; check it // against the full namespace stack again. Note that just restarting // the while loop is ok, since we haven't changed aURI, so the // closestURIMatch and uriMatch state is not affected. index = count - 1; haveSeenOurPrefix = false; continue; } } // If we've found a URI match, then record the first one if (!uriMatch && aURI.Equals(decl.mURI)) { // Need to check that decl->mPrefix is not declared anywhere closer to // us. If it is, we can't use it. bool prefixOK = true; int32_t index2; for (index2 = count-1; index2 > index && prefixOK; --index2) { prefixOK = (mNameSpaceStack[index2].mPrefix != decl.mPrefix); } if (prefixOK) { uriMatch = true; closestURIMatch.Assign(decl.mPrefix); } } --index; } // At this point the following invariants hold: // 1) The prefix in closestURIMatch is mapped to aURI in our scope if // uriMatch is set. // 2) There is nothing on the namespace stack that has aPrefix as the prefix // and a _different_ URI, except for the case aPrefix.IsEmpty (and // possible default namespaces on ancestors) // So if uriMatch is set it's OK to use the closestURIMatch prefix. The one // exception is when closestURIMatch is actually empty (default namespace // decl) and we must have a prefix. if (uriMatch && (!mustHavePrefix || !closestURIMatch.IsEmpty())) { aPrefix.Assign(closestURIMatch); return false; } if (aPrefix.IsEmpty()) { // At this point, aPrefix is empty (which means we never had a prefix to // start with). If we must have a prefix, just generate a new prefix and // then send it back through the namespace stack checks to make sure it's // OK. if (mustHavePrefix) { GenerateNewPrefix(aPrefix); return ConfirmPrefix(aPrefix, aURI, aElement, aIsAttribute); } // One final special case. If aPrefix is empty and we never saw an empty // prefix (default namespace decl) on the namespace stack and we're in the // null namespace there is no reason to output an |xmlns=""| here. It just // makes the output less readable. if (!haveSeenOurPrefix && aURI.IsEmpty()) { return false; } } // Now just set aURI as the new default namespace URI. Indicate that we need // to create a namespace decl for the final prefix return true; } void nsXMLContentSerializer::GenerateNewPrefix(nsAString& aPrefix) { aPrefix.Assign('a'); char buf[128]; SprintfLiteral(buf, "%d", mPrefixIndex++); AppendASCIItoUTF16(buf, aPrefix); } bool nsXMLContentSerializer::SerializeAttr(const nsAString& aPrefix, const nsAString& aName, const nsAString& aValue, nsAString& aStr, bool aDoEscapeEntities) { nsAutoString attrString_; // For innerHTML we can do faster appending without // temporary strings. bool rawAppend = mDoRaw && aDoEscapeEntities; nsAString& attrString = (rawAppend) ? aStr : attrString_; NS_ENSURE_TRUE(attrString.Append(char16_t(' '), mozilla::fallible), false); if (!aPrefix.IsEmpty()) { NS_ENSURE_TRUE(attrString.Append(aPrefix, mozilla::fallible), false); NS_ENSURE_TRUE(attrString.Append(char16_t(':'), mozilla::fallible), false); } NS_ENSURE_TRUE(attrString.Append(aName, mozilla::fallible), false); if (aDoEscapeEntities) { // if problem characters are turned into character entity references // then there will be no problem with the value delimiter characters NS_ENSURE_TRUE(attrString.AppendLiteral("=\"", mozilla::fallible), false); mInAttribute = true; bool result = AppendAndTranslateEntities(aValue, attrString); mInAttribute = false; NS_ENSURE_TRUE(result, false); NS_ENSURE_TRUE(attrString.Append(char16_t('"'), mozilla::fallible), false); if (rawAppend) { return true; } } else { // Depending on whether the attribute value contains quotes or apostrophes we // need to select the delimiter character and escape characters using // character entity references, ignoring the value of aDoEscapeEntities. // See http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.3.2.2 for // the standard on character entity references in values. We also have to // make sure to escape any '&' characters. bool bIncludesSingle = false; bool bIncludesDouble = false; nsAString::const_iterator iCurr, iEnd; aValue.BeginReading(iCurr); aValue.EndReading(iEnd); for ( ; iCurr != iEnd; ++iCurr) { if (*iCurr == char16_t('\'')) { bIncludesSingle = true; if (bIncludesDouble) { break; } } else if (*iCurr == char16_t('"')) { bIncludesDouble = true; if (bIncludesSingle) { break; } } } // Delimiter and escaping is according to the following table // bIncludesDouble bIncludesSingle Delimiter Escape Double Quote // FALSE FALSE " FALSE // FALSE TRUE " FALSE // TRUE FALSE ' FALSE // TRUE TRUE " TRUE char16_t cDelimiter = (bIncludesDouble && !bIncludesSingle) ? char16_t('\'') : char16_t('"'); NS_ENSURE_TRUE(attrString.Append(char16_t('='), mozilla::fallible), false); NS_ENSURE_TRUE(attrString.Append(cDelimiter, mozilla::fallible), false); nsAutoString sValue(aValue); NS_ENSURE_TRUE(sValue.ReplaceSubstring(NS_LITERAL_STRING("&"), NS_LITERAL_STRING("&"), mozilla::fallible), false); if (bIncludesDouble && bIncludesSingle) { NS_ENSURE_TRUE(sValue.ReplaceSubstring(NS_LITERAL_STRING("\""), NS_LITERAL_STRING("""), mozilla::fallible), false); } NS_ENSURE_TRUE(attrString.Append(sValue, mozilla::fallible), false); NS_ENSURE_TRUE(attrString.Append(cDelimiter, mozilla::fallible), false); } if (mDoRaw || PreLevel() > 0) { NS_ENSURE_TRUE(AppendToStringConvertLF(attrString, aStr), false); } else if (mDoFormat) { NS_ENSURE_TRUE(AppendToStringFormatedWrapped(attrString, aStr), false); } else if (mDoWrap) { NS_ENSURE_TRUE(AppendToStringWrapped(attrString, aStr), false); } else { NS_ENSURE_TRUE(AppendToStringConvertLF(attrString, aStr), false); } return true; } uint32_t nsXMLContentSerializer::ScanNamespaceDeclarations(nsIContent* aContent, nsIContent *aOriginalElement, const nsAString& aTagNamespaceURI) { uint32_t index, count; nsAutoString uriStr, valueStr; count = aContent->GetAttrCount(); // First scan for namespace declarations, pushing each on the stack uint32_t skipAttr = count; for (index = 0; index < count; index++) { const BorrowedAttrInfo info = aContent->GetAttrInfoAt(index); const nsAttrName* name = info.mName; int32_t namespaceID = name->NamespaceID(); nsIAtom *attrName = name->LocalName(); if (namespaceID == kNameSpaceID_XMLNS || // Also push on the stack attrs named "xmlns" in the null // namespace... because once we serialize those out they'll look like // namespace decls. :( // XXXbz what if we have both "xmlns" in the null namespace and "xmlns" // in the xmlns namespace? (namespaceID == kNameSpaceID_None && attrName == nsGkAtoms::xmlns)) { info.mValue->ToString(uriStr); if (!name->GetPrefix()) { if (aTagNamespaceURI.IsEmpty() && !uriStr.IsEmpty()) { // If the element is in no namespace we need to add a xmlns // attribute to declare that. That xmlns attribute must not have a // prefix (see http://www.w3.org/TR/REC-xml-names/#dt-prefix), ie it // must declare the default namespace. We just found an xmlns // attribute that declares the default namespace to something // non-empty. We're going to ignore this attribute, for children we // will detect that we need to add it again and attributes aren't // affected by the default namespace. skipAttr = index; } else { // Default NS attribute does not have prefix (and the name is "xmlns") PushNameSpaceDecl(EmptyString(), uriStr, aOriginalElement); } } else { PushNameSpaceDecl(nsDependentAtomString(attrName), uriStr, aOriginalElement); } } } return skipAttr; } bool nsXMLContentSerializer::IsJavaScript(nsIContent * aContent, nsIAtom* aAttrNameAtom, int32_t aAttrNamespaceID, const nsAString& aValueString) { bool isHtml = aContent->IsHTMLElement(); bool isXul = aContent->IsXULElement(); bool isSvg = aContent->IsSVGElement(); if (aAttrNamespaceID == kNameSpaceID_None && (isHtml || isXul || isSvg) && (aAttrNameAtom == nsGkAtoms::href || aAttrNameAtom == nsGkAtoms::src)) { static const char kJavaScript[] = "javascript"; int32_t pos = aValueString.FindChar(':'); if (pos < (int32_t)(sizeof kJavaScript - 1)) return false; nsAutoString scheme(Substring(aValueString, 0, pos)); scheme.StripWhitespace(); if ((scheme.Length() == (sizeof kJavaScript - 1)) && scheme.EqualsIgnoreCase(kJavaScript)) return true; else return false; } return aContent->IsEventAttributeName(aAttrNameAtom); } bool nsXMLContentSerializer::SerializeAttributes(nsIContent* aContent, nsIContent *aOriginalElement, nsAString& aTagPrefix, const nsAString& aTagNamespaceURI, nsIAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr, bool aAddNSAttr) { nsAutoString prefixStr, uriStr, valueStr; nsAutoString xmlnsStr; xmlnsStr.AssignLiteral(kXMLNS); uint32_t index, count; // If we had to add a new namespace declaration, serialize // and push it on the namespace stack if (aAddNSAttr) { if (aTagPrefix.IsEmpty()) { // Serialize default namespace decl NS_ENSURE_TRUE(SerializeAttr(EmptyString(), xmlnsStr, aTagNamespaceURI, aStr, true), false); } else { // Serialize namespace decl NS_ENSURE_TRUE(SerializeAttr(xmlnsStr, aTagPrefix, aTagNamespaceURI, aStr, true), false); } PushNameSpaceDecl(aTagPrefix, aTagNamespaceURI, aOriginalElement); } count = aContent->GetAttrCount(); // Now serialize each of the attributes // XXX Unfortunately we need a namespace manager to get // attribute URIs. for (index = 0; index < count; index++) { if (aSkipAttr == index) { continue; } const nsAttrName* name = aContent->GetAttrNameAt(index); int32_t namespaceID = name->NamespaceID(); nsIAtom* attrName = name->LocalName(); nsIAtom* attrPrefix = name->GetPrefix(); // Filter out any attribute starting with [-|_]moz nsDependentAtomString attrNameStr(attrName); if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) || StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) { continue; } if (attrPrefix) { attrPrefix->ToString(prefixStr); } else { prefixStr.Truncate(); } bool addNSAttr = false; if (kNameSpaceID_XMLNS != namespaceID) { nsContentUtils::NameSpaceManager()->GetNameSpaceURI(namespaceID, uriStr); addNSAttr = ConfirmPrefix(prefixStr, uriStr, aOriginalElement, true); } aContent->GetAttr(namespaceID, attrName, valueStr); nsDependentAtomString nameStr(attrName); bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr); NS_ENSURE_TRUE(SerializeAttr(prefixStr, nameStr, valueStr, aStr, !isJS), false); if (addNSAttr) { NS_ASSERTION(!prefixStr.IsEmpty(), "Namespaced attributes must have a prefix"); NS_ENSURE_TRUE(SerializeAttr(xmlnsStr, prefixStr, uriStr, aStr, true), false); PushNameSpaceDecl(prefixStr, uriStr, aOriginalElement); } } return true; } NS_IMETHODIMP nsXMLContentSerializer::AppendElementStart(Element* aElement, Element* aOriginalElement, nsAString& aStr) { NS_ENSURE_ARG(aElement); nsIContent* content = aElement; bool forceFormat = false; nsresult rv = NS_OK; if (!CheckElementStart(content, forceFormat, aStr, rv)) { // When we go to AppendElementEnd for this element, we're going to // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent() // now, so our PreLevel() doesn't get confused. MaybeEnterInPreContent(content); return rv; } NS_ENSURE_SUCCESS(rv, rv); nsAutoString tagPrefix, tagLocalName, tagNamespaceURI; aElement->NodeInfo()->GetPrefix(tagPrefix); aElement->NodeInfo()->GetName(tagLocalName); aElement->NodeInfo()->GetNamespaceURI(tagNamespaceURI); uint32_t skipAttr = ScanNamespaceDeclarations(content, aOriginalElement, tagNamespaceURI); nsIAtom *name = content->NodeInfo()->NameAtom(); bool lineBreakBeforeOpen = LineBreakBeforeOpen(content->GetNameSpaceID(), name); if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { if (mColPos && lineBreakBeforeOpen) { NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY); } else { NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY); } if (!mColPos) { NS_ENSURE_TRUE(AppendIndentation(aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mAddSpace) { NS_ENSURE_TRUE(AppendToString(char16_t(' '), aStr), NS_ERROR_OUT_OF_MEMORY); mAddSpace = false; } } else if (mAddSpace) { NS_ENSURE_TRUE(AppendToString(char16_t(' '), aStr), NS_ERROR_OUT_OF_MEMORY); mAddSpace = false; } else { NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY); } // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't // called mAddNewlineForRootNode = false; bool addNSAttr; addNSAttr = ConfirmPrefix(tagPrefix, tagNamespaceURI, aOriginalElement, false); // Serialize the qualified name of the element NS_ENSURE_TRUE(AppendToString(kLessThan, aStr), NS_ERROR_OUT_OF_MEMORY); if (!tagPrefix.IsEmpty()) { NS_ENSURE_TRUE(AppendToString(tagPrefix, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING(":"), aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AppendToString(tagLocalName, aStr), NS_ERROR_OUT_OF_MEMORY); MaybeEnterInPreContent(content); if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(SerializeAttributes(content, aOriginalElement, tagPrefix, tagNamespaceURI, name, aStr, skipAttr, addNSAttr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendEndOfElementStart(aElement, aOriginalElement, aStr), NS_ERROR_OUT_OF_MEMORY); if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() && LineBreakAfterOpen(content->GetNameSpaceID(), name)) { NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AfterElementStart(content, aOriginalElement, aStr), NS_ERROR_OUT_OF_MEMORY); return NS_OK; } // aElement is the actual element we're outputting. aOriginalElement is the one // in the original DOM, which is the one we have to test for kids. static bool ElementNeedsSeparateEndTag(Element* aElement, Element* aOriginalElement) { if (aOriginalElement->GetChildCount()) { // We have kids, so we need a separate end tag. This needs to be checked on // aOriginalElement because that's the one that's actually in the DOM and // might have kids. return true; } if (!aElement->IsHTMLElement()) { // Empty non-HTML elements can just skip a separate end tag. return false; } // HTML container tags should have a separate end tag even if empty, per spec. // See // https://w3c.github.io/DOM-Parsing/#dfn-concept-xml-serialization-algorithm bool isHTMLContainer = true; // Default in case we get no parser service. nsIParserService* parserService = nsContentUtils::GetParserService(); if (parserService) { nsIAtom* localName = aElement->NodeInfo()->NameAtom(); parserService->IsContainer( parserService->HTMLCaseSensitiveAtomTagToId(localName), isHTMLContainer); } return isHTMLContainer; } bool nsXMLContentSerializer::AppendEndOfElementStart(Element* aElement, Element* aOriginalElement, nsAString& aStr) { if (ElementNeedsSeparateEndTag(aElement, aOriginalElement)) { return AppendToString(kGreaterThan, aStr); } // We don't need a separate end tag. For HTML elements (which at this point // must be non-containers), append a space before the '/', per spec. See // https://w3c.github.io/DOM-Parsing/#dfn-concept-xml-serialization-algorithm if (aOriginalElement->IsHTMLElement()) { if (!AppendToString(kSpace, aStr)) { return false; } } return AppendToString(NS_LITERAL_STRING("/>"), aStr); } NS_IMETHODIMP nsXMLContentSerializer::AppendElementEnd(Element* aElement, nsAString& aStr) { NS_ENSURE_ARG(aElement); nsIContent* content = aElement; bool forceFormat = false, outputElementEnd; outputElementEnd = CheckElementEnd(aElement, forceFormat, aStr); nsIAtom *name = content->NodeInfo()->NameAtom(); if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { DecrIndentation(name); } if (!outputElementEnd) { // Keep this in sync with the cleanup at the end of this method. PopNameSpaceDeclsFor(aElement); MaybeLeaveFromPreContent(content); MaybeFlagNewlineForRootNode(aElement); AfterElementEnd(content, aStr); return NS_OK; } nsAutoString tagPrefix, tagLocalName, tagNamespaceURI; aElement->NodeInfo()->GetPrefix(tagPrefix); aElement->NodeInfo()->GetName(tagLocalName); aElement->NodeInfo()->GetNamespaceURI(tagNamespaceURI); #ifdef DEBUG bool debugNeedToPushNamespace = #endif ConfirmPrefix(tagPrefix, tagNamespaceURI, aElement, false); NS_ASSERTION(!debugNeedToPushNamespace, "Can't push namespaces in closing tag!"); if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { bool lineBreakBeforeClose = LineBreakBeforeClose(content->GetNameSpaceID(), name); if (mColPos && lineBreakBeforeClose) { NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY); } if (!mColPos) { NS_ENSURE_TRUE(AppendIndentation(aStr), NS_ERROR_OUT_OF_MEMORY); } else if (mAddSpace) { NS_ENSURE_TRUE(AppendToString(char16_t(' '), aStr), NS_ERROR_OUT_OF_MEMORY); mAddSpace = false; } } else if (mAddSpace) { NS_ENSURE_TRUE(AppendToString(char16_t(' '), aStr), NS_ERROR_OUT_OF_MEMORY); mAddSpace = false; } NS_ENSURE_TRUE(AppendToString(kEndTag, aStr), NS_ERROR_OUT_OF_MEMORY); if (!tagPrefix.IsEmpty()) { NS_ENSURE_TRUE(AppendToString(tagPrefix, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(NS_LITERAL_STRING(":"), aStr), NS_ERROR_OUT_OF_MEMORY); } NS_ENSURE_TRUE(AppendToString(tagLocalName, aStr), NS_ERROR_OUT_OF_MEMORY); NS_ENSURE_TRUE(AppendToString(kGreaterThan, aStr), NS_ERROR_OUT_OF_MEMORY); // Keep what follows in sync with the cleanup in the !outputElementEnd case. PopNameSpaceDeclsFor(aElement); MaybeLeaveFromPreContent(content); if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() && LineBreakAfterClose(content->GetNameSpaceID(), name)) { NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY); } else { MaybeFlagNewlineForRootNode(aElement); } AfterElementEnd(content, aStr); return NS_OK; } NS_IMETHODIMP nsXMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument, nsAString& aStr) { NS_ENSURE_ARG_POINTER(aDocument); nsAutoString version, encoding, standalone; aDocument->GetXMLDeclaration(version, encoding, standalone); if (version.IsEmpty()) return NS_OK; // A declaration must have version, or there is no decl NS_NAMED_LITERAL_STRING(endQuote, "\""); aStr += NS_LITERAL_STRING("<?xml version=\"") + version + endQuote; if (!mCharset.IsEmpty()) { aStr += NS_LITERAL_STRING(" encoding=\"") + NS_ConvertASCIItoUTF16(mCharset) + endQuote; } // Otherwise just don't output an encoding attr. Not that we expect // mCharset to ever be empty. #ifdef DEBUG else { NS_WARNING("Empty mCharset? How come?"); } #endif if (!standalone.IsEmpty()) { aStr += NS_LITERAL_STRING(" standalone=\"") + standalone + endQuote; } NS_ENSURE_TRUE(aStr.AppendLiteral("?>", mozilla::fallible), NS_ERROR_OUT_OF_MEMORY); mAddNewlineForRootNode = true; return NS_OK; } bool nsXMLContentSerializer::CheckElementStart(nsIContent * aContent, bool & aForceFormat, nsAString& aStr, nsresult& aResult) { aResult = NS_OK; aForceFormat = false; return true; } bool nsXMLContentSerializer::CheckElementEnd(Element* aElement, bool& aForceFormat, nsAString& aStr) { // We don't output a separate end tag for empty element aForceFormat = false; // XXXbz this is a bit messed up, but by now we don't have our fixed-up // version of aElement anymore. Let's hope fixup never changes the localName // or namespace... return ElementNeedsSeparateEndTag(aElement, aElement); } bool nsXMLContentSerializer::AppendToString(const char16_t aChar, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return true; } mColPos += 1; return aOutputStr.Append(aChar, mozilla::fallible); } bool nsXMLContentSerializer::AppendToString(const nsAString& aStr, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return true; } mColPos += aStr.Length(); return aOutputStr.Append(aStr, mozilla::fallible); } static const uint16_t kGTVal = 62; #define _ 0 // This table indexes into kEntityStrings[]. static const uint8_t kEntities[] = { _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 2, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 3, _, 4 }; // This table indexes into kEntityStrings[]. static const uint8_t kAttrEntities[] = { _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 1, _, _, _, 2, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, 3, _, 4 }; #undef _ static const char* const kEntityStrings[] = { /* 0 */ nullptr, /* 1 */ """, /* 2 */ "&", /* 3 */ "<", /* 4 */ ">", }; bool nsXMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr, nsAString& aOutputStr) { nsReadingIterator<char16_t> done_reading; aStr.EndReading(done_reading); // for each chunk of |aString|... uint32_t advanceLength = 0; nsReadingIterator<char16_t> iter; const uint8_t* entityTable = mInAttribute ? kAttrEntities : kEntities; for (aStr.BeginReading(iter); iter != done_reading; iter.advance(int32_t(advanceLength))) { uint32_t fragmentLength = done_reading - iter; const char16_t* c = iter.get(); const char16_t* fragmentStart = c; const char16_t* fragmentEnd = c + fragmentLength; const char* entityText = nullptr; advanceLength = 0; // for each character in this chunk, check if it // needs to be replaced for (; c < fragmentEnd; c++, advanceLength++) { char16_t val = *c; if ((val <= kGTVal) && entityTable[val]) { entityText = kEntityStrings[entityTable[val]]; break; } } NS_ENSURE_TRUE(aOutputStr.Append(fragmentStart, advanceLength, mozilla::fallible), false); if (entityText) { NS_ENSURE_TRUE(AppendASCIItoUTF16(entityText, aOutputStr, mozilla::fallible), false); advanceLength++; } } return true; } bool nsXMLContentSerializer::MaybeAddNewlineForRootNode(nsAString& aStr) { if (mAddNewlineForRootNode) { return AppendNewLineToString(aStr); } return true; } void nsXMLContentSerializer::MaybeFlagNewlineForRootNode(nsINode* aNode) { nsINode* parent = aNode->GetParentNode(); if (parent) { mAddNewlineForRootNode = parent->IsNodeOfType(nsINode::eDOCUMENT); } } void nsXMLContentSerializer::MaybeEnterInPreContent(nsIContent* aNode) { // support of the xml:space attribute if (ShouldMaintainPreLevel() && aNode->HasAttr(kNameSpaceID_XML, nsGkAtoms::space)) { nsAutoString space; aNode->GetAttr(kNameSpaceID_XML, nsGkAtoms::space, space); if (space.EqualsLiteral("preserve")) ++PreLevel(); } } void nsXMLContentSerializer::MaybeLeaveFromPreContent(nsIContent* aNode) { // support of the xml:space attribute if (ShouldMaintainPreLevel() && aNode->HasAttr(kNameSpaceID_XML, nsGkAtoms::space)) { nsAutoString space; aNode->GetAttr(kNameSpaceID_XML, nsGkAtoms::space, space); if (space.EqualsLiteral("preserve")) --PreLevel(); } } bool nsXMLContentSerializer::AppendNewLineToString(nsAString& aStr) { bool result = AppendToString(mLineBreak, aStr); mMayIgnoreLineBreakSequence = true; mColPos = 0; mAddSpace = false; mIsIndentationAddedOnCurrentLine = false; return result; } bool nsXMLContentSerializer::AppendIndentation(nsAString& aStr) { mIsIndentationAddedOnCurrentLine = true; bool result = AppendToString(mIndent, aStr); mAddSpace = false; mMayIgnoreLineBreakSequence = false; return result; } bool nsXMLContentSerializer::IncrIndentation(nsIAtom* aName) { // we want to keep the source readable if (mDoWrap && mIndent.Length() >= uint32_t(mMaxColumn) - MIN_INDENTED_LINE_LENGTH) { ++mIndentOverflow; } else { return mIndent.AppendLiteral(INDENT_STRING, mozilla::fallible); } return true; } void nsXMLContentSerializer::DecrIndentation(nsIAtom* aName) { if(mIndentOverflow) --mIndentOverflow; else mIndent.Cut(0, INDENT_STRING_LENGTH); } bool nsXMLContentSerializer::LineBreakBeforeOpen(int32_t aNamespaceID, nsIAtom* aName) { return mAddSpace; } bool nsXMLContentSerializer::LineBreakAfterOpen(int32_t aNamespaceID, nsIAtom* aName) { return false; } bool nsXMLContentSerializer::LineBreakBeforeClose(int32_t aNamespaceID, nsIAtom* aName) { return mAddSpace; } bool nsXMLContentSerializer::LineBreakAfterClose(int32_t aNamespaceID, nsIAtom* aName) { return false; } bool nsXMLContentSerializer::AppendToStringConvertLF(const nsAString& aStr, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return true; } if (mDoRaw) { NS_ENSURE_TRUE(AppendToString(aStr, aOutputStr), false); } else { // Convert line-endings to mLineBreak uint32_t start = 0; uint32_t theLen = aStr.Length(); while (start < theLen) { int32_t eol = aStr.FindChar('\n', start); if (eol == kNotFound) { nsDependentSubstring dataSubstring(aStr, start, theLen - start); NS_ENSURE_TRUE(AppendToString(dataSubstring, aOutputStr), false); start = theLen; // if there was a line break before this substring // AppendNewLineToString was called, so we should reverse // this flag mMayIgnoreLineBreakSequence = false; } else { nsDependentSubstring dataSubstring(aStr, start, eol - start); NS_ENSURE_TRUE(AppendToString(dataSubstring, aOutputStr), false); NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false); start = eol + 1; } } } return true; } bool nsXMLContentSerializer::AppendFormatedWrapped_WhitespaceSequence( nsASingleFragmentString::const_char_iterator &aPos, const nsASingleFragmentString::const_char_iterator aEnd, const nsASingleFragmentString::const_char_iterator aSequenceStart, bool &aMayIgnoreStartOfLineWhitespaceSequence, nsAString &aOutputStr) { // Handle the complete sequence of whitespace. // Continue to iterate until we find the first non-whitespace char. // Updates "aPos" to point to the first unhandled char. // Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag, // as well as the other "global" state flags. bool sawBlankOrTab = false; bool leaveLoop = false; do { switch (*aPos) { case ' ': case '\t': sawBlankOrTab = true; MOZ_FALLTHROUGH; case '\n': ++aPos; // do not increase mColPos, // because we will reduce the whitespace to a single char break; default: leaveLoop = true; break; } } while (!leaveLoop && aPos < aEnd); if (mAddSpace) { // if we had previously been asked to add space, // our situation has not changed } else if (!sawBlankOrTab && mMayIgnoreLineBreakSequence) { // nothing to do in the case where line breaks have already been added // before the call of AppendToStringWrapped // and only if we found line break in the sequence mMayIgnoreLineBreakSequence = false; } else if (aMayIgnoreStartOfLineWhitespaceSequence) { // nothing to do aMayIgnoreStartOfLineWhitespaceSequence = false; } else { if (sawBlankOrTab) { if (mDoWrap && mColPos + 1 >= mMaxColumn) { // no much sense in delaying, we only have one slot left, // let's write a break now bool result = aOutputStr.Append(mLineBreak, mozilla::fallible); mColPos = 0; mIsIndentationAddedOnCurrentLine = false; mMayIgnoreLineBreakSequence = true; NS_ENSURE_TRUE(result, false); } else { // do not write out yet, we may write out either a space or a linebreak // let's delay writing it out until we know more mAddSpace = true; ++mColPos; // eat a slot of available space } } else { // Asian text usually does not contain spaces, therefore we should not // transform a linebreak into a space. // Since we only saw linebreaks, but no spaces or tabs, // let's write a linebreak now. NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false); } } return true; } bool nsXMLContentSerializer::AppendWrapped_NonWhitespaceSequence( nsASingleFragmentString::const_char_iterator &aPos, const nsASingleFragmentString::const_char_iterator aEnd, const nsASingleFragmentString::const_char_iterator aSequenceStart, bool &aMayIgnoreStartOfLineWhitespaceSequence, bool &aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr) { mMayIgnoreLineBreakSequence = false; aMayIgnoreStartOfLineWhitespaceSequence = false; // Handle the complete sequence of non-whitespace in this block // Iterate until we find the first whitespace char or an aEnd condition // Updates "aPos" to point to the first unhandled char. // Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag, // as well as the other "global" state flags. bool thisSequenceStartsAtBeginningOfLine = !mColPos; bool onceAgainBecauseWeAddedBreakInFront = false; bool foundWhitespaceInLoop; uint32_t length, colPos; do { if (mColPos) { colPos = mColPos; } else { if (mDoFormat && !mDoRaw && !PreLevel() && !onceAgainBecauseWeAddedBreakInFront) { colPos = mIndent.Length(); } else colPos = 0; } foundWhitespaceInLoop = false; length = 0; // we iterate until the next whitespace character // or until we reach the maximum of character per line // or until the end of the string to add. do { if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') { foundWhitespaceInLoop = true; break; } ++aPos; ++length; } while ( (!mDoWrap || colPos + length < mMaxColumn) && aPos < aEnd); // in the case we don't reached the end of the string, but we reached the maxcolumn, // we see if there is a whitespace after the maxcolumn // if yes, then we can append directly the string instead of // appending a new line etc. if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') { foundWhitespaceInLoop = true; } if (aPos == aEnd || foundWhitespaceInLoop) { // there is enough room for the complete block we found if (mDoFormat && !mColPos) { NS_ENSURE_TRUE(AppendIndentation(aOutputStr), false); } else if (mAddSpace) { bool result = aOutputStr.Append(char16_t(' '), mozilla::fallible); mAddSpace = false; NS_ENSURE_TRUE(result, false); } mColPos += length; NS_ENSURE_TRUE(aOutputStr.Append(aSequenceStart, aPos - aSequenceStart, mozilla::fallible), false); // We have not yet reached the max column, we will continue to // fill the current line in the next outer loop iteration // (this one in AppendToStringWrapped) // make sure we return in this outer loop onceAgainBecauseWeAddedBreakInFront = false; } else { // we reach the max column if (!thisSequenceStartsAtBeginningOfLine && (mAddSpace || (!mDoFormat && aSequenceStartAfterAWhiteSpace))) { // when !mDoFormat, mAddSpace is not used, mAddSpace is always false // so, in the case where mDoWrap && !mDoFormat, if we want to enter in this condition... // We can avoid to wrap. We try to add the whole block // in an empty new line NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false); aPos = aSequenceStart; thisSequenceStartsAtBeginningOfLine = true; onceAgainBecauseWeAddedBreakInFront = true; } else { // we must wrap onceAgainBecauseWeAddedBreakInFront = false; bool foundWrapPosition = false; int32_t wrapPosition = 0; if (mAllowLineBreaking) { nsILineBreaker *lineBreaker = nsContentUtils::LineBreaker(); wrapPosition = lineBreaker->Prev(aSequenceStart, (aEnd - aSequenceStart), (aPos - aSequenceStart) + 1); if (wrapPosition != NS_LINEBREAKER_NEED_MORE_TEXT) { foundWrapPosition = true; } else { wrapPosition = lineBreaker->Next(aSequenceStart, (aEnd - aSequenceStart), (aPos - aSequenceStart)); if (wrapPosition != NS_LINEBREAKER_NEED_MORE_TEXT) { foundWrapPosition = true; } } } if (foundWrapPosition) { if (!mColPos && mDoFormat) { NS_ENSURE_TRUE(AppendIndentation(aOutputStr), false); } else if (mAddSpace) { bool result = aOutputStr.Append(char16_t(' '), mozilla::fallible); mAddSpace = false; NS_ENSURE_TRUE(result, false); } NS_ENSURE_TRUE(aOutputStr.Append(aSequenceStart, wrapPosition, mozilla::fallible), false); NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false); aPos = aSequenceStart + wrapPosition; aMayIgnoreStartOfLineWhitespaceSequence = true; } else { // try some simple fallback logic // go forward up to the next whitespace position, // in the worst case this will be all the rest of the data // we update the mColPos variable with the length of // the part already parsed. mColPos += length; // now try to find the next whitespace do { if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') { break; } ++aPos; ++mColPos; } while (aPos < aEnd); if (mAddSpace) { bool result = aOutputStr.Append(char16_t(' '), mozilla::fallible); mAddSpace = false; NS_ENSURE_TRUE(result, false); } NS_ENSURE_TRUE(aOutputStr.Append(aSequenceStart, aPos - aSequenceStart, mozilla::fallible), false); } } aSequenceStartAfterAWhiteSpace = false; } } while (onceAgainBecauseWeAddedBreakInFront); return true; } bool nsXMLContentSerializer::AppendToStringFormatedWrapped(const nsASingleFragmentString& aStr, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return true; } nsASingleFragmentString::const_char_iterator pos, end, sequenceStart; aStr.BeginReading(pos); aStr.EndReading(end); bool sequenceStartAfterAWhitespace = false; if (pos < end) { nsAString::const_char_iterator end2; aOutputStr.EndReading(end2); --end2; if (*end2 == ' ' || *end2 == '\n' || *end2 == '\t') { sequenceStartAfterAWhitespace = true; } } // if the current line already has text on it, such as a tag, // leading whitespace is significant bool mayIgnoreStartOfLineWhitespaceSequence = (!mColPos || (mIsIndentationAddedOnCurrentLine && sequenceStartAfterAWhitespace && uint32_t(mColPos) == mIndent.Length())); while (pos < end) { sequenceStart = pos; // if beginning of a whitespace sequence if (*pos == ' ' || *pos == '\n' || *pos == '\t') { NS_ENSURE_TRUE(AppendFormatedWrapped_WhitespaceSequence(pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence, aOutputStr), false); } else { // any other non-whitespace char NS_ENSURE_TRUE(AppendWrapped_NonWhitespaceSequence(pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence, sequenceStartAfterAWhitespace, aOutputStr), false); } } return true; } bool nsXMLContentSerializer::AppendWrapped_WhitespaceSequence( nsASingleFragmentString::const_char_iterator &aPos, const nsASingleFragmentString::const_char_iterator aEnd, const nsASingleFragmentString::const_char_iterator aSequenceStart, nsAString &aOutputStr) { // Handle the complete sequence of whitespace. // Continue to iterate until we find the first non-whitespace char. // Updates "aPos" to point to the first unhandled char. mAddSpace = false; mIsIndentationAddedOnCurrentLine = false; bool leaveLoop = false; nsASingleFragmentString::const_char_iterator lastPos = aPos; do { switch (*aPos) { case ' ': case '\t': // if there are too many spaces on a line, we wrap if (mColPos >= mMaxColumn) { if (lastPos != aPos) { NS_ENSURE_TRUE(aOutputStr.Append(lastPos, aPos - lastPos, mozilla::fallible), false); } NS_ENSURE_TRUE(AppendToString(mLineBreak, aOutputStr), false); mColPos = 0; lastPos = aPos; } ++mColPos; ++aPos; break; case '\n': if (lastPos != aPos) { NS_ENSURE_TRUE(aOutputStr.Append(lastPos, aPos - lastPos, mozilla::fallible), false); } NS_ENSURE_TRUE(AppendToString(mLineBreak, aOutputStr), false); mColPos = 0; ++aPos; lastPos = aPos; break; default: leaveLoop = true; break; } } while (!leaveLoop && aPos < aEnd); if (lastPos != aPos) { NS_ENSURE_TRUE(aOutputStr.Append(lastPos, aPos - lastPos, mozilla::fallible), false); } return true; } bool nsXMLContentSerializer::AppendToStringWrapped(const nsASingleFragmentString& aStr, nsAString& aOutputStr) { if (mBodyOnly && !mInBody) { return true; } nsASingleFragmentString::const_char_iterator pos, end, sequenceStart; aStr.BeginReading(pos); aStr.EndReading(end); // not used in this case, but needed by AppendWrapped_NonWhitespaceSequence bool mayIgnoreStartOfLineWhitespaceSequence = false; mMayIgnoreLineBreakSequence = false; bool sequenceStartAfterAWhitespace = false; if (pos < end && !aOutputStr.IsEmpty()) { nsAString::const_char_iterator end2; aOutputStr.EndReading(end2); --end2; if (*end2 == ' ' || *end2 == '\n' || *end2 == '\t') { sequenceStartAfterAWhitespace = true; } } while (pos < end) { sequenceStart = pos; // if beginning of a whitespace sequence if (*pos == ' ' || *pos == '\n' || *pos == '\t') { sequenceStartAfterAWhitespace = true; NS_ENSURE_TRUE(AppendWrapped_WhitespaceSequence(pos, end, sequenceStart, aOutputStr), false); } else { // any other non-whitespace char NS_ENSURE_TRUE(AppendWrapped_NonWhitespaceSequence(pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence, sequenceStartAfterAWhitespace, aOutputStr), false); } } return true; } bool nsXMLContentSerializer::ShouldMaintainPreLevel() const { // Only attempt to maintain the pre level for consumers who care about it. return !mDoRaw || (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre); }