Add m-esr52 at 52.6.0

author: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
committer: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
commit: 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree: 10027f336435511475e392454359edea8e25895d /intl/icu/source/tools/toolutil/xmlparser.h
parent: 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download: UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
1 files changed, 247 insertions, 0 deletions
diff --git a/intl/icu/source/tools/toolutil/xmlparser.h b/intl/icu/source/tools/toolutil/xmlparser.h
new file mode 100644
index 000000000..aff9d88ce
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/xmlparser.h
@@ -0,0 +1,247 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2004-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  xmlparser.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004jul21
+*   created by: Andy Heninger
+*
+* Tiny XML parser using ICU and intended for use in ICU tests and in build tools.
+* Not suitable for production use. Not supported.
+* Not conformant. Not efficient.
+* But very small.
+*/
+
+#ifndef __XMLPARSER_H__
+#define __XMLPARSER_H__
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/regex.h"
+#include "uvector.h"
+#include "hash.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
+
+enum UXMLNodeType {
+    /** Node type string (text contents), stored as a UnicodeString. */
+    UXML_NODE_TYPE_STRING,
+    /** Node type element, stored as a UXMLElement. */
+    UXML_NODE_TYPE_ELEMENT,
+    UXML_NODE_TYPE_COUNT
+};
+
+U_NAMESPACE_BEGIN
+
+class UXMLParser;
+
+/**
+ * This class represents an element node in a parsed XML tree.
+ */
+class U_TOOLUTIL_API UXMLElement : public UObject {
+public:
+    /**
+     * Destructor.
+     */
+    virtual ~UXMLElement();
+
+    /**
+     * Get the tag name of this element.
+     */
+    const UnicodeString &getTagName() const;
+    /**
+     * Get the text contents of the element.
+     * Append the contents of all text child nodes.
+     * @param recurse If TRUE, also recursively appends the contents of all
+     *        text child nodes of element children.
+     * @return The text contents.
+     */
+    UnicodeString getText(UBool recurse) const;
+    /**
+     * Get the number of attributes.
+     */
+    int32_t countAttributes() const;
+    /**
+     * Get the i-th attribute.
+     * @param i Index of the attribute.
+     * @param name Output parameter, receives the attribute name.
+     * @param value Output parameter, receives the attribute value.
+     * @return A pointer to the attribute value (may be &value or a pointer to an
+     *         internal string object), or NULL if i is out of bounds.
+     */
+    const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const;
+    /**
+     * Get the value of the attribute with the given name.
+     * @param name Attribute name to be looked up.
+     * @return A pointer to the attribute value, or NULL if this element
+     * does not have this attribute.
+     */
+    const UnicodeString *getAttribute(const UnicodeString &name) const;
+    /**
+     * Get the number of child nodes.
+     */
+    int32_t countChildren() const;
+    /**
+     * Get the i-th child node.
+     * @param i Index of the child node.
+     * @param type The child node type.
+     * @return A pointer to the child node object, or NULL if i is out of bounds.
+     */
+    const UObject *getChild(int32_t i, UXMLNodeType &type) const;
+    /**
+     * Get the next child element node, skipping non-element child nodes.
+     * @param i Enumeration index; initialize to 0 before getting the first child element.
+     * @return A pointer to the next child element, or NULL if there is none.
+     */
+    const UXMLElement *nextChildElement(int32_t &i) const;
+    /**
+     * Get the immediate child element with the given name.
+     * If there are multiple child elements with this name, then return
+     * the first one.
+     * @param name Element name to be looked up.
+     * @return A pointer to the element node, or NULL if this element
+     * does not have this immediate child element.
+     */
+    const UXMLElement *getChildElement(const UnicodeString &name) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    // prevent default construction etc.
+    UXMLElement();
+    UXMLElement(const UXMLElement &other);
+    UXMLElement &operator=(const UXMLElement &other);
+
+    void appendText(UnicodeString &text, UBool recurse) const;
+
+    friend class UXMLParser;
+
+    UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode);
+
+    const UXMLParser *fParser;
+    const UnicodeString *fName;          // The tag name of this element (owned by the UXMLParser)
+    UnicodeString       fContent;        // The text content of this node.  All element content is 
+                                         //   concatenated even when there are intervening nested elements
+                                         //   (which doesn't happen with most xml files we care about)
+                                         //   Sections of content containing only white space are dropped,
+                                         //   which gets rid  the bogus white space content from
+                                         //   elements which are primarily containers for nested elements.
+    UVector             fAttNames;       // A vector containing the names of this element's attributes
+                                         //    The names are UnicodeString objects, owned by the UXMLParser.
+    UVector             fAttValues;      // A vector containing the attribute values for
+                                         //    this element's attributes.  The order is the same
+                                         //    as that of the attribute name vector.
+
+    UVector             fChildren;       // The child nodes of this element (a Vector)
+
+    UXMLElement        *fParent;         // A pointer to the parent element of this element.
+};
+
+/**
+ * A simple XML parser; it is neither efficient nor conformant and only useful for
+ * restricted types of XML documents.
+ *
+ * The parse methods parse whole documents and return the parse trees via their
+ * root elements.
+ */
+class U_TOOLUTIL_API UXMLParser : public UObject {
+public:
+    /**
+     * Create an XML parser.
+     */
+    static UXMLParser *createParser(UErrorCode &errorCode);
+    /**
+     * Destructor.
+     */
+    virtual ~UXMLParser();
+
+    /**
+     * Parse an XML document, create the entire document tree, and
+     * return a pointer to the root element of the parsed tree.
+     * The caller must delete the element.
+     */
+    UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode);
+    /**
+     * Parse an XML file, create the entire document tree, and
+     * return a pointer to the root element of the parsed tree.
+     * The caller must delete the element.
+     */
+    UXMLElement *parseFile(const char *filename, UErrorCode &errorCode);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    // prevent default construction etc.
+    UXMLParser();
+    UXMLParser(const UXMLParser &other);
+    UXMLParser &operator=(const UXMLParser &other);
+
+    // constructor
+    UXMLParser(UErrorCode &status);
+
+    void           parseMisc(UErrorCode &status);
+    UXMLElement   *createElement(RegexMatcher &mEl, UErrorCode &status);
+    void           error(const char *message, UErrorCode &status);
+    UnicodeString  scanContent(UErrorCode &status);
+    void           replaceCharRefs(UnicodeString &s, UErrorCode &status);
+
+    const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode);
+public:
+    // public for UXMLElement only
+    const UnicodeString *findName(const UnicodeString &s) const;
+private:
+
+    // There is one ICU regex matcher for each of the major XML syntax items
+    //  that are recognized.
+    RegexMatcher mXMLDecl;
+    RegexMatcher mXMLComment;
+    RegexMatcher mXMLSP;
+    RegexMatcher mXMLDoctype;
+    RegexMatcher mXMLPI;
+    RegexMatcher mXMLElemStart;
+    RegexMatcher mXMLElemEnd;
+    RegexMatcher mXMLElemEmpty;
+    RegexMatcher mXMLCharData;
+    RegexMatcher mAttrValue;
+    RegexMatcher mAttrNormalizer;
+    RegexMatcher mNewLineNormalizer;
+    RegexMatcher mAmps;
+
+    Hashtable             fNames;           // interned element/attribute name strings
+    UStack                fElementStack;    // Stack holds the parent elements when nested
+                                            //    elements are being parsed.  All items on this
+                                            //    stack are of type UXMLElement.
+    int32_t               fPos;             // String index of the current scan position in
+                                            //    xml source (in fSrc).
+    UnicodeString         fOneLF;
+};
+
+U_NAMESPACE_END
+#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
+
+#endif
author	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
committer	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
commit	5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree	10027f336435511475e392454359edea8e25895d /intl/icu/source/tools/toolutil/xmlparser.h
parent	49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download	UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip