summaryrefslogtreecommitdiffstats
path: root/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
diff options
context:
space:
mode:
Diffstat (limited to 'parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java')
-rw-r--r--parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java886
1 files changed, 886 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
new file mode 100644
index 000000000..6e59ef7c7
--- /dev/null
+++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java
@@ -0,0 +1,886 @@
+/*
+ * Copyright (c) 2015 Mozilla Foundation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package nu.validator.encoding;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Represents an <a href="https://encoding.spec.whatwg.org/#encoding">encoding</a>
+ * as defined in the <a href="https://encoding.spec.whatwg.org/">Encoding
+ * Standard</a>, provides access to each encoding defined in the Encoding
+ * Standard via a static constant and provides the
+ * "<a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an
+ * encoding</a>" algorithm defined in the Encoding Standard.
+ *
+ * <p>This class inherits from {@link Charset} to allow the Encoding
+ * Standard-compliant encodings to be used in contexts that support
+ * <code>Charset</code> instances. However, by design, the Encoding
+ * Standard-compliant encodings are not supplied via a {@link CharsetProvider}
+ * and, therefore, are not available via and do not interfere with the static
+ * methods provided by <code>Charset</code>. (This class provides methods of
+ * the same name to hide each static method of <code>Charset</code> to help
+ * avoid accidental calls to the static methods of the superclass when working
+ * with Encoding Standard-compliant encodings.)
+ *
+ * <p>When an application needs to use a particular encoding, such as utf-8
+ * or windows-1252, the corresponding constant, i.e.
+ * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252}
+ * respectively, should be used. However, when the application receives an
+ * encoding label from external input, the method {@link #forName(String)
+ * forName()} should be used to obtain the object representing the encoding
+ * identified by the label. In contexts where labels that map to the
+ * <a href="https://encoding.spec.whatwg.org/#replacement">replacement
+ * encoding</a> should be treated as unknown, the method {@link
+ * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead.
+ *
+ *
+ * @author hsivonen
+ */
+public abstract class Encoding extends Charset {
+
+ private static final String[] LABELS = {
+ "866",
+ "ansi_x3.4-1968",
+ "arabic",
+ "ascii",
+ "asmo-708",
+ "big5",
+ "big5-hkscs",
+ "chinese",
+ "cn-big5",
+ "cp1250",
+ "cp1251",
+ "cp1252",
+ "cp1253",
+ "cp1254",
+ "cp1255",
+ "cp1256",
+ "cp1257",
+ "cp1258",
+ "cp819",
+ "cp866",
+ "csbig5",
+ "cseuckr",
+ "cseucpkdfmtjapanese",
+ "csgb2312",
+ "csibm866",
+ "csiso2022jp",
+ "csiso2022kr",
+ "csiso58gb231280",
+ "csiso88596e",
+ "csiso88596i",
+ "csiso88598e",
+ "csiso88598i",
+ "csisolatin1",
+ "csisolatin2",
+ "csisolatin3",
+ "csisolatin4",
+ "csisolatin5",
+ "csisolatin6",
+ "csisolatin9",
+ "csisolatinarabic",
+ "csisolatincyrillic",
+ "csisolatingreek",
+ "csisolatinhebrew",
+ "cskoi8r",
+ "csksc56011987",
+ "csmacintosh",
+ "csshiftjis",
+ "cyrillic",
+ "dos-874",
+ "ecma-114",
+ "ecma-118",
+ "elot_928",
+ "euc-jp",
+ "euc-kr",
+ "gb18030",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "greek",
+ "greek8",
+ "hebrew",
+ "hz-gb-2312",
+ "ibm819",
+ "ibm866",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-jp",
+ "iso-2022-kr",
+ "iso-8859-1",
+ "iso-8859-10",
+ "iso-8859-11",
+ "iso-8859-13",
+ "iso-8859-14",
+ "iso-8859-15",
+ "iso-8859-16",
+ "iso-8859-2",
+ "iso-8859-3",
+ "iso-8859-4",
+ "iso-8859-5",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-8859-7",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-8859-8-i",
+ "iso-8859-9",
+ "iso-ir-100",
+ "iso-ir-101",
+ "iso-ir-109",
+ "iso-ir-110",
+ "iso-ir-126",
+ "iso-ir-127",
+ "iso-ir-138",
+ "iso-ir-144",
+ "iso-ir-148",
+ "iso-ir-149",
+ "iso-ir-157",
+ "iso-ir-58",
+ "iso8859-1",
+ "iso8859-10",
+ "iso8859-11",
+ "iso8859-13",
+ "iso8859-14",
+ "iso8859-15",
+ "iso8859-2",
+ "iso8859-3",
+ "iso8859-4",
+ "iso8859-5",
+ "iso8859-6",
+ "iso8859-7",
+ "iso8859-8",
+ "iso8859-9",
+ "iso88591",
+ "iso885910",
+ "iso885911",
+ "iso885913",
+ "iso885914",
+ "iso885915",
+ "iso88592",
+ "iso88593",
+ "iso88594",
+ "iso88595",
+ "iso88596",
+ "iso88597",
+ "iso88598",
+ "iso88599",
+ "iso_8859-1",
+ "iso_8859-15",
+ "iso_8859-1:1987",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "iso_8859-5",
+ "iso_8859-5:1988",
+ "iso_8859-6",
+ "iso_8859-6:1987",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8-ru",
+ "koi8-u",
+ "koi8_r",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "l1",
+ "l2",
+ "l3",
+ "l4",
+ "l5",
+ "l6",
+ "l9",
+ "latin1",
+ "latin2",
+ "latin3",
+ "latin4",
+ "latin5",
+ "latin6",
+ "logical",
+ "mac",
+ "macintosh",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "sun_eu_greek",
+ "tis-620",
+ "unicode-1-1-utf-8",
+ "us-ascii",
+ "utf-16",
+ "utf-16be",
+ "utf-16le",
+ "utf-8",
+ "utf8",
+ "visual",
+ "windows-1250",
+ "windows-1251",
+ "windows-1252",
+ "windows-1253",
+ "windows-1254",
+ "windows-1255",
+ "windows-1256",
+ "windows-1257",
+ "windows-1258",
+ "windows-31j",
+ "windows-874",
+ "windows-949",
+ "x-cp1250",
+ "x-cp1251",
+ "x-cp1252",
+ "x-cp1253",
+ "x-cp1254",
+ "x-cp1255",
+ "x-cp1256",
+ "x-cp1257",
+ "x-cp1258",
+ "x-euc-jp",
+ "x-gbk",
+ "x-mac-cyrillic",
+ "x-mac-roman",
+ "x-mac-ukrainian",
+ "x-sjis",
+ "x-user-defined",
+ "x-x-big5",
+ };
+
+ private static final Encoding[] ENCODINGS_FOR_LABELS = {
+ Ibm866.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso6.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso6.INSTANCE,
+ Big5.INSTANCE,
+ Big5.INSTANCE,
+ Gbk.INSTANCE,
+ Big5.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ Windows1252.INSTANCE,
+ Ibm866.INSTANCE,
+ Big5.INSTANCE,
+ EucKr.INSTANCE,
+ EucJp.INSTANCE,
+ Gbk.INSTANCE,
+ Ibm866.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Replacement.INSTANCE,
+ Gbk.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso15.INSTANCE,
+ Iso6.INSTANCE,
+ Iso5.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Koi8R.INSTANCE,
+ EucKr.INSTANCE,
+ Macintosh.INSTANCE,
+ ShiftJis.INSTANCE,
+ Iso5.INSTANCE,
+ Windows874.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ EucJp.INSTANCE,
+ EucKr.INSTANCE,
+ Gb18030.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Gbk.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Replacement.INSTANCE,
+ Windows1252.INSTANCE,
+ Ibm866.INSTANCE,
+ Replacement.INSTANCE,
+ Replacement.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Replacement.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso16.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso7.INSTANCE,
+ Iso6.INSTANCE,
+ Iso8.INSTANCE,
+ Iso5.INSTANCE,
+ Windows1254.INSTANCE,
+ EucKr.INSTANCE,
+ Iso10.INSTANCE,
+ Gbk.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso10.INSTANCE,
+ Windows874.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso15.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1254.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8U.INSTANCE,
+ Koi8U.INSTANCE,
+ Koi8R.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ EucKr.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso15.INSTANCE,
+ Windows1252.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Windows1254.INSTANCE,
+ Iso10.INSTANCE,
+ Iso8I.INSTANCE,
+ Macintosh.INSTANCE,
+ Macintosh.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ ShiftJis.INSTANCE,
+ Iso7.INSTANCE,
+ Windows874.INSTANCE,
+ Utf8.INSTANCE,
+ Windows1252.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf16Be.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf8.INSTANCE,
+ Utf8.INSTANCE,
+ Iso8.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ ShiftJis.INSTANCE,
+ Windows874.INSTANCE,
+ EucKr.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ EucJp.INSTANCE,
+ Gbk.INSTANCE,
+ MacCyrillic.INSTANCE,
+ Macintosh.INSTANCE,
+ MacCyrillic.INSTANCE,
+ ShiftJis.INSTANCE,
+ UserDefined.INSTANCE,
+ Big5.INSTANCE,
+ };
+
+ private static final Encoding[] ENCODINGS = {
+ Big5.INSTANCE,
+ EucJp.INSTANCE,
+ EucKr.INSTANCE,
+ Gb18030.INSTANCE,
+ Gbk.INSTANCE,
+ Ibm866.INSTANCE,
+ Iso2022Jp.INSTANCE,
+ Iso10.INSTANCE,
+ Iso13.INSTANCE,
+ Iso14.INSTANCE,
+ Iso15.INSTANCE,
+ Iso16.INSTANCE,
+ Iso2.INSTANCE,
+ Iso3.INSTANCE,
+ Iso4.INSTANCE,
+ Iso5.INSTANCE,
+ Iso6.INSTANCE,
+ Iso7.INSTANCE,
+ Iso8.INSTANCE,
+ Iso8I.INSTANCE,
+ Koi8R.INSTANCE,
+ Koi8U.INSTANCE,
+ Macintosh.INSTANCE,
+ Replacement.INSTANCE,
+ ShiftJis.INSTANCE,
+ Utf16Be.INSTANCE,
+ Utf16Le.INSTANCE,
+ Utf8.INSTANCE,
+ Windows1250.INSTANCE,
+ Windows1251.INSTANCE,
+ Windows1252.INSTANCE,
+ Windows1253.INSTANCE,
+ Windows1254.INSTANCE,
+ Windows1255.INSTANCE,
+ Windows1256.INSTANCE,
+ Windows1257.INSTANCE,
+ Windows1258.INSTANCE,
+ Windows874.INSTANCE,
+ MacCyrillic.INSTANCE,
+ UserDefined.INSTANCE,
+ };
+
+ /**
+ * The big5 encoding.
+ */
+ public static final Encoding BIG5 = Big5.INSTANCE;
+
+ /**
+ * The euc-jp encoding.
+ */
+ public static final Encoding EUC_JP = EucJp.INSTANCE;
+
+ /**
+ * The euc-kr encoding.
+ */
+ public static final Encoding EUC_KR = EucKr.INSTANCE;
+
+ /**
+ * The gb18030 encoding.
+ */
+ public static final Encoding GB18030 = Gb18030.INSTANCE;
+
+ /**
+ * The gbk encoding.
+ */
+ public static final Encoding GBK = Gbk.INSTANCE;
+
+ /**
+ * The ibm866 encoding.
+ */
+ public static final Encoding IBM866 = Ibm866.INSTANCE;
+
+ /**
+ * The iso-2022-jp encoding.
+ */
+ public static final Encoding ISO_2022_JP = Iso2022Jp.INSTANCE;
+
+ /**
+ * The iso-8859-10 encoding.
+ */
+ public static final Encoding ISO_8859_10 = Iso10.INSTANCE;
+
+ /**
+ * The iso-8859-13 encoding.
+ */
+ public static final Encoding ISO_8859_13 = Iso13.INSTANCE;
+
+ /**
+ * The iso-8859-14 encoding.
+ */
+ public static final Encoding ISO_8859_14 = Iso14.INSTANCE;
+
+ /**
+ * The iso-8859-15 encoding.
+ */
+ public static final Encoding ISO_8859_15 = Iso15.INSTANCE;
+
+ /**
+ * The iso-8859-16 encoding.
+ */
+ public static final Encoding ISO_8859_16 = Iso16.INSTANCE;
+
+ /**
+ * The iso-8859-2 encoding.
+ */
+ public static final Encoding ISO_8859_2 = Iso2.INSTANCE;
+
+ /**
+ * The iso-8859-3 encoding.
+ */
+ public static final Encoding ISO_8859_3 = Iso3.INSTANCE;
+
+ /**
+ * The iso-8859-4 encoding.
+ */
+ public static final Encoding ISO_8859_4 = Iso4.INSTANCE;
+
+ /**
+ * The iso-8859-5 encoding.
+ */
+ public static final Encoding ISO_8859_5 = Iso5.INSTANCE;
+
+ /**
+ * The iso-8859-6 encoding.
+ */
+ public static final Encoding ISO_8859_6 = Iso6.INSTANCE;
+
+ /**
+ * The iso-8859-7 encoding.
+ */
+ public static final Encoding ISO_8859_7 = Iso7.INSTANCE;
+
+ /**
+ * The iso-8859-8 encoding.
+ */
+ public static final Encoding ISO_8859_8 = Iso8.INSTANCE;
+
+ /**
+ * The iso-8859-8-i encoding.
+ */
+ public static final Encoding ISO_8859_8_I = Iso8I.INSTANCE;
+
+ /**
+ * The koi8-r encoding.
+ */
+ public static final Encoding KOI8_R = Koi8R.INSTANCE;
+
+ /**
+ * The koi8-u encoding.
+ */
+ public static final Encoding KOI8_U = Koi8U.INSTANCE;
+
+ /**
+ * The macintosh encoding.
+ */
+ public static final Encoding MACINTOSH = Macintosh.INSTANCE;
+
+ /**
+ * The replacement encoding.
+ */
+ public static final Encoding REPLACEMENT = Replacement.INSTANCE;
+
+ /**
+ * The shift_jis encoding.
+ */
+ public static final Encoding SHIFT_JIS = ShiftJis.INSTANCE;
+
+ /**
+ * The utf-16be encoding.
+ */
+ public static final Encoding UTF_16BE = Utf16Be.INSTANCE;
+
+ /**
+ * The utf-16le encoding.
+ */
+ public static final Encoding UTF_16LE = Utf16Le.INSTANCE;
+
+ /**
+ * The utf-8 encoding.
+ */
+ public static final Encoding UTF_8 = Utf8.INSTANCE;
+
+ /**
+ * The windows-1250 encoding.
+ */
+ public static final Encoding WINDOWS_1250 = Windows1250.INSTANCE;
+
+ /**
+ * The windows-1251 encoding.
+ */
+ public static final Encoding WINDOWS_1251 = Windows1251.INSTANCE;
+
+ /**
+ * The windows-1252 encoding.
+ */
+ public static final Encoding WINDOWS_1252 = Windows1252.INSTANCE;
+
+ /**
+ * The windows-1253 encoding.
+ */
+ public static final Encoding WINDOWS_1253 = Windows1253.INSTANCE;
+
+ /**
+ * The windows-1254 encoding.
+ */
+ public static final Encoding WINDOWS_1254 = Windows1254.INSTANCE;
+
+ /**
+ * The windows-1255 encoding.
+ */
+ public static final Encoding WINDOWS_1255 = Windows1255.INSTANCE;
+
+ /**
+ * The windows-1256 encoding.
+ */
+ public static final Encoding WINDOWS_1256 = Windows1256.INSTANCE;
+
+ /**
+ * The windows-1257 encoding.
+ */
+ public static final Encoding WINDOWS_1257 = Windows1257.INSTANCE;
+
+ /**
+ * The windows-1258 encoding.
+ */
+ public static final Encoding WINDOWS_1258 = Windows1258.INSTANCE;
+
+ /**
+ * The windows-874 encoding.
+ */
+ public static final Encoding WINDOWS_874 = Windows874.INSTANCE;
+
+ /**
+ * The x-mac-cyrillic encoding.
+ */
+ public static final Encoding X_MAC_CYRILLIC = MacCyrillic.INSTANCE;
+
+ /**
+ * The x-user-defined encoding.
+ */
+ public static final Encoding X_USER_DEFINED = UserDefined.INSTANCE;
+
+
+private static SortedMap<String, Charset> encodings = null;
+
+ protected Encoding(String canonicalName, String[] aliases) {
+ super(canonicalName, aliases);
+ }
+
+ private enum State {
+ HEAD, LABEL, TAIL
+ };
+
+ public static Encoding forName(String label) {
+ if (label == null) {
+ throw new IllegalArgumentException("Label must not be null.");
+ }
+ if (label.length() == 0) {
+ throw new IllegalCharsetNameException(label);
+ }
+ // First try the fast path
+ int index = Arrays.binarySearch(LABELS, label);
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ // Else, slow path
+ StringBuilder sb = new StringBuilder();
+ State state = State.HEAD;
+ for (int i = 0; i < label.length(); i++) {
+ char c = label.charAt(i);
+ if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t')
+ || (c == '\u000C')) {
+ if (state == State.LABEL) {
+ state = State.TAIL;
+ }
+ continue;
+ }
+ if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ switch (state) {
+ case HEAD:
+ state = State.LABEL;
+ // Fall through
+ case LABEL:
+ sb.append(c);
+ continue;
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ if ((c == '-') || (c == '+') || (c == '.') || (c == ':')
+ || (c == '_')) {
+ switch (state) {
+ case LABEL:
+ sb.append(c);
+ continue;
+ case HEAD:
+ case TAIL:
+ throw new IllegalCharsetNameException(label);
+ }
+ }
+ throw new IllegalCharsetNameException(label);
+ }
+ index = Arrays.binarySearch(LABELS, sb.toString());
+ if (index >= 0) {
+ return ENCODINGS_FOR_LABELS[index];
+ }
+ throw new UnsupportedCharsetException(label);
+ }
+
+ public static Encoding forNameNoReplacement(String label) {
+ Encoding encoding = Encoding.forName(label);
+ if (encoding == Encoding.REPLACEMENT) {
+ throw new UnsupportedCharsetException(label);
+ }
+ return encoding;
+ }
+
+ public static boolean isSupported(String label) {
+ try {
+ Encoding.forName(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static boolean isSupportedNoReplacement(String label) {
+ try {
+ Encoding.forNameNoReplacement(label);
+ } catch (UnsupportedCharsetException e) {
+ return false;
+ }
+ return true;
+ }
+
+ public static SortedMap<String, Charset> availableCharsets() {
+ if (encodings == null) {
+ TreeMap<String, Charset> map = new TreeMap<String, Charset>();
+ for (Encoding encoding : ENCODINGS) {
+ map.put(encoding.name(), encoding);
+ }
+ encodings = Collections.unmodifiableSortedMap(map);
+ }
+ return encodings;
+ }
+
+ public static Encoding defaultCharset() {
+ return WINDOWS_1252;
+ }
+
+ @Override public boolean canEncode() {
+ return false;
+ }
+
+ @Override public boolean contains(Charset cs) {
+ return false;
+ }
+
+ @Override public CharsetEncoder newEncoder() {
+ throw new UnsupportedOperationException("Encoder not implemented.");
+ }
+}