diff options
author | Matt A. Tobin <email@mattatobin.com> | 2020-01-16 07:32:48 -0500 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2020-01-16 07:32:48 -0500 |
commit | ed60101550022a2650edc41cd3a63b35fea836c5 (patch) | |
tree | e6967e47f27945599ec09c4401f7932751315beb /parser/html/java/htmlparser/src/nu/validator | |
parent | fa816e1ec69d865114b7d061905574038fbd425b (diff) | |
parent | 927c386dd8c9526d8695d0202a08735984dc7b31 (diff) | |
download | UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar.gz UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar.lz UXP-ed60101550022a2650edc41cd3a63b35fea836c5.tar.xz UXP-ed60101550022a2650edc41cd3a63b35fea836c5.zip |
Merge branch 'html5-parser-work'
Diffstat (limited to 'parser/html/java/htmlparser/src/nu/validator')
148 files changed, 42470 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java new file mode 100644 index 000000000..00e5f7ca7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Big5 extends Encoding { + + private static final String[] LABELS = { + "big5", + "big5-hkscs", + "cn-big5", + "csbig5", + "x-x-big5" + }; + + private static final String NAME = "big5"; + + static final Big5 INSTANCE = new Big5(); + + private Big5() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new Big5Decoder(this); + } + + @Override public CharsetEncoder newEncoder() { + return new Big5Encoder(this); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java new file mode 100644 index 000000000..9f35be341 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Data.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +final class Big5Data { + + private static final String ASTRALNESS = "\uF829\u7A22\u1290\uC5C4\u0007\u0200\u7549\"\u0000\uA000\u3859\u0300\u002C\u573E\uF72B\u6EFC\u90F2\u3B7B\u83E9\uF049\u9DA6\uBBFC\uBEF7\uFDFE\u0C83\uABD1\u7BFF\u7FBF\u1804\u002C\u4840\u2046\u0408\u2A22\u4858\u091A\u5100\u3122\uC000\u5000\uC00D\u6110\uD44C\u9A24\u0180\u0004\u92B2\u0209\u8631\u1242\u8140\u0351\uAB48\u7460\uD5A2\u3E5C\uE361\u1083\u720B\u08A0\u51D6\uE00A\u8100\u1686\uC443\u1135\u6037\u7AE6\u056D\u7D0C\u0E66\u81E0\u7F88\u2420\u2406\u1D03\u340C\u4268\u454A\uF13F\u080D\u8084\uBB00\u0C4D\u6ED6\u97D7\u41DF\u5D3E\uDA68\u305C\uB800\u26E9\u80BC\u0151\uE078\u89A1\u59C0\u9679\u3BCC\u5EDE\uBC2C\uDF9B\u6C5D\u046D\u6043\u4A36\uD860\u073E\uC8C4\u6C69\uD8B1\u8302\u0F88\u0973\u806E\u3B6B\u5A17\uA503\u2D52\u3F40\u1120\u4101\u5024\uB903\u90EE\u1079\u5CAD\u1820\uDA0A\u8060\u9E26\u6E73\u1021\u080E\u4368\u6FB2\u161F\u8AFE\u76B6\u763A\u8262\u1894\u1801\uFE7D\u578D\u1327\u5BD2\u1937\uDB8C\u4862\u0024\u0000\u0010\u8000\u0000\u0000\u0038\u3800\uB9E2\uFD7D\u75F8\uDCF7\u6FF3\uBBF2\uFF4A\uAE3F\u9FC5\uEAFF\uBABA\uBC5D\u9F73\uD8FA\uDED6\u4B25\u975E\u2ADA\u6DB9\u06E6\u9D36\u53F9\u6FC5\uF98A\u49BF\uDB5D\uFFF8\u14A6\uE605\u96F7\u0A99\u00E5\u0800\u3D81\u5002\u0102\uBF49\u475E\u036F\u6280\uEECA\u4819\u6081\u205A\u24F7\u0000\u0004\u0000\u2804\u22C8\u0200\u0000\u2010\u5082\u3040\u0001\u0010\u1284\u0041\u0504\u2000\uC100\u3F7F\uB059\u8AC1\uAFAF\uAC05\u033F\u0204\u7280\u420A\u0426\u02D0\u0EC3\u0958\u0A80\u20B5\u9206\u8B77\u0560\u21C9\u4606\u6038\uC048\u24B4\u84DE\uC0E0\u3364\u3154\u300D\u688A\u5F2B\u0626\u8496\uB108\uE890\uA394\u734F\u50B8\u0D11\uDFA4\u4003\u5D20\u8480\u6160\u51CE\u800A\u58B7\u0050\uE862\u6750\u7220\u1228"; + + private static final String TABLE0 = "\u43F0\u4C32\u4603\u45A6\u4578\u7267\u4D77\u45B3\u7CB1\u4CE2\u7CC5\u3B95\u4736\u4744\u4C47\u4C40\u42BF\u3617\u7352\u6E8B\u70D2\u4C57\uA351\u474F\u45DA\u4C85\u7C6C\u4D07\u4AA4\u46A1\u6B23\u7225\u5A54\u1A63\u3E06\u3F61\u664D\u56FB\u0000\u7D95\u591D\u8BB9\u3DF4\u9734\u7BEF\u5BDB\u1D5E\u5AA4\u3625\u9EB0\u5AD1\u5BB7\u5CFC\u676E\u8593\u9945\u7461\u749D\u3875\u1D53\u369E\u6021\u3EEC\u58DE\u3AF5\u7AFC\u9F97\u4161\u890D\u31EA\u0A8A\u325E\u430A\u8484\u9F96\u942F\u4930\u8613\u5896\u974A\u9218\u79D0\u7A32\u6660\u6A29\u889D\u744C\u7BC5\u6782\u7A2C\u524F\u9046\u34E6\u73C4\u5DB9\u74C6\u9FC7\u57B3\u492F\u544C\u4131\u368E\u5818\u7A72\u7B65\u8B8F\u46AE\u6E88\u4181\u5D99\u7BAE\u24BC\u9FC8\u24C1\u24C9\u24CC\u9FC9\u8504\u35BB\u40B4\u9FCA\u44E1\uADFF\u62C1\u706E\u9FCB"; + + private static final String TABLE1 = "\u31C0\u31C1\u31C2\u31C3\u31C4\u010C\u31C5\u00D1\u00CD\u31C6\u31C7\u00CB\u1FE8\u31C8\u00CA\u31C9\u31CA\u31CB\u31CC\u010E\u31CD\u31CE\u0100\u00C1\u01CD\u00C0\u0112\u00C9\u011A\u00C8\u014C\u00D3\u01D1\u00D2\u0000\u1EBE\u0000\u1EC0\u00CA\u0101\u00E1\u01CE\u00E0\u0251\u0113\u00E9\u011B\u00E8\u012B\u00ED\u01D0\u00EC\u014D\u00F3\u01D2\u00F2\u016B\u00FA\u01D4\u00F9\u01D6\u01D8\u01DA\u01DC\u00FC\u0000\u1EBF\u0000\u1EC1\u00EA\u0261\u23DA\u23DB"; + + private static final String TABLE2 = "\uA3A9\u1145\u0000\u650A\u0000\u0000\u4E3D\u6EDD\u9D4E\u91DF\u0000\u0000\u7735\u6491\u4F1A\u4F28\u4FA8\u5156\u5174\u519C\u51E4\u52A1\u52A8\u533B\u534E\u53D1\u53D8\u56E2\u58F0\u5904\u5907\u5932\u5934\u5B66\u5B9E\u5B9F\u5C9A\u5E86\u603B\u6589\u67FE\u6804\u6865\u6D4E\u70BC\u7535\u7EA4\u7EAC\u7EBA\u7EC7\u7ECF\u7EDF\u7F06\u7F37\u827A\u82CF\u836F\u89C6\u8BBE\u8BE2\u8F66\u8F67\u8F6E\u7411\u7CFC\u7DCD\u6946\u7AC9\u5227\u0000\u0000\u0000\u0000\u918C\u78B8\u915E\u80BC\u0000\u8D0B\u80F6\u09E7\u0000\u0000\u809F\u9EC7\u4CCD\u9DC9\u9E0C\u4C3E\u9DF6\u700E\u9E0A\uA133\u35C1\u0000\u6E9A\u823E\u7519\u0000\u4911\u9A6C\u9A8F\u9F99\u7987\u846C\u1DCA\u05D0\u2AE6\u4E24\u4E81\u4E80\u4E87\u4EBF\u4EEB\u4F37\u344C\u4FBD\u3E48\u5003\u5088\u347D\u3493\u34A5\u5186\u5905\u51DB\u51FC\u5205\u4E89\u5279\u5290\u5327\u35C7\u53A9\u3551\u53B0\u3553\u53C2\u5423\u356D\u3572\u3681\u5493\u54A3\u54B4\u54B9\u54D0\u54EF\u5518\u5523\u5528\u3598\u553F\u35A5\u35BF\u55D7\u35C5\u7D84\u5525\u0000\u0C42\u0D15\u512B\u5590\u2CC6\u39EC\u0341\u8E46\u4DB8\u94E5\u4053\u80BE\u777A\u2C38\u3A34\u47D5\u815D\u69F2\u4DEA\u64DD\u0D7C\u0FB4\u0CD5\u10F4\u648D\u8E7E\u0E96\u0C0B\u0F64\u2CA9\u8256\u44D3\u0000\u0D46\u9A4D\u80E9\u47F4\u4EA7\u2CC2\u9AB2\u3A67\u95F4\u3FED\u3506\u52C7\u97D4\u78C8\u2D44\u9D6E\u9815\u0000\u43D9\u60A5\u64B4\u54E3\u2D4C\u2BCA\u1077\u39FB\u106F\u66DA\u6716\u79A0\u64EA\u5052\u0C43\u8E68\u21A1\u8B4C\u0731\u0000\u480B\u01A9\u3FFA\u5873\u2D8D\u0000\u45C8\u04FC\u6097\u0F4C\u0D96\u5579\u40BB\u43BA\u0000\u4AB4\u2A66\u109D\u81AA\u98F5\u0D9C\u6379\u39FE\u2775\u8DC0\u56A1\u647C\u3E43\u0000\uA601\u0E09\u2ACF\u2CC9\u0000\u10C8\u39C2\u3992\u3A06\u829B\u3578\u5E49\u20C7\u5652\u0F31\u2CB2\u9720\u34BC\u6C3D\u4E3B\u0000\u0000\u7574\u2E8B\u2208\uA65B\u8CCD\u0E7A\u0C34\u681C\u7F93\u10CF\u2803\u2939\u35FB\u51E3\u0E8C\u0F8D\u0EAA\u3F93\u0F30\u0D47\u114F\u0E4C\u0000\u0EAB\u0BA9\u0D48\u10C0\u113D\u3FF9\u2696\u6432\u0FAD\u33F4\u7639\u2BCE\u0D7E\u0D7F\u2C51\u2C55\u3A18\u0E98\u10C7\u0F2E\uA632\u6B50\u8CD2\u8D99\u8CCA\u95AA\u54CC\u82C4\u55B9\u0000\u9EC3\u9C26\u9AB6\u775E\u2DEE\u7140\u816D\u80EC\u5C1C\u6572\u8134\u3797\u535F\u80BD\u91B6\u0EFA\u0E0F\u0E77\u0EFB\u35DD\u4DEB\u3609\u0CD6\u56AF\u27B5\u10C9\u0E10\u0E78\u1078\u1148\u8207\u1455\u0E79\u4E50\u2DA4\u5A54\u101D\u101E\u10F5\u10F6\u579C\u0E11\u7694\u82CD\u0FB5\u0E7B\u517E\u3703\u0FB6\u1180\u52D8\uA2BD\u49DA\u183A\u4177\u827C\u5899\u5268\u361A\u573D\u7BB2\u5B68\u4800\u4B2C\u9F27\u49E7\u9C1F\u9B8D\u5B74\u313D\u55FB\u35F2\u5689\u4E28\u5902\u1BC1\uF878\u9751\u0086\u4E5B\u4EBB\u353E\u5C23\u5F51\u5FC4\u38FA\u624C\u6535\u6B7A\u6C35\u6C3A\u706C\u722B\u4E2C\u72AD\u48E9\u7F52\u793B\u7CF9\u7F53\u626A\u34C1\u0000\u634B\u8002\u8080\u6612\u6951\u535D\u8864\u89C1\u78B2\u8BA0\u8D1D\u9485\u9578\u957F\u95E8\u8E0F\u97E6\u9875\u98CE\u98DE\u9963\u9810\u9C7C\u9E1F\u9EC4\u6B6F\uF907\u4E37\u0087\u961D\u6237\u94A2\u0000\u503B\u6DFE\u9C73\u9FA6\u3DC9\u888F\u414E\u7077\u5CF5\u4B20\u51CD\u3559\u5D30\u6122\u8A32\u8FA7\u91F6\u7191\u6719\u73BA\u3281\uA107\u3C8B\u1980\u4B10\u78E4\u7402\u51AE\u870F\u4009\u6A63\uA2BA\u4223\u860F\u0A6F\u7A2A\u9947\u8AEA\u9755\u704D\u5324\u207E\u93F4\u76D9\u89E3\u9FA7\u77DD\u4EA3\u4FF0\u50BC\u4E2F\u4F17\u9FA8\u5434\u7D8B\u5892\u58D0\u1DB6\u5E92\u5E99\u5FC2\u2712\u658B\u33F9\u6919\u6A43\u3C63\u6CFF\u0000\u7200\u4505\u738C\u3EDB\u4A13\u5B15\u74B9\u8B83\u5CA4\u5695\u7A93\u7BEC\u7CC3\u7E6C\u82F8\u8597\u9FA9\u8890\u9FAA\u8EB9\u9FAB\u8FCF\u855F\u99E0\u9221\u9FAC\u8DB9\u143F\u4071\u42A2\u5A1A\u0000\u0000\u0000\u9868\u676B\u4276\u573D\u0000\u85D6\u497B\u82BF\u710D\u4C81\u6D74\u5D7B\u6B15\u6FBE\u9FAD\u9FAE\u5B96\u9FAF\u66E7\u7E5B\u6E57\u79CA\u3D88\u44C3\u3256\u2796\u439A\u4536\u0000\u5CD5\u3B1A\u8AF9\u5C78\u3D12\u3551\u5D78\u9FB2\u7157\u4558\u40EC\u1E23\u4C77\u3978\u344A\u01A4\u6C41\u8ACC\u4FB4\u0239\u59BF\u816C\u9856\u98FA\u5F3B\u0B9F\u0000\u21C1\u896D\u4102\u46BB\u9079\u3F07\u9FB3\uA1B5\u40F8\u37D6\u46F7\u6C46\u417C\u86B2\u73FF\u456D\u38D4\u549A\u4561\u451B\u4D89\u4C7B\u4D76\u45EA\u3FC8\u4B0F\u3661\u44DE\u44BD\u41ED\u5D3E\u5D48\u5D56\u3DFC\u380F\u5DA4\u5DB9\u3820\u3838\u5E42\u5EBD\u5F25\u5F83\u3908\u3914\u393F\u394D\u60D7\u613D\u5CE5\u3989\u61B7\u61B9\u61CF\u39B8\u622C\u6290\u62E5\u6318\u39F8\u56B1\u3A03\u63E2\u63FB\u6407\u645A\u3A4B\u64C0\u5D15\u5621\u9F9F\u3A97\u6586\u3ABD\u65FF\u6653\u3AF2\u6692\u3B22\u6716\u3B42\u67A4\u6800\u3B58\u684A\u6884\u3B72\u3B71\u3B7B\u6909\u6943\u725C\u6964\u699F\u6985\u3BBC\u69D6\u3BDD\u6A65\u6A74\u6A71\u6A82\u3BEC\u6A99\u3BF2\u6AAB\u6AB5\u6AD4\u6AF6\u6B81\u6BC1\u6BEA\u6C75\u6CAA\u3CCB\u6D02\u6D06\u6D26\u6D81\u3CEF\u6DA4\u6DB1\u6E15\u6E18\u6E29\u6E86\u89C0\u6EBB\u6EE2\u6EDA\u9F7F\u6EE8\u6EE9\u6F24\u6F34\u3D46\u3F41\u6F81\u6FBE\u3D6A\u3D75\u71B7\u5C99\u3D8A\u702C\u3D91\u7050\u7054\u706F\u707F\u7089\u0325\u43C1\u35F1\u0ED8\u3ED7\u57BE\u6ED3\u713E\u57E0\u364E\u69A2\u8BE9\u5B74\u7A49\u58E1\u94D9\u7A65\u7A7D\u59AC\u7ABB\u7AB0\u7AC2\u7AC3\u71D1\u648D\u41CA\u7ADA\u7ADD\u7AEA\u41EF\u54B2\u5C01\u7B0B\u7B55\u7B29\u530E\u5CFE\u7BA2\u7B6F\u839C\u5BB4\u6C7F\u7BD0\u8421\u7B92\u7BB8\u5D20\u3DAD\u5C65\u8492\u7BFA\u7C06\u7C35\u5CC1\u7C44\u7C83\u4882\u7CA6\u667D\u4578\u7CC9\u7CC7\u7CE6\u7C74\u7CF3\u7CF5\u7CCE\u7E67\u451D\u6E44\u7D5D\u6ED6\u748D\u7D89\u7DAB\u7135\u7DB3\u7DD2\u4057\u6029\u7DE4\u3D13\u7DF5\u17F9\u7DE5\u836D\u7E1D\u6121\u615A\u7E6E\u7E92\u432B\u946C\u7E27\u7F40\u7F41\u7F47\u7936\u62D0\u99E1\u7F97\u6351\u7FA3\u1661\u0068\u455C\u3766\u4503\u833A\u7FFA\u6489\u8005\u8008\u801D\u8028\u802F\uA087\u6CC3\u803B\u803C\u8061\u2714\u4989\u6626\u3DE3\u66E8\u6725\u80A7\u8A48\u8107\u811A\u58B0\u26F6\u6C7F\u6498\u4FB8\u64E7\u148A\u8218\u185E\u6A53\u4A65\u4A95\u447A\u8229\u0B0D\u6A52\u3D7E\u4FF9\u14FD\u84E2\u8362\u6B0A\u49A7\u3530\u1773\u3DF8\u82AA\u691B\uF994\u41DB\u854B\u82D0\u831A\u0E16\u17B4\u36C1\u317D\u355A\u827B\u82E2\u8318\u3E8B\u6DA3\u6B05\u6B97\u35CE\u3DBF\u831D\u55EC\u8385\u450B\u6DA5\u83AC\u83C1\u83D3\u347E\u6ED4\u6A57\u855A\u3496\u6E42\u2EEF\u8458\u5BE4\u8471\u3DD3\u44E4\u6AA7\u844A\u3CB5\u7958\u84A8\u6B96\u6E77\u6E43\u84DE\u840F\u8391\u44A0\u8493\u84E4\u5C91\u4240\u5CC0\u4543\u8534\u5AF2\u6E99\u4527\u8573\u4516\u67BF\u8616\u8625\u863B\u85C1\u7088\u8602\u1582\u70CD\uF9B2\u456A\u8628\u3648\u18A2\u53F7\u739A\u867E\u8771\uA0F8\u87EE\u2C27\u87B1\u87DA\u880F\u5661\u866C\u6856\u460F\u8845\u8846\u75E0\u3DB9\u75E4\u885E\u889C\u465B\u88B4\u88B5\u63C1\u88C5\u7777\u770F\u8987\u898A\u89A6\u89A9\u89A7\u89BC\u8A25\u89E7\u7924\u7ABD\u8A9C\u7793\u91FE\u8A90\u7A59\u7AE9\u7B3A\u3F8F\u4713\u7B38\u717C\u8B0C\u8B1F\u5430\u5565\u8B3F\u8B4C\u8B4D\u8AA9\u4A7A\u8B90\u8B9B\u8AAF\u16DF\u4615\u884F\u8C9B\u7D54\u7D8F\uF9D4\u3725\u7D53\u8CD6\u7D98\u7DBD\u8D12\u8D03\u1910\u8CDB\u705C\u8D11\u4CC9\u3ED0\u8D77\u8DA9\u8002\u1014\u498A\u3B7C\u81BC\u710C\u7AE7\u8EAD\u8EB6\u8EC3\u92D4\u8F19\u8F2D\u8365\u8412\u8FA5\u9303\uA29F\u0A50\u8FB3\u492A\u89DE\u853D\u3DBB\u5EF8\u3262\u8FF9\uA014\u86BC\u8501\u2325\u3980\u6ED7\u9037\u853C\u7ABE\u9061\u856C\u860B\u90A8\u8713\u90C4\u86E6\u90AE\u90FD\u9167\u3AF0\u91A9\u91C4\u7CAC\u8933\u1E89\u920E\u6C9F\u9241\u9262\u55B9\u92B9\u8AC6\u3C9B\u8B0C\u55DB\u0D31\u932C\u936B\u8AE1\u8BEB\u708F\u5AC3\u8AE2\u8AE5\u4965\u9244\u8BEC\u8C39\u8BFF\u9373\u945B\u8EBC\u9585\u95A6\u9426\u95A0\u6FF6\u42B9\u267A\u86D8\u127C\u3E2E\u49DF\u6C1C\u967B\u9696\u416C\u96A3\u6ED5\u61DA\u96B6\u78F5\u8AE0\u96BD\u53CC\u49A1\u6CB8\u0274\u6410\u90AF\u90E5\u4AD1\u1915\u330A\u9731\u8642\u9736\u4A0F\u453D\u4585\u4AE9\u7075\u5B41\u971B\u975C\u91D5\u9757\u5B4A\u91EB\u975F\u9425\u50D0\u30B7\u30BC\u9789\u979F\u97B1\u97BE\u97C0\u97D2\u97E0\u546C\u97EE\u741C\u9433\u97FF\u97F5\u941D\u797A\u4AD1\u9834\u9833\u984B\u9866\u3B0E\u7175\u3D51\u0630\u415C\u5706\u98CA\u98B7\u98C8\u98C7\u4AFF\u6D27\u16D3\u55B0\u98E1\u98E6\u98EC\u9378\u9939\u4A29\u4B72\u9857\u9905\u99F5\u9A0C\u9A3B\u9A10\u9A58\u5725\u36C4\u90B1\u9BD5\u9AE0\u9AE2\u9B05\u9AF4\u4C0E\u9B14\u9B2D\u8600\u5034\u9B34\u69A8\u38C3\u307D\u9B50\u9B40\u9D3E\u5A45\u1863\u9B8E\u424B\u9C02\u9BFF\u9C0C\u9E68\u9DD4\u9FB7\uA192\uA1AB\uA0E1\uA123\uA1DF\u9D7E\u9D83\uA134\u9E0E\u6888\u9DC4\u215B\uA193\uA220\u193B\uA233\u9D39\uA0B9\uA2B4\u9E90\u9E95\u9E9E\u9EA2\u4D34\u9EAA\u9EAF\u4364\u9EC1\u3B60\u39E5\u3D1D\u4F32\u37BE\u8C2B\u9F02\u9F08\u4B96\u9424\u6DA2\u9F17\u9F16\u9F39\u569F\u568A\u9F45\u99B8\u908B\u97F2\u847F\u9F62\u9F69\u7ADC\u9F8E\u7216\u4BBE\u4975\u49BB\u7177\u49F8\u4348\u4A51\u739E\u8BDA\u18FA\u799F\u897E\u8E36\u9369\u93F3\u8A44\u92EC\u9381\u93CB\u896C\u44B9\u7217\u3EEB\u7772\u7A43\u70D0\u4473\u43F8\u717E\u17EF\u70A3\u18BE\u3599\u3EC7\u1885\u542F\u17F8\u3722\u16FB\u1839\u36E1\u1774\u18D1\u5F4B\u3723\u16C0\u575B\u4A25\u13FE\u12A8\u13C6\u14B6\u8503\u36A6\u8503\u8455\u4994\u7165\u3E31\u555C\u3EFB\u7052\u44F4\u36EE\u999D\u6F26\u67F9\u3733\u3C15\u3DE7\u586C\u1922\u6810\u4057\u373F\u40E1\u408B\u410F\u6C21\u54CB\u569E\u66B1\u5692\u0FDF\u0BA8\u0E0D\u93C6\u8B13\u939C\u4EF8\u512B\u3819\u4436\u4EBC\u0465\u037F\u4F4B\u4F8A\u5651\u5A68\u01AB\u03CB\u3999\u030A\u0414\u3435\u4F29\u02C0\u8EB3\u0275\u8ADA\u020C\u4E98\u50CD\u510D\u4FA2\u4F03\u4A0E\u3E8A\u4F42\u502E\u506C\u5081\u4FCC\u4FE5\u5058\u50FC\u5159\u515B\u515D\u515E\u6E76\u3595\u3E39\u3EBF\u6D72\u1884\u3E89\u51A8\u51C3\u05E0\u44DD\u04A3\u0492\u0491\u8D7A\u8A9C\u070E\u5259\u52A4\u0873\u52E1\u936E\u467A\u718C\u438C\u0C20\u49AC\u10E4\u69D1\u0E1D\u7479\u3EDE\u7499\u7414\u7456\u7398\u4B8E\u4ABC\u408D\u53D0\u3584\u720F\u40C9\u55B4\u0345\u54CD\u0BC6\u571D\u925D\u96F4\u9366\u57DD\u578D\u577F\u363E\u58CB\u5A99\u8A46\u16FA\u176F\u1710\u5A2C\u59B8\u928F\u5A7E\u5ACF\u5A12\u5946\u19F3\u1861\u4295\u36F5\u6D05\u7443\u5A21\u5E83\u5A81\u8BD7\u0413\u93E0\u748C\u1303\u7105\u4972\u9408\u89FB\u93BD\u37A0\u5C1E\u5C9E\u5E5E\u5E48\u1996\u197C\u3AEE\u5ECD\u5B4F\u1903\u1904\u3701\u18A0\u36DD\u16FE\u36D3\u812A\u8A47\u1DBA\u3472\u89A8\u5F0C\u5F0E\u1927\u17AB\u5A6B\u173B\u5B44\u8614\u75FD\u8860\u607E\u2860\u262B\u5FDB\u3EB8\u25AF\u25BE\u9088\u6F73\u61C0\u003E\u0046\u261B\u6199\u6198\u6075\u2C9B\u2D07\u46D4\u914D\u6471\u4665\u2B6A\u3A29\u2B22\u3450\u98EA\u2E78\u6337\uA45B\u64B6\u6331\u63D1\u49E3\u2D67\u62A4\u2CA1\u643B\u656B\u6972\u3BF4\u308E\u32AD\u4989\u32AB\u550D\u32E0\u18D9\u943F\u66CE\u3289\u31B3\u3AE0\u4190\u5584\u8B22\u558F\u16FC\u555B\u5425\u78EE\u3103\u182A\u3234\u3464\u320F\u3182\u42C9\u668E\u6D24\u666B\u4B93\u6630\u7870\u1DEB\u6663\u32D2\u32E1\u661E\u5872\u38D1\u383A\u37BC\u3B99\u37A2\u33FE\u74D0\u3B96\u678F\u462A\u68B6\u681E\u3BC4\u6ABE\u3863\u37D5\u4487\u6A33\u6A52\u6AC9\u6B05\u1912\u6511\u6898\u6A4C\u3BD7\u6A7A\u6B57\u3FC0\u3C9A\u93A0\u92F2\u8BEA\u8ACB\u9289\u801E\u89DC\u9467\u6DA5\u6F0B\u49EC\u6D67\u3F7F\u3D8F\u6E04\u403C\u5A3D\u6E0A\u5847\u6D24\u7842\u713B\u431A\u4276\u70F1\u7250\u7287\u7294\u478F\u4725\u5179\u4AA4\u05EB\u747A\u3EF8\u365F\u4A4A\u4917\u5FE1\u3F06\u3EB1\u4ADF\u8C23\u3F35\u60A7\u3EF3\u74CC\u743C\u9387\u7437\u449F\u6DEA\u4551\u7583\u3F63\u4CD9\u4D06\u3F58\u7555\u7673\uA5C6\u3B19\u7468\u8ACC\u49AB\u498E\u3AFB\u3DCD\u4A4E\u3EFF\u49C5\u48F3\u91FA\u5732\u9342\u8AE3\u1864\u50DF\u5221\u51E7\u7778\u3232\u770E\u770F\u777B\u4697\u3781\u3A5E\u48F0\u7438\u749B\u3EBF\u4ABA\u4AC7\u40C8\u4A96\u61AE\u9307\u5581\u781E\u788D\u7888\u78D2\u73D0\u7959\u7741\u56E3\u410E\u799B\u8496\u79A5\u6A2D\u3EFA\u7A3A\u79F4\u416E\u16E6\u4132\u9235\u79F1\u0D4C\u498C\u0299\u3DBA\u176E\u3597\u556B\u3570\u36AA\u01D4\u0C0D\u7AE2\u5A59\u26F5\u5AAF\u5A9C\u5A0D\u025B\u78F0\u5A2A\u5BC6\u7AFE\u41F9\u7C5D\u7C6D\u4211\u5BB3\u5EBC\u5EA6\u7CCD\u49F9\u17B0\u7C8E\u7C7C\u7CAE\u6AB2\u7DDC\u7E07\u7DD3\u7F4E\u6261\u615C\u7B48\u7D97\u5E82\u426A\u6B75\u0916\u67D6\u004E\u35CF\u57C4\u6412\u63F8\u4962\u7FDD\u7B27\u082C\u5AE9\u5D43\u7B0C\u5E0E\u99E6\u8645\u9A63\u6A1C\u343F\u39E2\u49F7\u65AD\u9A1F\u65A0\u8480\u7127\u6CD1\u44EA\u8137\u4402\u80C6\u8109\u8142\u67B4\u98C3\u6A42\u8262\u8265\u6A51\u8453\u6DA7\u8610\u721B\u5A86\u417F\u1840\u5B2B\u18A1\u5AE4\u18D8\u86A0\uF9BC\u3D8F\u882D\u7422\u5A02\u886E\u4F45\u8887\u88BF\u88E6\u8965\u894D\u5683\u8954\u7785\u7784\u8BF5\u8BD9\u8B9C\u89F9\u3EAD\u84A3\u46F5\u46CF\u37F2\u8A3D\u8A1C\u9448\u5F4D\u922B\u4284\u65D4\u7129\u70C4\u1845\u9D6D\u8C9F\u8CE9\u7DDC\u599A\u77C3\u59F0\u436E\u36D4\u8E2A\u8EA7\u4C09\u8F30\u8F4A\u42F4\u6C58\u6FBB\u2321\u489B\u6F79\u6E8B\u17DA\u9BE9\u36B5\u492F\u90BB\u9097\u5571\u4906\u91BB\u9404\u8A4B\u4062\u8AFC\u9427\u8C1D\u8C3B\u84E5\u8A2B\u9599\u95A7\u9597\u9596\u8D34\u7445\u3EC2\u48FF\u4A42\u43EA\u3EE7\u3225\u968F\u8EE7\u8E66\u8E65\u3ECC\u49ED\u4A78\u3FEE\u7412\u746B\u3EFC\u9741\u90B0\u6847\u4A1D\u9093\u57DF\u975D\u9368\u8989\u8C26\u8B2F\u63BE\u92BA\u5B11\u8B69\u493C\u73F9\u421B\u979B\u9771\u9938\u0F26\u5DC1\u8BC5\u4AB2\u981F\u94DA\u92F6\u95D7\u91E5\u44C0\u8B50\u4A67\u8B64\u98DC\u8A45\u3F00\u922A\u4925\u8414\u993B\u994D\u7B06\u3DFD\u999B\u4B6F\u99AA\u9A5C\u8B65\u58C8\u6A8F\u9A21\u5AFE\u9A2F\u98F1\u4B90\u9948\u99BC\u4BBD\u4B97\u937D\u5872\u1302\u5822\u49B8\u14E8\u7844\u271F\u3DB8\u68C5\u3D7D\u9458\u3927\u6150\u2781\u296B\u6107\u9C4F\u9C53\u9C7B\u9C35\u9C10\u9B7F\u9BCF\u9E2D\u9B9F\uA1F5\uA0FE\u9D21\u4CAE\u4104\u9E18\u4CB0\u9D0C\uA1B4\uA0ED\uA0F3\u992F\u9DA5\u84BD\u6E12\u6FDF\u6B82\u85FC\u4533\u6DA4\u6E84\u6DF0\u8420\u85EE\u6E00\u37D7\u6064\u79E2\u359C\u3640\u492D\u49DE\u3D62\u93DB\u92BE\u9348\u02BF\u78B9\u9277\u944D\u4FE4\u3440\u9064\u555D\u783D\u7854\u78B6\u784B\u1757\u31C9\u4941\u369A\u4F72\u6FDA\u6FD9\u701E\u701E\u5414\u41B5\u57BB\u58F3\u578A\u9D16\u57D7\u7134\u34AF\u41AC\u71EB\u6C40\u4F97\u5B28\u17B5\u8A49\u610C\u5ACE\u5A0B\u42BC\u4488\u372C\u4B7B\u89FC\u93BB\u93B8\u18D6\u0F1D\u8472\u6CC0\u1413\u42FA\u2C26\u43C1\u5994\u3DB7\u6741\u7DA8\u615B\u60A4\u49B9\u498B\u89FA\u92E5\u73E2\u3EE9\u74B4\u8B63\u189F\u3EE1\u4AB3\u6AD8\u73F3\u73FB\u3ED6\u4A3E\u4A94\u17D9\u4A66\u03A7\u1424\u49E5\u7448\u4916\u70A5\u4976\u9284\u73E6\u935F\u04FE\u9331\u8ACE\u8A16\u9386\u8BE7\u55D5\u4935\u8A82\u716B\u4943\u0CFF\u56A4\u061A\u0BEB\u0CB8\u5502\u79C4\u17FA\u7DFE\u16C2\u4A50\u1852\u452E\u9401\u370A\u8AC0\u49AD\u59B0\u18BF\u1883\u7484\u5AA1\u36E2\u3D5B\u36B0\u925F\u5A79\u8A81\u1862\u9374\u3CCD\u0AB4\u4A96\u398A\u50F4\u3D69\u3D4C\u139C\u7175\u42FB\u8218\u6E0F\u90E4\u44EB\u6D57\u7E4F\u7067\u6CAF\u3CD6\u3FED\u3E2D\u6E02\u6F0C\u3D6F\u03F5\u7551\u36BC\u34C8\u4680\u3EDA\u4871\u59C4\u926E\u493E\u8F41\u8C1C\u6BC0\u5812\u57C8\u36D6\u1452\u70FE\u4362\u4A71\u2FE3\u12B0\u23BD\u68B9\u6967\u1398\u34E5\u7BF4\u36DF\u8A83\u37D6\u33FA\u4C9F\u6A1A\u36AD\u6CB7\u843E\u44DF\u44CE\u6D26\u6D51\u6C82\u6FDE\u6F17\u7109\u833D\u173A\u83ED\u6C80\u7053\u17DB\u5989\u5A82\u17B3\u5A61\u5A71\u1905\u41FC\u372D\u59EF\u173C\u36C7\u718E\u9390\u669A\u42A5\u5A6E\u5A2B\u4293\u6A2B\u3EF9\u7736\u445B\u42CA\u711D\u4259\u89E1\u4FB0\u6D28\u5CC2\u44CE\u7E4D\u43BD\u6A0C\u4256\u1304\u70A6\u7133\u43E9\u3DA5\u6CDF\uF825\u4A4F\u7E65\u59EB\u5D2F\u3DF3\u5F5C\u4A5D\u17DF\u7DA4\u8426\u5485\u3AFA\u3300\u0214\u577E\u08D5\u0619\u3FE5\u1F9E\uA2B6\u7003\u915B\u5D70\u738F\u7CD3\u8A59\u9420\u4FC8\u7FE7\u72CD\u7310\u7AF4\u7338\u7339\u56F6\u7341\u7348\u3EA9\u7B18\u906C\u71F5\u48F2\u73E1\u81F6\u3ECA\u770C\u3ED1\u6CA2\u56FD\u7419\u741E\u741F\u3EE2\u3EF0\u3EF4\u3EFA\u74D3\u3F0E\u3F53\u7542\u756D\u7572\u758D\u3F7C\u75C8\u75DC\u3FC0\u764D\u3FD7\u7674\u3FDC\u767A\u4F5C\u7188\u5623\u8980\u5869\u401D\u7743\u4039\u6761\u4045\u35DB\u7798\u406A\u406F\u5C5E\u77BE\u77CB\u58F2\u7818\u70B9\u781C\u40A8\u7839\u7847\u7851\u7866\u8448\u5535\u7933\u6803\u7932\u4103\u4109\u7991\u7999\u8FBB\u7A06\u8FBC\u4167\u7A91\u41B2\u7ABC\u8279\u41C4\u7ACF\u7ADB\u41CF\u4E21\u7B62\u7B6C\u7B7B\u7C12\u7C1B\u4260\u427A\u7C7B\u7C9C\u428C\u7CB8\u4294\u7CED\u8F93\u70C0\u0CCF\u7DCF\u7DD4\u7DD0\u7DFD\u7FAE\u7FB4\u729F\u4397\u8020\u8025\u7B39\u802E\u8031\u8054\u3DCC\u57B4\u70A0\u80B7\u80E9\u43ED\u810C\u732A\u810E\u8112\u7560\u8114\u4401\u3B39\u8156\u8159\u815A\u4413\u583A\u817C\u8184\u4425\u8193\u442D\u81A5\u57EF\u81C1\u81E4\u8254\u448F\u82A6\u8276\u82CA\u82D8\u82FF\u44B0\u8357\u9669\u698A\u8405\u70F5\u8464\u60E3\u8488\u4504\u84BE\u84E1\u84F8\u8510\u8538\u8552\u453B\u856F\u8570\u85E0\u4577\u8672\u8692\u86B2\u86EF\u9645\u878B\u4606\u4617\u88AE\u88FF\u8924\u8947\u8991\u7967\u8A29\u8A38\u8A94\u8AB4\u8C51\u8CD4\u8CF2\u8D1C\u4798\u585F\u8DC3\u47ED\u4EEE\u8E3A\u55D8\u5754\u8E71\u55F5\u8EB0\u4837\u8ECE\u8EE2\u8EE4\u8EED\u8EF2\u8FB7\u8FC1\u8FCA\u8FCC\u9033\u99C4\u48AD\u98E0\u9213\u491E\u9228\u9258\u926B\u92B1\u92AE\u92BF\u92E3\u92EB\u92F3\u92F4\u92FD\u9343\u9384\u93AD\u4945\u4951\u9EBF\u9417\u5301\u941D\u942D\u943E\u496A\u9454\u9479\u952D\u95A2\u49A7\u95F4\u9633\u49E5\u67A0\u4A24\u9740\u4A35\u97B2\u97C2\u5654\u4AE4\u60E8\u98B9\u4B19\u98F1\u5844\u990E\u9919\u51B4\u991C\u9937\u9942\u995D\u9962\u4B70\u99C5\u4B9D\u9A3C\u9B0F\u7A83\u9B69\u9B81\u9BDD\u9BF1\u9BF4\u4C6D\u9C20\u376F\u1BC2\u9D49\u9C3A\u9EFE\u5650\u9D93\u9DBD\u9DC0\u9DFC\u94F6\u8FB6\u9E7B\u9EAC\u9EB1\u9EBD\u9EC6\u94DC\u9EE2\u9EF1\u9EF8\u7AC8\u9F44\u0094\u02B7\u03A0\u691A\u94C3\u59AC\u04D7\u5840\u94C1\u37B9\u05D5\u0615\u0676\u16BA\u5757\u7173\u0AC2\u0ACD\u0BBF\u546A\uF83B\u0BCB\u549E\u0BFB\u0C3B\u0C53\u0C65\u0C7C\u60E7\u0C8D\u567A\u0CB5\u0CDD\u0CED\u0D6F\u0DB2\u0DC8\u6955\u9C2F\u87A5\u0E04\u0E0E\u0ED7\u0F90\u0F2D\u0E73\u5C20\u0FBC\u5E0B\u105C\u104F\u1076\u671E\u107B\u1088\u1096\u3647\u10BF\u10D3\u112F\u113B\u5364\u84AD\u12E3\u1375\u1336\u8B81\u1577\u1619\u17C3\u17C7\u4E78\u70BB\u182D\u196A\u1A2D\u1A45\u1C2A\u1C70\u1CAC\u1EC8\u62C3\u1ED5\u1F15\u7198\u6855\u2045\u69E9\u36C8\u227C\u23D7\u23FA\u272A\u2871\u294F\u82FD\u2967\u2993\u2AD5\u89A5\u2AE8\u8FA0\u2B0E\u97B8\u2B3F\u9847\u9ABD\u2C4C\u0000\u2C88\u2CB7\u5BE8\u2D08\u2D12\u2DB7\u2D95\u2E42\u2F74\u2FCC\u3033\u3066\u331F\u33DE\u5FB1\u6648\u66BF\u7A79\u3567\u35F3\u7201\u49BA\u77D7\u361A\u3716\u7E87\u0346\u58B5\u670E\u6918\u3AA7\u7657\u5FE2\u3E11\u3EB9\u75FE\u209A\u48D0\u4AB8\u4119\u8A9A\u42EE\u430D\u403B\u4334\u4396\u4A45\u05CA\u51D2\u0611\u599F\u1EA8\u3BBE\u3CFF\u4404\u44D6\u5788\u4674\u399B\u472F\u85E8\u99C9\u3762\u21C3\u8B5E\u8B4E\u99D6\u4812\u48FB\u4A15\u7209\u4AC0\u0C78\u5965\u4EA5\u4F86\u0779\u8EDA\u502C\u528F\u573F\u7171\u5299\u5419\u3F4A\u4AA7\u55BC\u5446\u546E\u6B52\u91D4\u3473\u553F\u7632\u555E\u4718\u5562\u5566\u57C7\u493F\u585D\u5066\u34FB\u33CC\u60DE\u5903\u477C\u8948\u5AAE\u5B89\u5C06\u1D90\u57A1\u7151\u6FB6\u6102\u7C12\u9056\u61B2\u4F9A\u8B62\u6402\u644A\u5D5B\u6BF7\u8F36\u6484\u191C\u8AEA\u49F6\u6488\u3FEF\u6512\u4BC0\u65BF\u66B5\u271B\u9465\u57E1\u6195\u5A27\uF8CD\u4FBB\u56B9\u4521\u66FC\u4E6A\u4934\u9656\u6D8F\u6CBD\u3618\u8977\u6799\u686E\u6411\u685E\u71DF\u68C7\u7B42\u90C0\u0A11\u6926\u9104\u6939\u7A45\u9DF0\u69FA\u9A26\u6A2D\u365F\u6469\u0021\u7983\u6A34\u6B5B\u5D2C\u3519\u83CF\u6B9D\u46D0\u6CA4\u753B\u8865\u6DAE\u58B6\u371C\u258D\u704B\u71CD\u3C54\u7280\u7285\u9281\u217A\u728B\u9330\u72E6\u49D0\u6C39\u949F\u7450\u0EF8\u8827\u88F5\u2926\u8473\u17B1\u6EB8\u4A2A\u1820\u39A4\u36B9\u5C10\u79E3\u453F\u66B6\u9CAD\u98A4\u8943\u77CC\u7858\u56D6\u40DF\u160A\u39A1\u372F\u80E8\u13C5\u71AD\u8366\u79DD\u91A8\u5A67\u4CB7\u70AF\u89AB\u79FD\u7A0A\u7B0B\u7D66\u417A\u7B43\u797E\u8009\u6FB5\uA2DF\u6A03\u8318\u53A2\u6E07\u93BF\u6836\u975D\u816F\u8023\u69B5\u13ED\u322F\u8048\u5D85\u8C30\u8083\u5715\u9823\u8949\u5DAB\u4988\u65BE\u69D5\u53D2\u4AA5\u3F81\u3C11\u6736\u8090\u80F4\u812E\u1FA1\u814F\u8189\u81AF\u821A\u8306\u832F\u838A\u35CA\u8468\u86AA\u48FA\u63E6\u8956\u7808\u9255\u89B8\u43F2\u89E7\u43DF\u89E8\u8B46\u8BD4\u59F8\u8C09\u8F0B\u8FC5\u90EC\u7B51\u9110\u913C\u3DF7\u915E\u4ACA\u8FD0\u728F\u568B\u94E7\u95E9\u95B0\u95B8\u9732\u98D1\u9949\u996A\u99C3\u9A28\u9B0E\u9D5A\u9D9B\u7E9F\u9EF8\u9F23\u4CA4\u9547\uA293\u71A2\uA2FF\u4D91\u9012\uA5CB\u4D9C\u0C9C\u8FBE\u55C1\u8FBA\u24B0\u8FB9\u4A93\u4509\u7E7F\u6F56\u6AB1\u4EEA\u34E4\u8B2C\u789D\u373A\u8E80\u17F5\u8024\u8B6C\u8B99\u7A3E\u66AF\u3DEB\u7655\u3CB7\u5635\u5956\u4E9A\u5E81\u6258\u56BF\u0E6D\u8E0E\u5B6D\u3E88\u4C9E\u63DE\u62D0\u17F6\u187B\u6530\u562D\u5C4A\u541A\u5311\u3DC6\u9D98\u4C7D\u5622\u561E\u7F49\u5ED8\u5975\u3D40\u8770\u4E1C\u0FEA\u0D49\u36BA\u8117\u9D5E\u8D18\u763B\u9C45\u764E\u77B9\u9345\u5432\u8148\u82F7\u5625\u8132\u8418\u80BD\u55EA\u7962\u5643\u5416\u0E9D\u35CE\u5605\u55F1\u66F1\u82E2\u362D\u7534\u55F0\u55BA\u5497\u5572\u0C41\u0C96\u5ED0\u5148\u0E76\u2C62\u0EA2\u9EAB\u7D5A\u55DE\u1075\u629D\u976D\u5494\u8CCD\u71F6\u9176\u63FC\u63B9\u63FE\u5569\u2B43\u9C72\u2EB3\u519A\u34DF\u0DA7\u51A7\u544D\u551E\u5513\u7666\u8E2D\u688A\u75B1\u80B6\u8804\u8786\u88C7\u81B6\u841C\u10C1\u44EC\u7304\u4706\u5B90\u830B\u6893\u567B\u26F4\u7D2F\u41A3\u7D73\u6ED0\u72B6\u9170\u11D9\u9208\u3CFC\uA6A9\u0EAC\u0EF9\u7266\u1CA2\u474E\u4FC2\u7FF9\u0FEB\u40FA\u9C5D\u651F\u2DA0\u48F3\u47E0\u9D7C\u0FEC\u0E0A\u6062\u75A3\u0FED\u0000\u6048\u1187\u71A3\u7E8E\u9D50\u4E1A\u4E04\u3577\u5B0D\u6CB2\u5367\u36AC\u39DC\u537D\u36A5\u4618\u589A\u4B6E\u822D\u544B\u57AA\u5A95\u0979\u0000\u3A52\u2465\u7374\u9EAC\u4D09\u9BED\u3CFE\u9F30\u4C5B\u4FA9\u959E\u9FDE\u845C\u3DB6\u72B2\u67B3\u3720\u632E\u7D25\u3EF7\u3E2C\u3A2A\u9008\u52CC\u3E74\u367A\u45E9\u048E\u7640\u5AF0\u0EB6\u787A\u7F2E\u58A7\u40BF\u567C\u9B8B\u5D74\u7654\uA434\u9E85\u4CE1\u75F9\u37FB\u6119\u30DA\u43F2\u0000\u565D\u12A9\u57A7\u4963\u9E06\u5234\u70AE\u35AD\u6C4A\u9D7C\u7C56\u9B39\u57DE\u176C\u5C53\u64D3\u94D0\u6335\u7164\u86AD\u0D28\u6D22\u4AE2\u0D71\u0000\u51FE\u1F0F\u5D8E\u9703\u1DD1\u9E81\u904C\u7B1F\u9B02\u5CD1\u7BA3\u6268\u6335\u9AFF\u7BCF\u9B2A\u7C7E\u9B2E\u7C42\u7C86\u9C15\u7BFC\u9B09\u9F17\u9C1B\u493E\u9F5A\u5573\u5BC3\u4FFD\u9E98\u4FF2\u5260\u3E06\u52D1\u5767\u5056\u59B7\u5E12\u97C8\u9DAB\u8F5C\u5469\u97B4\u9940\u97BA\u532C\u6130\u692C\u53DA\u9C0A\u9D02\u4C3B\u9641\u6980\u50A6\u7546\u176D\u99DA\u5273\u0000\u9159\u9681\u915C\u0000\u9151\u8E97\u637F\u6D23\u6ACA\u5611\u918E\u757A\u6285\u03FC\u734F\u7C70\u5C21\u3CFD\u0000\u4919\u76D6\u9B9D\u4E2A\u0CD4\u83BE\u8842\u0000\u5C4A\u69C0\u50ED\u577A\u521F\u5DF5\u4ECE\u6C31\u01F2\u4F39\u549C\u54DA\u529A\u8D82\u35FE\u5F0C\u35F3\u0000\u6B52\u917C\u9FA5\u9B97\u982E\u98B4\u9ABA\u9EA8\u9E84\u717A\u7B14\u0000\u6BFA\u8818\u7F78\u0000\u5620\uA64A\u8E77\u9F53\u0000\u8DD4\u8E4F\u9E1C\u8E01\u6282\u837D\u8E28\u8E75\u7AD3\u4A77\u7A3E\u78D8\u6CEA\u8A67\u7607\u8A5A\u9F26\u6CCE\u87D6\u75C3\uA2B2\u7853\uF840\u8D0C\u72E2\u7371\u8B2D\u7302\u74F1\u8CEB\u4ABB\u862F\u5FBA\u88A0\u44B7\u0000\u183B\u6E05\u0000\u8A7E\u251B\u0000\u60FD\u7667\u9AD7\u9D44\u936E\u9B8F\u87F5\u0000\u880F\u8CF7\u732C\u9721\u9BB0\u35D6\u72B2\u4C07\u7C51\u994A\u6159\u6159\u4C04\u9E96\u617D\u0000\u575F\u616F\u62A6\u6239\u62CE\u3A5C\u61E2\u53AA\u33F5\u6364\u6802\u35D2\u5D57\u8BC2\u8FDA\u8E39\u0000\u50D9\u1D46\u7906\u5332\u9638\u0F3B\u4065\u0000\u77FE\u0000\u7CC2\u5F1A\u7CDA\u7A2D\u8066\u8063\u7D4D\u7505\u74F2\u8994\u821A\u670C\u8062\u7486\u805B\u74F0\u8103\u7724\u8989\u67CC\u7553\u6ED1\u87A9\u87CE\u81C8\u878C\u8A49\u8CAD\u8B43\u772B\u74F8\u84DA\u3635\u69B2\u8DA6\u0000\u89A9\u7468\u6DB9\u87C1\u4011\u74E7\u3DDB\u7176\u60A4\u619C\u3CD1\u7162\u6077\u0000\u7F71\u8B2D\u7250\u60E9\u4B7E\u5220\u3C18\u3CC7\u5ED7\u7656\u5531\u1944\u12FE\u9903\u6DDC\u70AD\u5CC1\u61AD\u8A0F\u3677\u00EE\u6846\u4F0E\u4562\u5B1F\u634C\u9F50\u9EA6\u626B\u3000\uFF0C\u3001\u3002\uFF0E\u2027\uFF1B\uFF1A\uFF1F\uFF01\uFE30\u2026\u2025\uFE50\uFE51\uFE52\u00B7\uFE54\uFE55\uFE56\uFE57\uFF5C\u2013\uFE31\u2014\uFE33\u2574\uFE34\uFE4F\uFF08\uFF09\uFE35\uFE36\uFF5B\uFF5D\uFE37\uFE38\u3014\u3015\uFE39\uFE3A\u3010\u3011\uFE3B\uFE3C\u300A\u300B\uFE3D\uFE3E\u3008\u3009\uFE3F\uFE40\u300C\u300D\uFE41\uFE42\u300E\u300F\uFE43\uFE44\uFE59\uFE5A\uFE5B\uFE5C\uFE5D\uFE5E\u2018\u2019\u201C\u201D\u301D\u301E\u2035\u2032\uFF03\uFF06\uFF0A\u203B\u00A7\u3003\u25CB\u25CF\u25B3\u25B2\u25CE\u2606\u2605\u25C7\u25C6\u25A1\u25A0\u25BD\u25BC\u32A3\u2105\u00AF\uFFE3\uFF3F\u02CD\uFE49\uFE4A\uFE4D\uFE4E\uFE4B\uFE4C\uFE5F\uFE60\uFE61\uFF0B\uFF0D\u00D7\u00F7\u00B1\u221A\uFF1C\uFF1E\uFF1D\u2266\u2267\u2260\u221E\u2252\u2261\uFE62\uFE63\uFE64\uFE65\uFE66\uFF5E\u2229\u222A\u22A5\u2220\u221F\u22BF\u33D2\u33D1\u222B\u222E\u2235\u2234\u2640\u2642\u2295\u2299\u2191\u2193\u2190\u2192\u2196\u2197\u2199\u2198\u2225\u2223\uFF0F\uFF3C\u2215\uFE68\uFF04\uFFE5\u3012\uFFE0\uFFE1\uFF05\uFF20\u2103\u2109\uFE69\uFE6A\uFE6B\u33D5\u339C\u339D\u339E\u33CE\u33A1\u338E\u338F\u33C4\u00B0\u5159\u515B\u515E\u515D\u5161\u5163\u55E7\u74E9\u7CCE\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588\u258F\u258E\u258D\u258C\u258B\u258A\u2589\u253C\u2534\u252C\u2524\u251C\u2594\u2500\u2502\u2595\u250C\u2510\u2514\u2518\u256D\u256E\u2570\u256F\u2550\u255E\u256A\u2561\u25E2\u25E3\u25E5\u25E4\u2571\u2572\u2573\uFF10\uFF11\uFF12\uFF13\uFF14\uFF15\uFF16\uFF17\uFF18\uFF19\u2160\u2161\u2162\u2163\u2164\u2165\u2166\u2167\u2168\u2169\u3021\u3022\u3023\u3024\u3025\u3026\u3027\u3028\u3029\u5341\u5344\u5345\uFF21\uFF22\uFF23\uFF24\uFF25\uFF26\uFF27\uFF28\uFF29\uFF2A\uFF2B\uFF2C\uFF2D\uFF2E\uFF2F\uFF30\uFF31\uFF32\uFF33\uFF34\uFF35\uFF36\uFF37\uFF38\uFF39\uFF3A\uFF41\uFF42\uFF43\uFF44\uFF45\uFF46\uFF47\uFF48\uFF49\uFF4A\uFF4B\uFF4C\uFF4D\uFF4E\uFF4F\uFF50\uFF51\uFF52\uFF53\uFF54\uFF55\uFF56\uFF57\uFF58\uFF59\uFF5A\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF\u03C0\u03C1\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u3105\u3106\u3107\u3108\u3109\u310A\u310B\u310C\u310D\u310E\u310F\u3110\u3111\u3112\u3113\u3114\u3115\u3116\u3117\u3118\u3119\u311A\u311B\u311C\u311D\u311E\u311F\u3120\u3121\u3122\u3123\u3124\u3125\u3126\u3127\u3128\u3129\u02D9\u02C9\u02CA\u02C7\u02CB\u2400\u2401\u2402\u2403\u2404\u2405\u2406\u2407\u2408\u2409\u240A\u240B\u240C\u240D\u240E\u240F\u2410\u2411\u2412\u2413\u2414\u2415\u2416\u2417\u2418\u2419\u241A\u241B\u241C\u241D\u241E\u241F\u2421\u20AC"; + + private static final String TABLE3 = "\u4E00\u4E59\u4E01\u4E03\u4E43\u4E5D\u4E86\u4E8C\u4EBA\u513F\u5165\u516B\u51E0\u5200\u5201\u529B\u5315\u5341\u535C\u53C8\u4E09\u4E0B\u4E08\u4E0A\u4E2B\u4E38\u51E1\u4E45\u4E48\u4E5F\u4E5E\u4E8E\u4EA1\u5140\u5203\u52FA\u5343\u53C9\u53E3\u571F\u58EB\u5915\u5927\u5973\u5B50\u5B51\u5B53\u5BF8\u5C0F\u5C22\u5C38\u5C71\u5DDD\u5DE5\u5DF1\u5DF2\u5DF3\u5DFE\u5E72\u5EFE\u5F0B\u5F13\u624D\u4E11\u4E10\u4E0D\u4E2D\u4E30\u4E39\u4E4B\u5C39\u4E88\u4E91\u4E95\u4E92\u4E94\u4EA2\u4EC1\u4EC0\u4EC3\u4EC6\u4EC7\u4ECD\u4ECA\u4ECB\u4EC4\u5143\u5141\u5167\u516D\u516E\u516C\u5197\u51F6\u5206\u5207\u5208\u52FB\u52FE\u52FF\u5316\u5339\u5348\u5347\u5345\u535E\u5384\u53CB\u53CA\u53CD\u58EC\u5929\u592B\u592A\u592D\u5B54\u5C11\u5C24\u5C3A\u5C6F\u5DF4\u5E7B\u5EFF\u5F14\u5F15\u5FC3\u6208\u6236\u624B\u624E\u652F\u6587\u6597\u65A4\u65B9\u65E5\u66F0\u6708\u6728\u6B20\u6B62\u6B79\u6BCB\u6BD4\u6BDB\u6C0F\u6C34\u706B\u722A\u7236\u723B\u7247\u7259\u725B\u72AC\u738B\u4E19\u4E16\u4E15\u4E14\u4E18\u4E3B\u4E4D\u4E4F\u4E4E\u4EE5\u4ED8\u4ED4\u4ED5\u4ED6\u4ED7\u4EE3\u4EE4\u4ED9\u4EDE\u5145\u5144\u5189\u518A\u51AC\u51F9\u51FA\u51F8\u520A\u52A0\u529F\u5305\u5306\u5317\u531D\u4EDF\u534A\u5349\u5361\u5360\u536F\u536E\u53BB\u53EF\u53E4\u53F3\u53EC\u53EE\u53E9\u53E8\u53FC\u53F8\u53F5\u53EB\u53E6\u53EA\u53F2\u53F1\u53F0\u53E5\u53ED\u53FB\u56DB\u56DA\u5916\u592E\u5931\u5974\u5976\u5B55\u5B83\u5C3C\u5DE8\u5DE7\u5DE6\u5E02\u5E03\u5E73\u5E7C\u5F01\u5F18\u5F17\u5FC5\u620A\u6253\u6254\u6252\u6251\u65A5\u65E6\u672E\u672C\u672A\u672B\u672D\u6B63\u6BCD\u6C11\u6C10\u6C38\u6C41\u6C40\u6C3E\u72AF\u7384\u7389\u74DC\u74E6\u7518\u751F\u7528\u7529\u7530\u7531\u7532\u7533\u758B\u767D\u76AE\u76BF\u76EE\u77DB\u77E2\u77F3\u793A\u79BE\u7A74\u7ACB\u4E1E\u4E1F\u4E52\u4E53\u4E69\u4E99\u4EA4\u4EA6\u4EA5\u4EFF\u4F09\u4F19\u4F0A\u4F15\u4F0D\u4F10\u4F11\u4F0F\u4EF2\u4EF6\u4EFB\u4EF0\u4EF3\u4EFD\u4F01\u4F0B\u5149\u5147\u5146\u5148\u5168\u5171\u518D\u51B0\u5217\u5211\u5212\u520E\u5216\u52A3\u5308\u5321\u5320\u5370\u5371\u5409\u540F\u540C\u540A\u5410\u5401\u540B\u5404\u5411\u540D\u5408\u5403\u540E\u5406\u5412\u56E0\u56DE\u56DD\u5733\u5730\u5728\u572D\u572C\u572F\u5729\u5919\u591A\u5937\u5938\u5984\u5978\u5983\u597D\u5979\u5982\u5981\u5B57\u5B58\u5B87\u5B88\u5B85\u5B89\u5BFA\u5C16\u5C79\u5DDE\u5E06\u5E76\u5E74\u5F0F\u5F1B\u5FD9\u5FD6\u620E\u620C\u620D\u6210\u6263\u625B\u6258\u6536\u65E9\u65E8\u65EC\u65ED\u66F2\u66F3\u6709\u673D\u6734\u6731\u6735\u6B21\u6B64\u6B7B\u6C16\u6C5D\u6C57\u6C59\u6C5F\u6C60\u6C50\u6C55\u6C61\u6C5B\u6C4D\u6C4E\u7070\u725F\u725D\u767E\u7AF9\u7C73\u7CF8\u7F36\u7F8A\u7FBD\u8001\u8003\u800C\u8012\u8033\u807F\u8089\u808B\u808C\u81E3\u81EA\u81F3\u81FC\u820C\u821B\u821F\u826E\u8272\u827E\u866B\u8840\u884C\u8863\u897F\u9621\u4E32\u4EA8\u4F4D\u4F4F\u4F47\u4F57\u4F5E\u4F34\u4F5B\u4F55\u4F30\u4F50\u4F51\u4F3D\u4F3A\u4F38\u4F43\u4F54\u4F3C\u4F46\u4F63\u4F5C\u4F60\u4F2F\u4F4E\u4F36\u4F59\u4F5D\u4F48\u4F5A\u514C\u514B\u514D\u5175\u51B6\u51B7\u5225\u5224\u5229\u522A\u5228\u52AB\u52A9\u52AA\u52AC\u5323\u5373\u5375\u541D\u542D\u541E\u543E\u5426\u544E\u5427\u5446\u5443\u5433\u5448\u5442\u541B\u5429\u544A\u5439\u543B\u5438\u542E\u5435\u5436\u5420\u543C\u5440\u5431\u542B\u541F\u542C\u56EA\u56F0\u56E4\u56EB\u574A\u5751\u5740\u574D\u5747\u574E\u573E\u5750\u574F\u573B\u58EF\u593E\u599D\u5992\u59A8\u599E\u59A3\u5999\u5996\u598D\u59A4\u5993\u598A\u59A5\u5B5D\u5B5C\u5B5A\u5B5B\u5B8C\u5B8B\u5B8F\u5C2C\u5C40\u5C41\u5C3F\u5C3E\u5C90\u5C91\u5C94\u5C8C\u5DEB\u5E0C\u5E8F\u5E87\u5E8A\u5EF7\u5F04\u5F1F\u5F64\u5F62\u5F77\u5F79\u5FD8\u5FCC\u5FD7\u5FCD\u5FF1\u5FEB\u5FF8\u5FEA\u6212\u6211\u6284\u6297\u6296\u6280\u6276\u6289\u626D\u628A\u627C\u627E\u6279\u6273\u6292\u626F\u6298\u626E\u6295\u6293\u6291\u6286\u6539\u653B\u6538\u65F1\u66F4\u675F\u674E\u674F\u6750\u6751\u675C\u6756\u675E\u6749\u6746\u6760\u6753\u6757\u6B65\u6BCF\u6C42\u6C5E\u6C99\u6C81\u6C88\u6C89\u6C85\u6C9B\u6C6A\u6C7A\u6C90\u6C70\u6C8C\u6C68\u6C96\u6C92\u6C7D\u6C83\u6C72\u6C7E\u6C74\u6C86\u6C76\u6C8D\u6C94\u6C98\u6C82\u7076\u707C\u707D\u7078\u7262\u7261\u7260\u72C4\u72C2\u7396\u752C\u752B\u7537\u7538\u7682\u76EF\u77E3\u79C1\u79C0\u79BF\u7A76\u7CFB\u7F55\u8096\u8093\u809D\u8098\u809B\u809A\u80B2\u826F\u8292\u828B\u828D\u898B\u89D2\u8A00\u8C37\u8C46\u8C55\u8C9D\u8D64\u8D70\u8DB3\u8EAB\u8ECA\u8F9B\u8FB0\u8FC2\u8FC6\u8FC5\u8FC4\u5DE1\u9091\u90A2\u90AA\u90A6\u90A3\u9149\u91C6\u91CC\u9632\u962E\u9631\u962A\u962C\u4E26\u4E56\u4E73\u4E8B\u4E9B\u4E9E\u4EAB\u4EAC\u4F6F\u4F9D\u4F8D\u4F73\u4F7F\u4F6C\u4F9B\u4F8B\u4F86\u4F83\u4F70\u4F75\u4F88\u4F69\u4F7B\u4F96\u4F7E\u4F8F\u4F91\u4F7A\u5154\u5152\u5155\u5169\u5177\u5176\u5178\u51BD\u51FD\u523B\u5238\u5237\u523A\u5230\u522E\u5236\u5241\u52BE\u52BB\u5352\u5354\u5353\u5351\u5366\u5377\u5378\u5379\u53D6\u53D4\u53D7\u5473\u5475\u5496\u5478\u5495\u5480\u547B\u5477\u5484\u5492\u5486\u547C\u5490\u5471\u5476\u548C\u549A\u5462\u5468\u548B\u547D\u548E\u56FA\u5783\u5777\u576A\u5769\u5761\u5766\u5764\u577C\u591C\u5949\u5947\u5948\u5944\u5954\u59BE\u59BB\u59D4\u59B9\u59AE\u59D1\u59C6\u59D0\u59CD\u59CB\u59D3\u59CA\u59AF\u59B3\u59D2\u59C5\u5B5F\u5B64\u5B63\u5B97\u5B9A\u5B98\u5B9C\u5B99\u5B9B\u5C1A\u5C48\u5C45\u5C46\u5CB7\u5CA1\u5CB8\u5CA9\u5CAB\u5CB1\u5CB3\u5E18\u5E1A\u5E16\u5E15\u5E1B\u5E11\u5E78\u5E9A\u5E97\u5E9C\u5E95\u5E96\u5EF6\u5F26\u5F27\u5F29\u5F80\u5F81\u5F7F\u5F7C\u5FDD\u5FE0\u5FFD\u5FF5\u5FFF\u600F\u6014\u602F\u6035\u6016\u602A\u6015\u6021\u6027\u6029\u602B\u601B\u6216\u6215\u623F\u623E\u6240\u627F\u62C9\u62CC\u62C4\u62BF\u62C2\u62B9\u62D2\u62DB\u62AB\u62D3\u62D4\u62CB\u62C8\u62A8\u62BD\u62BC\u62D0\u62D9\u62C7\u62CD\u62B5\u62DA\u62B1\u62D8\u62D6\u62D7\u62C6\u62AC\u62CE\u653E\u65A7\u65BC\u65FA\u6614\u6613\u660C\u6606\u6602\u660E\u6600\u660F\u6615\u660A\u6607\u670D\u670B\u676D\u678B\u6795\u6771\u679C\u6773\u6777\u6787\u679D\u6797\u676F\u6770\u677F\u6789\u677E\u6790\u6775\u679A\u6793\u677C\u676A\u6772\u6B23\u6B66\u6B67\u6B7F\u6C13\u6C1B\u6CE3\u6CE8\u6CF3\u6CB1\u6CCC\u6CE5\u6CB3\u6CBD\u6CBE\u6CBC\u6CE2\u6CAB\u6CD5\u6CD3\u6CB8\u6CC4\u6CB9\u6CC1\u6CAE\u6CD7\u6CC5\u6CF1\u6CBF\u6CBB\u6CE1\u6CDB\u6CCA\u6CAC\u6CEF\u6CDC\u6CD6\u6CE0\u7095\u708E\u7092\u708A\u7099\u722C\u722D\u7238\u7248\u7267\u7269\u72C0\u72CE\u72D9\u72D7\u72D0\u73A9\u73A8\u739F\u73AB\u73A5\u753D\u759D\u7599\u759A\u7684\u76C2\u76F2\u76F4\u77E5\u77FD\u793E\u7940\u7941\u79C9\u79C8\u7A7A\u7A79\u7AFA\u7CFE\u7F54\u7F8C\u7F8B\u8005\u80BA\u80A5\u80A2\u80B1\u80A1\u80AB\u80A9\u80B4\u80AA\u80AF\u81E5\u81FE\u820D\u82B3\u829D\u8299\u82AD\u82BD\u829F\u82B9\u82B1\u82AC\u82A5\u82AF\u82B8\u82A3\u82B0\u82BE\u82B7\u864E\u8671\u521D\u8868\u8ECB\u8FCE\u8FD4\u8FD1\u90B5\u90B8\u90B1\u90B6\u91C7\u91D1\u9577\u9580\u961C\u9640\u963F\u963B\u9644\u9642\u96B9\u96E8\u9752\u975E\u4E9F\u4EAD\u4EAE\u4FE1\u4FB5\u4FAF\u4FBF\u4FE0\u4FD1\u4FCF\u4FDD\u4FC3\u4FB6\u4FD8\u4FDF\u4FCA\u4FD7\u4FAE\u4FD0\u4FC4\u4FC2\u4FDA\u4FCE\u4FDE\u4FB7\u5157\u5192\u5191\u51A0\u524E\u5243\u524A\u524D\u524C\u524B\u5247\u52C7\u52C9\u52C3\u52C1\u530D\u5357\u537B\u539A\u53DB\u54AC\u54C0\u54A8\u54CE\u54C9\u54B8\u54A6\u54B3\u54C7\u54C2\u54BD\u54AA\u54C1\u54C4\u54C8\u54AF\u54AB\u54B1\u54BB\u54A9\u54A7\u54BF\u56FF\u5782\u578B\u57A0\u57A3\u57A2\u57CE\u57AE\u5793\u5955\u5951\u594F\u594E\u5950\u59DC\u59D8\u59FF\u59E3\u59E8\u5A03\u59E5\u59EA\u59DA\u59E6\u5A01\u59FB\u5B69\u5BA3\u5BA6\u5BA4\u5BA2\u5BA5\u5C01\u5C4E\u5C4F\u5C4D\u5C4B\u5CD9\u5CD2\u5DF7\u5E1D\u5E25\u5E1F\u5E7D\u5EA0\u5EA6\u5EFA\u5F08\u5F2D\u5F65\u5F88\u5F85\u5F8A\u5F8B\u5F87\u5F8C\u5F89\u6012\u601D\u6020\u6025\u600E\u6028\u604D\u6070\u6068\u6062\u6046\u6043\u606C\u606B\u606A\u6064\u6241\u62DC\u6316\u6309\u62FC\u62ED\u6301\u62EE\u62FD\u6307\u62F1\u62F7\u62EF\u62EC\u62FE\u62F4\u6311\u6302\u653F\u6545\u65AB\u65BD\u65E2\u6625\u662D\u6620\u6627\u662F\u661F\u6628\u6631\u6624\u66F7\u67FF\u67D3\u67F1\u67D4\u67D0\u67EC\u67B6\u67AF\u67F5\u67E9\u67EF\u67C4\u67D1\u67B4\u67DA\u67E5\u67B8\u67CF\u67DE\u67F3\u67B0\u67D9\u67E2\u67DD\u67D2\u6B6A\u6B83\u6B86\u6BB5\u6BD2\u6BD7\u6C1F\u6CC9\u6D0B\u6D32\u6D2A\u6D41\u6D25\u6D0C\u6D31\u6D1E\u6D17\u6D3B\u6D3D\u6D3E\u6D36\u6D1B\u6CF5\u6D39\u6D27\u6D38\u6D29\u6D2E\u6D35\u6D0E\u6D2B\u70AB\u70BA\u70B3\u70AC\u70AF\u70AD\u70B8\u70AE\u70A4\u7230\u7272\u726F\u7274\u72E9\u72E0\u72E1\u73B7\u73CA\u73BB\u73B2\u73CD\u73C0\u73B3\u751A\u752D\u754F\u754C\u754E\u754B\u75AB\u75A4\u75A5\u75A2\u75A3\u7678\u7686\u7687\u7688\u76C8\u76C6\u76C3\u76C5\u7701\u76F9\u76F8\u7709\u770B\u76FE\u76FC\u7707\u77DC\u7802\u7814\u780C\u780D\u7946\u7949\u7948\u7947\u79B9\u79BA\u79D1\u79D2\u79CB\u7A7F\u7A81\u7AFF\u7AFD\u7C7D\u7D02\u7D05\u7D00\u7D09\u7D07\u7D04\u7D06\u7F38\u7F8E\u7FBF\u8004\u8010\u800D\u8011\u8036\u80D6\u80E5\u80DA\u80C3\u80C4\u80CC\u80E1\u80DB\u80CE\u80DE\u80E4\u80DD\u81F4\u8222\u82E7\u8303\u8305\u82E3\u82DB\u82E6\u8304\u82E5\u8302\u8309\u82D2\u82D7\u82F1\u8301\u82DC\u82D4\u82D1\u82DE\u82D3\u82DF\u82EF\u8306\u8650\u8679\u867B\u867A\u884D\u886B\u8981\u89D4\u8A08\u8A02\u8A03\u8C9E\u8CA0\u8D74\u8D73\u8DB4\u8ECD\u8ECC\u8FF0\u8FE6\u8FE2\u8FEA\u8FE5\u8FED\u8FEB\u8FE4\u8FE8\u90CA\u90CE\u90C1\u90C3\u914B\u914A\u91CD\u9582\u9650\u964B\u964C\u964D\u9762\u9769\u97CB\u97ED\u97F3\u9801\u98A8\u98DB\u98DF\u9996\u9999\u4E58\u4EB3\u500C\u500D\u5023\u4FEF\u5026\u5025\u4FF8\u5029\u5016\u5006\u503C\u501F\u501A\u5012\u5011\u4FFA\u5000\u5014\u5028\u4FF1\u5021\u500B\u5019\u5018\u4FF3\u4FEE\u502D\u502A\u4FFE\u502B\u5009\u517C\u51A4\u51A5\u51A2\u51CD\u51CC\u51C6\u51CB\u5256\u525C\u5254\u525B\u525D\u532A\u537F\u539F\u539D\u53DF\u54E8\u5510\u5501\u5537\u54FC\u54E5\u54F2\u5506\u54FA\u5514\u54E9\u54ED\u54E1\u5509\u54EE\u54EA\u54E6\u5527\u5507\u54FD\u550F\u5703\u5704\u57C2\u57D4\u57CB\u57C3\u5809\u590F\u5957\u5958\u595A\u5A11\u5A18\u5A1C\u5A1F\u5A1B\u5A13\u59EC\u5A20\u5A23\u5A29\u5A25\u5A0C\u5A09\u5B6B\u5C58\u5BB0\u5BB3\u5BB6\u5BB4\u5BAE\u5BB5\u5BB9\u5BB8\u5C04\u5C51\u5C55\u5C50\u5CED\u5CFD\u5CFB\u5CEA\u5CE8\u5CF0\u5CF6\u5D01\u5CF4\u5DEE\u5E2D\u5E2B\u5EAB\u5EAD\u5EA7\u5F31\u5F92\u5F91\u5F90\u6059\u6063\u6065\u6050\u6055\u606D\u6069\u606F\u6084\u609F\u609A\u608D\u6094\u608C\u6085\u6096\u6247\u62F3\u6308\u62FF\u634E\u633E\u632F\u6355\u6342\u6346\u634F\u6349\u633A\u6350\u633D\u632A\u632B\u6328\u634D\u634C\u6548\u6549\u6599\u65C1\u65C5\u6642\u6649\u664F\u6643\u6652\u664C\u6645\u6641\u66F8\u6714\u6715\u6717\u6821\u6838\u6848\u6846\u6853\u6839\u6842\u6854\u6829\u68B3\u6817\u684C\u6851\u683D\u67F4\u6850\u6840\u683C\u6843\u682A\u6845\u6813\u6818\u6841\u6B8A\u6B89\u6BB7\u6C23\u6C27\u6C28\u6C26\u6C24\u6CF0\u6D6A\u6D95\u6D88\u6D87\u6D66\u6D78\u6D77\u6D59\u6D93\u6D6C\u6D89\u6D6E\u6D5A\u6D74\u6D69\u6D8C\u6D8A\u6D79\u6D85\u6D65\u6D94\u70CA\u70D8\u70E4\u70D9\u70C8\u70CF\u7239\u7279\u72FC\u72F9\u72FD\u72F8\u72F7\u7386\u73ED\u7409\u73EE\u73E0\u73EA\u73DE\u7554\u755D\u755C\u755A\u7559\u75BE\u75C5\u75C7\u75B2\u75B3\u75BD\u75BC\u75B9\u75C2\u75B8\u768B\u76B0\u76CA\u76CD\u76CE\u7729\u771F\u7720\u7728\u77E9\u7830\u7827\u7838\u781D\u7834\u7837\u7825\u782D\u7820\u781F\u7832\u7955\u7950\u7960\u795F\u7956\u795E\u795D\u7957\u795A\u79E4\u79E3\u79E7\u79DF\u79E6\u79E9\u79D8\u7A84\u7A88\u7AD9\u7B06\u7B11\u7C89\u7D21\u7D17\u7D0B\u7D0A\u7D20\u7D22\u7D14\u7D10\u7D15\u7D1A\u7D1C\u7D0D\u7D19\u7D1B\u7F3A\u7F5F\u7F94\u7FC5\u7FC1\u8006\u8018\u8015\u8019\u8017\u803D\u803F\u80F1\u8102\u80F0\u8105\u80ED\u80F4\u8106\u80F8\u80F3\u8108\u80FD\u810A\u80FC\u80EF\u81ED\u81EC\u8200\u8210\u822A\u822B\u8228\u822C\u82BB\u832B\u8352\u8354\u834A\u8338\u8350\u8349\u8335\u8334\u834F\u8332\u8339\u8336\u8317\u8340\u8331\u8328\u8343\u8654\u868A\u86AA\u8693\u86A4\u86A9\u868C\u86A3\u869C\u8870\u8877\u8881\u8882\u887D\u8879\u8A18\u8A10\u8A0E\u8A0C\u8A15\u8A0A\u8A17\u8A13\u8A16\u8A0F\u8A11\u8C48\u8C7A\u8C79\u8CA1\u8CA2\u8D77\u8EAC\u8ED2\u8ED4\u8ECF\u8FB1\u9001\u9006\u8FF7\u9000\u8FFA\u8FF4\u9003\u8FFD\u9005\u8FF8\u9095\u90E1\u90DD\u90E2\u9152\u914D\u914C\u91D8\u91DD\u91D7\u91DC\u91D9\u9583\u9662\u9663\u9661\u965B\u965D\u9664\u9658\u965E\u96BB\u98E2\u99AC\u9AA8\u9AD8\u9B25\u9B32\u9B3C\u4E7E\u507A\u507D\u505C\u5047\u5043\u504C\u505A\u5049\u5065\u5076\u504E\u5055\u5075\u5074\u5077\u504F\u500F\u506F\u506D\u515C\u5195\u51F0\u526A\u526F\u52D2\u52D9\u52D8\u52D5\u5310\u530F\u5319\u533F\u5340\u533E\u53C3\u66FC\u5546\u556A\u5566\u5544\u555E\u5561\u5543\u554A\u5531\u5556\u554F\u5555\u552F\u5564\u5538\u552E\u555C\u552C\u5563\u5533\u5541\u5557\u5708\u570B\u5709\u57DF\u5805\u580A\u5806\u57E0\u57E4\u57FA\u5802\u5835\u57F7\u57F9\u5920\u5962\u5A36\u5A41\u5A49\u5A66\u5A6A\u5A40\u5A3C\u5A62\u5A5A\u5A46\u5A4A\u5B70\u5BC7\u5BC5\u5BC4\u5BC2\u5BBF\u5BC6\u5C09\u5C08\u5C07\u5C60\u5C5C\u5C5D\u5D07\u5D06\u5D0E\u5D1B\u5D16\u5D22\u5D11\u5D29\u5D14\u5D19\u5D24\u5D27\u5D17\u5DE2\u5E38\u5E36\u5E33\u5E37\u5EB7\u5EB8\u5EB6\u5EB5\u5EBE\u5F35\u5F37\u5F57\u5F6C\u5F69\u5F6B\u5F97\u5F99\u5F9E\u5F98\u5FA1\u5FA0\u5F9C\u607F\u60A3\u6089\u60A0\u60A8\u60CB\u60B4\u60E6\u60BD\u60C5\u60BB\u60B5\u60DC\u60BC\u60D8\u60D5\u60C6\u60DF\u60B8\u60DA\u60C7\u621A\u621B\u6248\u63A0\u63A7\u6372\u6396\u63A2\u63A5\u6377\u6367\u6398\u63AA\u6371\u63A9\u6389\u6383\u639B\u636B\u63A8\u6384\u6388\u6399\u63A1\u63AC\u6392\u638F\u6380\u637B\u6369\u6368\u637A\u655D\u6556\u6551\u6559\u6557\u555F\u654F\u6558\u6555\u6554\u659C\u659B\u65AC\u65CF\u65CB\u65CC\u65CE\u665D\u665A\u6664\u6668\u6666\u665E\u66F9\u52D7\u671B\u6881\u68AF\u68A2\u6893\u68B5\u687F\u6876\u68B1\u68A7\u6897\u68B0\u6883\u68C4\u68AD\u6886\u6885\u6894\u689D\u68A8\u689F\u68A1\u6882\u6B32\u6BBA\u6BEB\u6BEC\u6C2B\u6D8E\u6DBC\u6DF3\u6DD9\u6DB2\u6DE1\u6DCC\u6DE4\u6DFB\u6DFA\u6E05\u6DC7\u6DCB\u6DAF\u6DD1\u6DAE\u6DDE\u6DF9\u6DB8\u6DF7\u6DF5\u6DC5\u6DD2\u6E1A\u6DB5\u6DDA\u6DEB\u6DD8\u6DEA\u6DF1\u6DEE\u6DE8\u6DC6\u6DC4\u6DAA\u6DEC\u6DBF\u6DE6\u70F9\u7109\u710A\u70FD\u70EF\u723D\u727D\u7281\u731C\u731B\u7316\u7313\u7319\u7387\u7405\u740A\u7403\u7406\u73FE\u740D\u74E0\u74F6\u74F7\u751C\u7522\u7565\u7566\u7562\u7570\u758F\u75D4\u75D5\u75B5\u75CA\u75CD\u768E\u76D4\u76D2\u76DB\u7737\u773E\u773C\u7736\u7738\u773A\u786B\u7843\u784E\u7965\u7968\u796D\u79FB\u7A92\u7A95\u7B20\u7B28\u7B1B\u7B2C\u7B26\u7B19\u7B1E\u7B2E\u7C92\u7C97\u7C95\u7D46\u7D43\u7D71\u7D2E\u7D39\u7D3C\u7D40\u7D30\u7D33\u7D44\u7D2F\u7D42\u7D32\u7D31\u7F3D\u7F9E\u7F9A\u7FCC\u7FCE\u7FD2\u801C\u804A\u8046\u812F\u8116\u8123\u812B\u8129\u8130\u8124\u8202\u8235\u8237\u8236\u8239\u838E\u839E\u8398\u8378\u83A2\u8396\u83BD\u83AB\u8392\u838A\u8393\u8389\u83A0\u8377\u837B\u837C\u8386\u83A7\u8655\u5F6A\u86C7\u86C0\u86B6\u86C4\u86B5\u86C6\u86CB\u86B1\u86AF\u86C9\u8853\u889E\u8888\u88AB\u8892\u8896\u888D\u888B\u8993\u898F\u8A2A\u8A1D\u8A23\u8A25\u8A31\u8A2D\u8A1F\u8A1B\u8A22\u8C49\u8C5A\u8CA9\u8CAC\u8CAB\u8CA8\u8CAA\u8CA7\u8D67\u8D66\u8DBE\u8DBA\u8EDB\u8EDF\u9019\u900D\u901A\u9017\u9023\u901F\u901D\u9010\u9015\u901E\u9020\u900F\u9022\u9016\u901B\u9014\u90E8\u90ED\u90FD\u9157\u91CE\u91F5\u91E6\u91E3\u91E7\u91ED\u91E9\u9589\u966A\u9675\u9673\u9678\u9670\u9674\u9676\u9677\u966C\u96C0\u96EA\u96E9\u7AE0\u7ADF\u9802\u9803\u9B5A\u9CE5\u9E75\u9E7F\u9EA5\u9EBB\u50A2\u508D\u5085\u5099\u5091\u5080\u5096\u5098\u509A\u6700\u51F1\u5272\u5274\u5275\u5269\u52DE\u52DD\u52DB\u535A\u53A5\u557B\u5580\u55A7\u557C\u558A\u559D\u5598\u5582\u559C\u55AA\u5594\u5587\u558B\u5583\u55B3\u55AE\u559F\u553E\u55B2\u559A\u55BB\u55AC\u55B1\u557E\u5589\u55AB\u5599\u570D\u582F\u582A\u5834\u5824\u5830\u5831\u5821\u581D\u5820\u58F9\u58FA\u5960\u5A77\u5A9A\u5A7F\u5A92\u5A9B\u5AA7\u5B73\u5B71\u5BD2\u5BCC\u5BD3\u5BD0\u5C0A\u5C0B\u5C31\u5D4C\u5D50\u5D34\u5D47\u5DFD\u5E45\u5E3D\u5E40\u5E43\u5E7E\u5ECA\u5EC1\u5EC2\u5EC4\u5F3C\u5F6D\u5FA9\u5FAA\u5FA8\u60D1\u60E1\u60B2\u60B6\u60E0\u611C\u6123\u60FA\u6115\u60F0\u60FB\u60F4\u6168\u60F1\u610E\u60F6\u6109\u6100\u6112\u621F\u6249\u63A3\u638C\u63CF\u63C0\u63E9\u63C9\u63C6\u63CD\u63D2\u63E3\u63D0\u63E1\u63D6\u63ED\u63EE\u6376\u63F4\u63EA\u63DB\u6452\u63DA\u63F9\u655E\u6566\u6562\u6563\u6591\u6590\u65AF\u666E\u6670\u6674\u6676\u666F\u6691\u667A\u667E\u6677\u66FE\u66FF\u671F\u671D\u68FA\u68D5\u68E0\u68D8\u68D7\u6905\u68DF\u68F5\u68EE\u68E7\u68F9\u68D2\u68F2\u68E3\u68CB\u68CD\u690D\u6912\u690E\u68C9\u68DA\u696E\u68FB\u6B3E\u6B3A\u6B3D\u6B98\u6B96\u6BBC\u6BEF\u6C2E\u6C2F\u6C2C\u6E2F\u6E38\u6E54\u6E21\u6E32\u6E67\u6E4A\u6E20\u6E25\u6E23\u6E1B\u6E5B\u6E58\u6E24\u6E56\u6E6E\u6E2D\u6E26\u6E6F\u6E34\u6E4D\u6E3A\u6E2C\u6E43\u6E1D\u6E3E\u6ECB\u6E89\u6E19\u6E4E\u6E63\u6E44\u6E72\u6E69\u6E5F\u7119\u711A\u7126\u7130\u7121\u7136\u716E\u711C\u724C\u7284\u7280\u7336\u7325\u7334\u7329\u743A\u742A\u7433\u7422\u7425\u7435\u7436\u7434\u742F\u741B\u7426\u7428\u7525\u7526\u756B\u756A\u75E2\u75DB\u75E3\u75D9\u75D8\u75DE\u75E0\u767B\u767C\u7696\u7693\u76B4\u76DC\u774F\u77ED\u785D\u786C\u786F\u7A0D\u7A08\u7A0B\u7A05\u7A00\u7A98\u7A97\u7A96\u7AE5\u7AE3\u7B49\u7B56\u7B46\u7B50\u7B52\u7B54\u7B4D\u7B4B\u7B4F\u7B51\u7C9F\u7CA5\u7D5E\u7D50\u7D68\u7D55\u7D2B\u7D6E\u7D72\u7D61\u7D66\u7D62\u7D70\u7D73\u5584\u7FD4\u7FD5\u800B\u8052\u8085\u8155\u8154\u814B\u8151\u814E\u8139\u8146\u813E\u814C\u8153\u8174\u8212\u821C\u83E9\u8403\u83F8\u840D\u83E0\u83C5\u840B\u83C1\u83EF\u83F1\u83F4\u8457\u840A\u83F0\u840C\u83CC\u83FD\u83F2\u83CA\u8438\u840E\u8404\u83DC\u8407\u83D4\u83DF\u865B\u86DF\u86D9\u86ED\u86D4\u86DB\u86E4\u86D0\u86DE\u8857\u88C1\u88C2\u88B1\u8983\u8996\u8A3B\u8A60\u8A55\u8A5E\u8A3C\u8A41\u8A54\u8A5B\u8A50\u8A46\u8A34\u8A3A\u8A36\u8A56\u8C61\u8C82\u8CAF\u8CBC\u8CB3\u8CBD\u8CC1\u8CBB\u8CC0\u8CB4\u8CB7\u8CB6\u8CBF\u8CB8\u8D8A\u8D85\u8D81\u8DCE\u8DDD\u8DCB\u8DDA\u8DD1\u8DCC\u8DDB\u8DC6\u8EFB\u8EF8\u8EFC\u8F9C\u902E\u9035\u9031\u9038\u9032\u9036\u9102\u90F5\u9109\u90FE\u9163\u9165\u91CF\u9214\u9215\u9223\u9209\u921E\u920D\u9210\u9207\u9211\u9594\u958F\u958B\u9591\u9593\u9592\u958E\u968A\u968E\u968B\u967D\u9685\u9686\u968D\u9672\u9684\u96C1\u96C5\u96C4\u96C6\u96C7\u96EF\u96F2\u97CC\u9805\u9806\u9808\u98E7\u98EA\u98EF\u98E9\u98F2\u98ED\u99AE\u99AD\u9EC3\u9ECD\u9ED1\u4E82\u50AD\u50B5\u50B2\u50B3\u50C5\u50BE\u50AC\u50B7\u50BB\u50AF\u50C7\u527F\u5277\u527D\u52DF\u52E6\u52E4\u52E2\u52E3\u532F\u55DF\u55E8\u55D3\u55E6\u55CE\u55DC\u55C7\u55D1\u55E3\u55E4\u55EF\u55DA\u55E1\u55C5\u55C6\u55E5\u55C9\u5712\u5713\u585E\u5851\u5858\u5857\u585A\u5854\u586B\u584C\u586D\u584A\u5862\u5852\u584B\u5967\u5AC1\u5AC9\u5ACC\u5ABE\u5ABD\u5ABC\u5AB3\u5AC2\u5AB2\u5D69\u5D6F\u5E4C\u5E79\u5EC9\u5EC8\u5F12\u5F59\u5FAC\u5FAE\u611A\u610F\u6148\u611F\u60F3\u611B\u60F9\u6101\u6108\u614E\u614C\u6144\u614D\u613E\u6134\u6127\u610D\u6106\u6137\u6221\u6222\u6413\u643E\u641E\u642A\u642D\u643D\u642C\u640F\u641C\u6414\u640D\u6436\u6416\u6417\u6406\u656C\u659F\u65B0\u6697\u6689\u6687\u6688\u6696\u6684\u6698\u668D\u6703\u6994\u696D\u695A\u6977\u6960\u6954\u6975\u6930\u6982\u694A\u6968\u696B\u695E\u6953\u6979\u6986\u695D\u6963\u695B\u6B47\u6B72\u6BC0\u6BBF\u6BD3\u6BFD\u6EA2\u6EAF\u6ED3\u6EB6\u6EC2\u6E90\u6E9D\u6EC7\u6EC5\u6EA5\u6E98\u6EBC\u6EBA\u6EAB\u6ED1\u6E96\u6E9C\u6EC4\u6ED4\u6EAA\u6EA7\u6EB4\u714E\u7159\u7169\u7164\u7149\u7167\u715C\u716C\u7166\u714C\u7165\u715E\u7146\u7168\u7156\u723A\u7252\u7337\u7345\u733F\u733E\u746F\u745A\u7455\u745F\u745E\u7441\u743F\u7459\u745B\u745C\u7576\u7578\u7600\u75F0\u7601\u75F2\u75F1\u75FA\u75FF\u75F4\u75F3\u76DE\u76DF\u775B\u776B\u7766\u775E\u7763\u7779\u776A\u776C\u775C\u7765\u7768\u7762\u77EE\u788E\u78B0\u7897\u7898\u788C\u7889\u787C\u7891\u7893\u787F\u797A\u797F\u7981\u842C\u79BD\u7A1C\u7A1A\u7A20\u7A14\u7A1F\u7A1E\u7A9F\u7AA0\u7B77\u7BC0\u7B60\u7B6E\u7B67\u7CB1\u7CB3\u7CB5\u7D93\u7D79\u7D91\u7D81\u7D8F\u7D5B\u7F6E\u7F69\u7F6A\u7F72\u7FA9\u7FA8\u7FA4\u8056\u8058\u8086\u8084\u8171\u8170\u8178\u8165\u816E\u8173\u816B\u8179\u817A\u8166\u8205\u8247\u8482\u8477\u843D\u8431\u8475\u8466\u846B\u8449\u846C\u845B\u843C\u8435\u8461\u8463\u8469\u846D\u8446\u865E\u865C\u865F\u86F9\u8713\u8708\u8707\u8700\u86FE\u86FB\u8702\u8703\u8706\u870A\u8859\u88DF\u88D4\u88D9\u88DC\u88D8\u88DD\u88E1\u88CA\u88D5\u88D2\u899C\u89E3\u8A6B\u8A72\u8A73\u8A66\u8A69\u8A70\u8A87\u8A7C\u8A63\u8AA0\u8A71\u8A85\u8A6D\u8A62\u8A6E\u8A6C\u8A79\u8A7B\u8A3E\u8A68\u8C62\u8C8A\u8C89\u8CCA\u8CC7\u8CC8\u8CC4\u8CB2\u8CC3\u8CC2\u8CC5\u8DE1\u8DDF\u8DE8\u8DEF\u8DF3\u8DFA\u8DEA\u8DE4\u8DE6\u8EB2\u8F03\u8F09\u8EFE\u8F0A\u8F9F\u8FB2\u904B\u904A\u9053\u9042\u9054\u903C\u9055\u9050\u9047\u904F\u904E\u904D\u9051\u903E\u9041\u9112\u9117\u916C\u916A\u9169\u91C9\u9237\u9257\u9238\u923D\u9240\u923E\u925B\u924B\u9264\u9251\u9234\u9249\u924D\u9245\u9239\u923F\u925A\u9598\u9698\u9694\u9695\u96CD\u96CB\u96C9\u96CA\u96F7\u96FB\u96F9\u96F6\u9756\u9774\u9776\u9810\u9811\u9813\u980A\u9812\u980C\u98FC\u98F4\u98FD\u98FE\u99B3\u99B1\u99B4\u9AE1\u9CE9\u9E82\u9F0E\u9F13\u9F20\u50E7\u50EE\u50E5\u50D6\u50ED\u50DA\u50D5\u50CF\u50D1\u50F1\u50CE\u50E9\u5162\u51F3\u5283\u5282\u5331\u53AD\u55FE\u5600\u561B\u5617\u55FD\u5614\u5606\u5609\u560D\u560E\u55F7\u5616\u561F\u5608\u5610\u55F6\u5718\u5716\u5875\u587E\u5883\u5893\u588A\u5879\u5885\u587D\u58FD\u5925\u5922\u5924\u596A\u5969\u5AE1\u5AE6\u5AE9\u5AD7\u5AD6\u5AD8\u5AE3\u5B75\u5BDE\u5BE7\u5BE1\u5BE5\u5BE6\u5BE8\u5BE2\u5BE4\u5BDF\u5C0D\u5C62\u5D84\u5D87\u5E5B\u5E63\u5E55\u5E57\u5E54\u5ED3\u5ED6\u5F0A\u5F46\u5F70\u5FB9\u6147\u613F\u614B\u6177\u6162\u6163\u615F\u615A\u6158\u6175\u622A\u6487\u6458\u6454\u64A4\u6478\u645F\u647A\u6451\u6467\u6434\u646D\u647B\u6572\u65A1\u65D7\u65D6\u66A2\u66A8\u669D\u699C\u69A8\u6995\u69C1\u69AE\u69D3\u69CB\u699B\u69B7\u69BB\u69AB\u69B4\u69D0\u69CD\u69AD\u69CC\u69A6\u69C3\u69A3\u6B49\u6B4C\u6C33\u6F33\u6F14\u6EFE\u6F13\u6EF4\u6F29\u6F3E\u6F20\u6F2C\u6F0F\u6F02\u6F22\u6EFF\u6EEF\u6F06\u6F31\u6F38\u6F32\u6F23\u6F15\u6F2B\u6F2F\u6F88\u6F2A\u6EEC\u6F01\u6EF2\u6ECC\u6EF7\u7194\u7199\u717D\u718A\u7184\u7192\u723E\u7292\u7296\u7344\u7350\u7464\u7463\u746A\u7470\u746D\u7504\u7591\u7627\u760D\u760B\u7609\u7613\u76E1\u76E3\u7784\u777D\u777F\u7761\u78C1\u789F\u78A7\u78B3\u78A9\u78A3\u798E\u798F\u798D\u7A2E\u7A31\u7AAA\u7AA9\u7AED\u7AEF\u7BA1\u7B95\u7B8B\u7B75\u7B97\u7B9D\u7B94\u7B8F\u7BB8\u7B87\u7B84\u7CB9\u7CBD\u7CBE\u7DBB\u7DB0\u7D9C\u7DBD\u7DBE\u7DA0\u7DCA\u7DB4\u7DB2\u7DB1\u7DBA\u7DA2\u7DBF\u7DB5\u7DB8\u7DAD\u7DD2\u7DC7\u7DAC\u7F70\u7FE0\u7FE1\u7FDF\u805E\u805A\u8087\u8150\u8180\u818F\u8188\u818A\u817F\u8182\u81E7\u81FA\u8207\u8214\u821E\u824B\u84C9\u84BF\u84C6\u84C4\u8499\u849E\u84B2\u849C\u84CB\u84B8\u84C0\u84D3\u8490\u84BC\u84D1\u84CA\u873F\u871C\u873B\u8722\u8725\u8734\u8718\u8755\u8737\u8729\u88F3\u8902\u88F4\u88F9\u88F8\u88FD\u88E8\u891A\u88EF\u8AA6\u8A8C\u8A9E\u8AA3\u8A8D\u8AA1\u8A93\u8AA4\u8AAA\u8AA5\u8AA8\u8A98\u8A91\u8A9A\u8AA7\u8C6A\u8C8D\u8C8C\u8CD3\u8CD1\u8CD2\u8D6B\u8D99\u8D95\u8DFC\u8F14\u8F12\u8F15\u8F13\u8FA3\u9060\u9058\u905C\u9063\u9059\u905E\u9062\u905D\u905B\u9119\u9118\u911E\u9175\u9178\u9177\u9174\u9278\u9280\u9285\u9298\u9296\u927B\u9293\u929C\u92A8\u927C\u9291\u95A1\u95A8\u95A9\u95A3\u95A5\u95A4\u9699\u969C\u969B\u96CC\u96D2\u9700\u977C\u9785\u97F6\u9817\u9818\u98AF\u98B1\u9903\u9905\u990C\u9909\u99C1\u9AAF\u9AB0\u9AE6\u9B41\u9B42\u9CF4\u9CF6\u9CF3\u9EBC\u9F3B\u9F4A\u5104\u5100\u50FB\u50F5\u50F9\u5102\u5108\u5109\u5105\u51DC\u5287\u5288\u5289\u528D\u528A\u52F0\u53B2\u562E\u563B\u5639\u5632\u563F\u5634\u5629\u5653\u564E\u5657\u5674\u5636\u562F\u5630\u5880\u589F\u589E\u58B3\u589C\u58AE\u58A9\u58A6\u596D\u5B09\u5AFB\u5B0B\u5AF5\u5B0C\u5B08\u5BEE\u5BEC\u5BE9\u5BEB\u5C64\u5C65\u5D9D\u5D94\u5E62\u5E5F\u5E61\u5EE2\u5EDA\u5EDF\u5EDD\u5EE3\u5EE0\u5F48\u5F71\u5FB7\u5FB5\u6176\u6167\u616E\u615D\u6155\u6182\u617C\u6170\u616B\u617E\u61A7\u6190\u61AB\u618E\u61AC\u619A\u61A4\u6194\u61AE\u622E\u6469\u646F\u6479\u649E\u64B2\u6488\u6490\u64B0\u64A5\u6493\u6495\u64A9\u6492\u64AE\u64AD\u64AB\u649A\u64AC\u6499\u64A2\u64B3\u6575\u6577\u6578\u66AE\u66AB\u66B4\u66B1\u6A23\u6A1F\u69E8\u6A01\u6A1E\u6A19\u69FD\u6A21\u6A13\u6A0A\u69F3\u6A02\u6A05\u69ED\u6A11\u6B50\u6B4E\u6BA4\u6BC5\u6BC6\u6F3F\u6F7C\u6F84\u6F51\u6F66\u6F54\u6F86\u6F6D\u6F5B\u6F78\u6F6E\u6F8E\u6F7A\u6F70\u6F64\u6F97\u6F58\u6ED5\u6F6F\u6F60\u6F5F\u719F\u71AC\u71B1\u71A8\u7256\u729B\u734E\u7357\u7469\u748B\u7483\u747E\u7480\u757F\u7620\u7629\u761F\u7624\u7626\u7621\u7622\u769A\u76BA\u76E4\u778E\u7787\u778C\u7791\u778B\u78CB\u78C5\u78BA\u78CA\u78BE\u78D5\u78BC\u78D0\u7A3F\u7A3C\u7A40\u7A3D\u7A37\u7A3B\u7AAF\u7AAE\u7BAD\u7BB1\u7BC4\u7BB4\u7BC6\u7BC7\u7BC1\u7BA0\u7BCC\u7CCA\u7DE0\u7DF4\u7DEF\u7DFB\u7DD8\u7DEC\u7DDD\u7DE8\u7DE3\u7DDA\u7DDE\u7DE9\u7D9E\u7DD9\u7DF2\u7DF9\u7F75\u7F77\u7FAF\u7FE9\u8026\u819B\u819C\u819D\u81A0\u819A\u8198\u8517\u853D\u851A\u84EE\u852C\u852D\u8513\u8511\u8523\u8521\u8514\u84EC\u8525\u84FF\u8506\u8782\u8774\u8776\u8760\u8766\u8778\u8768\u8759\u8757\u874C\u8753\u885B\u885D\u8910\u8907\u8912\u8913\u8915\u890A\u8ABC\u8AD2\u8AC7\u8AC4\u8A95\u8ACB\u8AF8\u8AB2\u8AC9\u8AC2\u8ABF\u8AB0\u8AD6\u8ACD\u8AB6\u8AB9\u8ADB\u8C4C\u8C4E\u8C6C\u8CE0\u8CDE\u8CE6\u8CE4\u8CEC\u8CED\u8CE2\u8CE3\u8CDC\u8CEA\u8CE1\u8D6D\u8D9F\u8DA3\u8E2B\u8E10\u8E1D\u8E22\u8E0F\u8E29\u8E1F\u8E21\u8E1E\u8EBA\u8F1D\u8F1B\u8F1F\u8F29\u8F26\u8F2A\u8F1C\u8F1E\u8F25\u9069\u906E\u9068\u906D\u9077\u9130\u912D\u9127\u9131\u9187\u9189\u918B\u9183\u92C5\u92BB\u92B7\u92EA\u92AC\u92E4\u92C1\u92B3\u92BC\u92D2\u92C7\u92F0\u92B2\u95AD\u95B1\u9704\u9706\u9707\u9709\u9760\u978D\u978B\u978F\u9821\u982B\u981C\u98B3\u990A\u9913\u9912\u9918\u99DD\u99D0\u99DF\u99DB\u99D1\u99D5\u99D2\u99D9\u9AB7\u9AEE\u9AEF\u9B27\u9B45\u9B44\u9B77\u9B6F\u9D06\u9D09\u9D03\u9EA9\u9EBE\u9ECE\u58A8\u9F52\u5112\u5118\u5114\u5110\u5115\u5180\u51AA\u51DD\u5291\u5293\u52F3\u5659\u566B\u5679\u5669\u5664\u5678\u566A\u5668\u5665\u5671\u566F\u566C\u5662\u5676\u58C1\u58BE\u58C7\u58C5\u596E\u5B1D\u5B34\u5B78\u5BF0\u5C0E\u5F4A\u61B2\u6191\u61A9\u618A\u61CD\u61B6\u61BE\u61CA\u61C8\u6230\u64C5\u64C1\u64CB\u64BB\u64BC\u64DA\u64C4\u64C7\u64C2\u64CD\u64BF\u64D2\u64D4\u64BE\u6574\u66C6\u66C9\u66B9\u66C4\u66C7\u66B8\u6A3D\u6A38\u6A3A\u6A59\u6A6B\u6A58\u6A39\u6A44\u6A62\u6A61\u6A4B\u6A47\u6A35\u6A5F\u6A48\u6B59\u6B77\u6C05\u6FC2\u6FB1\u6FA1\u6FC3\u6FA4\u6FC1\u6FA7\u6FB3\u6FC0\u6FB9\u6FB6\u6FA6\u6FA0\u6FB4\u71BE\u71C9\u71D0\u71D2\u71C8\u71D5\u71B9\u71CE\u71D9\u71DC\u71C3\u71C4\u7368\u749C\u74A3\u7498\u749F\u749E\u74E2\u750C\u750D\u7634\u7638\u763A\u76E7\u76E5\u77A0\u779E\u779F\u77A5\u78E8\u78DA\u78EC\u78E7\u79A6\u7A4D\u7A4E\u7A46\u7A4C\u7A4B\u7ABA\u7BD9\u7C11\u7BC9\u7BE4\u7BDB\u7BE1\u7BE9\u7BE6\u7CD5\u7CD6\u7E0A\u7E11\u7E08\u7E1B\u7E23\u7E1E\u7E1D\u7E09\u7E10\u7F79\u7FB2\u7FF0\u7FF1\u7FEE\u8028\u81B3\u81A9\u81A8\u81FB\u8208\u8258\u8259\u854A\u8559\u8548\u8568\u8569\u8543\u8549\u856D\u856A\u855E\u8783\u879F\u879E\u87A2\u878D\u8861\u892A\u8932\u8925\u892B\u8921\u89AA\u89A6\u8AE6\u8AFA\u8AEB\u8AF1\u8B00\u8ADC\u8AE7\u8AEE\u8AFE\u8B01\u8B02\u8AF7\u8AED\u8AF3\u8AF6\u8AFC\u8C6B\u8C6D\u8C93\u8CF4\u8E44\u8E31\u8E34\u8E42\u8E39\u8E35\u8F3B\u8F2F\u8F38\u8F33\u8FA8\u8FA6\u9075\u9074\u9078\u9072\u907C\u907A\u9134\u9192\u9320\u9336\u92F8\u9333\u932F\u9322\u92FC\u932B\u9304\u931A\u9310\u9326\u9321\u9315\u932E\u9319\u95BB\u96A7\u96A8\u96AA\u96D5\u970E\u9711\u9716\u970D\u9713\u970F\u975B\u975C\u9766\u9798\u9830\u9838\u983B\u9837\u982D\u9839\u9824\u9910\u9928\u991E\u991B\u9921\u991A\u99ED\u99E2\u99F1\u9AB8\u9ABC\u9AFB\u9AED\u9B28\u9B91\u9D15\u9D23\u9D26\u9D28\u9D12\u9D1B\u9ED8\u9ED4\u9F8D\u9F9C\u512A\u511F\u5121\u5132\u52F5\u568E\u5680\u5690\u5685\u5687\u568F\u58D5\u58D3\u58D1\u58CE\u5B30\u5B2A\u5B24\u5B7A\u5C37\u5C68\u5DBC\u5DBA\u5DBD\u5DB8\u5E6B\u5F4C\u5FBD\u61C9\u61C2\u61C7\u61E6\u61CB\u6232\u6234\u64CE\u64CA\u64D8\u64E0\u64F0\u64E6\u64EC\u64F1\u64E2\u64ED\u6582\u6583\u66D9\u66D6\u6A80\u6A94\u6A84\u6AA2\u6A9C\u6ADB\u6AA3\u6A7E\u6A97\u6A90\u6AA0\u6B5C\u6BAE\u6BDA\u6C08\u6FD8\u6FF1\u6FDF\u6FE0\u6FDB\u6FE4\u6FEB\u6FEF\u6F80\u6FEC\u6FE1\u6FE9\u6FD5\u6FEE\u6FF0\u71E7\u71DF\u71EE\u71E6\u71E5\u71ED\u71EC\u71F4\u71E0\u7235\u7246\u7370\u7372\u74A9\u74B0\u74A6\u74A8\u7646\u7642\u764C\u76EA\u77B3\u77AA\u77B0\u77AC\u77A7\u77AD\u77EF\u78F7\u78FA\u78F4\u78EF\u7901\u79A7\u79AA\u7A57\u7ABF\u7C07\u7C0D\u7BFE\u7BF7\u7C0C\u7BE0\u7CE0\u7CDC\u7CDE\u7CE2\u7CDF\u7CD9\u7CDD\u7E2E\u7E3E\u7E46\u7E37\u7E32\u7E43\u7E2B\u7E3D\u7E31\u7E45\u7E41\u7E34\u7E39\u7E48\u7E35\u7E3F\u7E2F\u7F44\u7FF3\u7FFC\u8071\u8072\u8070\u806F\u8073\u81C6\u81C3\u81BA\u81C2\u81C0\u81BF\u81BD\u81C9\u81BE\u81E8\u8209\u8271\u85AA\u8584\u857E\u859C\u8591\u8594\u85AF\u859B\u8587\u85A8\u858A\u8667\u87C0\u87D1\u87B3\u87D2\u87C6\u87AB\u87BB\u87BA\u87C8\u87CB\u893B\u8936\u8944\u8938\u893D\u89AC\u8B0E\u8B17\u8B19\u8B1B\u8B0A\u8B20\u8B1D\u8B04\u8B10\u8C41\u8C3F\u8C73\u8CFA\u8CFD\u8CFC\u8CF8\u8CFB\u8DA8\u8E49\u8E4B\u8E48\u8E4A\u8F44\u8F3E\u8F42\u8F45\u8F3F\u907F\u907D\u9084\u9081\u9082\u9080\u9139\u91A3\u919E\u919C\u934D\u9382\u9328\u9375\u934A\u9365\u934B\u9318\u937E\u936C\u935B\u9370\u935A\u9354\u95CA\u95CB\u95CC\u95C8\u95C6\u96B1\u96B8\u96D6\u971C\u971E\u97A0\u97D3\u9846\u98B6\u9935\u9A01\u99FF\u9BAE\u9BAB\u9BAA\u9BAD\u9D3B\u9D3F\u9E8B\u9ECF\u9EDE\u9EDC\u9EDD\u9EDB\u9F3E\u9F4B\u53E2\u5695\u56AE\u58D9\u58D8\u5B38\u5F5D\u61E3\u6233\u64F4\u64F2\u64FE\u6506\u64FA\u64FB\u64F7\u65B7\u66DC\u6726\u6AB3\u6AAC\u6AC3\u6ABB\u6AB8\u6AC2\u6AAE\u6AAF\u6B5F\u6B78\u6BAF\u7009\u700B\u6FFE\u7006\u6FFA\u7011\u700F\u71FB\u71FC\u71FE\u71F8\u7377\u7375\u74A7\u74BF\u7515\u7656\u7658\u7652\u77BD\u77BF\u77BB\u77BC\u790E\u79AE\u7A61\u7A62\u7A60\u7AC4\u7AC5\u7C2B\u7C27\u7C2A\u7C1E\u7C23\u7C21\u7CE7\u7E54\u7E55\u7E5E\u7E5A\u7E61\u7E52\u7E59\u7F48\u7FF9\u7FFB\u8077\u8076\u81CD\u81CF\u820A\u85CF\u85A9\u85CD\u85D0\u85C9\u85B0\u85BA\u85B9\u85A6\u87EF\u87EC\u87F2\u87E0\u8986\u89B2\u89F4\u8B28\u8B39\u8B2C\u8B2B\u8C50\u8D05\u8E59\u8E63\u8E66\u8E64\u8E5F\u8E55\u8EC0\u8F49\u8F4D\u9087\u9083\u9088\u91AB\u91AC\u91D0\u9394\u938A\u9396\u93A2\u93B3\u93AE\u93AC\u93B0\u9398\u939A\u9397\u95D4\u95D6\u95D0\u95D5\u96E2\u96DC\u96D9\u96DB\u96DE\u9724\u97A3\u97A6\u97AD\u97F9\u984D\u984F\u984C\u984E\u9853\u98BA\u993E\u993F\u993D\u992E\u99A5\u9A0E\u9AC1\u9B03\u9B06\u9B4F\u9B4E\u9B4D\u9BCA\u9BC9\u9BFD\u9BC8\u9BC0\u9D51\u9D5D\u9D60\u9EE0\u9F15\u9F2C\u5133\u56A5\u58DE\u58DF\u58E2\u5BF5\u9F90\u5EEC\u61F2\u61F7\u61F6\u61F5\u6500\u650F\u66E0\u66DD\u6AE5\u6ADD\u6ADA\u6AD3\u701B\u701F\u7028\u701A\u701D\u7015\u7018\u7206\u720D\u7258\u72A2\u7378\u737A\u74BD\u74CA\u74E3\u7587\u7586\u765F\u7661\u77C7\u7919\u79B1\u7A6B\u7A69\u7C3E\u7C3F\u7C38\u7C3D\u7C37\u7C40\u7E6B\u7E6D\u7E79\u7E69\u7E6A\u7F85\u7E73\u7FB6\u7FB9\u7FB8\u81D8\u85E9\u85DD\u85EA\u85D5\u85E4\u85E5\u85F7\u87FB\u8805\u880D\u87F9\u87FE\u8960\u895F\u8956\u895E\u8B41\u8B5C\u8B58\u8B49\u8B5A\u8B4E\u8B4F\u8B46\u8B59\u8D08\u8D0A\u8E7C\u8E72\u8E87\u8E76\u8E6C\u8E7A\u8E74\u8F54\u8F4E\u8FAD\u908A\u908B\u91B1\u91AE\u93E1\u93D1\u93DF\u93C3\u93C8\u93DC\u93DD\u93D6\u93E2\u93CD\u93D8\u93E4\u93D7\u93E8\u95DC\u96B4\u96E3\u972A\u9727\u9761\u97DC\u97FB\u985E\u9858\u985B\u98BC\u9945\u9949\u9A16\u9A19\u9B0D\u9BE8\u9BE7\u9BD6\u9BDB\u9D89\u9D61\u9D72\u9D6A\u9D6C\u9E92\u9E97\u9E93\u9EB4\u52F8\u56A8\u56B7\u56B6\u56B4\u56BC\u58E4\u5B40\u5B43\u5B7D\u5BF6\u5DC9\u61F8\u61FA\u6518\u6514\u6519\u66E6\u6727\u6AEC\u703E\u7030\u7032\u7210\u737B\u74CF\u7662\u7665\u7926\u792A\u792C\u792B\u7AC7\u7AF6\u7C4C\u7C43\u7C4D\u7CEF\u7CF0\u8FAE\u7E7D\u7E7C\u7E82\u7F4C\u8000\u81DA\u8266\u85FB\u85F9\u8611\u85FA\u8606\u860B\u8607\u860A\u8814\u8815\u8964\u89BA\u89F8\u8B70\u8B6C\u8B66\u8B6F\u8B5F\u8B6B\u8D0F\u8D0D\u8E89\u8E81\u8E85\u8E82\u91B4\u91CB\u9418\u9403\u93FD\u95E1\u9730\u98C4\u9952\u9951\u99A8\u9A2B\u9A30\u9A37\u9A35\u9C13\u9C0D\u9E79\u9EB5\u9EE8\u9F2F\u9F5F\u9F63\u9F61\u5137\u5138\u56C1\u56C0\u56C2\u5914\u5C6C\u5DCD\u61FC\u61FE\u651D\u651C\u6595\u66E9\u6AFB\u6B04\u6AFA\u6BB2\u704C\u721B\u72A7\u74D6\u74D4\u7669\u77D3\u7C50\u7E8F\u7E8C\u7FBC\u8617\u862D\u861A\u8823\u8822\u8821\u881F\u896A\u896C\u89BD\u8B74\u8B77\u8B7D\u8D13\u8E8A\u8E8D\u8E8B\u8F5F\u8FAF\u91BA\u942E\u9433\u9435\u943A\u9438\u9432\u942B\u95E2\u9738\u9739\u9732\u97FF\u9867\u9865\u9957\u9A45\u9A43\u9A40\u9A3E\u9ACF\u9B54\u9B51\u9C2D\u9C25\u9DAF\u9DB4\u9DC2\u9DB8\u9E9D\u9EEF\u9F19\u9F5C\u9F66\u9F67\u513C\u513B\u56C8\u56CA\u56C9\u5B7F\u5DD4\u5DD2\u5F4E\u61FF\u6524\u6B0A\u6B61\u7051\u7058\u7380\u74E4\u758A\u766E\u766C\u79B3\u7C60\u7C5F\u807E\u807D\u81DF\u8972\u896F\u89FC\u8B80\u8D16\u8D17\u8E91\u8E93\u8F61\u9148\u9444\u9451\u9452\u973D\u973E\u97C3\u97C1\u986B\u9955\u9A55\u9A4D\u9AD2\u9B1A\u9C49\u9C31\u9C3E\u9C3B\u9DD3\u9DD7\u9F34\u9F6C\u9F6A\u9F94\u56CC\u5DD6\u6200\u6523\u652B\u652A\u66EC\u6B10\u74DA\u7ACA\u7C64\u7C63\u7C65\u7E93\u7E96\u7E94\u81E2\u8638\u863F\u8831\u8B8A\u9090\u908F\u9463\u9460\u9464\u9768\u986F\u995C\u9A5A\u9A5B\u9A57\u9AD3\u9AD4\u9AD1\u9C54\u9C57\u9C56\u9DE5\u9E9F\u9EF4\u56D1\u58E9\u652C\u705E\u7671\u7672\u77D7\u7F50\u7F88\u8836\u8839\u8862\u8B93\u8B92\u8B96\u8277\u8D1B\u91C0\u946A\u9742\u9748\u9744\u97C6\u9870\u9A5F\u9B22\u9B58\u9C5F\u9DF9\u9DFA\u9E7C\u9E7D\u9F07\u9F77\u9F72\u5EF3\u6B16\u7063\u7C6C\u7C6E\u883B\u89C0\u8EA1\u91C1\u9472\u9470\u9871\u995E\u9AD6\u9B23\u9ECC\u7064\u77DA\u8B9A\u9477\u97C9\u9A62\u9A65\u7E9C\u8B9C\u8EAA\u91C5\u947D\u947E\u947C\u9C77\u9C78\u9EF7\u8C54\u947F\u9E1A\u7228\u9A6A\u9B31\u9E1B\u9E1E\u7C72\u2460\u2461\u2462\u2463\u2464\u2465\u2466\u2467\u2468\u2469\u2474\u2475\u2476\u2477\u2478\u2479\u247A\u247B\u247C\u247D\u2170\u2171\u2172\u2173\u2174\u2175\u2176\u2177\u2178\u2179\u4E36\u4E3F\u4E85\u4EA0\u5182\u5196\u51AB\u52F9\u5338\u5369\u53B6\u590A\u5B80\u5DDB\u2F33\u5E7F\u5EF4\u5F50\u5F61\u6534\u65E0\u7592\u7676\u8FB5\u96B6\u00A8\u02C6\u30FD\u30FE\u309D\u309E\u3003\u4EDD\u3005\u3006\u3007\u30FC\uFF3B\uFF3D\u273D\u3041\u3042\u3043\u3044\u3045\u3046\u3047\u3048\u3049\u304A\u304B\u304C\u304D\u304E\u304F\u3050\u3051\u3052\u3053\u3054\u3055\u3056\u3057\u3058\u3059\u305A\u305B\u305C\u305D\u305E\u305F\u3060\u3061\u3062\u3063\u3064\u3065\u3066\u3067\u3068\u3069\u306A\u306B\u306C\u306D\u306E\u306F\u3070\u3071\u3072\u3073\u3074\u3075\u3076\u3077\u3078\u3079\u307A\u307B\u307C\u307D\u307E\u307F\u3080\u3081\u3082\u3083\u3084\u3085\u3086\u3087\u3088\u3089\u308A\u308B\u308C\u308D\u308E\u308F\u3090\u3091\u3092\u3093\u30A1\u30A2\u30A3\u30A4\u30A5\u30A6\u30A7\u30A8\u30A9\u30AA\u30AB\u30AC\u30AD\u30AE\u30AF\u30B0\u30B1\u30B2\u30B3\u30B4\u30B5\u30B6\u30B7\u30B8\u30B9\u30BA\u30BB\u30BC\u30BD\u30BE\u30BF\u30C0\u30C1\u30C2\u30C3\u30C4\u30C5\u30C6\u30C7\u30C8\u30C9\u30CA\u30CB\u30CC\u30CD\u30CE\u30CF\u30D0\u30D1\u30D2\u30D3\u30D4\u30D5\u30D6\u30D7\u30D8\u30D9\u30DA\u30DB\u30DC\u30DD\u30DE\u30DF\u30E0\u30E1\u30E2\u30E3\u30E4\u30E5\u30E6\u30E7\u30E8\u30E9\u30EA\u30EB\u30EC\u30ED\u30EE\u30EF\u30F0\u30F1\u30F2\u30F3\u30F4\u30F5\u30F6\u0410\u0411\u0412\u0413\u0414\u0415\u0401\u0416\u0417\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042A\u042B\u042C\u042D\u042E\u042F\u0430\u0431\u0432\u0433\u0434\u0435\u0451\u0436\u0437\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044A\u044B\u044C\u044D\u044E\u044F\u21E7\u21B8\u21B9\u31CF\u00CC\u4E5A\u008A\u5202\u4491\u9FB0\u5188\u9FB1\u7607"; + + private static final String TABLE4 = "\uFFE2\uFFE4\uFF07\uFF02\u3231\u2116\u2121\u309B\u309C\u2E80\u2E84\u2E86\u2E87\u2E88\u2E8A\u2E8C\u2E8D\u2E95\u2E9C\u2E9D\u2EA5\u2EA7\u2EAA\u2EAC\u2EAE\u2EB6\u2EBC\u2EBE\u2EC6\u2ECA\u2ECC\u2ECD\u2ECF\u2ED6\u2ED7\u2EDE\u2EE3\u0000\u0000\u0000\u0283\u0250\u025B\u0254\u0275\u0153\u00F8\u014B\u028A\u026A\u4E42\u4E5C\u51F5\u531A\u5382\u4E07\u4E0C\u4E47\u4E8D\u56D7\uFA0C\u5C6E\u5F73\u4E0F\u5187\u4E0E\u4E2E\u4E93\u4EC2\u4EC9\u4EC8\u5198\u52FC\u536C\u53B9\u5720\u5903\u592C\u5C10\u5DFF\u65E1\u6BB3\u6BCC\u6C14\u723F\u4E31\u4E3C\u4EE8\u4EDC\u4EE9\u4EE1\u4EDD\u4EDA\u520C\u531C\u534C\u5722\u5723\u5917\u592F\u5B81\u5B84\u5C12\u5C3B\u5C74\u5C73\u5E04\u5E80\u5E82\u5FC9\u6209\u6250\u6C15\u6C36\u6C43\u6C3F\u6C3B\u72AE\u72B0\u738A\u79B8\u808A\u961E\u4F0E\u4F18\u4F2C\u4EF5\u4F14\u4EF1\u4F00\u4EF7\u4F08\u4F1D\u4F02\u4F05\u4F22\u4F13\u4F04\u4EF4\u4F12\u51B1\u5213\u5209\u5210\u52A6\u5322\u531F\u534D\u538A\u5407\u56E1\u56DF\u572E\u572A\u5734\u593C\u5980\u597C\u5985\u597B\u597E\u5977\u597F\u5B56\u5C15\u5C25\u5C7C\u5C7A\u5C7B\u5C7E\u5DDF\u5E75\u5E84\u5F02\u5F1A\u5F74\u5FD5\u5FD4\u5FCF\u625C\u625E\u6264\u6261\u6266\u6262\u6259\u6260\u625A\u6265\u65EF\u65EE\u673E\u6739\u6738\u673B\u673A\u673F\u673C\u6733\u6C18\u6C46\u6C52\u6C5C\u6C4F\u6C4A\u6C54\u6C4B\u6C4C\u7071\u725E\u72B4\u72B5\u738E\u752A\u767F\u7A75\u7F51\u8278\u827C\u8280\u827D\u827F\u864D\u897E\u9099\u9097\u9098\u909B\u9094\u9622\u9624\u9620\u9623\u4F56\u4F3B\u4F62\u4F49\u4F53\u4F64\u4F3E\u4F67\u4F52\u4F5F\u4F41\u4F58\u4F2D\u4F33\u4F3F\u4F61\u518F\u51B9\u521C\u521E\u5221\u52AD\u52AE\u5309\u5363\u5372\u538E\u538F\u5430\u5437\u542A\u5454\u5445\u5419\u541C\u5425\u5418\u543D\u544F\u5441\u5428\u5424\u5447\u56EE\u56E7\u56E5\u5741\u5745\u574C\u5749\u574B\u5752\u5906\u5940\u59A6\u5998\u59A0\u5997\u598E\u59A2\u5990\u598F\u59A7\u59A1\u5B8E\u5B92\u5C28\u5C2A\u5C8D\u5C8F\u5C88\u5C8B\u5C89\u5C92\u5C8A\u5C86\u5C93\u5C95\u5DE0\u5E0A\u5E0E\u5E8B\u5E89\u5E8C\u5E88\u5E8D\u5F05\u5F1D\u5F78\u5F76\u5FD2\u5FD1\u5FD0\u5FED\u5FE8\u5FEE\u5FF3\u5FE1\u5FE4\u5FE3\u5FFA\u5FEF\u5FF7\u5FFB\u6000\u5FF4\u623A\u6283\u628C\u628E\u628F\u6294\u6287\u6271\u627B\u627A\u6270\u6281\u6288\u6277\u627D\u6272\u6274\u6537\u65F0\u65F4\u65F3\u65F2\u65F5\u6745\u6747\u6759\u6755\u674C\u6748\u675D\u674D\u675A\u674B\u6BD0\u6C19\u6C1A\u6C78\u6C67\u6C6B\u6C84\u6C8B\u6C8F\u6C71\u6C6F\u6C69\u6C9A\u6C6D\u6C87\u6C95\u6C9C\u6C66\u6C73\u6C65\u6C7B\u6C8E\u7074\u707A\u7263\u72BF\u72BD\u72C3\u72C6\u72C1\u72BA\u72C5\u7395\u7397\u7393\u7394\u7392\u753A\u7539\u7594\u7595\u7681\u793D\u8034\u8095\u8099\u8090\u8092\u809C\u8290\u828F\u8285\u828E\u8291\u8293\u828A\u8283\u8284\u8C78\u8FC9\u8FBF\u909F\u90A1\u90A5\u909E\u90A7\u90A0\u9630\u9628\u962F\u962D\u4E33\u4F98\u4F7C\u4F85\u4F7D\u4F80\u4F87\u4F76\u4F74\u4F89\u4F84\u4F77\u4F4C\u4F97\u4F6A\u4F9A\u4F79\u4F81\u4F78\u4F90\u4F9C\u4F94\u4F9E\u4F92\u4F82\u4F95\u4F6B\u4F6E\u519E\u51BC\u51BE\u5235\u5232\u5233\u5246\u5231\u52BC\u530A\u530B\u533C\u5392\u5394\u5487\u547F\u5481\u5491\u5482\u5488\u546B\u547A\u547E\u5465\u546C\u5474\u5466\u548D\u546F\u5461\u5460\u5498\u5463\u5467\u5464\u56F7\u56F9\u576F\u5772\u576D\u576B\u5771\u5770\u5776\u5780\u5775\u577B\u5773\u5774\u5762\u5768\u577D\u590C\u5945\u59B5\u59BA\u59CF\u59CE\u59B2\u59CC\u59C1\u59B6\u59BC\u59C3\u59D6\u59B1\u59BD\u59C0\u59C8\u59B4\u59C7\u5B62\u5B65\u5B93\u5B95\u5C44\u5C47\u5CAE\u5CA4\u5CA0\u5CB5\u5CAF\u5CA8\u5CAC\u5C9F\u5CA3\u5CAD\u5CA2\u5CAA\u5CA7\u5C9D\u5CA5\u5CB6\u5CB0\u5CA6\u5E17\u5E14\u5E19\u5F28\u5F22\u5F23\u5F24\u5F54\u5F82\u5F7E\u5F7D\u5FDE\u5FE5\u602D\u6026\u6019\u6032\u600B\u6034\u600A\u6017\u6033\u601A\u601E\u602C\u6022\u600D\u6010\u602E\u6013\u6011\u600C\u6009\u601C\u6214\u623D\u62AD\u62B4\u62D1\u62BE\u62AA\u62B6\u62CA\u62AE\u62B3\u62AF\u62BB\u62A9\u62B0\u62B8\u653D\u65A8\u65BB\u6609\u65FC\u6604\u6612\u6608\u65FB\u6603\u660B\u660D\u6605\u65FD\u6611\u6610\u66F6\u670A\u6785\u676C\u678E\u6792\u6776\u677B\u6798\u6786\u6784\u6774\u678D\u678C\u677A\u679F\u6791\u6799\u6783\u677D\u6781\u6778\u6779\u6794\u6B25\u6B80\u6B7E\u6BDE\u6C1D\u6C93\u6CEC\u6CEB\u6CEE\u6CD9\u6CB6\u6CD4\u6CAD\u6CE7\u6CB7\u6CD0\u6CC2\u6CBA\u6CC3\u6CC6\u6CED\u6CF2\u6CD2\u6CDD\u6CB4\u6C8A\u6C9D\u6C80\u6CDE\u6CC0\u6D30\u6CCD\u6CC7\u6CB0\u6CF9\u6CCF\u6CE9\u6CD1\u7094\u7098\u7085\u7093\u7086\u7084\u7091\u7096\u7082\u709A\u7083\u726A\u72D6\u72CB\u72D8\u72C9\u72DC\u72D2\u72D4\u72DA\u72CC\u72D1\u73A4\u73A1\u73AD\u73A6\u73A2\u73A0\u73AC\u739D\u74DD\u74E8\u753F\u7540\u753E\u758C\u7598\u76AF\u76F3\u76F1\u76F0\u76F5\u77F8\u77FC\u77F9\u77FB\u77FA\u77F7\u7942\u793F\u79C5\u7A78\u7A7B\u7AFB\u7C75\u7CFD\u8035\u808F\u80AE\u80A3\u80B8\u80B5\u80AD\u8220\u82A0\u82C0\u82AB\u829A\u8298\u829B\u82B5\u82A7\u82AE\u82BC\u829E\u82BA\u82B4\u82A8\u82A1\u82A9\u82C2\u82A4\u82C3\u82B6\u82A2\u8670\u866F\u866D\u866E\u8C56\u8FD2\u8FCB\u8FD3\u8FCD\u8FD6\u8FD5\u8FD7\u90B2\u90B4\u90AF\u90B3\u90B0\u9639\u963D\u963C\u963A\u9643\u4FCD\u4FC5\u4FD3\u4FB2\u4FC9\u4FCB\u4FC1\u4FD4\u4FDC\u4FD9\u4FBB\u4FB3\u4FDB\u4FC7\u4FD6\u4FBA\u4FC0\u4FB9\u4FEC\u5244\u5249\u52C0\u52C2\u533D\u537C\u5397\u5396\u5399\u5398\u54BA\u54A1\u54AD\u54A5\u54CF\u54C3\u830D\u54B7\u54AE\u54D6\u54B6\u54C5\u54C6\u54A0\u5470\u54BC\u54A2\u54BE\u5472\u54DE\u54B0\u57B5\u579E\u579F\u57A4\u578C\u5797\u579D\u579B\u5794\u5798\u578F\u5799\u57A5\u579A\u5795\u58F4\u590D\u5953\u59E1\u59DE\u59EE\u5A00\u59F1\u59DD\u59FA\u59FD\u59FC\u59F6\u59E4\u59F2\u59F7\u59DB\u59E9\u59F3\u59F5\u59E0\u59FE\u59F4\u59ED\u5BA8\u5C4C\u5CD0\u5CD8\u5CCC\u5CD7\u5CCB\u5CDB\u5CDE\u5CDA\u5CC9\u5CC7\u5CCA\u5CD6\u5CD3\u5CD4\u5CCF\u5CC8\u5CC6\u5CCE\u5CDF\u5CF8\u5DF9\u5E21\u5E22\u5E23\u5E20\u5E24\u5EB0\u5EA4\u5EA2\u5E9B\u5EA3\u5EA5\u5F07\u5F2E\u5F56\u5F86\u6037\u6039\u6054\u6072\u605E\u6045\u6053\u6047\u6049\u605B\u604C\u6040\u6042\u605F\u6024\u6044\u6058\u6066\u606E\u6242\u6243\u62CF\u630D\u630B\u62F5\u630E\u6303\u62EB\u62F9\u630F\u630C\u62F8\u62F6\u6300\u6313\u6314\u62FA\u6315\u62FB\u62F0\u6541\u6543\u65AA\u65BF\u6636\u6621\u6632\u6635\u661C\u6626\u6622\u6633\u662B\u663A\u661D\u6634\u6639\u662E\u670F\u6710\u67C1\u67F2\u67C8\u67BA\u67DC\u67BB\u67F8\u67D8\u67C0\u67B7\u67C5\u67EB\u67E4\u67DF\u67B5\u67CD\u67B3\u67F7\u67F6\u67EE\u67E3\u67C2\u67B9\u67CE\u67E7\u67F0\u67B2\u67FC\u67C6\u67ED\u67CC\u67AE\u67E6\u67DB\u67FA\u67C9\u67CA\u67C3\u67EA\u67CB\u6B28\u6B82\u6B84\u6BB6\u6BD6\u6BD8\u6BE0\u6C20\u6C21\u6D28\u6D34\u6D2D\u6D1F\u6D3C\u6D3F\u6D12\u6D0A\u6CDA\u6D33\u6D04\u6D19\u6D3A\u6D1A\u6D11\u6D00\u6D1D\u6D42\u6D01\u6D18\u6D37\u6D03\u6D0F\u6D40\u6D07\u6D20\u6D2C\u6D08\u6D22\u6D09\u6D10\u70B7\u709F\u70BE\u70B1\u70B0\u70A1\u70B4\u70B5\u70A9\u7241\u7249\u724A\u726C\u7270\u7273\u726E\u72CA\u72E4\u72E8\u72EB\u72DF\u72EA\u72E6\u72E3\u7385\u73CC\u73C2\u73C8\u73C5\u73B9\u73B6\u73B5\u73B4\u73EB\u73BF\u73C7\u73BE\u73C3\u73C6\u73B8\u73CB\u74EC\u74EE\u752E\u7547\u7548\u75A7\u75AA\u7679\u76C4\u7708\u7703\u7704\u7705\u770A\u76F7\u76FB\u76FA\u77E7\u77E8\u7806\u7811\u7812\u7805\u7810\u780F\u780E\u7809\u7803\u7813\u794A\u794C\u794B\u7945\u7944\u79D5\u79CD\u79CF\u79D6\u79CE\u7A80\u7A7E\u7AD1\u7B00\u7B01\u7C7A\u7C78\u7C79\u7C7F\u7C80\u7C81\u7D03\u7D08\u7D01\u7F58\u7F91\u7F8D\u7FBE\u8007\u800E\u800F\u8014\u8037\u80D8\u80C7\u80E0\u80D1\u80C8\u80C2\u80D0\u80C5\u80E3\u80D9\u80DC\u80CA\u80D5\u80C9\u80CF\u80D7\u80E6\u80CD\u81FF\u8221\u8294\u82D9\u82FE\u82F9\u8307\u82E8\u8300\u82D5\u833A\u82EB\u82D6\u82F4\u82EC\u82E1\u82F2\u82F5\u830C\u82FB\u82F6\u82F0\u82EA\u82E4\u82E0\u82FA\u82F3\u82ED\u8677\u8674\u867C\u8673\u8841\u884E\u8867\u886A\u8869\u89D3\u8A04\u8A07\u8D72\u8FE3\u8FE1\u8FEE\u8FE0\u90F1\u90BD\u90BF\u90D5\u90C5\u90BE\u90C7\u90CB\u90C8\u91D4\u91D3\u9654\u964F\u9651\u9653\u964A\u964E\u501E\u5005\u5007\u5013\u5022\u5030\u501B\u4FF5\u4FF4\u5033\u5037\u502C\u4FF6\u4FF7\u5017\u501C\u5020\u5027\u5035\u502F\u5031\u500E\u515A\u5194\u5193\u51CA\u51C4\u51C5\u51C8\u51CE\u5261\u525A\u5252\u525E\u525F\u5255\u5262\u52CD\u530E\u539E\u5526\u54E2\u5517\u5512\u54E7\u54F3\u54E4\u551A\u54FF\u5504\u5508\u54EB\u5511\u5505\u54F1\u550A\u54FB\u54F7\u54F8\u54E0\u550E\u5503\u550B\u5701\u5702\u57CC\u5832\u57D5\u57D2\u57BA\u57C6\u57BD\u57BC\u57B8\u57B6\u57BF\u57C7\u57D0\u57B9\u57C1\u590E\u594A\u5A19\u5A16\u5A2D\u5A2E\u5A15\u5A0F\u5A17\u5A0A\u5A1E\u5A33\u5B6C\u5BA7\u5BAD\u5BAC\u5C03\u5C56\u5C54\u5CEC\u5CFF\u5CEE\u5CF1\u5CF7\u5D00\u5CF9\u5E29\u5E28\u5EA8\u5EAE\u5EAA\u5EAC\u5F33\u5F30\u5F67\u605D\u605A\u6067\u6041\u60A2\u6088\u6080\u6092\u6081\u609D\u6083\u6095\u609B\u6097\u6087\u609C\u608E\u6219\u6246\u62F2\u6310\u6356\u632C\u6344\u6345\u6336\u6343\u63E4\u6339\u634B\u634A\u633C\u6329\u6341\u6334\u6358\u6354\u6359\u632D\u6347\u6333\u635A\u6351\u6338\u6357\u6340\u6348\u654A\u6546\u65C6\u65C3\u65C4\u65C2\u664A\u665F\u6647\u6651\u6712\u6713\u681F\u681A\u6849\u6832\u6833\u683B\u684B\u684F\u6816\u6831\u681C\u6835\u682B\u682D\u682F\u684E\u6844\u6834\u681D\u6812\u6814\u6826\u6828\u682E\u684D\u683A\u6825\u6820\u6B2C\u6B2F\u6B2D\u6B31\u6B34\u6B6D\u8082\u6B88\u6BE6\u6BE4\u6BE8\u6BE3\u6BE2\u6BE7\u6C25\u6D7A\u6D63\u6D64\u6D76\u6D0D\u6D61\u6D92\u6D58\u6D62\u6D6D\u6D6F\u6D91\u6D8D\u6DEF\u6D7F\u6D86\u6D5E\u6D67\u6D60\u6D97\u6D70\u6D7C\u6D5F\u6D82\u6D98\u6D2F\u6D68\u6D8B\u6D7E\u6D80\u6D84\u6D16\u6D83\u6D7B\u6D7D\u6D75\u6D90\u70DC\u70D3\u70D1\u70DD\u70CB\u7F39\u70E2\u70D7\u70D2\u70DE\u70E0\u70D4\u70CD\u70C5\u70C6\u70C7\u70DA\u70CE\u70E1\u7242\u7278\u7277\u7276\u7300\u72FA\u72F4\u72FE\u72F6\u72F3\u72FB\u7301\u73D3\u73D9\u73E5\u73D6\u73BC\u73E7\u73E3\u73E9\u73DC\u73D2\u73DB\u73D4\u73DD\u73DA\u73D7\u73D8\u73E8\u74DE\u74DF\u74F4\u74F5\u7521\u755B\u755F\u75B0\u75C1\u75BB\u75C4\u75C0\u75BF\u75B6\u75BA\u768A\u76C9\u771D\u771B\u7710\u7713\u7712\u7723\u7711\u7715\u7719\u771A\u7722\u7727\u7823\u782C\u7822\u7835\u782F\u7828\u782E\u782B\u7821\u7829\u7833\u782A\u7831\u7954\u795B\u794F\u795C\u7953\u7952\u7951\u79EB\u79EC\u79E0\u79EE\u79ED\u79EA\u79DC\u79DE\u79DD\u7A86\u7A89\u7A85\u7A8B\u7A8C\u7A8A\u7A87\u7AD8\u7B10\u7B04\u7B13\u7B05\u7B0F\u7B08\u7B0A\u7B0E\u7B09\u7B12\u7C84\u7C91\u7C8A\u7C8C\u7C88\u7C8D\u7C85\u7D1E\u7D1D\u7D11\u7D0E\u7D18\u7D16\u7D13\u7D1F\u7D12\u7D0F\u7D0C\u7F5C\u7F61\u7F5E\u7F60\u7F5D\u7F5B\u7F96\u7F92\u7FC3\u7FC2\u7FC0\u8016\u803E\u8039\u80FA\u80F2\u80F9\u80F5\u8101\u80FB\u8100\u8201\u822F\u8225\u8333\u832D\u8344\u8319\u8351\u8325\u8356\u833F\u8341\u8326\u831C\u8322\u8342\u834E\u831B\u832A\u8308\u833C\u834D\u8316\u8324\u8320\u8337\u832F\u8329\u8347\u8345\u834C\u8353\u831E\u832C\u834B\u8327\u8348\u8653\u8652\u86A2\u86A8\u8696\u868D\u8691\u869E\u8687\u8697\u8686\u868B\u869A\u8685\u86A5\u8699\u86A1\u86A7\u8695\u8698\u868E\u869D\u8690\u8694\u8843\u8844\u886D\u8875\u8876\u8872\u8880\u8871\u887F\u886F\u8883\u887E\u8874\u887C\u8A12\u8C47\u8C57\u8C7B\u8CA4\u8CA3\u8D76\u8D78\u8DB5\u8DB7\u8DB6\u8ED1\u8ED3\u8FFE\u8FF5\u9002\u8FFF\u8FFB\u9004\u8FFC\u8FF6\u90D6\u90E0\u90D9\u90DA\u90E3\u90DF\u90E5\u90D8\u90DB\u90D7\u90DC\u90E4\u9150\u914E\u914F\u91D5\u91E2\u91DA\u965C\u965F\u96BC\u98E3\u9ADF\u9B2F\u4E7F\u5070\u506A\u5061\u505E\u5060\u5053\u504B\u505D\u5072\u5048\u504D\u5041\u505B\u504A\u5062\u5015\u5045\u505F\u5069\u506B\u5063\u5064\u5046\u5040\u506E\u5073\u5057\u5051\u51D0\u526B\u526D\u526C\u526E\u52D6\u52D3\u532D\u539C\u5575\u5576\u553C\u554D\u5550\u5534\u552A\u5551\u5562\u5536\u5535\u5530\u5552\u5545\u550C\u5532\u5565\u554E\u5539\u5548\u552D\u553B\u5540\u554B\u570A\u5707\u57FB\u5814\u57E2\u57F6\u57DC\u57F4\u5800\u57ED\u57FD\u5808\u57F8\u580B\u57F3\u57CF\u5807\u57EE\u57E3\u57F2\u57E5\u57EC\u57E1\u580E\u57FC\u5810\u57E7\u5801\u580C\u57F1\u57E9\u57F0\u580D\u5804\u595C\u5A60\u5A58\u5A55\u5A67\u5A5E\u5A38\u5A35\u5A6D\u5A50\u5A5F\u5A65\u5A6C\u5A53\u5A64\u5A57\u5A43\u5A5D\u5A52\u5A44\u5A5B\u5A48\u5A8E\u5A3E\u5A4D\u5A39\u5A4C\u5A70\u5A69\u5A47\u5A51\u5A56\u5A42\u5A5C\u5B72\u5B6E\u5BC1\u5BC0\u5C59\u5D1E\u5D0B\u5D1D\u5D1A\u5D20\u5D0C\u5D28\u5D0D\u5D26\u5D25\u5D0F\u5D30\u5D12\u5D23\u5D1F\u5D2E\u5E3E\u5E34\u5EB1\u5EB4\u5EB9\u5EB2\u5EB3\u5F36\u5F38\u5F9B\u5F96\u5F9F\u608A\u6090\u6086\u60BE\u60B0\u60BA\u60D3\u60D4\u60CF\u60E4\u60D9\u60DD\u60C8\u60B1\u60DB\u60B7\u60CA\u60BF\u60C3\u60CD\u60C0\u6332\u6365\u638A\u6382\u637D\u63BD\u639E\u63AD\u639D\u6397\u63AB\u638E\u636F\u6387\u6390\u636E\u63AF\u6375\u639C\u636D\u63AE\u637C\u63A4\u633B\u639F\u6378\u6385\u6381\u6391\u638D\u6370\u6553\u65CD\u6665\u6661\u665B\u6659\u665C\u6662\u6718\u6879\u6887\u6890\u689C\u686D\u686E\u68AE\u68AB\u6956\u686F\u68A3\u68AC\u68A9\u6875\u6874\u68B2\u688F\u6877\u6892\u687C\u686B\u6872\u68AA\u6880\u6871\u687E\u689B\u6896\u688B\u68A0\u6889\u68A4\u6878\u687B\u6891\u688C\u688A\u687D\u6B36\u6B33\u6B37\u6B38\u6B91\u6B8F\u6B8D\u6B8E\u6B8C\u6C2A\u6DC0\u6DAB\u6DB4\u6DB3\u6E74\u6DAC\u6DE9\u6DE2\u6DB7\u6DF6\u6DD4\u6E00\u6DC8\u6DE0\u6DDF\u6DD6\u6DBE\u6DE5\u6DDC\u6DDD\u6DDB\u6DF4\u6DCA\u6DBD\u6DED\u6DF0\u6DBA\u6DD5\u6DC2\u6DCF\u6DC9\u6DD0\u6DF2\u6DD3\u6DFD\u6DD7\u6DCD\u6DE3\u6DBB\u70FA\u710D\u70F7\u7117\u70F4\u710C\u70F0\u7104\u70F3\u7110\u70FC\u70FF\u7106\u7113\u7100\u70F8\u70F6\u710B\u7102\u710E\u727E\u727B\u727C\u727F\u731D\u7317\u7307\u7311\u7318\u730A\u7308\u72FF\u730F\u731E\u7388\u73F6\u73F8\u73F5\u7404\u7401\u73FD\u7407\u7400\u73FA\u73FC\u73FF\u740C\u740B\u73F4\u7408\u7564\u7563\u75CE\u75D2\u75CF\u75CB\u75CC\u75D1\u75D0\u768F\u7689\u76D3\u7739\u772F\u772D\u7731\u7732\u7734\u7733\u773D\u7725\u773B\u7735\u7848\u7852\u7849\u784D\u784A\u784C\u7826\u7845\u7850\u7964\u7967\u7969\u796A\u7963\u796B\u7961\u79BB\u79FA\u79F8\u79F6\u79F7\u7A8F\u7A94\u7A90\u7B35\u7B47\u7B34\u7B25\u7B30\u7B22\u7B24\u7B33\u7B18\u7B2A\u7B1D\u7B31\u7B2B\u7B2D\u7B2F\u7B32\u7B38\u7B1A\u7B23\u7C94\u7C98\u7C96\u7CA3\u7D35\u7D3D\u7D38\u7D36\u7D3A\u7D45\u7D2C\u7D29\u7D41\u7D47\u7D3E\u7D3F\u7D4A\u7D3B\u7D28\u7F63\u7F95\u7F9C\u7F9D\u7F9B\u7FCA\u7FCB\u7FCD\u7FD0\u7FD1\u7FC7\u7FCF\u7FC9\u801F\u801E\u801B\u8047\u8043\u8048\u8118\u8125\u8119\u811B\u812D\u811F\u812C\u811E\u8121\u8115\u8127\u811D\u8122\u8211\u8238\u8233\u823A\u8234\u8232\u8274\u8390\u83A3\u83A8\u838D\u837A\u8373\u83A4\u8374\u838F\u8381\u8395\u8399\u8375\u8394\u83A9\u837D\u8383\u838C\u839D\u839B\u83AA\u838B\u837E\u83A5\u83AF\u8388\u8397\u83B0\u837F\u83A6\u8387\u83AE\u8376\u839A\u8659\u8656\u86BF\u86B7\u86C2\u86C1\u86C5\u86BA\u86B0\u86C8\u86B9\u86B3\u86B8\u86CC\u86B4\u86BB\u86BC\u86C3\u86BD\u86BE\u8852\u8889\u8895\u88A8\u88A2\u88AA\u889A\u8891\u88A1\u889F\u8898\u88A7\u8899\u889B\u8897\u88A4\u88AC\u888C\u8893\u888E\u8982\u89D6\u89D9\u89D5\u8A30\u8A27\u8A2C\u8A1E\u8C39\u8C3B\u8C5C\u8C5D\u8C7D\u8CA5\u8D7D\u8D7B\u8D79\u8DBC\u8DC2\u8DB9\u8DBF\u8DC1\u8ED8\u8EDE\u8EDD\u8EDC\u8ED7\u8EE0\u8EE1\u9024\u900B\u9011\u901C\u900C\u9021\u90EF\u90EA\u90F0\u90F4\u90F2\u90F3\u90D4\u90EB\u90EC\u90E9\u9156\u9158\u915A\u9153\u9155\u91EC\u91F4\u91F1\u91F3\u91F8\u91E4\u91F9\u91EA\u91EB\u91F7\u91E8\u91EE\u957A\u9586\u9588\u967C\u966D\u966B\u9671\u966F\u96BF\u976A\u9804\u98E5\u9997\u509B\u5095\u5094\u509E\u508B\u50A3\u5083\u508C\u508E\u509D\u5068\u509C\u5092\u5082\u5087\u515F\u51D4\u5312\u5311\u53A4\u53A7\u5591\u55A8\u55A5\u55AD\u5577\u5645\u55A2\u5593\u5588\u558F\u55B5\u5581\u55A3\u5592\u55A4\u557D\u558C\u55A6\u557F\u5595\u55A1\u558E\u570C\u5829\u5837\u5819\u581E\u5827\u5823\u5828\u57F5\u5848\u5825\u581C\u581B\u5833\u583F\u5836\u582E\u5839\u5838\u582D\u582C\u583B\u5961\u5AAF\u5A94\u5A9F\u5A7A\u5AA2\u5A9E\u5A78\u5AA6\u5A7C\u5AA5\u5AAC\u5A95\u5AAE\u5A37\u5A84\u5A8A\u5A97\u5A83\u5A8B\u5AA9\u5A7B\u5A7D\u5A8C\u5A9C\u5A8F\u5A93\u5A9D\u5BEA\u5BCD\u5BCB\u5BD4\u5BD1\u5BCA\u5BCE\u5C0C\u5C30\u5D37\u5D43\u5D6B\u5D41\u5D4B\u5D3F\u5D35\u5D51\u5D4E\u5D55\u5D33\u5D3A\u5D52\u5D3D\u5D31\u5D59\u5D42\u5D39\u5D49\u5D38\u5D3C\u5D32\u5D36\u5D40\u5D45\u5E44\u5E41\u5F58\u5FA6\u5FA5\u5FAB\u60C9\u60B9\u60CC\u60E2\u60CE\u60C4\u6114\u60F2\u610A\u6116\u6105\u60F5\u6113\u60F8\u60FC\u60FE\u60C1\u6103\u6118\u611D\u6110\u60FF\u6104\u610B\u624A\u6394\u63B1\u63B0\u63CE\u63E5\u63E8\u63EF\u63C3\u649D\u63F3\u63CA\u63E0\u63F6\u63D5\u63F2\u63F5\u6461\u63DF\u63BE\u63DD\u63DC\u63C4\u63D8\u63D3\u63C2\u63C7\u63CC\u63CB\u63C8\u63F0\u63D7\u63D9\u6532\u6567\u656A\u6564\u655C\u6568\u6565\u658C\u659D\u659E\u65AE\u65D0\u65D2\u667C\u666C\u667B\u6680\u6671\u6679\u666A\u6672\u6701\u690C\u68D3\u6904\u68DC\u692A\u68EC\u68EA\u68F1\u690F\u68D6\u68F7\u68EB\u68E4\u68F6\u6913\u6910\u68F3\u68E1\u6907\u68CC\u6908\u6970\u68B4\u6911\u68EF\u68C6\u6914\u68F8\u68D0\u68FD\u68FC\u68E8\u690B\u690A\u6917\u68CE\u68C8\u68DD\u68DE\u68E6\u68F4\u68D1\u6906\u68D4\u68E9\u6915\u6925\u68C7\u6B39\u6B3B\u6B3F\u6B3C\u6B94\u6B97\u6B99\u6B95\u6BBD\u6BF0\u6BF2\u6BF3\u6C30\u6DFC\u6E46\u6E47\u6E1F\u6E49\u6E88\u6E3C\u6E3D\u6E45\u6E62\u6E2B\u6E3F\u6E41\u6E5D\u6E73\u6E1C\u6E33\u6E4B\u6E40\u6E51\u6E3B\u6E03\u6E2E\u6E5E\u6E68\u6E5C\u6E61\u6E31\u6E28\u6E60\u6E71\u6E6B\u6E39\u6E22\u6E30\u6E53\u6E65\u6E27\u6E78\u6E64\u6E77\u6E55\u6E79\u6E52\u6E66\u6E35\u6E36\u6E5A\u7120\u711E\u712F\u70FB\u712E\u7131\u7123\u7125\u7122\u7132\u711F\u7128\u713A\u711B\u724B\u725A\u7288\u7289\u7286\u7285\u728B\u7312\u730B\u7330\u7322\u7331\u7333\u7327\u7332\u732D\u7326\u7323\u7335\u730C\u742E\u742C\u7430\u742B\u7416\u741A\u7421\u742D\u7431\u7424\u7423\u741D\u7429\u7420\u7432\u74FB\u752F\u756F\u756C\u75E7\u75DA\u75E1\u75E6\u75DD\u75DF\u75E4\u75D7\u7695\u7692\u76DA\u7746\u7747\u7744\u774D\u7745\u774A\u774E\u774B\u774C\u77DE\u77EC\u7860\u7864\u7865\u785C\u786D\u7871\u786A\u786E\u7870\u7869\u7868\u785E\u7862\u7974\u7973\u7972\u7970\u7A02\u7A0A\u7A03\u7A0C\u7A04\u7A99\u7AE6\u7AE4\u7B4A\u7B3B\u7B44\u7B48\u7B4C\u7B4E\u7B40\u7B58\u7B45\u7CA2\u7C9E\u7CA8\u7CA1\u7D58\u7D6F\u7D63\u7D53\u7D56\u7D67\u7D6A\u7D4F\u7D6D\u7D5C\u7D6B\u7D52\u7D54\u7D69\u7D51\u7D5F\u7D4E\u7F3E\u7F3F\u7F65\u7F66\u7FA2\u7FA0\u7FA1\u7FD7\u8051\u804F\u8050\u80FE\u80D4\u8143\u814A\u8152\u814F\u8147\u813D\u814D\u813A\u81E6\u81EE\u81F7\u81F8\u81F9\u8204\u823C\u823D\u823F\u8275\u833B\u83CF\u83F9\u8423\u83C0\u83E8\u8412\u83E7\u83E4\u83FC\u83F6\u8410\u83C6\u83C8\u83EB\u83E3\u83BF\u8401\u83DD\u83E5\u83D8\u83FF\u83E1\u83CB\u83CE\u83D6\u83F5\u83C9\u8409\u840F\u83DE\u8411\u8406\u83C2\u83F3\u83D5\u83FA\u83C7\u83D1\u83EA\u8413\u83C3\u83EC\u83EE\u83C4\u83FB\u83D7\u83E2\u841B\u83DB\u83FE\u86D8\u86E2\u86E6\u86D3\u86E3\u86DA\u86EA\u86DD\u86EB\u86DC\u86EC\u86E9\u86D7\u86E8\u86D1\u8848\u8856\u8855\u88BA\u88D7\u88B9\u88B8\u88C0\u88BE\u88B6\u88BC\u88B7\u88BD\u88B2\u8901\u88C9\u8995\u8998\u8997\u89DD\u89DA\u89DB\u8A4E\u8A4D\u8A39\u8A59\u8A40\u8A57\u8A58\u8A44\u8A45\u8A52\u8A48\u8A51\u8A4A\u8A4C\u8A4F\u8C5F\u8C81\u8C80\u8CBA\u8CBE\u8CB0\u8CB9\u8CB5\u8D84\u8D80\u8D89\u8DD8\u8DD3\u8DCD\u8DC7\u8DD6\u8DDC\u8DCF\u8DD5\u8DD9\u8DC8\u8DD7\u8DC5\u8EEF\u8EF7\u8EFA\u8EF9\u8EE6\u8EEE\u8EE5\u8EF5\u8EE7\u8EE8\u8EF6\u8EEB\u8EF1\u8EEC\u8EF4\u8EE9\u902D\u9034\u902F\u9106\u912C\u9104\u90FF\u90FC\u9108\u90F9\u90FB\u9101\u9100\u9107\u9105\u9103\u9161\u9164\u915F\u9162\u9160\u9201\u920A\u9225\u9203\u921A\u9226\u920F\u920C\u9200\u9212\u91FF\u91FD\u9206\u9204\u9227\u9202\u921C\u9224\u9219\u9217\u9205\u9216\u957B\u958D\u958C\u9590\u9687\u967E\u9688\u9689\u9683\u9680\u96C2\u96C8\u96C3\u96F1\u96F0\u976C\u9770\u976E\u9807\u98A9\u98EB\u9CE6\u9EF9\u4E83\u4E84\u4EB6\u50BD\u50BF\u50C6\u50AE\u50C4\u50CA\u50B4\u50C8\u50C2\u50B0\u50C1\u50BA\u50B1\u50CB\u50C9\u50B6\u50B8\u51D7\u527A\u5278\u527B\u527C\u55C3\u55DB\u55CC\u55D0\u55CB\u55CA\u55DD\u55C0\u55D4\u55C4\u55E9\u55BF\u55D2\u558D\u55CF\u55D5\u55E2\u55D6\u55C8\u55F2\u55CD\u55D9\u55C2\u5714\u5853\u5868\u5864\u584F\u584D\u5849\u586F\u5855\u584E\u585D\u5859\u5865\u585B\u583D\u5863\u5871\u58FC\u5AC7\u5AC4\u5ACB\u5ABA\u5AB8\u5AB1\u5AB5\u5AB0\u5ABF\u5AC8\u5ABB\u5AC6\u5AB7\u5AC0\u5ACA\u5AB4\u5AB6\u5ACD\u5AB9\u5A90\u5BD6\u5BD8\u5BD9\u5C1F\u5C33\u5D71\u5D63\u5D4A\u5D65\u5D72\u5D6C\u5D5E\u5D68\u5D67\u5D62\u5DF0\u5E4F\u5E4E\u5E4A\u5E4D\u5E4B\u5EC5\u5ECC\u5EC6\u5ECB\u5EC7\u5F40\u5FAF\u5FAD\u60F7\u6149\u614A\u612B\u6145\u6136\u6132\u612E\u6146\u612F\u614F\u6129\u6140\u6220\u9168\u6223\u6225\u6224\u63C5\u63F1\u63EB\u6410\u6412\u6409\u6420\u6424\u6433\u6443\u641F\u6415\u6418\u6439\u6437\u6422\u6423\u640C\u6426\u6430\u6428\u6441\u6435\u642F\u640A\u641A\u6440\u6425\u6427\u640B\u63E7\u641B\u642E\u6421\u640E\u656F\u6592\u65D3\u6686\u668C\u6695\u6690\u668B\u668A\u6699\u6694\u6678\u6720\u6966\u695F\u6938\u694E\u6962\u6971\u693F\u6945\u696A\u6939\u6942\u6957\u6959\u697A\u6948\u6949\u6935\u696C\u6933\u693D\u6965\u68F0\u6978\u6934\u6969\u6940\u696F\u6944\u6976\u6958\u6941\u6974\u694C\u693B\u694B\u6937\u695C\u694F\u6951\u6932\u6952\u692F\u697B\u693C\u6B46\u6B45\u6B43\u6B42\u6B48\u6B41\u6B9B\uFA0D\u6BFB\u6BFC\u6BF9\u6BF7\u6BF8\u6E9B\u6ED6\u6EC8\u6E8F\u6EC0\u6E9F\u6E93\u6E94\u6EA0\u6EB1\u6EB9\u6EC6\u6ED2\u6EBD\u6EC1\u6E9E\u6EC9\u6EB7\u6EB0\u6ECD\u6EA6\u6ECF\u6EB2\u6EBE\u6EC3\u6EDC\u6ED8\u6E99\u6E92\u6E8E\u6E8D\u6EA4\u6EA1\u6EBF\u6EB3\u6ED0\u6ECA\u6E97\u6EAE\u6EA3\u7147\u7154\u7152\u7163\u7160\u7141\u715D\u7162\u7172\u7178\u716A\u7161\u7142\u7158\u7143\u714B\u7170\u715F\u7150\u7153\u7144\u714D\u715A\u724F\u728D\u728C\u7291\u7290\u728E\u733C\u7342\u733B\u733A\u7340\u734A\u7349\u7444\u744A\u744B\u7452\u7451\u7457\u7440\u744F\u7450\u744E\u7442\u7446\u744D\u7454\u74E1\u74FF\u74FE\u74FD\u751D\u7579\u7577\u6983\u75EF\u760F\u7603\u75F7\u75FE\u75FC\u75F9\u75F8\u7610\u75FB\u75F6\u75ED\u75F5\u75FD\u7699\u76B5\u76DD\u7755\u775F\u7760\u7752\u7756\u775A\u7769\u7767\u7754\u7759\u776D\u77E0\u7887\u789A\u7894\u788F\u7884\u7895\u7885\u7886\u78A1\u7883\u7879\u7899\u7880\u7896\u787B\u797C\u7982\u797D\u7979\u7A11\u7A18\u7A19\u7A12\u7A17\u7A15\u7A22\u7A13\u7A1B\u7A10\u7AA3\u7AA2\u7A9E\u7AEB\u7B66\u7B64\u7B6D\u7B74\u7B69\u7B72\u7B65\u7B73\u7B71\u7B70\u7B61\u7B78\u7B76\u7B63\u7CB2\u7CB4\u7CAF\u7D88\u7D86\u7D80\u7D8D\u7D7F\u7D85\u7D7A\u7D8E\u7D7B\u7D83\u7D7C\u7D8C\u7D94\u7D84\u7D7D\u7D92\u7F6D\u7F6B\u7F67\u7F68\u7F6C\u7FA6\u7FA5\u7FA7\u7FDB\u7FDC\u8021\u8164\u8160\u8177\u815C\u8169\u815B\u8162\u8172\u6721\u815E\u8176\u8167\u816F\u8144\u8161\u821D\u8249\u8244\u8240\u8242\u8245\u84F1\u843F\u8456\u8476\u8479\u848F\u848D\u8465\u8451\u8440\u8486\u8467\u8430\u844D\u847D\u845A\u8459\u8474\u8473\u845D\u8507\u845E\u8437\u843A\u8434\u847A\u8443\u8478\u8432\u8445\u8429\u83D9\u844B\u842F\u8442\u842D\u845F\u8470\u8439\u844E\u844C\u8452\u846F\u84C5\u848E\u843B\u8447\u8436\u8433\u8468\u847E\u8444\u842B\u8460\u8454\u846E\u8450\u870B\u8704\u86F7\u870C\u86FA\u86D6\u86F5\u874D\u86F8\u870E\u8709\u8701\u86F6\u870D\u8705\u88D6\u88CB\u88CD\u88CE\u88DE\u88DB\u88DA\u88CC\u88D0\u8985\u899B\u89DF\u89E5\u89E4\u89E1\u89E0\u89E2\u89DC\u89E6\u8A76\u8A86\u8A7F\u8A61\u8A3F\u8A77\u8A82\u8A84\u8A75\u8A83\u8A81\u8A74\u8A7A\u8C3C\u8C4B\u8C4A\u8C65\u8C64\u8C66\u8C86\u8C84\u8C85\u8CCC\u8D68\u8D69\u8D91\u8D8C\u8D8E\u8D8F\u8D8D\u8D93\u8D94\u8D90\u8D92\u8DF0\u8DE0\u8DEC\u8DF1\u8DEE\u8DD0\u8DE9\u8DE3\u8DE2\u8DE7\u8DF2\u8DEB\u8DF4\u8F06\u8EFF\u8F01\u8F00\u8F05\u8F07\u8F08\u8F02\u8F0B\u9052\u903F\u9044\u9049\u903D\u9110\u910D\u910F\u9111\u9116\u9114\u910B\u910E\u916E\u916F\u9248\u9252\u9230\u923A\u9266\u9233\u9265\u925E\u9283\u922E\u924A\u9246\u926D\u926C\u924F\u9260\u9267\u926F\u9236\u9261\u9270\u9231\u9254\u9263\u9250\u9272\u924E\u9253\u924C\u9256\u9232\u959F\u959C\u959E\u959B\u9692\u9693\u9691\u9697\u96CE\u96FA\u96FD\u96F8\u96F5\u9773\u9777\u9778\u9772\u980F\u980D\u980E\u98AC\u98F6\u98F9\u99AF\u99B2\u99B0\u99B5\u9AAD\u9AAB\u9B5B\u9CEA\u9CED\u9CE7\u9E80\u9EFD\u50E6\u50D4\u50D7\u50E8\u50F3\u50DB\u50EA\u50DD\u50E4\u50D3\u50EC\u50F0\u50EF\u50E3\u50E0\u51D8\u5280\u5281\u52E9\u52EB\u5330\u53AC\u5627\u5615\u560C\u5612\u55FC\u560F\u561C\u5601\u5613\u5602\u55FA\u561D\u5604\u55FF\u55F9\u5889\u587C\u5890\u5898\u5886\u5881\u587F\u5874\u588B\u587A\u5887\u5891\u588E\u5876\u5882\u5888\u587B\u5894\u588F\u58FE\u596B\u5ADC\u5AEE\u5AE5\u5AD5\u5AEA\u5ADA\u5AED\u5AEB\u5AF3\u5AE2\u5AE0\u5ADB\u5AEC\u5ADE\u5ADD\u5AD9\u5AE8\u5ADF\u5B77\u5BE0\u5BE3\u5C63\u5D82\u5D80\u5D7D\u5D86\u5D7A\u5D81\u5D77\u5D8A\u5D89\u5D88\u5D7E\u5D7C\u5D8D\u5D79\u5D7F\u5E58\u5E59\u5E53\u5ED8\u5ED1\u5ED7\u5ECE\u5EDC\u5ED5\u5ED9\u5ED2\u5ED4\u5F44\u5F43\u5F6F\u5FB6\u612C\u6128\u6141\u615E\u6171\u6173\u6152\u6153\u6172\u616C\u6180\u6174\u6154\u617A\u615B\u6165\u613B\u616A\u6161\u6156\u6229\u6227\u622B\u642B\u644D\u645B\u645D\u6474\u6476\u6472\u6473\u647D\u6475\u6466\u64A6\u644E\u6482\u645E\u645C\u644B\u6453\u6460\u6450\u647F\u643F\u646C\u646B\u6459\u6465\u6477\u6573\u65A0\u66A1\u66A0\u669F\u6705\u6704\u6722\u69B1\u69B6\u69C9\u69A0\u69CE\u6996\u69B0\u69AC\u69BC\u6991\u6999\u698E\u69A7\u698D\u69A9\u69BE\u69AF\u69BF\u69C4\u69BD\u69A4\u69D4\u69B9\u69CA\u699A\u69CF\u69B3\u6993\u69AA\u69A1\u699E\u69D9\u6997\u6990\u69C2\u69B5\u69A5\u69C6\u6B4A\u6B4D\u6B4B\u6B9E\u6B9F\u6BA0\u6BC3\u6BC4\u6BFE\u6ECE\u6EF5\u6EF1\u6F03\u6F25\u6EF8\u6F37\u6EFB\u6F2E\u6F09\u6F4E\u6F19\u6F1A\u6F27\u6F18\u6F3B\u6F12\u6EED\u6F0A\u6F36\u6F73\u6EF9\u6EEE\u6F2D\u6F40\u6F30\u6F3C\u6F35\u6EEB\u6F07\u6F0E\u6F43\u6F05\u6EFD\u6EF6\u6F39\u6F1C\u6EFC\u6F3A\u6F1F\u6F0D\u6F1E\u6F08\u6F21\u7187\u7190\u7189\u7180\u7185\u7182\u718F\u717B\u7186\u7181\u7197\u7244\u7253\u7297\u7295\u7293\u7343\u734D\u7351\u734C\u7462\u7473\u7471\u7475\u7472\u7467\u746E\u7500\u7502\u7503\u757D\u7590\u7616\u7608\u760C\u7615\u7611\u760A\u7614\u76B8\u7781\u777C\u7785\u7782\u776E\u7780\u776F\u777E\u7783\u78B2\u78AA\u78B4\u78AD\u78A8\u787E\u78AB\u789E\u78A5\u78A0\u78AC\u78A2\u78A4\u7998\u798A\u798B\u7996\u7995\u7994\u7993\u7997\u7988\u7992\u7990\u7A2B\u7A4A\u7A30\u7A2F\u7A28\u7A26\u7AA8\u7AAB\u7AAC\u7AEE\u7B88\u7B9C\u7B8A\u7B91\u7B90\u7B96\u7B8D\u7B8C\u7B9B\u7B8E\u7B85\u7B98\u5284\u7B99\u7BA4\u7B82\u7CBB\u7CBF\u7CBC\u7CBA\u7DA7\u7DB7\u7DC2\u7DA3\u7DAA\u7DC1\u7DC0\u7DC5\u7D9D\u7DCE\u7DC4\u7DC6\u7DCB\u7DCC\u7DAF\u7DB9\u7D96\u7DBC\u7D9F\u7DA6\u7DAE\u7DA9\u7DA1\u7DC9\u7F73\u7FE2\u7FE3\u7FE5\u7FDE\u8024\u805D\u805C\u8189\u8186\u8183\u8187\u818D\u818C\u818B\u8215\u8497\u84A4\u84A1\u849F\u84BA\u84CE\u84C2\u84AC\u84AE\u84AB\u84B9\u84B4\u84C1\u84CD\u84AA\u849A\u84B1\u84D0\u849D\u84A7\u84BB\u84A2\u8494\u84C7\u84CC\u849B\u84A9\u84AF\u84A8\u84D6\u8498\u84B6\u84CF\u84A0\u84D7\u84D4\u84D2\u84DB\u84B0\u8491\u8661\u8733\u8723\u8728\u876B\u8740\u872E\u871E\u8721\u8719\u871B\u8743\u872C\u8741\u873E\u8746\u8720\u8732\u872A\u872D\u873C\u8712\u873A\u8731\u8735\u8742\u8726\u8727\u8738\u8724\u871A\u8730\u8711\u88F7\u88E7\u88F1\u88F2\u88FA\u88FE\u88EE\u88FC\u88F6\u88FB\u88F0\u88EC\u88EB\u899D\u89A1\u899F\u899E\u89E9\u89EB\u89E8\u8AAB\u8A99\u8A8B\u8A92\u8A8F\u8A96\u8C3D\u8C68\u8C69\u8CD5\u8CCF\u8CD7\u8D96\u8E09\u8E02\u8DFF\u8E0D\u8DFD\u8E0A\u8E03\u8E07\u8E06\u8E05\u8DFE\u8E00\u8E04\u8F10\u8F11\u8F0E\u8F0D\u9123\u911C\u9120\u9122\u911F\u911D\u911A\u9124\u9121\u911B\u917A\u9172\u9179\u9173\u92A5\u92A4\u9276\u929B\u927A\u92A0\u9294\u92AA\u928D\u92A6\u929A\u92AB\u9279\u9297\u927F\u92A3\u92EE\u928E\u9282\u9295\u92A2\u927D\u9288\u92A1\u928A\u9286\u928C\u9299\u92A7\u927E\u9287\u92A9\u929D\u928B\u922D\u969E\u96A1\u96FF\u9758\u977D\u977A\u977E\u9783\u9780\u9782\u977B\u9784\u9781\u977F\u97CE\u97CD\u9816\u98AD\u98AE\u9902\u9900\u9907\u999D\u999C\u99C3\u99B9\u99BB\u99BA\u99C2\u99BD\u99C7\u9AB1\u9AE3\u9AE7\u9B3E\u9B3F\u9B60\u9B61\u9B5F\u9CF1\u9CF2\u9CF5\u9EA7\u50FF\u5103\u5130\u50F8\u5106\u5107\u50F6\u50FE\u510B\u510C\u50FD\u510A\u528B\u528C\u52F1\u52EF\u5648\u5642\u564C\u5635\u5641\u564A\u5649\u5646\u5658\u565A\u5640\u5633\u563D\u562C\u563E\u5638\u562A\u563A\u571A\u58AB\u589D\u58B1\u58A0\u58A3\u58AF\u58AC\u58A5\u58A1\u58FF\u5AFF\u5AF4\u5AFD\u5AF7\u5AF6\u5B03\u5AF8\u5B02\u5AF9\u5B01\u5B07\u5B05\u5B0F\u5C67\u5D99\u5D97\u5D9F\u5D92\u5DA2\u5D93\u5D95\u5DA0\u5D9C\u5DA1\u5D9A\u5D9E\u5E69\u5E5D\u5E60\u5E5C\u7DF3\u5EDB\u5EDE\u5EE1\u5F49\u5FB2\u618B\u6183\u6179\u61B1\u61B0\u61A2\u6189\u619B\u6193\u61AF\u61AD\u619F\u6192\u61AA\u61A1\u618D\u6166\u61B3\u622D\u646E\u6470\u6496\u64A0\u6485\u6497\u649C\u648F\u648B\u648A\u648C\u64A3\u649F\u6468\u64B1\u6498\u6576\u657A\u6579\u657B\u65B2\u65B3\u66B5\u66B0\u66A9\u66B2\u66B7\u66AA\u66AF\u6A00\u6A06\u6A17\u69E5\u69F8\u6A15\u69F1\u69E4\u6A20\u69FF\u69EC\u69E2\u6A1B\u6A1D\u69FE\u6A27\u69F2\u69EE\u6A14\u69F7\u69E7\u6A40\u6A08\u69E6\u69FB\u6A0D\u69FC\u69EB\u6A09\u6A04\u6A18\u6A25\u6A0F\u69F6\u6A26\u6A07\u69F4\u6A16\u6B51\u6BA5\u6BA3\u6BA2\u6BA6\u6C01\u6C00\u6BFF\u6C02\u6F41\u6F26\u6F7E\u6F87\u6FC6\u6F92\u6F8D\u6F89\u6F8C\u6F62\u6F4F\u6F85\u6F5A\u6F96\u6F76\u6F6C\u6F82\u6F55\u6F72\u6F52\u6F50\u6F57\u6F94\u6F93\u6F5D\u6F00\u6F61\u6F6B\u6F7D\u6F67\u6F90\u6F53\u6F8B\u6F69\u6F7F\u6F95\u6F63\u6F77\u6F6A\u6F7B\u71B2\u71AF\u719B\u71B0\u71A0\u719A\u71A9\u71B5\u719D\u71A5\u719E\u71A4\u71A1\u71AA\u719C\u71A7\u71B3\u7298\u729A\u7358\u7352\u735E\u735F\u7360\u735D\u735B\u7361\u735A\u7359\u7362\u7487\u7489\u748A\u7486\u7481\u747D\u7485\u7488\u747C\u7479\u7508\u7507\u757E\u7625\u761E\u7619\u761D\u761C\u7623\u761A\u7628\u761B\u769C\u769D\u769E\u769B\u778D\u778F\u7789\u7788\u78CD\u78BB\u78CF\u78CC\u78D1\u78CE\u78D4\u78C8\u78C3\u78C4\u78C9\u799A\u79A1\u79A0\u799C\u79A2\u799B\u6B76\u7A39\u7AB2\u7AB4\u7AB3\u7BB7\u7BCB\u7BBE\u7BAC\u7BCE\u7BAF\u7BB9\u7BCA\u7BB5\u7CC5\u7CC8\u7CCC\u7CCB\u7DF7\u7DDB\u7DEA\u7DE7\u7DD7\u7DE1\u7E03\u7DFA\u7DE6\u7DF6\u7DF1\u7DF0\u7DEE\u7DDF\u7F76\u7FAC\u7FB0\u7FAD\u7FED\u7FEB\u7FEA\u7FEC\u7FE6\u7FE8\u8064\u8067\u81A3\u819F\u819E\u8195\u81A2\u8199\u8197\u8216\u824F\u8253\u8252\u8250\u824E\u8251\u8524\u853B\u850F\u8500\u8529\u850E\u8509\u850D\u851F\u850A\u8527\u851C\u84FB\u852B\u84FA\u8508\u850C\u84F4\u852A\u84F2\u8515\u84F7\u84EB\u84F3\u84FC\u8512\u84EA\u84E9\u8516\u84FE\u8528\u851D\u852E\u8502\u84FD\u851E\u84F6\u8531\u8526\u84E7\u84E8\u84F0\u84EF\u84F9\u8518\u8520\u8530\u850B\u8519\u852F\u8662\u8756\u8763\u8764\u8777\u87E1\u8773\u8758\u8754\u875B\u8752\u8761\u875A\u8751\u875E\u876D\u876A\u8750\u874E\u875F\u875D\u876F\u876C\u877A\u876E\u875C\u8765\u874F\u877B\u8775\u8762\u8767\u8769\u885A\u8905\u890C\u8914\u890B\u8917\u8918\u8919\u8906\u8916\u8911\u890E\u8909\u89A2\u89A4\u89A3\u89ED\u89F0\u89EC\u8ACF\u8AC6\u8AB8\u8AD3\u8AD1\u8AD4\u8AD5\u8ABB\u8AD7\u8ABE\u8AC0\u8AC5\u8AD8\u8AC3\u8ABA\u8ABD\u8AD9\u8C3E\u8C4D\u8C8F\u8CE5\u8CDF\u8CD9\u8CE8\u8CDA\u8CDD\u8CE7\u8DA0\u8D9C\u8DA1\u8D9B\u8E20\u8E23\u8E25\u8E24\u8E2E\u8E15\u8E1B\u8E16\u8E11\u8E19\u8E26\u8E27\u8E14\u8E12\u8E18\u8E13\u8E1C\u8E17\u8E1A\u8F2C\u8F24\u8F18\u8F1A\u8F20\u8F23\u8F16\u8F17\u9073\u9070\u906F\u9067\u906B\u912F\u912B\u9129\u912A\u9132\u9126\u912E\u9185\u9186\u918A\u9181\u9182\u9184\u9180\u92D0\u92C3\u92C4\u92C0\u92D9\u92B6\u92CF\u92F1\u92DF\u92D8\u92E9\u92D7\u92DD\u92CC\u92EF\u92C2\u92E8\u92CA\u92C8\u92CE\u92E6\u92CD\u92D5\u92C9\u92E0\u92DE\u92E7\u92D1\u92D3\u92B5\u92E1\u92C6\u92B4\u957C\u95AC\u95AB\u95AE\u95B0\u96A4\u96A2\u96D3\u9705\u9708\u9702\u975A\u978A\u978E\u9788\u97D0\u97CF\u981E\u981D\u9826\u9829\u9828\u9820\u981B\u9827\u98B2\u9908\u98FA\u9911\u9914\u9916\u9917\u9915\u99DC\u99CD\u99CF\u99D3\u99D4\u99CE\u99C9\u99D6\u99D8\u99CB\u99D7\u99CC\u9AB3\u9AEC\u9AEB\u9AF3\u9AF2\u9AF1\u9B46\u9B43\u9B67\u9B74\u9B71\u9B66\u9B76\u9B75\u9B70\u9B68\u9B64\u9B6C\u9CFC\u9CFA\u9CFD\u9CFF\u9CF7\u9D07\u9D00\u9CF9\u9CFB\u9D08\u9D05\u9D04\u9E83\u9ED3\u9F0F\u9F10\u511C\u5113\u5117\u511A\u5111\u51DE\u5334\u53E1\u5670\u5660\u566E\u5673\u5666\u5663\u566D\u5672\u565E\u5677\u571C\u571B\u58C8\u58BD\u58C9\u58BF\u58BA\u58C2\u58BC\u58C6\u5B17\u5B19\u5B1B\u5B21\u5B14\u5B13\u5B10\u5B16\u5B28\u5B1A\u5B20\u5B1E\u5BEF\u5DAC\u5DB1\u5DA9\u5DA7\u5DB5\u5DB0\u5DAE\u5DAA\u5DA8\u5DB2\u5DAD\u5DAF\u5DB4\u5E67\u5E68\u5E66\u5E6F\u5EE9\u5EE7\u5EE6\u5EE8\u5EE5\u5F4B\u5FBC\u619D\u61A8\u6196\u61C5\u61B4\u61C6\u61C1\u61CC\u61BA\u61BF\u61B8\u618C\u64D7\u64D6\u64D0\u64CF\u64C9\u64BD\u6489\u64C3\u64DB\u64F3\u64D9\u6533\u657F\u657C\u65A2\u66C8\u66BE\u66C0\u66CA\u66CB\u66CF\u66BD\u66BB\u66BA\u66CC\u6723\u6A34\u6A66\u6A49\u6A67\u6A32\u6A68\u6A3E\u6A5D\u6A6D\u6A76\u6A5B\u6A51\u6A28\u6A5A\u6A3B\u6A3F\u6A41\u6A6A\u6A64\u6A50\u6A4F\u6A54\u6A6F\u6A69\u6A60\u6A3C\u6A5E\u6A56\u6A55\u6A4D\u6A4E\u6A46\u6B55\u6B54\u6B56\u6BA7\u6BAA\u6BAB\u6BC8\u6BC7\u6C04\u6C03\u6C06\u6FAD\u6FCB\u6FA3\u6FC7\u6FBC\u6FCE\u6FC8\u6F5E\u6FC4\u6FBD\u6F9E\u6FCA\u6FA8\u7004\u6FA5\u6FAE\u6FBA\u6FAC\u6FAA\u6FCF\u6FBF\u6FB8\u6FA2\u6FC9\u6FAB\u6FCD\u6FAF\u6FB2\u6FB0\u71C5\u71C2\u71BF\u71B8\u71D6\u71C0\u71C1\u71CB\u71D4\u71CA\u71C7\u71CF\u71BD\u71D8\u71BC\u71C6\u71DA\u71DB\u729D\u729E\u7369\u7366\u7367\u736C\u7365\u736B\u736A\u747F\u749A\u74A0\u7494\u7492\u7495\u74A1\u750B\u7580\u762F\u762D\u7631\u763D\u7633\u763C\u7635\u7632\u7630\u76BB\u76E6\u779A\u779D\u77A1\u779C\u779B\u77A2\u77A3\u7795\u7799\u7797\u78DD\u78E9\u78E5\u78EA\u78DE\u78E3\u78DB\u78E1\u78E2\u78ED\u78DF\u78E0\u79A4\u7A44\u7A48\u7A47\u7AB6\u7AB8\u7AB5\u7AB1\u7AB7\u7BDE\u7BE3\u7BE7\u7BDD\u7BD5\u7BE5\u7BDA\u7BE8\u7BF9\u7BD4\u7BEA\u7BE2\u7BDC\u7BEB\u7BD8\u7BDF\u7CD2\u7CD4\u7CD7\u7CD0\u7CD1\u7E12\u7E21\u7E17\u7E0C\u7E1F\u7E20\u7E13\u7E0E\u7E1C\u7E15\u7E1A\u7E22\u7E0B\u7E0F\u7E16\u7E0D\u7E14\u7E25\u7E24\u7F43\u7F7B\u7F7C\u7F7A\u7FB1\u7FEF\u802A\u8029\u806C\u81B1\u81A6\u81AE\u81B9\u81B5\u81AB\u81B0\u81AC\u81B4\u81B2\u81B7\u81A7\u81F2\u8255\u8256\u8257\u8556\u8545\u856B\u854D\u8553\u8561\u8558\u8540\u8546\u8564\u8541\u8562\u8544\u8551\u8547\u8563\u853E\u855B\u8571\u854E\u856E\u8575\u8555\u8567\u8560\u858C\u8566\u855D\u8554\u8565\u856C\u8663\u8665\u8664\u879B\u878F\u8797\u8793\u8792\u8788\u8781\u8796\u8798\u8779\u8787\u87A3\u8785\u8790\u8791\u879D\u8784\u8794\u879C\u879A\u8789\u891E\u8926\u8930\u892D\u892E\u8927\u8931\u8922\u8929\u8923\u892F\u892C\u891F\u89F1\u8AE0\u8AE2\u8AF2\u8AF4\u8AF5\u8ADD\u8B14\u8AE4\u8ADF\u8AF0\u8AC8\u8ADE\u8AE1\u8AE8\u8AFF\u8AEF\u8AFB\u8C91\u8C92\u8C90\u8CF5\u8CEE\u8CF1\u8CF0\u8CF3\u8D6C\u8D6E\u8DA5\u8DA7\u8E33\u8E3E\u8E38\u8E40\u8E45\u8E36\u8E3C\u8E3D\u8E41\u8E30\u8E3F\u8EBD\u8F36\u8F2E\u8F35\u8F32\u8F39\u8F37\u8F34\u9076\u9079\u907B\u9086\u90FA\u9133\u9135\u9136\u9193\u9190\u9191\u918D\u918F\u9327\u931E\u9308\u931F\u9306\u930F\u937A\u9338\u933C\u931B\u9323\u9312\u9301\u9346\u932D\u930E\u930D\u92CB\u931D\u92FA\u9325\u9313\u92F9\u92F7\u9334\u9302\u9324\u92FF\u9329\u9339\u9335\u932A\u9314\u930C\u930B\u92FE\u9309\u9300\u92FB\u9316\u95BC\u95CD\u95BE\u95B9\u95BA\u95B6\u95BF\u95B5\u95BD\u96A9\u96D4\u970B\u9712\u9710\u9799\u9797\u9794\u97F0\u97F8\u9835\u982F\u9832\u9924\u991F\u9927\u9929\u999E\u99EE\u99EC\u99E5\u99E4\u99F0\u99E3\u99EA\u99E9\u99E7\u9AB9\u9ABF\u9AB4\u9ABB\u9AF6\u9AFA\u9AF9\u9AF7\u9B33\u9B80\u9B85\u9B87\u9B7C\u9B7E\u9B7B\u9B82\u9B93\u9B92\u9B90\u9B7A\u9B95\u9B7D\u9B88\u9D25\u9D17\u9D20\u9D1E\u9D14\u9D29\u9D1D\u9D18\u9D22\u9D10\u9D19\u9D1F\u9E88\u9E86\u9E87\u9EAE\u9EAD\u9ED5\u9ED6\u9EFA\u9F12\u9F3D\u5126\u5125\u5122\u5124\u5120\u5129\u52F4\u5693\u568C\u568D\u5686\u5684\u5683\u567E\u5682\u567F\u5681\u58D6\u58D4\u58CF\u58D2\u5B2D\u5B25\u5B32\u5B23\u5B2C\u5B27\u5B26\u5B2F\u5B2E\u5B7B\u5BF1\u5BF2\u5DB7\u5E6C\u5E6A\u5FBE\u5FBB\u61C3\u61B5\u61BC\u61E7\u61E0\u61E5\u61E4\u61E8\u61DE\u64EF\u64E9\u64E3\u64EB\u64E4\u64E8\u6581\u6580\u65B6\u65DA\u66D2\u6A8D\u6A96\u6A81\u6AA5\u6A89\u6A9F\u6A9B\u6AA1\u6A9E\u6A87\u6A93\u6A8E\u6A95\u6A83\u6AA8\u6AA4\u6A91\u6A7F\u6AA6\u6A9A\u6A85\u6A8C\u6A92\u6B5B\u6BAD\u6C09\u6FCC\u6FA9\u6FF4\u6FD4\u6FE3\u6FDC\u6FED\u6FE7\u6FE6\u6FDE\u6FF2\u6FDD\u6FE2\u6FE8\u71E1\u71F1\u71E8\u71F2\u71E4\u71F0\u71E2\u7373\u736E\u736F\u7497\u74B2\u74AB\u7490\u74AA\u74AD\u74B1\u74A5\u74AF\u7510\u7511\u7512\u750F\u7584\u7643\u7648\u7649\u7647\u76A4\u76E9\u77B5\u77AB\u77B2\u77B7\u77B6\u77B4\u77B1\u77A8\u77F0\u78F3\u78FD\u7902\u78FB\u78FC\u78F2\u7905\u78F9\u78FE\u7904\u79AB\u79A8\u7A5C\u7A5B\u7A56\u7A58\u7A54\u7A5A\u7ABE\u7AC0\u7AC1\u7C05\u7C0F\u7BF2\u7C00\u7BFF\u7BFB\u7C0E\u7BF4\u7C0B\u7BF3\u7C02\u7C09\u7C03\u7C01\u7BF8\u7BFD\u7C06\u7BF0\u7BF1\u7C10\u7C0A\u7CE8\u7E2D\u7E3C\u7E42\u7E33\u9848\u7E38\u7E2A\u7E49\u7E40\u7E47\u7E29\u7E4C\u7E30\u7E3B\u7E36\u7E44\u7E3A\u7F45\u7F7F\u7F7E\u7F7D\u7FF4\u7FF2\u802C\u81BB\u81C4\u81CC\u81CA\u81C5\u81C7\u81BC\u81E9\u825B\u825A\u825C\u8583\u8580\u858F\u85A7\u8595\u85A0\u858B\u85A3\u857B\u85A4\u859A\u859E\u8577\u857C\u8589\u85A1\u857A\u8578\u8557\u858E\u8596\u8586\u858D\u8599\u859D\u8581\u85A2\u8582\u8588\u8585\u8579\u8576\u8598\u8590\u859F\u8668\u87BE\u87AA\u87AD\u87C5\u87B0\u87AC\u87B9\u87B5\u87BC\u87AE\u87C9\u87C3\u87C2\u87CC\u87B7\u87AF\u87C4\u87CA\u87B4\u87B6\u87BF\u87B8\u87BD\u87DE\u87B2\u8935\u8933\u893C\u893E\u8941\u8952\u8937\u8942\u89AD\u89AF\u89AE\u89F2\u89F3\u8B1E\u8B18\u8B16\u8B11\u8B05\u8B0B\u8B22\u8B0F\u8B12\u8B15\u8B07\u8B0D\u8B08\u8B06\u8B1C\u8B13\u8B1A\u8C4F\u8C70\u8C72\u8C71\u8C6F\u8C95\u8C94\u8CF9\u8D6F\u8E4E\u8E4D\u8E53\u8E50\u8E4C\u8E47\u8F43\u8F40\u9085\u907E\u9138\u919A\u91A2\u919B\u9199\u919F\u91A1\u919D\u91A0\u93A1\u9383\u93AF\u9364\u9356\u9347\u937C\u9358\u935C\u9376\u9349\u9350\u9351\u9360\u936D\u938F\u934C\u936A\u9379\u9357\u9355\u9352\u934F\u9371\u9377\u937B\u9361\u935E\u9363\u9367\u9380\u934E\u9359\u95C7\u95C0\u95C9\u95C3\u95C5\u95B7\u96AE\u96B0\u96AC\u9720\u971F\u9718\u971D\u9719\u979A\u97A1\u979C\u979E\u979D\u97D5\u97D4\u97F1\u9841\u9844\u984A\u9849\u9845\u9843\u9925\u992B\u992C\u992A\u9933\u9932\u992F\u992D\u9931\u9930\u9998\u99A3\u99A1\u9A02\u99FA\u99F4\u99F7\u99F9\u99F8\u99F6\u99FB\u99FD\u99FE\u99FC\u9A03\u9ABE\u9AFE\u9AFD\u9B01\u9AFC\u9B48\u9B9A\u9BA8\u9B9E\u9B9B\u9BA6\u9BA1\u9BA5\u9BA4\u9B86\u9BA2\u9BA0\u9BAF\u9D33\u9D41\u9D67\u9D36\u9D2E\u9D2F\u9D31\u9D38\u9D30\u9D45\u9D42\u9D43\u9D3E\u9D37\u9D40\u9D3D\u7FF5\u9D2D\u9E8A\u9E89\u9E8D\u9EB0\u9EC8\u9EDA\u9EFB\u9EFF\u9F24\u9F23\u9F22\u9F54\u9FA0\u5131\u512D\u512E\u5698\u569C\u5697\u569A\u569D\u5699\u5970\u5B3C\u5C69\u5C6A\u5DC0\u5E6D\u5E6E\u61D8\u61DF\u61ED\u61EE\u61F1\u61EA\u61F0\u61EB\u61D6\u61E9\u64FF\u6504\u64FD\u64F8\u6501\u6503\u64FC\u6594\u65DB\u66DA\u66DB\u66D8\u6AC5\u6AB9\u6ABD\u6AE1\u6AC6\u6ABA\u6AB6\u6AB7\u6AC7\u6AB4\u6AAD\u6B5E\u6BC9\u6C0B\u7007\u700C\u700D\u7001\u7005\u7014\u700E\u6FFF\u7000\u6FFB\u7026\u6FFC\u6FF7\u700A\u7201\u71FF\u71F9\u7203\u71FD\u7376\u74B8\u74C0\u74B5\u74C1\u74BE\u74B6\u74BB\u74C2\u7514\u7513\u765C\u7664\u7659\u7650\u7653\u7657\u765A\u76A6\u76BD\u76EC\u77C2\u77BA\u78FF\u790C\u7913\u7914\u7909\u7910\u7912\u7911\u79AD\u79AC\u7A5F\u7C1C\u7C29\u7C19\u7C20\u7C1F\u7C2D\u7C1D\u7C26\u7C28\u7C22\u7C25\u7C30\u7E5C\u7E50\u7E56\u7E63\u7E58\u7E62\u7E5F\u7E51\u7E60\u7E57\u7E53\u7FB5\u7FB3\u7FF7\u7FF8\u8075\u81D1\u81D2\u81D0\u825F\u825E\u85B4\u85C6\u85C0\u85C3\u85C2\u85B3\u85B5\u85BD\u85C7\u85C4\u85BF\u85CB\u85CE\u85C8\u85C5\u85B1\u85B6\u85D2\u8624\u85B8\u85B7\u85BE\u8669\u87E7\u87E6\u87E2\u87DB\u87EB\u87EA\u87E5\u87DF\u87F3\u87E4\u87D4\u87DC\u87D3\u87ED\u87D8\u87E3\u87A4\u87D7\u87D9\u8801\u87F4\u87E8\u87DD\u8953\u894B\u894F\u894C\u8946\u8950\u8951\u8949\u8B2A\u8B27\u8B23\u8B33\u8B30\u8B35\u8B47\u8B2F\u8B3C\u8B3E\u8B31\u8B25\u8B37\u8B26\u8B36\u8B2E\u8B24\u8B3B\u8B3D\u8B3A\u8C42\u8C75\u8C99\u8C98\u8C97\u8CFE\u8D04\u8D02\u8D00\u8E5C\u8E62\u8E60\u8E57\u8E56\u8E5E\u8E65\u8E67\u8E5B\u8E5A\u8E61\u8E5D\u8E69\u8E54\u8F46\u8F47\u8F48\u8F4B\u9128\u913A\u913B\u913E\u91A8\u91A5\u91A7\u91AF\u91AA\u93B5\u938C\u9392\u93B7\u939B\u939D\u9389\u93A7\u938E\u93AA\u939E\u93A6\u9395\u9388\u9399\u939F\u938D\u93B1\u9391\u93B2\u93A4\u93A8\u93B4\u93A3\u93A5\u95D2\u95D3\u95D1\u96B3\u96D7\u96DA\u5DC2\u96DF\u96D8\u96DD\u9723\u9722\u9725\u97AC\u97AE\u97A8\u97AB\u97A4\u97AA\u97A2\u97A5\u97D7\u97D9\u97D6\u97D8\u97FA\u9850\u9851\u9852\u98B8\u9941\u993C\u993A\u9A0F\u9A0B\u9A09\u9A0D\u9A04\u9A11\u9A0A\u9A05\u9A07\u9A06\u9AC0\u9ADC\u9B08\u9B04\u9B05\u9B29\u9B35\u9B4A\u9B4C\u9B4B\u9BC7\u9BC6\u9BC3\u9BBF\u9BC1\u9BB5\u9BB8\u9BD3\u9BB6\u9BC4\u9BB9\u9BBD\u9D5C\u9D53\u9D4F\u9D4A\u9D5B\u9D4B\u9D59\u9D56\u9D4C\u9D57\u9D52\u9D54\u9D5F\u9D58\u9D5A\u9E8E\u9E8C\u9EDF\u9F01\u9F00\u9F16\u9F25\u9F2B\u9F2A\u9F29\u9F28\u9F4C\u9F55\u5134\u5135\u5296\u52F7\u53B4\u56AB\u56AD\u56A6\u56A7\u56AA\u56AC\u58DA\u58DD\u58DB\u5912\u5B3D\u5B3E\u5B3F\u5DC3\u5E70\u5FBF\u61FB\u6507\u6510\u650D\u6509\u650C\u650E\u6584\u65DE\u65DD\u66DE\u6AE7\u6AE0\u6ACC\u6AD1\u6AD9\u6ACB\u6ADF\u6ADC\u6AD0\u6AEB\u6ACF\u6ACD\u6ADE\u6B60\u6BB0\u6C0C\u7019\u7027\u7020\u7016\u702B\u7021\u7022\u7023\u7029\u7017\u7024\u701C\u702A\u720C\u720A\u7207\u7202\u7205\u72A5\u72A6\u72A4\u72A3\u72A1\u74CB\u74C5\u74B7\u74C3\u7516\u7660\u77C9\u77CA\u77C4\u77F1\u791D\u791B\u7921\u791C\u7917\u791E\u79B0\u7A67\u7A68\u7C33\u7C3C\u7C39\u7C2C\u7C3B\u7CEC\u7CEA\u7E76\u7E75\u7E78\u7E70\u7E77\u7E6F\u7E7A\u7E72\u7E74\u7E68\u7F4B\u7F4A\u7F83\u7F86\u7FB7\u7FFD\u7FFE\u8078\u81D7\u81D5\u8264\u8261\u8263\u85EB\u85F1\u85ED\u85D9\u85E1\u85E8\u85DA\u85D7\u85EC\u85F2\u85F8\u85D8\u85DF\u85E3\u85DC\u85D1\u85F0\u85E6\u85EF\u85DE\u85E2\u8800\u87FA\u8803\u87F6\u87F7\u8809\u880C\u880B\u8806\u87FC\u8808\u87FF\u880A\u8802\u8962\u895A\u895B\u8957\u8961\u895C\u8958\u895D\u8959\u8988\u89B7\u89B6\u89F6\u8B50\u8B48\u8B4A\u8B40\u8B53\u8B56\u8B54\u8B4B\u8B55\u8B51\u8B42\u8B52\u8B57\u8C43\u8C77\u8C76\u8C9A\u8D06\u8D07\u8D09\u8DAC\u8DAA\u8DAD\u8DAB\u8E6D\u8E78\u8E73\u8E6A\u8E6F\u8E7B\u8EC2\u8F52\u8F51\u8F4F\u8F50\u8F53\u8FB4\u9140\u913F\u91B0\u91AD\u93DE\u93C7\u93CF\u93C2\u93DA\u93D0\u93F9\u93EC\u93CC\u93D9\u93A9\u93E6\u93CA\u93D4\u93EE\u93E3\u93D5\u93C4\u93CE\u93C0\u93D2\u93E7\u957D\u95DA\u95DB\u96E1\u9729\u972B\u972C\u9728\u9726\u97B3\u97B7\u97B6\u97DD\u97DE\u97DF\u985C\u9859\u985D\u9857\u98BF\u98BD\u98BB\u98BE\u9948\u9947\u9943\u99A6\u99A7\u9A1A\u9A15\u9A25\u9A1D\u9A24\u9A1B\u9A22\u9A20\u9A27\u9A23\u9A1E\u9A1C\u9A14\u9AC2\u9B0B\u9B0A\u9B0E\u9B0C\u9B37\u9BEA\u9BEB\u9BE0\u9BDE\u9BE4\u9BE6\u9BE2\u9BF0\u9BD4\u9BD7\u9BEC\u9BDC\u9BD9\u9BE5\u9BD5\u9BE1\u9BDA\u9D77\u9D81\u9D8A\u9D84\u9D88\u9D71\u9D80\u9D78\u9D86\u9D8B\u9D8C\u9D7D\u9D6B\u9D74\u9D75\u9D70\u9D69\u9D85\u9D73\u9D7B\u9D82\u9D6F\u9D79\u9D7F\u9D87\u9D68\u9E94\u9E91\u9EC0\u9EFC\u9F2D\u9F40\u9F41\u9F4D\u9F56\u9F57\u9F58\u5337\u56B2\u56B5\u56B3\u58E3\u5B45\u5DC6\u5DC7\u5EEE\u5EEF\u5FC0\u5FC1\u61F9\u6517\u6516\u6515\u6513\u65DF\u66E8\u66E3\u66E4\u6AF3\u6AF0\u6AEA\u6AE8\u6AF9\u6AF1\u6AEE\u6AEF\u703C\u7035\u702F\u7037\u7034\u7031\u7042\u7038\u703F\u703A\u7039\u7040\u703B\u7033\u7041\u7213\u7214\u72A8\u737D\u737C\u74BA\u76AB\u76AA\u76BE\u76ED\u77CC\u77CE\u77CF\u77CD\u77F2\u7925\u7923\u7927\u7928\u7924\u7929\u79B2\u7A6E\u7A6C\u7A6D\u7AF7\u7C49\u7C48\u7C4A\u7C47\u7C45\u7CEE\u7E7B\u7E7E\u7E81\u7E80\u7FBA\u7FFF\u8079\u81DB\u81D9\u820B\u8268\u8269\u8622\u85FF\u8601\u85FE\u861B\u8600\u85F6\u8604\u8609\u8605\u860C\u85FD\u8819\u8810\u8811\u8817\u8813\u8816\u8963\u8966\u89B9\u89F7\u8B60\u8B6A\u8B5D\u8B68\u8B63\u8B65\u8B67\u8B6D\u8DAE\u8E86\u8E88\u8E84\u8F59\u8F56\u8F57\u8F55\u8F58\u8F5A\u908D\u9143\u9141\u91B7\u91B5\u91B2\u91B3\u940B\u9413\u93FB\u9420\u940F\u9414\u93FE\u9415\u9410\u9428\u9419\u940D\u93F5\u9400\u93F7\u9407\u940E\u9416\u9412\u93FA\u9409\u93F8\u940A\u93FF\u93FC\u940C\u93F6\u9411\u9406\u95DE\u95E0\u95DF\u972E\u972F\u97B9\u97BB\u97FD\u97FE\u9860\u9862\u9863\u985F\u98C1\u98C2\u9950\u994E\u9959\u994C\u994B\u9953\u9A32\u9A34\u9A31\u9A2C\u9A2A\u9A36\u9A29\u9A2E\u9A38\u9A2D\u9AC7\u9ACA\u9AC6\u9B10\u9B12\u9B11\u9C0B\u9C08\u9BF7\u9C05\u9C12\u9BF8\u9C40\u9C07\u9C0E\u9C06\u9C17\u9C14\u9C09\u9D9F\u9D99\u9DA4\u9D9D\u9D92\u9D98\u9D90\u9D9B\u9DA0\u9D94\u9D9C\u9DAA\u9D97\u9DA1\u9D9A\u9DA2\u9DA8\u9D9E\u9DA3\u9DBF\u9DA9\u9D96\u9DA6\u9DA7\u9E99\u9E9B\u9E9A\u9EE5\u9EE4\u9EE7\u9EE6\u9F30\u9F2E\u9F5B\u9F60\u9F5E\u9F5D\u9F59\u9F91\u513A\u5139\u5298\u5297\u56C3\u56BD\u56BE\u5B48\u5B47\u5DCB\u5DCF\u5EF1\u61FD\u651B\u6B02\u6AFC\u6B03\u6AF8\u6B00\u7043\u7044\u704A\u7048\u7049\u7045\u7046\u721D\u721A\u7219\u737E\u7517\u766A\u77D0\u792D\u7931\u792F\u7C54\u7C53\u7CF2\u7E8A\u7E87\u7E88\u7E8B\u7E86\u7E8D\u7F4D\u7FBB\u8030\u81DD\u8618\u862A\u8626\u861F\u8623\u861C\u8619\u8627\u862E\u8621\u8620\u8629\u861E\u8625\u8829\u881D\u881B\u8820\u8824\u881C\u882B\u884A\u896D\u8969\u896E\u896B\u89FA\u8B79\u8B78\u8B45\u8B7A\u8B7B\u8D10\u8D14\u8DAF\u8E8E\u8E8C\u8F5E\u8F5B\u8F5D\u9146\u9144\u9145\u91B9\u943F\u943B\u9436\u9429\u943D\u943C\u9430\u9439\u942A\u9437\u942C\u9440\u9431\u95E5\u95E4\u95E3\u9735\u973A\u97BF\u97E1\u9864\u98C9\u98C6\u98C0\u9958\u9956\u9A39\u9A3D\u9A46\u9A44\u9A42\u9A41\u9A3A\u9A3F\u9ACD\u9B15\u9B17\u9B18\u9B16\u9B3A\u9B52\u9C2B\u9C1D\u9C1C\u9C2C\u9C23\u9C28\u9C29\u9C24\u9C21\u9DB7\u9DB6\u9DBC\u9DC1\u9DC7\u9DCA\u9DCF\u9DBE\u9DC5\u9DC3\u9DBB\u9DB5\u9DCE\u9DB9\u9DBA\u9DAC\u9DC8\u9DB1\u9DAD\u9DCC\u9DB3\u9DCD\u9DB2\u9E7A\u9E9C\u9EEB\u9EEE\u9EED\u9F1B\u9F18\u9F1A\u9F31\u9F4E\u9F65\u9F64\u9F92\u4EB9\u56C6\u56C5\u56CB\u5971\u5B4B\u5B4C\u5DD5\u5DD1\u5EF2\u6521\u6520\u6526\u6522\u6B0B\u6B08\u6B09\u6C0D\u7055\u7056\u7057\u7052\u721E\u721F\u72A9\u737F\u74D8\u74D5\u74D9\u74D7\u766D\u76AD\u7935\u79B4\u7A70\u7A71\u7C57\u7C5C\u7C59\u7C5B\u7C5A\u7CF4\u7CF1\u7E91\u7F4F\u7F87\u81DE\u826B\u8634\u8635\u8633\u862C\u8632\u8636\u882C\u8828\u8826\u882A\u8825\u8971\u89BF\u89BE\u89FB\u8B7E\u8B84\u8B82\u8B86\u8B85\u8B7F\u8D15\u8E95\u8E94\u8E9A\u8E92\u8E90\u8E96\u8E97\u8F60\u8F62\u9147\u944C\u9450\u944A\u944B\u944F\u9447\u9445\u9448\u9449\u9446\u973F\u97E3\u986A\u9869\u98CB\u9954\u995B\u9A4E\u9A53\u9A54\u9A4C\u9A4F\u9A48\u9A4A\u9A49\u9A52\u9A50\u9AD0\u9B19\u9B2B\u9B3B\u9B56\u9B55\u9C46\u9C48\u9C3F\u9C44\u9C39\u9C33\u9C41\u9C3C\u9C37\u9C34\u9C32\u9C3D\u9C36\u9DDB\u9DD2\u9DDE\u9DDA\u9DCB\u9DD0\u9DDC\u9DD1\u9DDF\u9DE9\u9DD9\u9DD8\u9DD6\u9DF5\u9DD5\u9DDD\u9EB6\u9EF0\u9F35\u9F33\u9F32\u9F42\u9F6B\u9F95\u9FA2\u513D\u5299\u58E8\u58E7\u5972\u5B4D\u5DD8\u882F\u5F4F\u6201\u6203\u6204\u6529\u6525\u6596\u66EB\u6B11\u6B12\u6B0F\u6BCA\u705B\u705A\u7222\u7382\u7381\u7383\u7670\u77D4\u7C67\u7C66\u7E95\u826C\u863A\u8640\u8639\u863C\u8631\u863B\u863E\u8830\u8832\u882E\u8833\u8976\u8974\u8973\u89FE\u8B8C\u8B8E\u8B8B\u8B88\u8C45\u8D19\u8E98\u8F64\u8F63\u91BC\u9462\u9455\u945D\u9457\u945E\u97C4\u97C5\u9800\u9A56\u9A59\u9B1E\u9B1F\u9B20\u9C52\u9C58\u9C50\u9C4A\u9C4D\u9C4B\u9C55\u9C59\u9C4C\u9C4E\u9DFB\u9DF7\u9DEF\u9DE3\u9DEB\u9DF8\u9DE4\u9DF6\u9DE1\u9DEE\u9DE6\u9DF2\u9DF0\u9DE2\u9DEC\u9DF4\u9DF3\u9DE8\u9DED\u9EC2\u9ED0\u9EF2\u9EF3\u9F06\u9F1C\u9F38\u9F37\u9F36\u9F43\u9F4F\u9F71\u9F70\u9F6E\u9F6F\u56D3\u56CD\u5B4E\u5C6D\u652D\u66ED\u66EE\u6B13\u705F\u7061\u705D\u7060\u7223\u74DB\u74E5\u77D5\u7938\u79B7\u79B6\u7C6A\u7E97\u7F89\u826D\u8643\u8838\u8837\u8835\u884B\u8B94\u8B95\u8E9E\u8E9F\u8EA0\u8E9D\u91BE\u91BD\u91C2\u946B\u9468\u9469\u96E5\u9746\u9743\u9747\u97C7\u97E5\u9A5E\u9AD5\u9B59\u9C63\u9C67\u9C66\u9C62\u9C5E\u9C60\u9E02\u9DFE\u9E07\u9E03\u9E06\u9E05\u9E00\u9E01\u9E09\u9DFF\u9DFD\u9E04\u9EA0\u9F1E\u9F46\u9F74\u9F75\u9F76\u56D4\u652E\u65B8\u6B18\u6B19\u6B17\u6B1A\u7062\u7226\u72AA\u77D8\u77D9\u7939\u7C69\u7C6B\u7CF6\u7E9A\u7E98\u7E9B\u7E99\u81E0\u81E1\u8646\u8647\u8648\u8979\u897A\u897C\u897B\u89FF\u8B98\u8B99\u8EA5\u8EA4\u8EA3\u946E\u946D\u946F\u9471\u9473\u9749\u9872\u995F\u9C68\u9C6E\u9C6D\u9E0B\u9E0D\u9E10\u9E0F\u9E12\u9E11\u9EA1\u9EF5\u9F09\u9F47\u9F78\u9F7B\u9F7A\u9F79\u571E\u7066\u7C6F\u883C\u8DB2\u8EA6\u91C3\u9474\u9478\u9476\u9475\u9A60\u9C74\u9C73\u9C71\u9C75\u9E14\u9E13\u9EF6\u9F0A\u9FA4\u7068\u7065\u7CF7\u866A\u883E\u883D\u883F\u8B9E\u8C9C\u8EA9\u8EC9\u974B\u9873\u9874\u98CC\u9961\u99AB\u9A64\u9A66\u9A67\u9B24\u9E15\u9E17\u9F48\u6207\u6B1E\u7227\u864C\u8EA8\u9482\u9480\u9481\u9A69\u9A68\u9B2E\u9E19\u7229\u864B\u8B9F\u9483\u9C79\u9EB7\u7675\u9A6B\u9C7A\u9E1D\u7069\u706A\u9EA4\u9F7E\u9F49\u9F98\u7881\u92B9\u88CF\u58BB\u6052\u7CA7\u5AFA\u2554\u2566\u2557\u2560\u256C\u2563\u255A\u2569\u255D\u2552\u2564\u2555\u255E\u256A\u2561\u2558\u2567\u255B\u2553\u2565\u2556\u255F\u256B\u2562\u2559\u2568\u255C\u2551\u2550\u256D\u256E\u2570\u256F\uFFED\u0547\u92DB\u05DF\u3FC5\u854C\u42B5\u73EF\u51B5\u3649\u4942\u89E4\u9344\u19DB\u82EE\u3CC8\u783C\u6744\u62DF\u4933\u89AA\u02A0\u6BB3\u1305\u4FAB\u24ED\u5008\u6D29\u7A84\u3600\u4AB1\u2513\u5029\u037E\u5FA4\u0380\u0347\u6EDB\u041F\u507D\u5101\u347A\u510E\u986C\u3743\u8416\u49A4\u0487\u5160\u33B4\u516A\u0BFF\u20FC\u02E5\u2530\u058E\u3233\u1983\u5B82\u877D\u05B3\u3C99\u51B2\u51B8\u9D34\u51C9\u51CF\u51D1\u3CDC\u51D3\u4AA6\u51B3\u51E2\u5342\u51ED\u83CD\u693E\u372D\u5F7B\u520B\u5226\u523C\u52B5\u5257\u5294\u52B9\u52C5\u7C15\u8542\u52E0\u860D\u6B13\u5305\u8ADE\u5549\u6ED9\u3F80\u0954\u3FEC\u5333\u5344\u0BE2\u6CCB\u1726\u681B\u73D5\u604A\u3EAA\u38CC\u16E8\u71DD\u44A2\u536D\u5374\u86AB\u537E\u537F\u1596\u1613\u77E6\u5393\u8A9B\u53A0\u53AB\u53AE\u73A7\u5772\u3F59\u739C\u53C1\u53C5\u6C49\u4E49\u57FE\u53D9\u3AAB\u0B8F\u53E0\u3FEB\u2DA3\u53F6\u0C77\u5413\u7079\u552B\u6657\u6D5B\u546D\u6B53\u0D74\u555D\u548F\u54A4\u47A6\u170D\u0EDD\u3DB4\u0D4D\u89BC\u2698\u5547\u4CED\u542F\u7417\u5586\u55A9\u5605\u18D7\u403A\u4552\u4435\u66B3\u10B4\u5637\u66CD\u328A\u66A4\u66AD\u564D\u564F\u78F1\u56F1\u9787\u53FE\u5700\u56EF\u56ED\u8B66\u3623\u124F\u5746\u41A5\u6C6E\u708B\u5742\u36B1\u6C7E\u57E6\u1416\u5803\u1454\u4363\u5826\u4BF5\u585C\u58AA\u3561\u58E0\u58DC\u123C\u58FB\u5BFF\u5743\uA150\u4278\u93D3\u35A1\u591F\u68A6\u36C3\u6E59\u163E\u5A24\u5553\u1692\u8505\u59C9\u0D4E\u6C81\u6D2A\u17DC\u59D9\u17FB\u17B2\u6DA6\u6D71\u1828\u16D5\u59F9\u6E45\u5AAB\u5A63\u36E6\u49A9\u5A77\u3708\u5A96\u7465\u5AD3\u6FA1\u2554\u3D85\u1911\u3732\u16B8\u5E83\u52D0\u5B76\u6588\u5B7C\u7A0E\u4004\u485D\u0204\u5BD5\u6160\u1A34\u59CC\u05A5\u5BF3\u5B9D\u4D10\u5C05\u1B44\u5C13\u73CE\u5C14\u1CA5\u6B28\u5C49\u48DD\u5C85\u5CE9\u5CEF\u5D8B\u1DF9\u1E37\u5D10\u5D18\u5D46\u1EA4\u5CBA\u5DD7\u82FC\u382D\u4901\u2049\u2173\u8287\u3836\u3BC2\u5E2E\u6A8A\u5E75\u5E7A\u44BC\u0CD3\u53A6\u4EB7\u5ED0\u53A8\u1771\u5E09\u5EF4\u8482\u5EF9\u5EFB\u38A0\u5EFC\u683E\u941B\u5F0D\u01C1\uF894\u3ADE\u48AE\u133A\u5F3A\u6888\u23D0\u5F58\u2471\u5F63\u97BD\u6E6E\u5F72\u9340\u8A36\u5FA7\u5DB6\u3D5F\u5250\u1F6A\u70F8\u2668\u91D6\u029E\u8A29\u6031\u6685\u1877\u3963\u3DC7\u3639\u5790\u27B4\u7971\u3E40\u609E\u60A4\u60B3\u4982\u498F\u7A53\u74A4\u50E1\u5AA0\u6164\u8424\u6142\uF8A6\u6ED2\u6181\u51F4\u0656\u6187\u5BAA\u3FB7\u285F\u61D3\u8B9D\u995D\u61D0\u3932\u2980\u28C1\u6023\u615C\u651E\u638B\u0118\u62C5\u1770\u62D5\u2E0D\u636C\u49DF\u3A17\u6438\u63F8\u138E\u17FC\u6490\u6F8A\u2E36\u9814\u408C\u571D\u64E1\u64E5\u947B\u3A66\u643A\u3A57\u654D\u6F16\u4A28\u4A23\u6585\u656D\u655F\u307E\u65B5\u4940\u4B37\u65D1\u40D8\u1829\u65E0\u65E3\u5FDF\u3400\u6618\u31F7\u31F8\u6644\u31A4\u31A5\u664B\u0E75\u6667\u51E6\u6673\u6674\u1E3D\u3231\u85F4\u31C8\u5313\u77C5\u28F7\u99A4\u6702\u439C\u4A21\u3B2B\u69FA\u37C2\u675E\u6767\u6762\u41CD\u90ED\u67D7\u44E9\u6822\u6E50\u923C\u6801\u33E6\u6DA0\u685D\u346F\u69E1\u6A0B\u8ADF\u6973\u68C3\u35CD\u6901\u6900\u3D32\u3A01\u363C\u3B80\u67AC\u6961\u8A4A\u42FC\u6936\u6998\u3BA1\u03C9\u8363\u5090\u69F9\u3659\u212A\u6A45\u3703\u6A9D\u3BF3\u67B1\u6AC8\u919C\u3C0D\u6B1D\u0923\u60DE\u6B35\u6B74\u27CD\u6EB5\u3ADB\u03B5\u1958\u3740\u5421\u3B5A\u6BE1\u3EFC\u6BDC\u6C37\u248B\u48F1\u6B51\u6C5A\u8226\u6C79\u3DBC\u44C5\u3DBD\u41A4\u490C\u4900\u3CC9\u36E5\u3CEB\u0D32\u9B83\u31F9\u2491\u7F8F\u6837\u6D25\u6DA1\u6DEB\u6D96\u6D5C\u6E7C\u6F04\u497F\u4085\u6E72\u8533\u6F74\u51C7\u6C9C\u6E1D\u842E\u8B21\u6E2F\u3E2F\u7453\u3F82\u79CC\u6E4F\u5A91\u304B\u6FF8\u370D\u6F9D\u3E30\u6EFA\u1497\u403D\u4555\u93F0\u6F44\u6F5C\u3D4E\u6F74\u9170\u3D3B\u6F9F\u4144\u6FD3\u4091\u4155\u4039\u3FF0\u3FB4\u413F\u51DF\u4156\u4157\u4140\u61DD\u704B\u707E\u70A7\u7081\u70CC\u70D5\u70D6\u70DF\u4104\u3DE8\u71B4\u7196\u4277\u712B\u7145\u5A88\u714A\u716E\u5C9C\u4365\u714F\u9362\u42C1\u712C\u445A\u4A27\u4A22\u71BA\u8BE8\u70BD\u720E\u9442\u7215\u5911\u9443\u7224\u9341\u5605\u722E\u7240\u4974\u68BD\u7255\u7257\u3E55\u3044\u680D\u6F3D\u7282\u732A\u732B\u4823\u882B\u48ED\u8804\u7328\u732E\u73CF\u73AA\u0C3A\u6A2E\u73C9\u7449\u41E2\u16E7\u4A24\u6623\u36C5\u49B7\u498D\u49FB\u73F7\u7415\u6903\u4A26\u7439\u05C3\u3ED7\u745C\u28AD\u7460\u8EB2\u7447\u73E4\u7476\u83B9\u746C\u3730\u7474\u93F1\u6A2C\u7482\u4953\u4A8C\u415F\u4A79\u8B8F\u5B46\u8C03\u189E\u74C8\u1988\u750E\u74E9\u751E\u8ED9\u1A4B\u5BD7\u8EAC\u9385\u754D\u754A\u7567\u756E\u4F82\u3F04\u4D13\u758E\u745D\u759E\u75B4\u7602\u762C\u7651\u764F\u766F\u7676\u63F5\u7690\u81EF\u37F8\u6911\u690E\u76A1\u76A5\u76B7\u76CC\u6F9F\u8462\u509D\u517D\u1E1C\u771E\u7726\u7740\u64AF\u5220\u7758\u32AC\u77AF\u8964\u8968\u16C1\u77F4\u7809\u1376\u4A12\u68CA\u78AF\u78C7\u78D3\u96A5\u792E\u55E0\u78D7\u7934\u78B1\u760C\u8FB8\u8884\u8B2B\u6083\u261C\u7986\u8900\u6902\u7980\u5857\u799D\u7B39\u793C\u79A9\u6E2A\u7126\u3EA8\u79C6\u910D\u79D4"; + + private static boolean readBit(int i) { + return (ASTRALNESS.charAt(i >> 4) & (1 << (i & 0xF))) != 0; + } + + static char lowBits(int pointer) { + if (pointer < 942) { + return '\u0000'; + } + if (pointer < 1068) { + return TABLE0.charAt(pointer - 942); + } + if (pointer < 1099) { + return '\u0000'; + } + if (pointer < 1172) { + return TABLE1.charAt(pointer - 1099); + } + if (pointer < 1256) { + return '\u0000'; + } + if (pointer < 5466) { + return TABLE2.charAt(pointer - 1256); + } + if (pointer < 5495) { + return '\u0000'; + } + if (pointer < 11214) { + return TABLE3.charAt(pointer - 5495); + } + if (pointer < 11254) { + return '\u0000'; + } + if (pointer < 19782) { + return TABLE4.charAt(pointer - 11254); + } + return '\u0000'; + } + + static boolean isAstral(int pointer) { + if (pointer < 947) { + return false; + } + if (pointer < 1119) { + return readBit(0 + (pointer - 947)); + } + if (pointer < 1256) { + return false; + } + if (pointer < 1269) { + return readBit(172 + (pointer - 1256)); + } + if (pointer < 1336) { + return false; + } + if (pointer < 1364) { + return readBit(185 + (pointer - 1336)); + } + if (pointer < 1413) { + return false; + } + if (pointer < 1912) { + return readBit(213 + (pointer - 1413)); + } + if (pointer < 2012) { + return false; + } + if (pointer < 3800) { + return readBit(712 + (pointer - 2012)); + } + if (pointer < 3883) { + return false; + } + if (pointer == 3883) { + return true; + } + if (pointer < 3985) { + return false; + } + if (pointer < 5024) { + return readBit(2501 + (pointer - 3985)); + } + if (pointer < 11205) { + return false; + } + if (pointer < 11214) { + return readBit(3540 + (pointer - 11205)); + } + if (pointer < 18997) { + return false; + } + if (pointer < 19782) { + return readBit(3549 + (pointer - 18997)); + } + return false; + } + + public static int findPointer(char lowBits, boolean isAstral) { + if (!isAstral) { + switch (lowBits) { + case 0x2550: + return 18991; + case 0x255E: + return 18975; + case 0x2561: + return 18977; + case 0x256A: + return 18976; + case 0x5341: + return 5512; + case 0x5345: + return 5599; + default: + break; + } + } + for (int i = 3768; i < TABLE2.length(); i++) { + if (TABLE2.charAt(i) == lowBits) { + int pointer = i + 1256; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + } + for (int i = 0; i < TABLE3.length(); i++) { + if (TABLE3.charAt(i) == lowBits) { + int pointer = i + 5495; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + } + for (int i = 0; i < TABLE4.length(); i++) { + if (TABLE4.charAt(i) == lowBits) { + int pointer = i + 11254; + if (isAstral == isAstral(pointer)) { + return pointer; + } + } + } + return 0; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java new file mode 100644 index 000000000..cc56b892f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Decoder.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CoderResult; + +public class Big5Decoder extends Decoder { + + private int big5Lead = 0; + + private char pendingTrail = '\u0000'; + + protected Big5Decoder(Charset cs) { + super(cs, 0.5f, 1.0f); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + assert !(this.report && (big5Lead != 0)): + "When reporting, this method should never return with big5Lead set."; + if (pendingTrail != '\u0000') { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = '\u0000'; + } + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = ((int) in.get() & 0xFF); + if (big5Lead == 0) { + if (b <= 0x7F) { + out.put((char) b); + continue; + } + if (b >= 0x81 && b <= 0xFE) { + if (this.report && !in.hasRemaining()) { + // The Java API is badly documented. Need to do this + // crazy thing and hope the caller knows about the + // undocumented aspects of the API! + in.position(in.position() - 1); + return CoderResult.UNDERFLOW; + } + big5Lead = b; + continue; + } + if (this.report) { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put('\uFFFD'); + continue; + } + int lead = big5Lead; + big5Lead = 0; + int offset = (b < 0x7F) ? 0x40 : 0x62; + if ((b >= 0x40 && b <= 0x7E) || (b >= 0xA1 && b <= 0xFE)) { + int pointer = (lead - 0x81) * 157 + (b - offset); + char outTrail; + switch (pointer) { + case 1133: + out.put('\u00CA'); + outTrail = '\u0304'; + break; + case 1135: + out.put('\u00CA'); + outTrail = '\u030C'; + break; + case 1164: + out.put('\u00EA'); + outTrail = '\u0304'; + break; + case 1166: + out.put('\u00EA'); + outTrail = '\u030C'; + break; + default: + char lowBits = Big5Data.lowBits(pointer); + if (lowBits == '\u0000') { + // The following |if| block fixes + // https://github.com/whatwg/encoding/issues/5 + if (b <= 0x7F) { + // prepend byte to stream + // Always legal, since we've always just read a byte + // if we come here. + in.position(in.position() - 1); + } + if (this.report) { + // This can go past the start of the buffer + // if the caller does not conform to the + // undocumented aspects of the API. + in.position(in.position() - 1); + return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2); + } + out.put('\uFFFD'); + continue; + } + if (Big5Data.isAstral(pointer)) { + int codePoint = lowBits | 0x20000; + out.put((char) (0xD7C0 + (codePoint >> 10))); + outTrail = (char) (0xDC00 + (codePoint & 0x3FF)); + break; + } + out.put(lowBits); + continue; + } + if (!out.hasRemaining()) { + pendingTrail = outTrail; + return CoderResult.OVERFLOW; + } + out.put(outTrail); + continue; + } + // pointer is null + if (b <= 0x7F) { + // prepend byte to stream + // Always legal, since we've always just read a byte + // if we come here. + in.position(in.position() - 1); + } + if (this.report) { + // if position() == 0, the caller is not using the + // undocumented part of the API right and the line + // below will throw! + in.position(in.position() - 1); + return CoderResult.malformedForLength(b <= 0x7F ? 1 : 2); + } + out.put('\uFFFD'); + continue; + } + } + + @Override protected CoderResult implFlush(CharBuffer out) { + if (pendingTrail != '\u0000') { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = '\u0000'; + } + if (big5Lead != 0) { + assert !this.report: "How come big5Lead got to be non-zero when decodeLoop() returned in the reporting mode?"; + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put('\uFFFD'); + big5Lead = 0; + } + return CoderResult.UNDERFLOW; + } + + @Override protected void implReset() { + big5Lead = 0; + pendingTrail = '\u0000'; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java new file mode 100644 index 000000000..de5132151 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Big5Encoder.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CoderResult; + +public class Big5Encoder extends Encoder { + + private char utf16Lead = '\u0000'; + + private byte pendingTrail = 0; + + protected Big5Encoder(Charset cs) { + super(cs, 1.5f, 2.0f); + } + + @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + assert !((this.reportMalformed || this.reportUnmappable) && (utf16Lead != '\u0000')): + "When reporting, this method should never return with utf16Lead set."; + if (pendingTrail != 0) { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = 0; + } + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + boolean isAstral; // true means Plane 2, false means BMP + char lowBits; // The low 16 bits of the code point + char codeUnit = in.get(); + int highBits = (codeUnit & 0xFC00); + if (highBits == 0xD800) { + // high surrogate + if (utf16Lead != '\u0000') { + // High surrogate follows another high surrogate. The + // *previous* code unit is in error. + if (this.reportMalformed) { + // The caller had better adhere to the API contract. + // Otherwise, this may throw. + in.position(in.position() - 2); + utf16Lead = '\u0000'; + return CoderResult.malformedForLength(1); + } + out.put((byte) '?'); + } + utf16Lead = codeUnit; + continue; + } + if (highBits == 0xDC00) { + // low surrogate + if (utf16Lead == '\u0000') { + // Got low surrogate without a previous high surrogate + if (this.reportMalformed) { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put((byte) '?'); + continue; + } + int codePoint = (utf16Lead << 10) + codeUnit - 56613888; + utf16Lead = '\u0000'; + // Plane 2 is the only astral plane that has potentially + // Big5-encodable characters. + if ((0xFF0000 & codePoint) != 0x20000) { + if (this.reportUnmappable) { + in.position(in.position() - 2); + return CoderResult.unmappableForLength(2); + } + out.put((byte) '?'); + continue; + } + isAstral = true; + lowBits = (char)(codePoint & 0xFFFF); + } else { + // not a surrogate + if (utf16Lead != '\u0000') { + // Non-surrogate follows a high surrogate. The *previous* + // code unit is in error. + utf16Lead = '\u0000'; + if (this.reportMalformed) { + // The caller had better adhere to the API contract. + // Otherwise, this may throw. + in.position(in.position() - 2); + return CoderResult.malformedForLength(1); + } + out.put((byte) '?'); + // Let's unconsume this code unit and reloop in order to + // re-check if the output buffer still has space. + in.position(in.position() - 1); + continue; + } + isAstral = false; + lowBits = codeUnit; + } + // isAstral now tells us if we have a Plane 2 or a BMP character. + // lowBits tells us the low 16 bits. + // After all the above setup to deal with UTF-16, we are now + // finally ready to follow the spec. + if (!isAstral && lowBits <= 0x7F) { + out.put((byte)lowBits); + continue; + } + int pointer = Big5Data.findPointer(lowBits, isAstral); + if (pointer == 0) { + if (this.reportUnmappable) { + if (isAstral) { + in.position(in.position() - 2); + return CoderResult.unmappableForLength(2); + } + in.position(in.position() - 1); + return CoderResult.unmappableForLength(1); + } + out.put((byte)'?'); + continue; + } + int lead = pointer / 157 + 0x81; + int trail = pointer % 157; + if (trail < 0x3F) { + trail += 0x40; + } else { + trail += 0x62; + } + out.put((byte)lead); + if (!out.hasRemaining()) { + pendingTrail = (byte)trail; + return CoderResult.OVERFLOW; + } + out.put((byte)trail); + continue; + } + } + + @Override protected CoderResult implFlush(ByteBuffer out) { + if (pendingTrail != 0) { + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put(pendingTrail); + pendingTrail = 0; + } + if (utf16Lead != '\u0000') { + assert !this.reportMalformed: "How come utf16Lead got to be non-zero when decodeLoop() returned in the reporting mode?"; + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + out.put((byte)'?'); + utf16Lead = '\u0000'; + } + return CoderResult.UNDERFLOW; + } + + @Override protected void implReset() { + utf16Lead = '\u0000'; + pendingTrail = 0; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java new file mode 100644 index 000000000..41e06c63a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Decoder.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; + +public abstract class Decoder extends CharsetDecoder { + + protected boolean report = true; + + protected Decoder(Charset cs, float averageCharsPerByte, float maxCharsPerByte) { + super(cs, averageCharsPerByte, maxCharsPerByte); + } + + @Override protected final void implOnMalformedInput(CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + this.report = false; + return; + } + if (newAction == CodingErrorAction.REPORT) { + this.report = true; + return; + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override protected final void implOnUnmappableCharacter( + CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + return; // We don't actually care, since there are no unmappables. + } + if (newAction == CodingErrorAction.REPORT) { + return; // We don't actually care, since there are no unmappables. + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override protected final void implReplaceWith(String newReplacement) { + if (!"\uFFFD".equals(newReplacement)) { + throw new IllegalArgumentException("Only U+FFFD is allowed as the replacement."); + } + } + + // TODO: Check if the JDK decoders reset the reporting state on reset() +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java new file mode 100644 index 000000000..6fc011ed2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoder.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +public abstract class Encoder extends CharsetEncoder { + + boolean reportMalformed = true; + + boolean reportUnmappable = true; + + protected Encoder(Charset cs, float averageBytesPerChar, + float maxBytesPerChar) { + super(cs, averageBytesPerChar, maxBytesPerChar); + } + + @Override protected final void implOnMalformedInput(CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + this.reportMalformed = false; + return; + } + if (newAction == CodingErrorAction.REPORT) { + this.reportUnmappable = true; + return; + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override protected final void implOnUnmappableCharacter( + CodingErrorAction newAction) { + if (newAction == null) { + throw new IllegalArgumentException("The argument must not be null."); + } + if (newAction == CodingErrorAction.IGNORE) { + throw new IllegalArgumentException("The Encoding Standard does not allow errors to be ignored."); + } + if (newAction == CodingErrorAction.REPLACE) { + this.reportUnmappable = false; + return; + } + if (newAction == CodingErrorAction.REPORT) { + this.reportMalformed = true; + return; + } + assert false: "Unreachable."; + throw new IllegalArgumentException("Unknown CodingErrorAction."); + } + + @Override public boolean isLegalReplacement(byte[] repl) { + if (repl == null) { + return false; + } + if (repl.length != 1) { + return false; + } + if (repl[0] != '?') { + return false; + } + return true; + } + + @Override protected final void implReplaceWith(byte[] newReplacement) { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java new file mode 100644 index 000000000..6e59ef7c7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Encoding.java @@ -0,0 +1,886 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.spi.CharsetProvider; +import java.util.Arrays; +import java.util.Collections; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Represents an <a href="https://encoding.spec.whatwg.org/#encoding">encoding</a> + * as defined in the <a href="https://encoding.spec.whatwg.org/">Encoding + * Standard</a>, provides access to each encoding defined in the Encoding + * Standard via a static constant and provides the + * "<a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an + * encoding</a>" algorithm defined in the Encoding Standard. + * + * <p>This class inherits from {@link Charset} to allow the Encoding + * Standard-compliant encodings to be used in contexts that support + * <code>Charset</code> instances. However, by design, the Encoding + * Standard-compliant encodings are not supplied via a {@link CharsetProvider} + * and, therefore, are not available via and do not interfere with the static + * methods provided by <code>Charset</code>. (This class provides methods of + * the same name to hide each static method of <code>Charset</code> to help + * avoid accidental calls to the static methods of the superclass when working + * with Encoding Standard-compliant encodings.) + * + * <p>When an application needs to use a particular encoding, such as utf-8 + * or windows-1252, the corresponding constant, i.e. + * {@link #UTF_8 Encoding.UTF_8} and {@link #WINDOWS_1252 Encoding.WINDOWS_1252} + * respectively, should be used. However, when the application receives an + * encoding label from external input, the method {@link #forName(String) + * forName()} should be used to obtain the object representing the encoding + * identified by the label. In contexts where labels that map to the + * <a href="https://encoding.spec.whatwg.org/#replacement">replacement + * encoding</a> should be treated as unknown, the method {@link + * #forNameNoReplacement(String) forNameNoReplacement()} should be used instead. + * + * + * @author hsivonen + */ +public abstract class Encoding extends Charset { + + private static final String[] LABELS = { + "866", + "ansi_x3.4-1968", + "arabic", + "ascii", + "asmo-708", + "big5", + "big5-hkscs", + "chinese", + "cn-big5", + "cp1250", + "cp1251", + "cp1252", + "cp1253", + "cp1254", + "cp1255", + "cp1256", + "cp1257", + "cp1258", + "cp819", + "cp866", + "csbig5", + "cseuckr", + "cseucpkdfmtjapanese", + "csgb2312", + "csibm866", + "csiso2022jp", + "csiso2022kr", + "csiso58gb231280", + "csiso88596e", + "csiso88596i", + "csiso88598e", + "csiso88598i", + "csisolatin1", + "csisolatin2", + "csisolatin3", + "csisolatin4", + "csisolatin5", + "csisolatin6", + "csisolatin9", + "csisolatinarabic", + "csisolatincyrillic", + "csisolatingreek", + "csisolatinhebrew", + "cskoi8r", + "csksc56011987", + "csmacintosh", + "csshiftjis", + "cyrillic", + "dos-874", + "ecma-114", + "ecma-118", + "elot_928", + "euc-jp", + "euc-kr", + "gb18030", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "greek", + "greek8", + "hebrew", + "hz-gb-2312", + "ibm819", + "ibm866", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-jp", + "iso-2022-kr", + "iso-8859-1", + "iso-8859-10", + "iso-8859-11", + "iso-8859-13", + "iso-8859-14", + "iso-8859-15", + "iso-8859-16", + "iso-8859-2", + "iso-8859-3", + "iso-8859-4", + "iso-8859-5", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-8859-7", + "iso-8859-8", + "iso-8859-8-e", + "iso-8859-8-i", + "iso-8859-9", + "iso-ir-100", + "iso-ir-101", + "iso-ir-109", + "iso-ir-110", + "iso-ir-126", + "iso-ir-127", + "iso-ir-138", + "iso-ir-144", + "iso-ir-148", + "iso-ir-149", + "iso-ir-157", + "iso-ir-58", + "iso8859-1", + "iso8859-10", + "iso8859-11", + "iso8859-13", + "iso8859-14", + "iso8859-15", + "iso8859-2", + "iso8859-3", + "iso8859-4", + "iso8859-5", + "iso8859-6", + "iso8859-7", + "iso8859-8", + "iso8859-9", + "iso88591", + "iso885910", + "iso885911", + "iso885913", + "iso885914", + "iso885915", + "iso88592", + "iso88593", + "iso88594", + "iso88595", + "iso88596", + "iso88597", + "iso88598", + "iso88599", + "iso_8859-1", + "iso_8859-15", + "iso_8859-1:1987", + "iso_8859-2", + "iso_8859-2:1987", + "iso_8859-3", + "iso_8859-3:1988", + "iso_8859-4", + "iso_8859-4:1988", + "iso_8859-5", + "iso_8859-5:1988", + "iso_8859-6", + "iso_8859-6:1987", + "iso_8859-7", + "iso_8859-7:1987", + "iso_8859-8", + "iso_8859-8:1988", + "iso_8859-9", + "iso_8859-9:1989", + "koi", + "koi8", + "koi8-r", + "koi8-ru", + "koi8-u", + "koi8_r", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "l1", + "l2", + "l3", + "l4", + "l5", + "l6", + "l9", + "latin1", + "latin2", + "latin3", + "latin4", + "latin5", + "latin6", + "logical", + "mac", + "macintosh", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "sun_eu_greek", + "tis-620", + "unicode-1-1-utf-8", + "us-ascii", + "utf-16", + "utf-16be", + "utf-16le", + "utf-8", + "utf8", + "visual", + "windows-1250", + "windows-1251", + "windows-1252", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "windows-31j", + "windows-874", + "windows-949", + "x-cp1250", + "x-cp1251", + "x-cp1252", + "x-cp1253", + "x-cp1254", + "x-cp1255", + "x-cp1256", + "x-cp1257", + "x-cp1258", + "x-euc-jp", + "x-gbk", + "x-mac-cyrillic", + "x-mac-roman", + "x-mac-ukrainian", + "x-sjis", + "x-user-defined", + "x-x-big5", + }; + + private static final Encoding[] ENCODINGS_FOR_LABELS = { + Ibm866.INSTANCE, + Windows1252.INSTANCE, + Iso6.INSTANCE, + Windows1252.INSTANCE, + Iso6.INSTANCE, + Big5.INSTANCE, + Big5.INSTANCE, + Gbk.INSTANCE, + Big5.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + Windows1252.INSTANCE, + Ibm866.INSTANCE, + Big5.INSTANCE, + EucKr.INSTANCE, + EucJp.INSTANCE, + Gbk.INSTANCE, + Ibm866.INSTANCE, + Iso2022Jp.INSTANCE, + Replacement.INSTANCE, + Gbk.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso8.INSTANCE, + Iso8I.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Windows1254.INSTANCE, + Iso10.INSTANCE, + Iso15.INSTANCE, + Iso6.INSTANCE, + Iso5.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Koi8R.INSTANCE, + EucKr.INSTANCE, + Macintosh.INSTANCE, + ShiftJis.INSTANCE, + Iso5.INSTANCE, + Windows874.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso7.INSTANCE, + EucJp.INSTANCE, + EucKr.INSTANCE, + Gb18030.INSTANCE, + Gbk.INSTANCE, + Gbk.INSTANCE, + Gbk.INSTANCE, + Gbk.INSTANCE, + Iso7.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Replacement.INSTANCE, + Windows1252.INSTANCE, + Ibm866.INSTANCE, + Replacement.INSTANCE, + Replacement.INSTANCE, + Iso2022Jp.INSTANCE, + Replacement.INSTANCE, + Windows1252.INSTANCE, + Iso10.INSTANCE, + Windows874.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso16.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Iso8.INSTANCE, + Iso8I.INSTANCE, + Windows1254.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso7.INSTANCE, + Iso6.INSTANCE, + Iso8.INSTANCE, + Iso5.INSTANCE, + Windows1254.INSTANCE, + EucKr.INSTANCE, + Iso10.INSTANCE, + Gbk.INSTANCE, + Windows1252.INSTANCE, + Iso10.INSTANCE, + Windows874.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Windows1254.INSTANCE, + Windows1252.INSTANCE, + Iso10.INSTANCE, + Windows874.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Windows1254.INSTANCE, + Windows1252.INSTANCE, + Iso15.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Iso8.INSTANCE, + Windows1254.INSTANCE, + Windows1254.INSTANCE, + Koi8R.INSTANCE, + Koi8R.INSTANCE, + Koi8R.INSTANCE, + Koi8U.INSTANCE, + Koi8U.INSTANCE, + Koi8R.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + EucKr.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Windows1254.INSTANCE, + Iso10.INSTANCE, + Iso15.INSTANCE, + Windows1252.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Windows1254.INSTANCE, + Iso10.INSTANCE, + Iso8I.INSTANCE, + Macintosh.INSTANCE, + Macintosh.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + ShiftJis.INSTANCE, + Iso7.INSTANCE, + Windows874.INSTANCE, + Utf8.INSTANCE, + Windows1252.INSTANCE, + Utf16Le.INSTANCE, + Utf16Be.INSTANCE, + Utf16Le.INSTANCE, + Utf8.INSTANCE, + Utf8.INSTANCE, + Iso8.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + ShiftJis.INSTANCE, + Windows874.INSTANCE, + EucKr.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + EucJp.INSTANCE, + Gbk.INSTANCE, + MacCyrillic.INSTANCE, + Macintosh.INSTANCE, + MacCyrillic.INSTANCE, + ShiftJis.INSTANCE, + UserDefined.INSTANCE, + Big5.INSTANCE, + }; + + private static final Encoding[] ENCODINGS = { + Big5.INSTANCE, + EucJp.INSTANCE, + EucKr.INSTANCE, + Gb18030.INSTANCE, + Gbk.INSTANCE, + Ibm866.INSTANCE, + Iso2022Jp.INSTANCE, + Iso10.INSTANCE, + Iso13.INSTANCE, + Iso14.INSTANCE, + Iso15.INSTANCE, + Iso16.INSTANCE, + Iso2.INSTANCE, + Iso3.INSTANCE, + Iso4.INSTANCE, + Iso5.INSTANCE, + Iso6.INSTANCE, + Iso7.INSTANCE, + Iso8.INSTANCE, + Iso8I.INSTANCE, + Koi8R.INSTANCE, + Koi8U.INSTANCE, + Macintosh.INSTANCE, + Replacement.INSTANCE, + ShiftJis.INSTANCE, + Utf16Be.INSTANCE, + Utf16Le.INSTANCE, + Utf8.INSTANCE, + Windows1250.INSTANCE, + Windows1251.INSTANCE, + Windows1252.INSTANCE, + Windows1253.INSTANCE, + Windows1254.INSTANCE, + Windows1255.INSTANCE, + Windows1256.INSTANCE, + Windows1257.INSTANCE, + Windows1258.INSTANCE, + Windows874.INSTANCE, + MacCyrillic.INSTANCE, + UserDefined.INSTANCE, + }; + + /** + * The big5 encoding. + */ + public static final Encoding BIG5 = Big5.INSTANCE; + + /** + * The euc-jp encoding. + */ + public static final Encoding EUC_JP = EucJp.INSTANCE; + + /** + * The euc-kr encoding. + */ + public static final Encoding EUC_KR = EucKr.INSTANCE; + + /** + * The gb18030 encoding. + */ + public static final Encoding GB18030 = Gb18030.INSTANCE; + + /** + * The gbk encoding. + */ + public static final Encoding GBK = Gbk.INSTANCE; + + /** + * The ibm866 encoding. + */ + public static final Encoding IBM866 = Ibm866.INSTANCE; + + /** + * The iso-2022-jp encoding. + */ + public static final Encoding ISO_2022_JP = Iso2022Jp.INSTANCE; + + /** + * The iso-8859-10 encoding. + */ + public static final Encoding ISO_8859_10 = Iso10.INSTANCE; + + /** + * The iso-8859-13 encoding. + */ + public static final Encoding ISO_8859_13 = Iso13.INSTANCE; + + /** + * The iso-8859-14 encoding. + */ + public static final Encoding ISO_8859_14 = Iso14.INSTANCE; + + /** + * The iso-8859-15 encoding. + */ + public static final Encoding ISO_8859_15 = Iso15.INSTANCE; + + /** + * The iso-8859-16 encoding. + */ + public static final Encoding ISO_8859_16 = Iso16.INSTANCE; + + /** + * The iso-8859-2 encoding. + */ + public static final Encoding ISO_8859_2 = Iso2.INSTANCE; + + /** + * The iso-8859-3 encoding. + */ + public static final Encoding ISO_8859_3 = Iso3.INSTANCE; + + /** + * The iso-8859-4 encoding. + */ + public static final Encoding ISO_8859_4 = Iso4.INSTANCE; + + /** + * The iso-8859-5 encoding. + */ + public static final Encoding ISO_8859_5 = Iso5.INSTANCE; + + /** + * The iso-8859-6 encoding. + */ + public static final Encoding ISO_8859_6 = Iso6.INSTANCE; + + /** + * The iso-8859-7 encoding. + */ + public static final Encoding ISO_8859_7 = Iso7.INSTANCE; + + /** + * The iso-8859-8 encoding. + */ + public static final Encoding ISO_8859_8 = Iso8.INSTANCE; + + /** + * The iso-8859-8-i encoding. + */ + public static final Encoding ISO_8859_8_I = Iso8I.INSTANCE; + + /** + * The koi8-r encoding. + */ + public static final Encoding KOI8_R = Koi8R.INSTANCE; + + /** + * The koi8-u encoding. + */ + public static final Encoding KOI8_U = Koi8U.INSTANCE; + + /** + * The macintosh encoding. + */ + public static final Encoding MACINTOSH = Macintosh.INSTANCE; + + /** + * The replacement encoding. + */ + public static final Encoding REPLACEMENT = Replacement.INSTANCE; + + /** + * The shift_jis encoding. + */ + public static final Encoding SHIFT_JIS = ShiftJis.INSTANCE; + + /** + * The utf-16be encoding. + */ + public static final Encoding UTF_16BE = Utf16Be.INSTANCE; + + /** + * The utf-16le encoding. + */ + public static final Encoding UTF_16LE = Utf16Le.INSTANCE; + + /** + * The utf-8 encoding. + */ + public static final Encoding UTF_8 = Utf8.INSTANCE; + + /** + * The windows-1250 encoding. + */ + public static final Encoding WINDOWS_1250 = Windows1250.INSTANCE; + + /** + * The windows-1251 encoding. + */ + public static final Encoding WINDOWS_1251 = Windows1251.INSTANCE; + + /** + * The windows-1252 encoding. + */ + public static final Encoding WINDOWS_1252 = Windows1252.INSTANCE; + + /** + * The windows-1253 encoding. + */ + public static final Encoding WINDOWS_1253 = Windows1253.INSTANCE; + + /** + * The windows-1254 encoding. + */ + public static final Encoding WINDOWS_1254 = Windows1254.INSTANCE; + + /** + * The windows-1255 encoding. + */ + public static final Encoding WINDOWS_1255 = Windows1255.INSTANCE; + + /** + * The windows-1256 encoding. + */ + public static final Encoding WINDOWS_1256 = Windows1256.INSTANCE; + + /** + * The windows-1257 encoding. + */ + public static final Encoding WINDOWS_1257 = Windows1257.INSTANCE; + + /** + * The windows-1258 encoding. + */ + public static final Encoding WINDOWS_1258 = Windows1258.INSTANCE; + + /** + * The windows-874 encoding. + */ + public static final Encoding WINDOWS_874 = Windows874.INSTANCE; + + /** + * The x-mac-cyrillic encoding. + */ + public static final Encoding X_MAC_CYRILLIC = MacCyrillic.INSTANCE; + + /** + * The x-user-defined encoding. + */ + public static final Encoding X_USER_DEFINED = UserDefined.INSTANCE; + + +private static SortedMap<String, Charset> encodings = null; + + protected Encoding(String canonicalName, String[] aliases) { + super(canonicalName, aliases); + } + + private enum State { + HEAD, LABEL, TAIL + }; + + public static Encoding forName(String label) { + if (label == null) { + throw new IllegalArgumentException("Label must not be null."); + } + if (label.length() == 0) { + throw new IllegalCharsetNameException(label); + } + // First try the fast path + int index = Arrays.binarySearch(LABELS, label); + if (index >= 0) { + return ENCODINGS_FOR_LABELS[index]; + } + // Else, slow path + StringBuilder sb = new StringBuilder(); + State state = State.HEAD; + for (int i = 0; i < label.length(); i++) { + char c = label.charAt(i); + if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t') + || (c == '\u000C')) { + if (state == State.LABEL) { + state = State.TAIL; + } + continue; + } + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + switch (state) { + case HEAD: + state = State.LABEL; + // Fall through + case LABEL: + sb.append(c); + continue; + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + if (c >= 'A' && c <= 'Z') { + c += 0x20; + switch (state) { + case HEAD: + state = State.LABEL; + // Fall through + case LABEL: + sb.append(c); + continue; + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + if ((c == '-') || (c == '+') || (c == '.') || (c == ':') + || (c == '_')) { + switch (state) { + case LABEL: + sb.append(c); + continue; + case HEAD: + case TAIL: + throw new IllegalCharsetNameException(label); + } + } + throw new IllegalCharsetNameException(label); + } + index = Arrays.binarySearch(LABELS, sb.toString()); + if (index >= 0) { + return ENCODINGS_FOR_LABELS[index]; + } + throw new UnsupportedCharsetException(label); + } + + public static Encoding forNameNoReplacement(String label) { + Encoding encoding = Encoding.forName(label); + if (encoding == Encoding.REPLACEMENT) { + throw new UnsupportedCharsetException(label); + } + return encoding; + } + + public static boolean isSupported(String label) { + try { + Encoding.forName(label); + } catch (UnsupportedCharsetException e) { + return false; + } + return true; + } + + public static boolean isSupportedNoReplacement(String label) { + try { + Encoding.forNameNoReplacement(label); + } catch (UnsupportedCharsetException e) { + return false; + } + return true; + } + + public static SortedMap<String, Charset> availableCharsets() { + if (encodings == null) { + TreeMap<String, Charset> map = new TreeMap<String, Charset>(); + for (Encoding encoding : ENCODINGS) { + map.put(encoding.name(), encoding); + } + encodings = Collections.unmodifiableSortedMap(map); + } + return encodings; + } + + public static Encoding defaultCharset() { + return WINDOWS_1252; + } + + @Override public boolean canEncode() { + return false; + } + + @Override public boolean contains(Charset cs) { + return false; + } + + @Override public CharsetEncoder newEncoder() { + throw new UnsupportedOperationException("Encoder not implemented."); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java new file mode 100644 index 000000000..05fbef810 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucJp.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class EucJp extends Encoding { + + private static final String[] LABELS = { + "cseucpkdfmtjapanese", + "euc-jp", + "x-euc-jp" + }; + + private static final String NAME = "euc-jp"; + + static final EucJp INSTANCE = new EucJp(); + + private EucJp() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java new file mode 100644 index 000000000..a3923e224 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/EucKr.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class EucKr extends Encoding { + + private static final String[] LABELS = { + "cseuckr", + "csksc56011987", + "euc-kr", + "iso-ir-149", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "windows-949" + }; + + private static final String NAME = "euc-kr"; + + static final EucKr INSTANCE = new EucKr(); + + private EucKr() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java new file mode 100644 index 000000000..34a1f36b5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/FallibleSingleByteDecoder.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CoderResult; + +public final class FallibleSingleByteDecoder extends InfallibleSingleByteDecoder { + + public FallibleSingleByteDecoder(Encoding cs, char[] upperHalf) { + super(cs, upperHalf); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + if (!this.report) { + return super.decodeLoop(in, out); + } else { + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = (int) in.get(); + if (b >= 0) { + out.put((char) b); + } else { + char mapped = this.upperHalf[b + 128]; + if (mapped == '\uFFFD') { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put(mapped); + } + } + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java new file mode 100644 index 000000000..fcb090dde --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gb18030.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Gb18030 extends Encoding { + + private static final String[] LABELS = { + "gb18030" + }; + + private static final String NAME = "gb18030"; + + static final Gb18030 INSTANCE = new Gb18030(); + + private Gb18030() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java new file mode 100644 index 000000000..2dc3694ed --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Gbk.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Gbk extends Encoding { + + private static final String[] LABELS = { + "chinese", + "csgb2312", + "csiso58gb231280", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "iso-ir-58", + "x-gbk" + }; + + private static final String NAME = "gbk"; + + static final Gbk INSTANCE = new Gbk(); + + private Gbk() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName("gb18030").newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java new file mode 100644 index 000000000..037e62835 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Ibm866.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Ibm866 extends Encoding { + + private static final char[] TABLE = { + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u2591', + '\u2592', + '\u2593', + '\u2502', + '\u2524', + '\u2561', + '\u2562', + '\u2556', + '\u2555', + '\u2563', + '\u2551', + '\u2557', + '\u255d', + '\u255c', + '\u255b', + '\u2510', + '\u2514', + '\u2534', + '\u252c', + '\u251c', + '\u2500', + '\u253c', + '\u255e', + '\u255f', + '\u255a', + '\u2554', + '\u2569', + '\u2566', + '\u2560', + '\u2550', + '\u256c', + '\u2567', + '\u2568', + '\u2564', + '\u2565', + '\u2559', + '\u2558', + '\u2552', + '\u2553', + '\u256b', + '\u256a', + '\u2518', + '\u250c', + '\u2588', + '\u2584', + '\u258c', + '\u2590', + '\u2580', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u044f', + '\u0401', + '\u0451', + '\u0404', + '\u0454', + '\u0407', + '\u0457', + '\u040e', + '\u045e', + '\u00b0', + '\u2219', + '\u00b7', + '\u221a', + '\u2116', + '\u00a4', + '\u25a0', + '\u00a0' + }; + + private static final String[] LABELS = { + "866", + "cp866", + "csibm866", + "ibm866" + }; + + private static final String NAME = "ibm866"; + + static final Encoding INSTANCE = new Ibm866(); + + private Ibm866() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java new file mode 100644 index 000000000..7cc63072c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/InfallibleSingleByteDecoder.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CoderResult; + +public class InfallibleSingleByteDecoder extends Decoder { + + protected final char[] upperHalf; + + protected InfallibleSingleByteDecoder(Encoding cs, char[] upperHalf) { + super(cs, 1.0f, 1.0f); + this.upperHalf = upperHalf; + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + // TODO figure out if it's worthwhile to optimize the case where both + // buffers are array-backed. + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = (int) in.get(); + if (b >= 0) { + out.put((char) b); + } else { + out.put(this.upperHalf[b + 128]); + } + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java new file mode 100644 index 000000000..895cb5eed --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso10.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso10 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u0112', + '\u0122', + '\u012a', + '\u0128', + '\u0136', + '\u00a7', + '\u013b', + '\u0110', + '\u0160', + '\u0166', + '\u017d', + '\u00ad', + '\u016a', + '\u014a', + '\u00b0', + '\u0105', + '\u0113', + '\u0123', + '\u012b', + '\u0129', + '\u0137', + '\u00b7', + '\u013c', + '\u0111', + '\u0161', + '\u0167', + '\u017e', + '\u2015', + '\u016b', + '\u014b', + '\u0100', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u012e', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u0116', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00d0', + '\u0145', + '\u014c', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u0168', + '\u00d8', + '\u0172', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u00de', + '\u00df', + '\u0101', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u012f', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u0117', + '\u00ed', + '\u00ee', + '\u00ef', + '\u00f0', + '\u0146', + '\u014d', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u0169', + '\u00f8', + '\u0173', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u00fe', + '\u0138' + }; + + private static final String[] LABELS = { + "csisolatin6", + "iso-8859-10", + "iso-ir-157", + "iso8859-10", + "iso885910", + "l6", + "latin6" + }; + + private static final String NAME = "iso-8859-10"; + + static final Encoding INSTANCE = new Iso10(); + + private Iso10() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java new file mode 100644 index 000000000..60e6f5339 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso13.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso13 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u201d', + '\u00a2', + '\u00a3', + '\u00a4', + '\u201e', + '\u00a6', + '\u00a7', + '\u00d8', + '\u00a9', + '\u0156', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00c6', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u201c', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00f8', + '\u00b9', + '\u0157', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00e6', + '\u0104', + '\u012e', + '\u0100', + '\u0106', + '\u00c4', + '\u00c5', + '\u0118', + '\u0112', + '\u010c', + '\u00c9', + '\u0179', + '\u0116', + '\u0122', + '\u0136', + '\u012a', + '\u013b', + '\u0160', + '\u0143', + '\u0145', + '\u00d3', + '\u014c', + '\u00d5', + '\u00d6', + '\u00d7', + '\u0172', + '\u0141', + '\u015a', + '\u016a', + '\u00dc', + '\u017b', + '\u017d', + '\u00df', + '\u0105', + '\u012f', + '\u0101', + '\u0107', + '\u00e4', + '\u00e5', + '\u0119', + '\u0113', + '\u010d', + '\u00e9', + '\u017a', + '\u0117', + '\u0123', + '\u0137', + '\u012b', + '\u013c', + '\u0161', + '\u0144', + '\u0146', + '\u00f3', + '\u014d', + '\u00f5', + '\u00f6', + '\u00f7', + '\u0173', + '\u0142', + '\u015b', + '\u016b', + '\u00fc', + '\u017c', + '\u017e', + '\u2019' + }; + + private static final String[] LABELS = { + "iso-8859-13", + "iso8859-13", + "iso885913" + }; + + private static final String NAME = "iso-8859-13"; + + static final Encoding INSTANCE = new Iso13(); + + private Iso13() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java new file mode 100644 index 000000000..d4a180e6e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso14.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso14 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u1e02', + '\u1e03', + '\u00a3', + '\u010a', + '\u010b', + '\u1e0a', + '\u00a7', + '\u1e80', + '\u00a9', + '\u1e82', + '\u1e0b', + '\u1ef2', + '\u00ad', + '\u00ae', + '\u0178', + '\u1e1e', + '\u1e1f', + '\u0120', + '\u0121', + '\u1e40', + '\u1e41', + '\u00b6', + '\u1e56', + '\u1e81', + '\u1e57', + '\u1e83', + '\u1e60', + '\u1ef3', + '\u1e84', + '\u1e85', + '\u1e61', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u0174', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u1e6a', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u0176', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u0175', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u1e6b', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u0177', + '\u00ff' + }; + + private static final String[] LABELS = { + "iso-8859-14", + "iso8859-14", + "iso885914" + }; + + private static final String NAME = "iso-8859-14"; + + static final Encoding INSTANCE = new Iso14(); + + private Iso14() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java new file mode 100644 index 000000000..a60e4b6ef --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso15.java @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso15 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u20ac', + '\u00a5', + '\u0160', + '\u00a7', + '\u0161', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u017d', + '\u00b5', + '\u00b6', + '\u00b7', + '\u017e', + '\u00b9', + '\u00ba', + '\u00bb', + '\u0152', + '\u0153', + '\u0178', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00d0', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u00de', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u00f0', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u00fe', + '\u00ff' + }; + + private static final String[] LABELS = { + "csisolatin9", + "iso-8859-15", + "iso8859-15", + "iso885915", + "iso_8859-15", + "l9" + }; + + private static final String NAME = "iso-8859-15"; + + static final Encoding INSTANCE = new Iso15(); + + private Iso15() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java new file mode 100644 index 000000000..5eb1926db --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso16.java @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso16 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u0105', + '\u0141', + '\u20ac', + '\u201e', + '\u0160', + '\u00a7', + '\u0161', + '\u00a9', + '\u0218', + '\u00ab', + '\u0179', + '\u00ad', + '\u017a', + '\u017b', + '\u00b0', + '\u00b1', + '\u010c', + '\u0142', + '\u017d', + '\u201d', + '\u00b6', + '\u00b7', + '\u017e', + '\u010d', + '\u0219', + '\u00bb', + '\u0152', + '\u0153', + '\u0178', + '\u017c', + '\u00c0', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u0106', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u0110', + '\u0143', + '\u00d2', + '\u00d3', + '\u00d4', + '\u0150', + '\u00d6', + '\u015a', + '\u0170', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u0118', + '\u021a', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u0107', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u0111', + '\u0144', + '\u00f2', + '\u00f3', + '\u00f4', + '\u0151', + '\u00f6', + '\u015b', + '\u0171', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u0119', + '\u021b', + '\u00ff' + }; + + private static final String[] LABELS = { + "iso-8859-16" + }; + + private static final String NAME = "iso-8859-16"; + + static final Encoding INSTANCE = new Iso16(); + + private Iso16() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java new file mode 100644 index 000000000..7a5f6322a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso2 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u02d8', + '\u0141', + '\u00a4', + '\u013d', + '\u015a', + '\u00a7', + '\u00a8', + '\u0160', + '\u015e', + '\u0164', + '\u0179', + '\u00ad', + '\u017d', + '\u017b', + '\u00b0', + '\u0105', + '\u02db', + '\u0142', + '\u00b4', + '\u013e', + '\u015b', + '\u02c7', + '\u00b8', + '\u0161', + '\u015f', + '\u0165', + '\u017a', + '\u02dd', + '\u017e', + '\u017c', + '\u0154', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u0139', + '\u0106', + '\u00c7', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u011a', + '\u00cd', + '\u00ce', + '\u010e', + '\u0110', + '\u0143', + '\u0147', + '\u00d3', + '\u00d4', + '\u0150', + '\u00d6', + '\u00d7', + '\u0158', + '\u016e', + '\u00da', + '\u0170', + '\u00dc', + '\u00dd', + '\u0162', + '\u00df', + '\u0155', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u013a', + '\u0107', + '\u00e7', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u011b', + '\u00ed', + '\u00ee', + '\u010f', + '\u0111', + '\u0144', + '\u0148', + '\u00f3', + '\u00f4', + '\u0151', + '\u00f6', + '\u00f7', + '\u0159', + '\u016f', + '\u00fa', + '\u0171', + '\u00fc', + '\u00fd', + '\u0163', + '\u02d9' + }; + + private static final String[] LABELS = { + "csisolatin2", + "iso-8859-2", + "iso-ir-101", + "iso8859-2", + "iso88592", + "iso_8859-2", + "iso_8859-2:1987", + "l2", + "latin2" + }; + + private static final String NAME = "iso-8859-2"; + + static final Encoding INSTANCE = new Iso2(); + + private Iso2() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java new file mode 100644 index 000000000..6ebadc947 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso2022Jp.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Iso2022Jp extends Encoding { + + private static final String[] LABELS = { + "csiso2022jp", + "iso-2022-jp" + }; + + private static final String NAME = "iso-2022-jp"; + + static final Iso2022Jp INSTANCE = new Iso2022Jp(); + + private Iso2022Jp() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java new file mode 100644 index 000000000..0667a160c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso3.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso3 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0126', + '\u02d8', + '\u00a3', + '\u00a4', + '\ufffd', + '\u0124', + '\u00a7', + '\u00a8', + '\u0130', + '\u015e', + '\u011e', + '\u0134', + '\u00ad', + '\ufffd', + '\u017b', + '\u00b0', + '\u0127', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u0125', + '\u00b7', + '\u00b8', + '\u0131', + '\u015f', + '\u011f', + '\u0135', + '\u00bd', + '\ufffd', + '\u017c', + '\u00c0', + '\u00c1', + '\u00c2', + '\ufffd', + '\u00c4', + '\u010a', + '\u0108', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\ufffd', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u0120', + '\u00d6', + '\u00d7', + '\u011c', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u016c', + '\u015c', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\ufffd', + '\u00e4', + '\u010b', + '\u0109', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\ufffd', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u0121', + '\u00f6', + '\u00f7', + '\u011d', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u016d', + '\u015d', + '\u02d9' + }; + + private static final String[] LABELS = { + "csisolatin3", + "iso-8859-3", + "iso-ir-109", + "iso8859-3", + "iso88593", + "iso_8859-3", + "iso_8859-3:1988", + "l3", + "latin3" + }; + + private static final String NAME = "iso-8859-3"; + + static final Encoding INSTANCE = new Iso3(); + + private Iso3() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java new file mode 100644 index 000000000..b954869ab --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso4.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso4 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0104', + '\u0138', + '\u0156', + '\u00a4', + '\u0128', + '\u013b', + '\u00a7', + '\u00a8', + '\u0160', + '\u0112', + '\u0122', + '\u0166', + '\u00ad', + '\u017d', + '\u00af', + '\u00b0', + '\u0105', + '\u02db', + '\u0157', + '\u00b4', + '\u0129', + '\u013c', + '\u02c7', + '\u00b8', + '\u0161', + '\u0113', + '\u0123', + '\u0167', + '\u014a', + '\u017e', + '\u014b', + '\u0100', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u012e', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u0116', + '\u00cd', + '\u00ce', + '\u012a', + '\u0110', + '\u0145', + '\u014c', + '\u0136', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u0172', + '\u00da', + '\u00db', + '\u00dc', + '\u0168', + '\u016a', + '\u00df', + '\u0101', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u012f', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u0117', + '\u00ed', + '\u00ee', + '\u012b', + '\u0111', + '\u0146', + '\u014d', + '\u0137', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u0173', + '\u00fa', + '\u00fb', + '\u00fc', + '\u0169', + '\u016b', + '\u02d9' + }; + + private static final String[] LABELS = { + "csisolatin4", + "iso-8859-4", + "iso-ir-110", + "iso8859-4", + "iso88594", + "iso_8859-4", + "iso_8859-4:1988", + "l4", + "latin4" + }; + + private static final String NAME = "iso-8859-4"; + + static final Encoding INSTANCE = new Iso4(); + + private Iso4() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java new file mode 100644 index 000000000..13946cdbb --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso5.java @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso5 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0401', + '\u0402', + '\u0403', + '\u0404', + '\u0405', + '\u0406', + '\u0407', + '\u0408', + '\u0409', + '\u040a', + '\u040b', + '\u040c', + '\u00ad', + '\u040e', + '\u040f', + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u044f', + '\u2116', + '\u0451', + '\u0452', + '\u0453', + '\u0454', + '\u0455', + '\u0456', + '\u0457', + '\u0458', + '\u0459', + '\u045a', + '\u045b', + '\u045c', + '\u00a7', + '\u045e', + '\u045f' + }; + + private static final String[] LABELS = { + "csisolatincyrillic", + "cyrillic", + "iso-8859-5", + "iso-ir-144", + "iso8859-5", + "iso88595", + "iso_8859-5", + "iso_8859-5:1988" + }; + + private static final String NAME = "iso-8859-5"; + + static final Encoding INSTANCE = new Iso5(); + + private Iso5() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java new file mode 100644 index 000000000..02e6df8ba --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso6.java @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso6 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\ufffd', + '\ufffd', + '\ufffd', + '\u00a4', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u060c', + '\u00ad', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u061b', + '\ufffd', + '\ufffd', + '\ufffd', + '\u061f', + '\ufffd', + '\u0621', + '\u0622', + '\u0623', + '\u0624', + '\u0625', + '\u0626', + '\u0627', + '\u0628', + '\u0629', + '\u062a', + '\u062b', + '\u062c', + '\u062d', + '\u062e', + '\u062f', + '\u0630', + '\u0631', + '\u0632', + '\u0633', + '\u0634', + '\u0635', + '\u0636', + '\u0637', + '\u0638', + '\u0639', + '\u063a', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u0640', + '\u0641', + '\u0642', + '\u0643', + '\u0644', + '\u0645', + '\u0646', + '\u0647', + '\u0648', + '\u0649', + '\u064a', + '\u064b', + '\u064c', + '\u064d', + '\u064e', + '\u064f', + '\u0650', + '\u0651', + '\u0652', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd' + }; + + private static final String[] LABELS = { + "arabic", + "asmo-708", + "csiso88596e", + "csiso88596i", + "csisolatinarabic", + "ecma-114", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-ir-127", + "iso8859-6", + "iso88596", + "iso_8859-6", + "iso_8859-6:1987" + }; + + private static final String NAME = "iso-8859-6"; + + static final Encoding INSTANCE = new Iso6(); + + private Iso6() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java new file mode 100644 index 000000000..630e702de --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso7.java @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso7 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u2018', + '\u2019', + '\u00a3', + '\u20ac', + '\u20af', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u037a', + '\u00ab', + '\u00ac', + '\u00ad', + '\ufffd', + '\u2015', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u0384', + '\u0385', + '\u0386', + '\u00b7', + '\u0388', + '\u0389', + '\u038a', + '\u00bb', + '\u038c', + '\u00bd', + '\u038e', + '\u038f', + '\u0390', + '\u0391', + '\u0392', + '\u0393', + '\u0394', + '\u0395', + '\u0396', + '\u0397', + '\u0398', + '\u0399', + '\u039a', + '\u039b', + '\u039c', + '\u039d', + '\u039e', + '\u039f', + '\u03a0', + '\u03a1', + '\ufffd', + '\u03a3', + '\u03a4', + '\u03a5', + '\u03a6', + '\u03a7', + '\u03a8', + '\u03a9', + '\u03aa', + '\u03ab', + '\u03ac', + '\u03ad', + '\u03ae', + '\u03af', + '\u03b0', + '\u03b1', + '\u03b2', + '\u03b3', + '\u03b4', + '\u03b5', + '\u03b6', + '\u03b7', + '\u03b8', + '\u03b9', + '\u03ba', + '\u03bb', + '\u03bc', + '\u03bd', + '\u03be', + '\u03bf', + '\u03c0', + '\u03c1', + '\u03c2', + '\u03c3', + '\u03c4', + '\u03c5', + '\u03c6', + '\u03c7', + '\u03c8', + '\u03c9', + '\u03ca', + '\u03cb', + '\u03cc', + '\u03cd', + '\u03ce', + '\ufffd' + }; + + private static final String[] LABELS = { + "csisolatingreek", + "ecma-118", + "elot_928", + "greek", + "greek8", + "iso-8859-7", + "iso-ir-126", + "iso8859-7", + "iso88597", + "iso_8859-7", + "iso_8859-7:1987", + "sun_eu_greek" + }; + + private static final String NAME = "iso-8859-7"; + + static final Encoding INSTANCE = new Iso7(); + + private Iso7() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java new file mode 100644 index 000000000..10ee33486 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8.java @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso8 extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\ufffd', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00d7', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00f7', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u2017', + '\u05d0', + '\u05d1', + '\u05d2', + '\u05d3', + '\u05d4', + '\u05d5', + '\u05d6', + '\u05d7', + '\u05d8', + '\u05d9', + '\u05da', + '\u05db', + '\u05dc', + '\u05dd', + '\u05de', + '\u05df', + '\u05e0', + '\u05e1', + '\u05e2', + '\u05e3', + '\u05e4', + '\u05e5', + '\u05e6', + '\u05e7', + '\u05e8', + '\u05e9', + '\u05ea', + '\ufffd', + '\ufffd', + '\u200e', + '\u200f', + '\ufffd' + }; + + private static final String[] LABELS = { + "csiso88598e", + "csisolatinhebrew", + "hebrew", + "iso-8859-8", + "iso-8859-8-e", + "iso-ir-138", + "iso8859-8", + "iso88598", + "iso_8859-8", + "iso_8859-8:1988", + "visual" + }; + + private static final String NAME = "iso-8859-8"; + + static final Encoding INSTANCE = new Iso8(); + + private Iso8() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java new file mode 100644 index 000000000..732e1c952 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Iso8I.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Iso8I extends Encoding { + + private static final char[] TABLE = { + '\u0080', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u0085', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u0091', + '\u0092', + '\u0093', + '\u0094', + '\u0095', + '\u0096', + '\u0097', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\ufffd', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00d7', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00f7', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u2017', + '\u05d0', + '\u05d1', + '\u05d2', + '\u05d3', + '\u05d4', + '\u05d5', + '\u05d6', + '\u05d7', + '\u05d8', + '\u05d9', + '\u05da', + '\u05db', + '\u05dc', + '\u05dd', + '\u05de', + '\u05df', + '\u05e0', + '\u05e1', + '\u05e2', + '\u05e3', + '\u05e4', + '\u05e5', + '\u05e6', + '\u05e7', + '\u05e8', + '\u05e9', + '\u05ea', + '\ufffd', + '\ufffd', + '\u200e', + '\u200f', + '\ufffd' + }; + + private static final String[] LABELS = { + "csiso88598i", + "iso-8859-8-i", + "logical" + }; + + private static final String NAME = "iso-8859-8-i"; + + static final Encoding INSTANCE = new Iso8I(); + + private Iso8I() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java new file mode 100644 index 000000000..b6157bd8e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8R.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Koi8R extends Encoding { + + private static final char[] TABLE = { + '\u2500', + '\u2502', + '\u250c', + '\u2510', + '\u2514', + '\u2518', + '\u251c', + '\u2524', + '\u252c', + '\u2534', + '\u253c', + '\u2580', + '\u2584', + '\u2588', + '\u258c', + '\u2590', + '\u2591', + '\u2592', + '\u2593', + '\u2320', + '\u25a0', + '\u2219', + '\u221a', + '\u2248', + '\u2264', + '\u2265', + '\u00a0', + '\u2321', + '\u00b0', + '\u00b2', + '\u00b7', + '\u00f7', + '\u2550', + '\u2551', + '\u2552', + '\u0451', + '\u2553', + '\u2554', + '\u2555', + '\u2556', + '\u2557', + '\u2558', + '\u2559', + '\u255a', + '\u255b', + '\u255c', + '\u255d', + '\u255e', + '\u255f', + '\u2560', + '\u2561', + '\u0401', + '\u2562', + '\u2563', + '\u2564', + '\u2565', + '\u2566', + '\u2567', + '\u2568', + '\u2569', + '\u256a', + '\u256b', + '\u256c', + '\u00a9', + '\u044e', + '\u0430', + '\u0431', + '\u0446', + '\u0434', + '\u0435', + '\u0444', + '\u0433', + '\u0445', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u044f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0436', + '\u0432', + '\u044c', + '\u044b', + '\u0437', + '\u0448', + '\u044d', + '\u0449', + '\u0447', + '\u044a', + '\u042e', + '\u0410', + '\u0411', + '\u0426', + '\u0414', + '\u0415', + '\u0424', + '\u0413', + '\u0425', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u042f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0416', + '\u0412', + '\u042c', + '\u042b', + '\u0417', + '\u0428', + '\u042d', + '\u0429', + '\u0427', + '\u042a' + }; + + private static final String[] LABELS = { + "cskoi8r", + "koi", + "koi8", + "koi8-r", + "koi8_r" + }; + + private static final String NAME = "koi8-r"; + + static final Encoding INSTANCE = new Koi8R(); + + private Koi8R() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java new file mode 100644 index 000000000..8150838d3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Koi8U.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Koi8U extends Encoding { + + private static final char[] TABLE = { + '\u2500', + '\u2502', + '\u250c', + '\u2510', + '\u2514', + '\u2518', + '\u251c', + '\u2524', + '\u252c', + '\u2534', + '\u253c', + '\u2580', + '\u2584', + '\u2588', + '\u258c', + '\u2590', + '\u2591', + '\u2592', + '\u2593', + '\u2320', + '\u25a0', + '\u2219', + '\u221a', + '\u2248', + '\u2264', + '\u2265', + '\u00a0', + '\u2321', + '\u00b0', + '\u00b2', + '\u00b7', + '\u00f7', + '\u2550', + '\u2551', + '\u2552', + '\u0451', + '\u0454', + '\u2554', + '\u0456', + '\u0457', + '\u2557', + '\u2558', + '\u2559', + '\u255a', + '\u255b', + '\u0491', + '\u045e', + '\u255e', + '\u255f', + '\u2560', + '\u2561', + '\u0401', + '\u0404', + '\u2563', + '\u0406', + '\u0407', + '\u2566', + '\u2567', + '\u2568', + '\u2569', + '\u256a', + '\u0490', + '\u040e', + '\u00a9', + '\u044e', + '\u0430', + '\u0431', + '\u0446', + '\u0434', + '\u0435', + '\u0444', + '\u0433', + '\u0445', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u044f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0436', + '\u0432', + '\u044c', + '\u044b', + '\u0437', + '\u0448', + '\u044d', + '\u0449', + '\u0447', + '\u044a', + '\u042e', + '\u0410', + '\u0411', + '\u0426', + '\u0414', + '\u0415', + '\u0424', + '\u0413', + '\u0425', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u042f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0416', + '\u0412', + '\u042c', + '\u042b', + '\u0417', + '\u0428', + '\u042d', + '\u0429', + '\u0427', + '\u042a' + }; + + private static final String[] LABELS = { + "koi8-ru", + "koi8-u" + }; + + private static final String NAME = "koi8-u"; + + static final Encoding INSTANCE = new Koi8U(); + + private Koi8U() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java new file mode 100644 index 000000000..f46546ce2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/MacCyrillic.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class MacCyrillic extends Encoding { + + private static final char[] TABLE = { + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u2020', + '\u00b0', + '\u0490', + '\u00a3', + '\u00a7', + '\u2022', + '\u00b6', + '\u0406', + '\u00ae', + '\u00a9', + '\u2122', + '\u0402', + '\u0452', + '\u2260', + '\u0403', + '\u0453', + '\u221e', + '\u00b1', + '\u2264', + '\u2265', + '\u0456', + '\u00b5', + '\u0491', + '\u0408', + '\u0404', + '\u0454', + '\u0407', + '\u0457', + '\u0409', + '\u0459', + '\u040a', + '\u045a', + '\u0458', + '\u0405', + '\u00ac', + '\u221a', + '\u0192', + '\u2248', + '\u2206', + '\u00ab', + '\u00bb', + '\u2026', + '\u00a0', + '\u040b', + '\u045b', + '\u040c', + '\u045c', + '\u0455', + '\u2013', + '\u2014', + '\u201c', + '\u201d', + '\u2018', + '\u2019', + '\u00f7', + '\u201e', + '\u040e', + '\u045e', + '\u040f', + '\u045f', + '\u2116', + '\u0401', + '\u0451', + '\u044f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u20ac' + }; + + private static final String[] LABELS = { + "x-mac-cyrillic", + "x-mac-ukrainian" + }; + + private static final String NAME = "x-mac-cyrillic"; + + static final Encoding INSTANCE = new MacCyrillic(); + + private MacCyrillic() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java new file mode 100644 index 000000000..70e356f23 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Macintosh.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Macintosh extends Encoding { + + private static final char[] TABLE = { + '\u00c4', + '\u00c5', + '\u00c7', + '\u00c9', + '\u00d1', + '\u00d6', + '\u00dc', + '\u00e1', + '\u00e0', + '\u00e2', + '\u00e4', + '\u00e3', + '\u00e5', + '\u00e7', + '\u00e9', + '\u00e8', + '\u00ea', + '\u00eb', + '\u00ed', + '\u00ec', + '\u00ee', + '\u00ef', + '\u00f1', + '\u00f3', + '\u00f2', + '\u00f4', + '\u00f6', + '\u00f5', + '\u00fa', + '\u00f9', + '\u00fb', + '\u00fc', + '\u2020', + '\u00b0', + '\u00a2', + '\u00a3', + '\u00a7', + '\u2022', + '\u00b6', + '\u00df', + '\u00ae', + '\u00a9', + '\u2122', + '\u00b4', + '\u00a8', + '\u2260', + '\u00c6', + '\u00d8', + '\u221e', + '\u00b1', + '\u2264', + '\u2265', + '\u00a5', + '\u00b5', + '\u2202', + '\u2211', + '\u220f', + '\u03c0', + '\u222b', + '\u00aa', + '\u00ba', + '\u03a9', + '\u00e6', + '\u00f8', + '\u00bf', + '\u00a1', + '\u00ac', + '\u221a', + '\u0192', + '\u2248', + '\u2206', + '\u00ab', + '\u00bb', + '\u2026', + '\u00a0', + '\u00c0', + '\u00c3', + '\u00d5', + '\u0152', + '\u0153', + '\u2013', + '\u2014', + '\u201c', + '\u201d', + '\u2018', + '\u2019', + '\u00f7', + '\u25ca', + '\u00ff', + '\u0178', + '\u2044', + '\u20ac', + '\u2039', + '\u203a', + '\ufb01', + '\ufb02', + '\u2021', + '\u00b7', + '\u201a', + '\u201e', + '\u2030', + '\u00c2', + '\u00ca', + '\u00c1', + '\u00cb', + '\u00c8', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00cc', + '\u00d3', + '\u00d4', + '\uf8ff', + '\u00d2', + '\u00da', + '\u00db', + '\u00d9', + '\u0131', + '\u02c6', + '\u02dc', + '\u00af', + '\u02d8', + '\u02d9', + '\u02da', + '\u00b8', + '\u02dd', + '\u02db', + '\u02c7' + }; + + private static final String[] LABELS = { + "csmacintosh", + "mac", + "macintosh", + "x-mac-roman" + }; + + private static final String NAME = "macintosh"; + + static final Encoding INSTANCE = new Macintosh(); + + private Macintosh() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java new file mode 100644 index 000000000..abb6e24e7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Replacement.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Replacement extends Encoding { + + private static final String[] LABELS = { + "csiso2022kr", + "hz-gb-2312", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-kr" + }; + + private static final String NAME = "replacement"; + + static final Replacement INSTANCE = new Replacement(); + + private Replacement() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new ReplacementDecoder(this); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java new file mode 100644 index 000000000..f6f2448f6 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ReplacementDecoder.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CoderResult; + +class ReplacementDecoder extends Decoder { + + private boolean haveEmitted = false; + + ReplacementDecoder(Charset cs) { + super(cs, 1.0f, 1.0f); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (haveEmitted) { + in.position(in.limit()); + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + in.position(in.limit()); + haveEmitted = true; + if (this.report) { + return CoderResult.malformedForLength(1); + } + out.put('\uFFFD'); + } + } + + /** + * @see java.nio.charset.CharsetDecoder#implFlush(java.nio.CharBuffer) + */ + @Override protected CoderResult implFlush(CharBuffer out) { + // TODO Auto-generated method stub + return super.implFlush(out); + } + + /** + * @see java.nio.charset.CharsetDecoder#implReset() + */ + @Override protected void implReset() { + // TODO Auto-generated method stub + super.implReset(); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java new file mode 100644 index 000000000..6638eab39 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/ShiftJis.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class ShiftJis extends Encoding { + + private static final String[] LABELS = { + "csshiftjis", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "windows-31j", + "x-sjis" + }; + + private static final String NAME = "shift_jis"; + + static final ShiftJis INSTANCE = new ShiftJis(); + + private ShiftJis() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java new file mode 100644 index 000000000..61534cb28 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefined.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class UserDefined extends Encoding { + + private static final String[] LABELS = { + "x-user-defined" + }; + + private static final String NAME = "x-user-defined"; + + static final UserDefined INSTANCE = new UserDefined(); + + private UserDefined() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new UserDefinedDecoder(this); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java new file mode 100644 index 000000000..c14ca8627 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/UserDefinedDecoder.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.encoding; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; + +class UserDefinedDecoder extends Decoder { + + UserDefinedDecoder(Charset cs) { + super(cs, 1.0f, 1.0f); + } + + @Override protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + // TODO figure out if it's worthwhile to optimize the case where both + // buffers are array-backed. + for (;;) { + if (!in.hasRemaining()) { + return CoderResult.UNDERFLOW; + } + if (!out.hasRemaining()) { + return CoderResult.OVERFLOW; + } + int b = (int)in.get(); + if (b >= 0) { + out.put((char)b); + } else { + out.put((char)(b + 128 + 0xF780)); + } + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java new file mode 100644 index 000000000..16c0d2fd5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Be.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Utf16Be extends Encoding { + + private static final String[] LABELS = { + "utf-16be" + }; + + private static final String NAME = "utf-16be"; + + static final Utf16Be INSTANCE = new Utf16Be(); + + private Utf16Be() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java new file mode 100644 index 000000000..7381235b5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf16Le.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Utf16Le extends Encoding { + + private static final String[] LABELS = { + "utf-16", + "utf-16le" + }; + + private static final String NAME = "utf-16le"; + + static final Utf16Le INSTANCE = new Utf16Le(); + + private Utf16Le() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java new file mode 100644 index 000000000..d6ea7b514 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Utf8.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; + +class Utf8 extends Encoding { + + private static final String[] LABELS = { + "unicode-1-1-utf-8", + "utf-8", + "utf8" + }; + + private static final String NAME = "utf-8"; + + static final Utf8 INSTANCE = new Utf8(); + + private Utf8() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return Charset.forName(NAME).newDecoder(); + } + + @Override public CharsetEncoder newEncoder() { + return Charset.forName(NAME).newEncoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java new file mode 100644 index 000000000..0b3f50875 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1250.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1250 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0083', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u0088', + '\u2030', + '\u0160', + '\u2039', + '\u015a', + '\u0164', + '\u017d', + '\u0179', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u0161', + '\u203a', + '\u015b', + '\u0165', + '\u017e', + '\u017a', + '\u00a0', + '\u02c7', + '\u02d8', + '\u0141', + '\u00a4', + '\u0104', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u015e', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u017b', + '\u00b0', + '\u00b1', + '\u02db', + '\u0142', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u0105', + '\u015f', + '\u00bb', + '\u013d', + '\u02dd', + '\u013e', + '\u017c', + '\u0154', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u0139', + '\u0106', + '\u00c7', + '\u010c', + '\u00c9', + '\u0118', + '\u00cb', + '\u011a', + '\u00cd', + '\u00ce', + '\u010e', + '\u0110', + '\u0143', + '\u0147', + '\u00d3', + '\u00d4', + '\u0150', + '\u00d6', + '\u00d7', + '\u0158', + '\u016e', + '\u00da', + '\u0170', + '\u00dc', + '\u00dd', + '\u0162', + '\u00df', + '\u0155', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u013a', + '\u0107', + '\u00e7', + '\u010d', + '\u00e9', + '\u0119', + '\u00eb', + '\u011b', + '\u00ed', + '\u00ee', + '\u010f', + '\u0111', + '\u0144', + '\u0148', + '\u00f3', + '\u00f4', + '\u0151', + '\u00f6', + '\u00f7', + '\u0159', + '\u016f', + '\u00fa', + '\u0171', + '\u00fc', + '\u00fd', + '\u0163', + '\u02d9' + }; + + private static final String[] LABELS = { + "cp1250", + "windows-1250", + "x-cp1250" + }; + + private static final String NAME = "windows-1250"; + + static final Encoding INSTANCE = new Windows1250(); + + private Windows1250() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java new file mode 100644 index 000000000..def5cf11e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1251.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1251 extends Encoding { + + private static final char[] TABLE = { + '\u0402', + '\u0403', + '\u201a', + '\u0453', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u20ac', + '\u2030', + '\u0409', + '\u2039', + '\u040a', + '\u040c', + '\u040b', + '\u040f', + '\u0452', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u0459', + '\u203a', + '\u045a', + '\u045c', + '\u045b', + '\u045f', + '\u00a0', + '\u040e', + '\u045e', + '\u0408', + '\u00a4', + '\u0490', + '\u00a6', + '\u00a7', + '\u0401', + '\u00a9', + '\u0404', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u0407', + '\u00b0', + '\u00b1', + '\u0406', + '\u0456', + '\u0491', + '\u00b5', + '\u00b6', + '\u00b7', + '\u0451', + '\u2116', + '\u0454', + '\u00bb', + '\u0458', + '\u0405', + '\u0455', + '\u0457', + '\u0410', + '\u0411', + '\u0412', + '\u0413', + '\u0414', + '\u0415', + '\u0416', + '\u0417', + '\u0418', + '\u0419', + '\u041a', + '\u041b', + '\u041c', + '\u041d', + '\u041e', + '\u041f', + '\u0420', + '\u0421', + '\u0422', + '\u0423', + '\u0424', + '\u0425', + '\u0426', + '\u0427', + '\u0428', + '\u0429', + '\u042a', + '\u042b', + '\u042c', + '\u042d', + '\u042e', + '\u042f', + '\u0430', + '\u0431', + '\u0432', + '\u0433', + '\u0434', + '\u0435', + '\u0436', + '\u0437', + '\u0438', + '\u0439', + '\u043a', + '\u043b', + '\u043c', + '\u043d', + '\u043e', + '\u043f', + '\u0440', + '\u0441', + '\u0442', + '\u0443', + '\u0444', + '\u0445', + '\u0446', + '\u0447', + '\u0448', + '\u0449', + '\u044a', + '\u044b', + '\u044c', + '\u044d', + '\u044e', + '\u044f' + }; + + private static final String[] LABELS = { + "cp1251", + "windows-1251", + "x-cp1251" + }; + + private static final String NAME = "windows-1251"; + + static final Encoding INSTANCE = new Windows1251(); + + private Windows1251() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java new file mode 100644 index 000000000..4b3fa1ffa --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1252.java @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1252 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u0160', + '\u2039', + '\u0152', + '\u008d', + '\u017d', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u0161', + '\u203a', + '\u0153', + '\u009d', + '\u017e', + '\u0178', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00ba', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u00d0', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u00dd', + '\u00de', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u00f0', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u00fd', + '\u00fe', + '\u00ff' + }; + + private static final String[] LABELS = { + "ansi_x3.4-1968", + "ascii", + "cp1252", + "cp819", + "csisolatin1", + "ibm819", + "iso-8859-1", + "iso-ir-100", + "iso8859-1", + "iso88591", + "iso_8859-1", + "iso_8859-1:1987", + "l1", + "latin1", + "us-ascii", + "windows-1252", + "x-cp1252" + }; + + private static final String NAME = "windows-1252"; + + static final Encoding INSTANCE = new Windows1252(); + + private Windows1252() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java new file mode 100644 index 000000000..c96e8630c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1253.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1253 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u0088', + '\u2030', + '\u008a', + '\u2039', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u009a', + '\u203a', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0385', + '\u0386', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\ufffd', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u2015', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u0384', + '\u00b5', + '\u00b6', + '\u00b7', + '\u0388', + '\u0389', + '\u038a', + '\u00bb', + '\u038c', + '\u00bd', + '\u038e', + '\u038f', + '\u0390', + '\u0391', + '\u0392', + '\u0393', + '\u0394', + '\u0395', + '\u0396', + '\u0397', + '\u0398', + '\u0399', + '\u039a', + '\u039b', + '\u039c', + '\u039d', + '\u039e', + '\u039f', + '\u03a0', + '\u03a1', + '\ufffd', + '\u03a3', + '\u03a4', + '\u03a5', + '\u03a6', + '\u03a7', + '\u03a8', + '\u03a9', + '\u03aa', + '\u03ab', + '\u03ac', + '\u03ad', + '\u03ae', + '\u03af', + '\u03b0', + '\u03b1', + '\u03b2', + '\u03b3', + '\u03b4', + '\u03b5', + '\u03b6', + '\u03b7', + '\u03b8', + '\u03b9', + '\u03ba', + '\u03bb', + '\u03bc', + '\u03bd', + '\u03be', + '\u03bf', + '\u03c0', + '\u03c1', + '\u03c2', + '\u03c3', + '\u03c4', + '\u03c5', + '\u03c6', + '\u03c7', + '\u03c8', + '\u03c9', + '\u03ca', + '\u03cb', + '\u03cc', + '\u03cd', + '\u03ce', + '\ufffd' + }; + + private static final String[] LABELS = { + "cp1253", + "windows-1253", + "x-cp1253" + }; + + private static final String NAME = "windows-1253"; + + static final Encoding INSTANCE = new Windows1253(); + + private Windows1253() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java new file mode 100644 index 000000000..fc3aa9839 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1254.java @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1254 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u0160', + '\u2039', + '\u0152', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u0161', + '\u203a', + '\u0153', + '\u009d', + '\u009e', + '\u0178', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00ba', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u00c3', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u00cc', + '\u00cd', + '\u00ce', + '\u00cf', + '\u011e', + '\u00d1', + '\u00d2', + '\u00d3', + '\u00d4', + '\u00d5', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u0130', + '\u015e', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u00e3', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u00ec', + '\u00ed', + '\u00ee', + '\u00ef', + '\u011f', + '\u00f1', + '\u00f2', + '\u00f3', + '\u00f4', + '\u00f5', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u0131', + '\u015f', + '\u00ff' + }; + + private static final String[] LABELS = { + "cp1254", + "csisolatin5", + "iso-8859-9", + "iso-ir-148", + "iso8859-9", + "iso88599", + "iso_8859-9", + "iso_8859-9:1989", + "l5", + "latin5", + "windows-1254", + "x-cp1254" + }; + + private static final String NAME = "windows-1254"; + + static final Encoding INSTANCE = new Windows1254(); + + private Windows1254() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java new file mode 100644 index 000000000..957203d80 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1255.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1255 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u008a', + '\u2039', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u009a', + '\u203a', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u20aa', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00d7', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00f7', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u05b0', + '\u05b1', + '\u05b2', + '\u05b3', + '\u05b4', + '\u05b5', + '\u05b6', + '\u05b7', + '\u05b8', + '\u05b9', + '\ufffd', + '\u05bb', + '\u05bc', + '\u05bd', + '\u05be', + '\u05bf', + '\u05c0', + '\u05c1', + '\u05c2', + '\u05c3', + '\u05f0', + '\u05f1', + '\u05f2', + '\u05f3', + '\u05f4', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u05d0', + '\u05d1', + '\u05d2', + '\u05d3', + '\u05d4', + '\u05d5', + '\u05d6', + '\u05d7', + '\u05d8', + '\u05d9', + '\u05da', + '\u05db', + '\u05dc', + '\u05dd', + '\u05de', + '\u05df', + '\u05e0', + '\u05e1', + '\u05e2', + '\u05e3', + '\u05e4', + '\u05e5', + '\u05e6', + '\u05e7', + '\u05e8', + '\u05e9', + '\u05ea', + '\ufffd', + '\ufffd', + '\u200e', + '\u200f', + '\ufffd' + }; + + private static final String[] LABELS = { + "cp1255", + "windows-1255", + "x-cp1255" + }; + + private static final String NAME = "windows-1255"; + + static final Encoding INSTANCE = new Windows1255(); + + private Windows1255() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java new file mode 100644 index 000000000..87d805e1e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1256.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1256 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u067e', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u0679', + '\u2039', + '\u0152', + '\u0686', + '\u0698', + '\u0688', + '\u06af', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u06a9', + '\u2122', + '\u0691', + '\u203a', + '\u0153', + '\u200c', + '\u200d', + '\u06ba', + '\u00a0', + '\u060c', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u06be', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u061b', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u061f', + '\u06c1', + '\u0621', + '\u0622', + '\u0623', + '\u0624', + '\u0625', + '\u0626', + '\u0627', + '\u0628', + '\u0629', + '\u062a', + '\u062b', + '\u062c', + '\u062d', + '\u062e', + '\u062f', + '\u0630', + '\u0631', + '\u0632', + '\u0633', + '\u0634', + '\u0635', + '\u0636', + '\u00d7', + '\u0637', + '\u0638', + '\u0639', + '\u063a', + '\u0640', + '\u0641', + '\u0642', + '\u0643', + '\u00e0', + '\u0644', + '\u00e2', + '\u0645', + '\u0646', + '\u0647', + '\u0648', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u0649', + '\u064a', + '\u00ee', + '\u00ef', + '\u064b', + '\u064c', + '\u064d', + '\u064e', + '\u00f4', + '\u064f', + '\u0650', + '\u00f7', + '\u0651', + '\u00f9', + '\u0652', + '\u00fb', + '\u00fc', + '\u200e', + '\u200f', + '\u06d2' + }; + + private static final String[] LABELS = { + "cp1256", + "windows-1256", + "x-cp1256" + }; + + private static final String NAME = "windows-1256"; + + static final Encoding INSTANCE = new Windows1256(); + + private Windows1256() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java new file mode 100644 index 000000000..140e9b458 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1257.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1257 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0083', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u0088', + '\u2030', + '\u008a', + '\u2039', + '\u008c', + '\u00a8', + '\u02c7', + '\u00b8', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u2122', + '\u009a', + '\u203a', + '\u009c', + '\u00af', + '\u02db', + '\u009f', + '\u00a0', + '\ufffd', + '\u00a2', + '\u00a3', + '\u00a4', + '\ufffd', + '\u00a6', + '\u00a7', + '\u00d8', + '\u00a9', + '\u0156', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00c6', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00f8', + '\u00b9', + '\u0157', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00e6', + '\u0104', + '\u012e', + '\u0100', + '\u0106', + '\u00c4', + '\u00c5', + '\u0118', + '\u0112', + '\u010c', + '\u00c9', + '\u0179', + '\u0116', + '\u0122', + '\u0136', + '\u012a', + '\u013b', + '\u0160', + '\u0143', + '\u0145', + '\u00d3', + '\u014c', + '\u00d5', + '\u00d6', + '\u00d7', + '\u0172', + '\u0141', + '\u015a', + '\u016a', + '\u00dc', + '\u017b', + '\u017d', + '\u00df', + '\u0105', + '\u012f', + '\u0101', + '\u0107', + '\u00e4', + '\u00e5', + '\u0119', + '\u0113', + '\u010d', + '\u00e9', + '\u017a', + '\u0117', + '\u0123', + '\u0137', + '\u012b', + '\u013c', + '\u0161', + '\u0144', + '\u0146', + '\u00f3', + '\u014d', + '\u00f5', + '\u00f6', + '\u00f7', + '\u0173', + '\u0142', + '\u015b', + '\u016b', + '\u00fc', + '\u017c', + '\u017e', + '\u02d9' + }; + + private static final String[] LABELS = { + "cp1257", + "windows-1257", + "x-cp1257" + }; + + private static final String NAME = "windows-1257"; + + static final Encoding INSTANCE = new Windows1257(); + + private Windows1257() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java new file mode 100644 index 000000000..130107789 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows1258.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows1258 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u201a', + '\u0192', + '\u201e', + '\u2026', + '\u2020', + '\u2021', + '\u02c6', + '\u2030', + '\u008a', + '\u2039', + '\u0152', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u02dc', + '\u2122', + '\u009a', + '\u203a', + '\u0153', + '\u009d', + '\u009e', + '\u0178', + '\u00a0', + '\u00a1', + '\u00a2', + '\u00a3', + '\u00a4', + '\u00a5', + '\u00a6', + '\u00a7', + '\u00a8', + '\u00a9', + '\u00aa', + '\u00ab', + '\u00ac', + '\u00ad', + '\u00ae', + '\u00af', + '\u00b0', + '\u00b1', + '\u00b2', + '\u00b3', + '\u00b4', + '\u00b5', + '\u00b6', + '\u00b7', + '\u00b8', + '\u00b9', + '\u00ba', + '\u00bb', + '\u00bc', + '\u00bd', + '\u00be', + '\u00bf', + '\u00c0', + '\u00c1', + '\u00c2', + '\u0102', + '\u00c4', + '\u00c5', + '\u00c6', + '\u00c7', + '\u00c8', + '\u00c9', + '\u00ca', + '\u00cb', + '\u0300', + '\u00cd', + '\u00ce', + '\u00cf', + '\u0110', + '\u00d1', + '\u0309', + '\u00d3', + '\u00d4', + '\u01a0', + '\u00d6', + '\u00d7', + '\u00d8', + '\u00d9', + '\u00da', + '\u00db', + '\u00dc', + '\u01af', + '\u0303', + '\u00df', + '\u00e0', + '\u00e1', + '\u00e2', + '\u0103', + '\u00e4', + '\u00e5', + '\u00e6', + '\u00e7', + '\u00e8', + '\u00e9', + '\u00ea', + '\u00eb', + '\u0301', + '\u00ed', + '\u00ee', + '\u00ef', + '\u0111', + '\u00f1', + '\u0323', + '\u00f3', + '\u00f4', + '\u01a1', + '\u00f6', + '\u00f7', + '\u00f8', + '\u00f9', + '\u00fa', + '\u00fb', + '\u00fc', + '\u01b0', + '\u20ab', + '\u00ff' + }; + + private static final String[] LABELS = { + "cp1258", + "windows-1258", + "x-cp1258" + }; + + private static final String NAME = "windows-1258"; + + static final Encoding INSTANCE = new Windows1258(); + + private Windows1258() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new InfallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java new file mode 100644 index 000000000..f93be0175 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/encoding/Windows874.java @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013-2015 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. + * Instead, please regenerate using generate-encoding-data.py + */ + +package nu.validator.encoding; + +import java.nio.charset.CharsetDecoder; + +class Windows874 extends Encoding { + + private static final char[] TABLE = { + '\u20ac', + '\u0081', + '\u0082', + '\u0083', + '\u0084', + '\u2026', + '\u0086', + '\u0087', + '\u0088', + '\u0089', + '\u008a', + '\u008b', + '\u008c', + '\u008d', + '\u008e', + '\u008f', + '\u0090', + '\u2018', + '\u2019', + '\u201c', + '\u201d', + '\u2022', + '\u2013', + '\u2014', + '\u0098', + '\u0099', + '\u009a', + '\u009b', + '\u009c', + '\u009d', + '\u009e', + '\u009f', + '\u00a0', + '\u0e01', + '\u0e02', + '\u0e03', + '\u0e04', + '\u0e05', + '\u0e06', + '\u0e07', + '\u0e08', + '\u0e09', + '\u0e0a', + '\u0e0b', + '\u0e0c', + '\u0e0d', + '\u0e0e', + '\u0e0f', + '\u0e10', + '\u0e11', + '\u0e12', + '\u0e13', + '\u0e14', + '\u0e15', + '\u0e16', + '\u0e17', + '\u0e18', + '\u0e19', + '\u0e1a', + '\u0e1b', + '\u0e1c', + '\u0e1d', + '\u0e1e', + '\u0e1f', + '\u0e20', + '\u0e21', + '\u0e22', + '\u0e23', + '\u0e24', + '\u0e25', + '\u0e26', + '\u0e27', + '\u0e28', + '\u0e29', + '\u0e2a', + '\u0e2b', + '\u0e2c', + '\u0e2d', + '\u0e2e', + '\u0e2f', + '\u0e30', + '\u0e31', + '\u0e32', + '\u0e33', + '\u0e34', + '\u0e35', + '\u0e36', + '\u0e37', + '\u0e38', + '\u0e39', + '\u0e3a', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd', + '\u0e3f', + '\u0e40', + '\u0e41', + '\u0e42', + '\u0e43', + '\u0e44', + '\u0e45', + '\u0e46', + '\u0e47', + '\u0e48', + '\u0e49', + '\u0e4a', + '\u0e4b', + '\u0e4c', + '\u0e4d', + '\u0e4e', + '\u0e4f', + '\u0e50', + '\u0e51', + '\u0e52', + '\u0e53', + '\u0e54', + '\u0e55', + '\u0e56', + '\u0e57', + '\u0e58', + '\u0e59', + '\u0e5a', + '\u0e5b', + '\ufffd', + '\ufffd', + '\ufffd', + '\ufffd' + }; + + private static final String[] LABELS = { + "dos-874", + "iso-8859-11", + "iso8859-11", + "iso885911", + "tis-620", + "windows-874" + }; + + private static final String NAME = "windows-874"; + + static final Encoding INSTANCE = new Windows874(); + + private Windows874() { + super(NAME, LABELS); + } + + @Override public CharsetDecoder newDecoder() { + return new FallibleSingleByteDecoder(this, TABLE); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java new file mode 100644 index 000000000..0967a5814 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Auto.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +public @interface Auto { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java new file mode 100644 index 000000000..bcb8a2b00 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/CharacterName.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +public @interface CharacterName { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java new file mode 100644 index 000000000..2ba7f418a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Const.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Marker for translating into the C++ const keyword on the declaration in + * question. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Const { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java new file mode 100644 index 000000000..117da8d3c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/IdType.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The type for attribute IDness. (In Java, an interned string + * <code>"CDATA"</code> or <code>"ID"</code>.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface IdType { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java new file mode 100644 index 000000000..cc0728b1b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Inline.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Translates into the C++ inline keyword. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Inline { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java new file mode 100644 index 000000000..44444d525 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Literal.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Marks a string type as being the literal string type (typically const char*) + * in C++. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Literal { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java new file mode 100644 index 000000000..1f91ba93b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Local.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The local name of an element or attribute. Must be comparable with + * <code>==</code> (interned <code>String</code> in Java). + * + * @version $Id$ + * @author hsivonen + */ +public @interface Local { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java new file mode 100644 index 000000000..cf011d33e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NoLength.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The array type marked with this annotation won't have its + * <code>.length</code> read. + * + * @version $Id$ + * @author hsivonen + */ +public @interface NoLength { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java new file mode 100644 index 000000000..03baa75f5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/NsUri.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The namespace URI type. (In Java, an interned <code>String</code>.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface NsUri { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java new file mode 100644 index 000000000..268e531a3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Prefix.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The type for namespace prefixes. (In Java, an interned <code>String</code>.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface Prefix { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java new file mode 100644 index 000000000..e6d4807b6 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/QName.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * The type for qualified names. (In Java, an interned <code>String</code>.) + * + * @version $Id$ + * @author hsivonen + */ +public @interface QName { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java new file mode 100644 index 000000000..e293e1af5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/Virtual.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.annotation; + +/** + * Marks a method as virtualy in C++. + * + * @version $Id$ + * @author hsivonen + */ +public @interface Virtual { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html new file mode 100644 index 000000000..af15d3827 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/annotation/package.html @@ -0,0 +1,30 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2008 Mozilla Foundation + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package provides annotations for facilitating automated translation +of the source code into other programming languages.</p> +</body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java new file mode 100644 index 000000000..f3b3e74ca --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/ByteReadable.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import java.io.IOException; + +/** + * An interface for providing a method for reading a stream of bytes one byte at + * a time. + * + * @version $Id$ + * @author hsivonen + */ +public interface ByteReadable { + /** + * Returns the value of the next byte as an integer from 0 to 0xFF or -1 if + * the stream has ended. + * + * @return integer from 0 to 0xFF or -1 on EOF + * @throws IOException + */ + public int readByte() throws IOException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java new file mode 100644 index 000000000..4a5769f54 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/CharacterHandler.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import org.xml.sax.SAXException; + +/** + * An interface for receiving notifications of UTF-16 code units read from a character stream. + * + * @version $Id$ + * @author hsivonen + */ +public interface CharacterHandler { + + /** + * Receive notification of a run of UTF-16 code units. + * @param ch the buffer + * @param start start index in the buffer + * @param length the number of characters to process starting from <code>start</code> + * @throws SAXException if things go wrong + */ + public void characters(char[] ch, int start, int length) + throws SAXException; + + /** + * Signals the end of the stream. Can be used for cleanup. Doesn't mean that the stream ended successfully. + * + * @throws SAXException if things go wrong + */ + public void end() throws SAXException; + + /** + * Signals the start of the stream. Can be used for setup. + * + * @throws SAXException if things go wrong + */ + public void start() throws SAXException; + +}
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java new file mode 100644 index 000000000..a34af51fa --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DoctypeExpectation.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Used for indicating desired behavior with legacy doctypes. + * + * @version $Id$ + * @author hsivonen + */ +public enum DoctypeExpectation { + /** + * Be a pure HTML5 parser. + */ + HTML, + + /** + * Require the HTML 4.01 Transitional public id. Turn on HTML4-specific + * additional errors regardless of doctype. + */ + HTML401_TRANSITIONAL, + + /** + * Require the HTML 4.01 Transitional public id and a system id. Turn on + * HTML4-specific additional errors regardless of doctype. + */ + HTML401_STRICT, + + /** + * Treat the doctype required by HTML 5, doctypes with the HTML 4.01 Strict + * public id and doctypes with the HTML 4.01 Transitional public id and a + * system id as non-errors. Turn on HTML4-specific additional errors if the + * public id is the HTML 4.01 Strict or Transitional public id. + */ + AUTO, + + /** + * Never enable HTML4-specific error checks. Never report any doctype + * condition as an error. (Doctype tokens in wrong places will be + * reported as errors, though.) The application may decide what to log + * in response to calls to <code>DocumentModeHanler</code>. This mode + * in meant for doing surveys on existing content. + */ + NO_DOCTYPE_ERRORS +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java new file mode 100644 index 000000000..e30eddd87 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentMode.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Represents the HTML document compatibility mode. + * + * @version $Id$ + * @author hsivonen + */ +public enum DocumentMode { + /** + * The Standards Mode + */ + STANDARDS_MODE, + + /** + * The Limited Quirks Mode aka. The Almost Standards Mode + */ + ALMOST_STANDARDS_MODE, + + /** + * The Quirks Mode + */ + QUIRKS_MODE +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java new file mode 100644 index 000000000..55377e0e4 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/DocumentModeHandler.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + + +import org.xml.sax.SAXException; + +/** + * A callback interface for receiving notification about the document mode. + * + * @version $Id$ + * @author hsivonen + */ +public interface DocumentModeHandler { + + /** + * Receive notification of the document mode. + * + * @param mode the document mode + * @param publicIdentifier the public id of the doctype or <code>null</code> if unavailable + * @param systemIdentifier the system id of the doctype or <code>null</code> if unavailable + * @param html4SpecificAdditionalErrorChecks <code>true</code> if HTML 4-specific checks were enabled, <code>false</code> otherwise + * @throws SAXException if things go wrong + */ + public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java new file mode 100644 index 000000000..6f185aeaf --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import org.xml.sax.SAXException; + +/** + * An interface for communicating about character encoding names with the + * environment of the parser. + * + * @version $Id$ + * @author hsivonen + */ +public interface EncodingDeclarationHandler { + + /** + * Indicates that the parser has found an internal encoding declaration with + * the charset value <code>charset</code>. + * + * @param charset + * the charset name found. + * @return <code>true</code> if the value of <code>charset</code> was an + * encoding name for a supported ASCII-superset encoding. + * @throws SAXException + * if something went wrong + */ + public boolean internalEncodingDeclaration(String charset) throws SAXException; + + /** + * Queries the environment for the encoding in use (for error reporting). + * + * @return the encoding in use + * @throws SAXException + * if something went wrong + */ + public String getCharacterEncoding() throws SAXException; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java new file mode 100644 index 000000000..40f15ce7d --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Heuristics.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Indicates a request for character encoding sniffer choice. + * + * @version $Id$ + * @author hsivonen + */ +public enum Heuristics { + + /** + * Perform no heuristic sniffing. + */ + NONE, + + /** + * Use both jchardet and ICU4J. + */ + ALL, + + /** + * Use jchardet only. + */ + CHARDET, + + /** + * Use ICU4J only. + */ + ICU +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java new file mode 100644 index 000000000..deab4c60f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/Interner.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * A placeholder type that translates into the type of the C++ class that + * implements an interning service for local names (<code>@Local</code> in + * Java). + * + * @version $Id$ + * @author hsivonen + */ +public interface Interner { + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java new file mode 100644 index 000000000..18f49e99d --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TokenHandler.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import nu.validator.htmlparser.annotation.Const; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.impl.ElementName; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.Tokenizer; + +import org.xml.sax.SAXException; + +/** + * <code>Tokenizer</code> reports tokens through this interface. + * + * @version $Id$ + * @author hsivonen + */ +public interface TokenHandler { + + /** + * This method is called at the start of tokenization before any other + * methods on this interface are called. Implementations should hold the + * reference to the <code>Tokenizer</code> in order to set the content + * model flag and in order to be able to query for <code>Locator</code> + * data. + * + * @param self + * the <code>Tokenizer</code>. + * @throws SAXException + * if something went wrong + */ + public void startTokenization(Tokenizer self) throws SAXException; + + /** + * If this handler implementation cares about comments, return + * <code>true</code>. If not, return <code>false</code>. + * + * @return whether this handler wants comments + * @throws SAXException + * if something went wrong + */ + public boolean wantsComments() throws SAXException; + + /** + * Receive a doctype token. + * + * @param name + * the name + * @param publicIdentifier + * the public id + * @param systemIdentifier + * the system id + * @param forceQuirks + * whether the token is correct + * @throws SAXException + * if something went wrong + */ + public void doctype(String name, String publicIdentifier, + String systemIdentifier, boolean forceQuirks) throws SAXException; + + /** + * Receive a start tag token. + * + * @param eltName + * the tag name + * @param attributes + * the attributes + * @param selfClosing + * TODO + * @throws SAXException + * if something went wrong + */ + public void startTag(ElementName eltName, HtmlAttributes attributes, + boolean selfClosing) throws SAXException; + + /** + * Receive an end tag token. + * + * @param eltName + * the tag name + * @throws SAXException + * if something went wrong + */ + public void endTag(ElementName eltName) throws SAXException; + + /** + * Receive a comment token. The data is junk if the + * <code>wantsComments()</code> returned <code>false</code>. + * + * @param buf + * a buffer holding the data + * @param start the offset into the buffer + * @param length + * the number of code units to read + * @throws SAXException + * if something went wrong + */ + public void comment(@NoLength char[] buf, int start, int length) throws SAXException; + + /** + * Receive character tokens. This method has the same semantics as the SAX + * method of the same name. + * + * @param buf + * a buffer holding the data + * @param start + * offset into the buffer + * @param length + * the number of code units to read + * @throws SAXException + * if something went wrong + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + public void characters(@Const @NoLength char[] buf, int start, int length) + throws SAXException; + + /** + * Reports a U+0000 that's being turned into a U+FFFD. + * + * @throws SAXException + * if something went wrong + */ + public void zeroOriginatingReplacementCharacter() throws SAXException; + + /** + * The end-of-file token. + * + * @throws SAXException + * if something went wrong + */ + public void eof() throws SAXException; + + /** + * The perform final cleanup. + * + * @throws SAXException + * if something went wrong + */ + public void endTokenization() throws SAXException; + + /** + * Checks if the CDATA sections are allowed. + * + * @return <code>true</code> if CDATA sections are allowed + * @throws SAXException + * if something went wrong + */ + public boolean cdataSectionAllowed() throws SAXException; + + /** + * Notifies the token handler of the worst case amount of data to be + * reported via <code>characters()</code> and + * <code>zeroOriginatingReplacementCharacter()</code>. + * + * @param inputLength the maximum number of chars that can be reported + * via <code>characters()</code> and + * <code>zeroOriginatingReplacementCharacter()</code> before a new call to + * this method. + */ + public void ensureBufferSpace(int inputLength) throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java new file mode 100644 index 000000000..eec23c71c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/TransitionHandler.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +import org.xml.sax.SAXException; + +/** + * An interface for intercepting information about the state transitions that + * the tokenizer is making. + * + * @version $Id$ + * @author hsivonen + */ +public interface TransitionHandler { + + /** + * This method is called for every tokenizer state transition. + * + * @param from + * the state the tokenizer is transitioning from + * @param to + * the state being transitioned to + * @param reconsume + * <code>true</code> if the current input character is going to + * be reconsumed in the new state + * @param pos + * the current index into the input stream + * @throws SAXException + * if something went wrong + */ + void transition(int from, int to, boolean reconsume, int pos) + throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java new file mode 100644 index 000000000..c959df655 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/XmlViolationPolicy.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.common; + +/** + * Policy for XML 1.0 violations. + * + * @version $Id$ + * @author hsivonen + */ +public enum XmlViolationPolicy { + /** + * Conform to HTML 5, allow XML 1.0 to be violated. + */ + ALLOW, + + /** + * Halt when something cannot be mapped to XML 1.0. + */ + FATAL, + + /** + * Be non-conforming and alter the infoset to fit + * XML 1.0 when something would otherwise not be + * mappable to XML 1.0. + */ + ALTER_INFOSET +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html new file mode 100644 index 000000000..43f141cd8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/common/package.html @@ -0,0 +1,29 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package provides common interfaces and enumerations.</p> +</body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java new file mode 100644 index 000000000..2b8eff230 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.dom; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; + +import org.w3c.dom.DOMException; +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.Text; +import org.xml.sax.SAXException; + +/** + * The tree builder glue for building a tree through the public DOM APIs. + * + * @version $Id$ + * @author hsivonen + */ +class DOMTreeBuilder extends CoalescingTreeBuilder<Element> { + + /** + * The DOM impl. + */ + private DOMImplementation implementation; + + /** + * The current doc. + */ + private Document document; + + /** + * The constructor. + * + * @param implementation + * the DOM impl. + */ + protected DOMTreeBuilder(DOMImplementation implementation) { + super(); + this.implementation = implementation; + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#addAttributesToElement(java.lang.Object, + * nu.validator.htmlparser.impl.HtmlAttributes) + */ + @Override protected void addAttributesToElement(Element element, + HtmlAttributes attributes) throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (!element.hasAttributeNS(uri, localName)) { + element.setAttributeNS(uri, localName, + attributes.getValueNoBoundsCheck(i)); + } + } + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCharacters(java.lang.Object, + * java.lang.String) + */ + @Override protected void appendCharacters(Element parent, String text) + throws SAXException { + try { + Node lastChild = parent.getLastChild(); + if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) { + Text lastAsText = (Text) lastChild; + lastAsText.setData(lastAsText.getData() + text); + return; + } + parent.appendChild(document.createTextNode(text)); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#appendChildrenToNewParent(java.lang.Object, + * java.lang.Object) + */ + @Override protected void appendChildrenToNewParent(Element oldParent, + Element newParent) throws SAXException { + try { + while (oldParent.hasChildNodes()) { + newParent.appendChild(oldParent.getFirstChild()); + } + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendComment(java.lang.Object, + * java.lang.String) + */ + @Override protected void appendComment(Element parent, String comment) + throws SAXException { + try { + parent.appendChild(document.createComment(comment)); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.CoalescingTreeBuilder#appendCommentToDocument(java.lang.String) + */ + @Override protected void appendCommentToDocument(String comment) + throws SAXException { + try { + document.appendChild(document.createComment(comment)); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, String, nu.validator.htmlparser.impl.HtmlAttributes, Object) + */ + @Override protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element intendedParent) throws SAXException { + try { + Element rv = document.createElementNS(ns, name); + for (int i = 0; i < attributes.getLength(); i++) { + rv.setAttributeNS(attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + if (attributes.getTypeNoBoundsCheck(i) == "ID") { + rv.setIdAttributeNS(null, attributes.getLocalName(i), true); + } + } + return rv; + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#createHtmlElementSetAsRoot(nu.validator.htmlparser.impl.HtmlAttributes) + */ + @Override protected Element createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + Element rv = document.createElementNS( + "http://www.w3.org/1999/xhtml", "html"); + for (int i = 0; i < attributes.getLength(); i++) { + rv.setAttributeNS(attributes.getURINoBoundsCheck(i), + attributes.getLocalNameNoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i)); + } + document.appendChild(rv); + return rv; + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#appendElement(java.lang.Object, + * java.lang.Object) + */ + @Override protected void appendElement(Element child, Element newParent) + throws SAXException { + try { + newParent.appendChild(child); + } catch (DOMException e) { + fatal(e); + } + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#hasChildren(java.lang.Object) + */ + @Override protected boolean hasChildren(Element element) + throws SAXException { + try { + return element.hasChildNodes(); + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException { + try { + Element rv = createElement(ns, name, attributes, intendedParent); + rv.setUserData("nu.validator.form-pointer", form, null); + return rv; + } catch (DOMException e) { + fatal(e); + return null; + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override protected void start(boolean fragment) throws SAXException { + document = implementation.createDocument(null, null, null); + } + + /** + * + * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode, + * java.lang.String, java.lang.String, boolean) + */ + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + document.setUserData("nu.validator.document-mode", mode, null); + } + + /** + * Returns the document. + * + * @return the document + */ + Document getDocument() { + Document rv = document; + document = null; + return rv; + } + + /** + * Return the document fragment. + * + * @return the document fragment + */ + DocumentFragment getDocumentFragment() { + DocumentFragment rv = document.createDocumentFragment(); + Node rootElt = document.getFirstChild(); + while (rootElt.hasChildNodes()) { + rv.appendChild(rootElt.getFirstChild()); + } + document = null; + return rv; + } + + @Override + protected Element createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParentNode(); + Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent); + + if (parent != null) { // always an element if not null + parent.insertBefore(child, table); + } else { + stackParent.appendChild(child); + } + + return child; + } catch (DOMException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected void insertFosterParentedCharacters(String text, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParentNode(); + if (parent != null) { // always an element if not null + Node previousSibling = table.getPreviousSibling(); + if (previousSibling != null + && previousSibling.getNodeType() == Node.TEXT_NODE) { + Text lastAsText = (Text) previousSibling; + lastAsText.setData(lastAsText.getData() + text); + return; + } + parent.insertBefore(document.createTextNode(text), table); + return; + } + Node lastChild = stackParent.getLastChild(); + if (lastChild != null && lastChild.getNodeType() == Node.TEXT_NODE) { + Text lastAsText = (Text) lastChild; + lastAsText.setData(lastAsText.getData() + text); + return; + } + stackParent.appendChild(document.createTextNode(text)); + } catch (DOMException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(Element child, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParentNode(); + if (parent != null) { // always an element if not null + parent.insertBefore(child, table); + } else { + stackParent.appendChild(child); + } + } catch (DOMException e) { + fatal(e); + } + } + + @Override protected void detachFromParent(Element element) + throws SAXException { + try { + Node parent = element.getParentNode(); + if (parent != null) { + parent.removeChild(element); + } + } catch (DOMException e) { + fatal(e); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java new file mode 100644 index 000000000..5e366be7b --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/Dom2Sax.java @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.dom; + +import org.w3c.dom.DocumentType; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class Dom2Sax { + + private static String emptyIfNull(String namespaceURI) { + return namespaceURI == null ? "" : namespaceURI; + } + + private final NamedNodeMapAttributes attributes = new NamedNodeMapAttributes(); + + private final ContentHandler contentHandler; + + private final LexicalHandler lexicalHandler; + + /** + * @param contentHandler + * @param lexicalHandler + */ + public Dom2Sax(ContentHandler contentHandler, LexicalHandler lexicalHandler) { + if (contentHandler == null) { + throw new IllegalArgumentException("ContentHandler must not be null."); + } + this.contentHandler = contentHandler; + this.lexicalHandler = lexicalHandler; + } + + public void parse(Node node) throws SAXException { + Node current = node; + Node next; + char[] buf; + for (;;) { + switch (current.getNodeType()) { + case Node.ELEMENT_NODE: + attributes.setNamedNodeMap(current.getAttributes()); + // To work around severe bogosity in the default DOM + // impl, use the node name if local name is null. + String localName = current.getLocalName(); + contentHandler.startElement( + emptyIfNull(current.getNamespaceURI()), + localName == null ? current.getNodeName() + : localName, null, attributes); + attributes.clear(); + break; + case Node.TEXT_NODE: + buf = current.getNodeValue().toCharArray(); + contentHandler.characters(buf, 0, buf.length); + break; + case Node.CDATA_SECTION_NODE: + if (lexicalHandler != null) { + lexicalHandler.startCDATA(); + } + buf = current.getNodeValue().toCharArray(); + contentHandler.characters(buf, 0, buf.length); + if (lexicalHandler != null) { + lexicalHandler.endCDATA(); + } + break; + case Node.COMMENT_NODE: + if (lexicalHandler != null) { + buf = current.getNodeValue().toCharArray(); + lexicalHandler.comment(buf, 0, buf.length); + } + break; + case Node.DOCUMENT_NODE: + contentHandler.startDocument(); + break; + case Node.DOCUMENT_TYPE_NODE: + if (lexicalHandler != null) { + DocumentType doctype = (DocumentType) current; + lexicalHandler.startDTD(doctype.getName(), + doctype.getPublicId(), doctype.getSystemId()); + lexicalHandler.endDTD(); + } + break; + case Node.PROCESSING_INSTRUCTION_NODE: + contentHandler.processingInstruction(current.getNodeName(), current.getNodeValue()); + break; + case Node.ENTITY_REFERENCE_NODE: + contentHandler.skippedEntity(current.getNodeName()); + break; + } + if ((next = current.getFirstChild()) != null) { + current = next; + continue; + } + for (;;) { + switch (current.getNodeType()) { + case Node.ELEMENT_NODE: + // To work around severe bogosity in the default DOM + // impl, use the node name if local name is null. + String localName = current.getLocalName(); + contentHandler.endElement( + emptyIfNull(current.getNamespaceURI()), + localName == null ? current.getNodeName() + : localName, null); + break; + case Node.DOCUMENT_NODE: + contentHandler.endDocument(); + break; + } + if (current == node) { + return; + } + if ((next = current.getNextSibling()) != null) { + current = next; + break; + } + current = current.getParentNode(); + } + } + } + + private class NamedNodeMapAttributes implements Attributes { + + private NamedNodeMap map; + + private int length; + + public void setNamedNodeMap(NamedNodeMap attributes) { + this.map = attributes; + this.length = attributes.getLength(); + } + + public void clear() { + this.map = null; + } + + public int getIndex(String qName) { + for (int i = 0; i < length; i++) { + Node n = map.item(i); + if (n.getNodeName().equals(qName)) { + return i; + } + } + return -1; + } + + public int getIndex(String uri, String localName) { + for (int i = 0; i < length; i++) { + Node n = map.item(i); + if (n.getLocalName().equals(localName) && emptyIfNull(n.getNamespaceURI()).equals(uri)) { + return i; + } + } + return -1; + } + + public int getLength() { + return length; + } + + public String getLocalName(int index) { + if (index < length && index >= 0) { + return map.item(index).getLocalName(); + } else { + return null; + } + } + + public String getQName(int index) { + if (index < length && index >= 0) { + return map.item(index).getNodeName(); + } else { + return null; + } + } + + public String getType(int index) { + if (index < length && index >= 0) { + return "id".equals(map.item(index).getLocalName()) ? "ID" : "CDATA"; + } else { + return null; + } + } + + public String getType(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public String getType(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public String getURI(int index) { + if (index < length && index >= 0) { + return emptyIfNull(map.item(index).getNamespaceURI()); + } else { + return null; + } + } + + public String getValue(int index) { + if (index < length && index >= 0) { + return map.item(index).getNodeValue(); + } else { + return null; + } + } + + public String getValue(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + public String getValue(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java new file mode 100644 index 000000000..f4a307c9f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.dom; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.io.Driver; + +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * This class implements an HTML5 parser that exposes data through the DOM + * interface. + * + * <p>By default, when using the constructor without arguments, the + * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible + * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general + * XML violation policy. To make the parser support non-conforming HTML fully + * per the HTML 5 spec while on the other hand potentially violating the SAX2 + * API contract, set the general XML violation policy to <code>ALLOW</code>. + * This does not work with a standard DOM implementation. + * It is possible to treat XML 1.0 infoset violations as fatal by setting + * the general XML violation policy to <code>FATAL</code>. + * + * <p>The doctype is not represented in the tree. + * + * <p>The document mode is represented as user data <code>DocumentMode</code> + * object with the key <code>nu.validator.document-mode</code> on the document + * node. + * + * <p>The form pointer is also stored as user data with the key + * <code>nu.validator.form-pointer</code>. + * + * @version $Id$ + * @author hsivonen + */ +public class HtmlDocumentBuilder extends DocumentBuilder { + + /** + * Returns the JAXP DOM implementation. + * + * @return the JAXP DOM implementation + */ + private static DOMImplementation jaxpDOMImplementation() { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + DocumentBuilder builder; + try { + builder = factory.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException(e); + } + return builder.getDOMImplementation(); + } + + /** + * The tokenizer. + */ + private Driver driver; + + /** + * The tree builder. + */ + private final DOMTreeBuilder treeBuilder; + + /** + * The DOM impl. + */ + private final DOMImplementation implementation; + + /** + * The entity resolver. + */ + private EntityResolver entityResolver; + + private ErrorHandler errorHandler = null; + + private DocumentModeHandler documentModeHandler = null; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private boolean checkingNormalization = false; + + private boolean scriptingEnabled = false; + + private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); + + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; + + private boolean html4ModeCompatibleWithXhtml1Schemata = false; + + private boolean mappingLangToXmlLang = false; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; + + private boolean reportingDoctype = true; + + private ErrorHandler treeBuilderErrorHandler = null; + + private Heuristics heuristics = Heuristics.NONE; + + private TransitionHandler transitionHandler = null; + + /** + * Instantiates the document builder with a specific DOM + * implementation and XML violation policy. + * + * @param implementation + * the DOM implementation + * @param xmlPolicy the policy + */ + public HtmlDocumentBuilder(DOMImplementation implementation, + XmlViolationPolicy xmlPolicy) { + this.implementation = implementation; + this.treeBuilder = new DOMTreeBuilder(implementation); + this.driver = null; + setXmlPolicy(xmlPolicy); + } + + /** + * Instantiates the document builder with a specific DOM implementation + * and the infoset-altering XML violation policy. + * + * @param implementation + * the DOM implementation + */ + public HtmlDocumentBuilder(DOMImplementation implementation) { + this(implementation, XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Instantiates the document builder with the JAXP DOM implementation + * and the infoset-altering XML violation policy. + */ + public HtmlDocumentBuilder() { + this(XmlViolationPolicy.ALTER_INFOSET); + } + + /** + * Instantiates the document builder with the JAXP DOM implementation + * and a specific XML violation policy. + * @param xmlPolicy the policy + */ + public HtmlDocumentBuilder(XmlViolationPolicy xmlPolicy) { + this(jaxpDOMImplementation(), xmlPolicy); + } + + + private Tokenizer newTokenizer(TokenHandler handler, + boolean newAttributesEachTime) { + if (errorHandler == null && transitionHandler == null + && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { + return new Tokenizer(handler, newAttributesEachTime); + } else { + return new ErrorReportingTokenizer(handler, newAttributesEachTime); + } + } + + /** + * This class wraps different tree builders depending on configuration. This + * method does the work of hiding this from the user of the class. + */ + private void lazyInit() { + if (driver == null) { + this.driver = new Driver(newTokenizer(treeBuilder, false)); + this.driver.setErrorHandler(errorHandler); + this.driver.setTransitionHandler(transitionHandler); + this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); + this.driver.setCheckingNormalization(checkingNormalization); + this.driver.setCommentPolicy(commentPolicy); + this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + this.driver.setContentSpacePolicy(contentSpacePolicy); + this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); + this.driver.setXmlnsPolicy(xmlnsPolicy); + this.driver.setHeuristics(heuristics); + for (CharacterHandler characterHandler : characterHandlers) { + this.driver.addCharacterHandler(characterHandler); + } + this.treeBuilder.setDoctypeExpectation(doctypeExpectation); + this.treeBuilder.setDocumentModeHandler(documentModeHandler); + this.treeBuilder.setScriptingEnabled(scriptingEnabled); + this.treeBuilder.setReportingDoctype(reportingDoctype); + this.treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Tokenizes the input source. + * + * @param is the source + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + * @throws MalformedURLException if the system ID is malformed and the entity resolver is <code>null</code> + */ + private void tokenize(InputSource is) throws SAXException, IOException, + MalformedURLException { + if (is == null) { + throw new IllegalArgumentException("Null input."); + } + if (is.getByteStream() == null && is.getCharacterStream() == null) { + String systemId = is.getSystemId(); + if (systemId == null) { + throw new IllegalArgumentException( + "No byte stream, no character stream nor URI."); + } + if (entityResolver != null) { + is = entityResolver.resolveEntity(is.getPublicId(), systemId); + } + if (is.getByteStream() == null || is.getCharacterStream() == null) { + is = new InputSource(); + is.setSystemId(systemId); + is.setByteStream(new URL(systemId).openStream()); + } + } + if (driver == null) lazyInit(); + driver.tokenize(is); + } + + /** + * Returns the DOM implementation + * @return the DOM implementation + * @see javax.xml.parsers.DocumentBuilder#getDOMImplementation() + */ + @Override public DOMImplementation getDOMImplementation() { + return implementation; + } + + /** + * Returns <code>true</code>. + * @return <code>true</code> + * @see javax.xml.parsers.DocumentBuilder#isNamespaceAware() + */ + @Override public boolean isNamespaceAware() { + return true; + } + + /** + * Returns <code>false</code> + * @return <code>false</code> + * @see javax.xml.parsers.DocumentBuilder#isValidating() + */ + @Override public boolean isValidating() { + return false; + } + + /** + * For API compatibility. + * @see javax.xml.parsers.DocumentBuilder#newDocument() + */ + @Override public Document newDocument() { + return implementation.createDocument(null, null, null); + } + + /** + * Parses a document from a SAX <code>InputSource</code>. + * @param is the source + * @return the doc + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) + */ + @Override public Document parse(InputSource is) throws SAXException, + IOException { + treeBuilder.setFragmentContext(null); + tokenize(is); + return treeBuilder.getDocument(); + } + + /** + * Parses a document fragment from a SAX <code>InputSource</code> with + * an HTML element as the fragment context. + * @param is the source + * @param context the context element name (HTML namespace assumed) + * @return the document fragment + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + */ + public DocumentFragment parseFragment(InputSource is, String context) + throws IOException, SAXException { + treeBuilder.setFragmentContext(context.intern()); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Parses a document fragment from a SAX <code>InputSource</code>. + * @param is the source + * @param contextLocal the local name of the context element + * @param contextNamespace the namespace of the context element + * @return the document fragment + * @throws SAXException if stuff goes wrong + * @throws IOException if IO goes wrong + */ + public DocumentFragment parseFragment(InputSource is, String contextLocal, + String contextNamespace) throws IOException, SAXException { + treeBuilder.setFragmentContext(contextLocal.intern(), + contextNamespace.intern(), null, false); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Sets the entity resolver for URI-only inputs. + * @param resolver the resolver + * @see javax.xml.parsers.DocumentBuilder#setEntityResolver(org.xml.sax.EntityResolver) + */ + @Override public void setEntityResolver(EntityResolver resolver) { + this.entityResolver = resolver; + } + + /** + * Sets the error handler. + * @param errorHandler the handler + * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) + */ + @Override public void setErrorHandler(ErrorHandler errorHandler) { + treeBuilder.setErrorHandler(errorHandler); + if (driver != null) { + driver.setErrorHandler(errorHandler); + } + } + + public void setTransitionHander(TransitionHandler handler) { + transitionHandler = handler; + driver = null; + } + + /** + * Indicates whether NFC normalization of source is being checked. + * @return <code>true</code> if NFC normalization of source is being checked. + * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + */ + public boolean isCheckingNormalization() { + return checkingNormalization; + } + + /** + * Toggles the checking of the NFC normalization of source. + * @param enable <code>true</code> to check normalization + * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + */ + public void setCheckingNormalization(boolean enable) { + this.checkingNormalization = enable; + if (driver != null) { + driver.setCheckingNormalization(checkingNormalization); + } + } + + /** + * Sets the policy for consecutive hyphens in comments. + * @param commentPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + if (driver != null) { + driver.setCommentPolicy(commentPolicy); + } + } + + /** + * Sets the policy for non-XML characters except white space. + * @param contentNonXmlCharPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + driver = null; + } + + /** + * Sets the policy for non-XML white space. + * @param contentSpacePolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + if (driver != null) { + driver.setContentSpacePolicy(contentSpacePolicy); + } + } + + /** + * Whether the parser considers scripting to be enabled for noscript treatment. + * + * @return <code>true</code> if enabled + * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled <code>true</code> to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + if (treeBuilder != null) { + treeBuilder.setScriptingEnabled(scriptingEnabled); + } + } + + /** + * Returns the doctype expectation. + * + * @return the doctypeExpectation + */ + public DoctypeExpectation getDoctypeExpectation() { + return doctypeExpectation; + } + + /** + * Sets the doctype expectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + if (treeBuilder != null) { + treeBuilder.setDoctypeExpectation(doctypeExpectation); + } + } + + /** + * Returns the document mode handler. + * + * @return the documentModeHandler + */ + public DocumentModeHandler getDocumentModeHandler() { + return documentModeHandler; + } + + /** + * Sets the document mode handler. + * + * @param documentModeHandler + * the documentModeHandler to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Returns the streamabilityViolationPolicy. + * + * @return the streamabilityViolationPolicy + */ + public XmlViolationPolicy getStreamabilityViolationPolicy() { + return streamabilityViolationPolicy; + } + + /** + * Sets the streamabilityViolationPolicy. + * + * @param streamabilityViolationPolicy + * the streamabilityViolationPolicy to set + */ + public void setStreamabilityViolationPolicy( + XmlViolationPolicy streamabilityViolationPolicy) { + this.streamabilityViolationPolicy = streamabilityViolationPolicy; + driver = null; + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * @param html4ModeCompatibleWithXhtml1Schemata + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + if (driver != null) { + driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + } + + /** + * Returns the <code>Locator</code> during parse. + * @return the <code>Locator</code> + */ + public Locator getDocumentLocator() { + return driver.getDocumentLocator(); + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * + * @return the html4ModeCompatibleWithXhtml1Schemata + */ + public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { + return html4ModeCompatibleWithXhtml1Schemata; + } + + /** + * Whether <code>lang</code> is mapped to <code>xml:lang</code>. + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang; + if (driver != null) { + driver.setMappingLangToXmlLang(mappingLangToXmlLang); + } + } + + /** + * Whether <code>lang</code> is mapped to <code>xml:lang</code>. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang; + } + + /** + * Whether the <code>xmlns</code> attribute on the root element is + * passed to through. (FATAL not allowed.) + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + if (driver != null) { + driver.setXmlnsPolicy(xmlnsPolicy); + } + } + + /** + * Returns the xmlnsPolicy. + * + * @return the xmlnsPolicy + */ + public XmlViolationPolicy getXmlnsPolicy() { + return xmlnsPolicy; + } + + /** + * Returns the commentPolicy. + * + * @return the commentPolicy + */ + public XmlViolationPolicy getCommentPolicy() { + return commentPolicy; + } + + /** + * Returns the contentNonXmlCharPolicy. + * + * @return the contentNonXmlCharPolicy + */ + public XmlViolationPolicy getContentNonXmlCharPolicy() { + return contentNonXmlCharPolicy; + } + + /** + * Returns the contentSpacePolicy. + * + * @return the contentSpacePolicy + */ + public XmlViolationPolicy getContentSpacePolicy() { + return contentSpacePolicy; + } + + /** + * @param reportingDoctype + * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + if (treeBuilder != null) { + treeBuilder.setReportingDoctype(reportingDoctype); + } + } + + /** + * Returns the reportingDoctype. + * + * @return the reportingDoctype + */ + public boolean isReportingDoctype() { + return reportingDoctype; + } + + /** + * The policy for non-NCName element and attribute names. + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + if (driver != null) { + driver.setNamePolicy(namePolicy); + treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics the heuristics to set + * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + if (driver != null) { + driver.setHeuristics(heuristics); + } + } + + public Heuristics getHeuristics() { + return this.heuristics; + } + + /** + * This is a catch-all convenience method for setting name, xmlns, content space, + * content non-XML char and comment policies in one go. This does not affect the + * streamability policy or doctype reporting. + * + * @param xmlPolicy + */ + public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { + setNamePolicy(xmlPolicy); + setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); + setContentSpacePolicy(xmlPolicy); + setContentNonXmlCharPolicy(xmlPolicy); + setCommentPolicy(xmlPolicy); + } + + /** + * The policy for non-NCName element and attribute names. + * + * @return the namePolicy + */ + public XmlViolationPolicy getNamePolicy() { + return namePolicy; + } + + /** + * Does nothing. + * @deprecated + */ + public void setBogusXmlnsPolicy( + XmlViolationPolicy bogusXmlnsPolicy) { + } + + /** + * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>. + * @deprecated + * @return <code>XmlViolationPolicy.ALTER_INFOSET</code> + */ + public XmlViolationPolicy getBogusXmlnsPolicy() { + return XmlViolationPolicy.ALTER_INFOSET; + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + this.characterHandlers.add(characterHandler); + if (driver != null) { + driver.addCharacterHandler(characterHandler); + } + } + + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments <code>true</code> to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + treeBuilder.setIgnoringComments(ignoreComments); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html new file mode 100644 index 000000000..d793bcf86 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/dom/package.html @@ -0,0 +1,29 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package provides an HTML5 parser that exposes the document using the DOM API.</p> +</body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java new file mode 100644 index 000000000..a75750398 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/ChardetSniffer.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import java.io.IOException; +import java.nio.charset.UnsupportedCharsetException; + +import nu.validator.htmlparser.io.Encoding; + +import org.mozilla.intl.chardet.nsDetector; +import org.mozilla.intl.chardet.nsICharsetDetectionObserver; +import org.mozilla.intl.chardet.nsPSMDetector; + +import com.ibm.icu.text.CharsetDetector; + +public class ChardetSniffer implements nsICharsetDetectionObserver { + + private final byte[] source; + + private final int length; + + private Encoding returnValue = null; + + /** + * @param source + */ + public ChardetSniffer(final byte[] source, final int length) { + this.source = source; + this.length = length; + } + + public Encoding sniff() throws IOException { + nsDetector detector = new nsDetector(nsPSMDetector.ALL); + detector.Init(this); + detector.DoIt(source, length, false); + detector.DataEnd(); + if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) { + return returnValue; + } else { + return null; + } + } + + public static void main(String[] args) { + String[] detectable = CharsetDetector.getAllDetectableCharsets(); + for (int i = 0; i < detectable.length; i++) { + String charset = detectable[i]; + System.out.println(charset); + } + } + + public void Notify(String charsetName) { + try { + Encoding enc = Encoding.forName(charsetName); + Encoding actual = enc.getActualHtmlEncoding(); + if (actual != null) { + enc = actual; + } + returnValue = enc; + } catch (UnsupportedCharsetException e) { + returnValue = null; + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java new file mode 100644 index 000000000..f3caab5c4 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import java.io.IOException; +import java.io.InputStream; + +import nu.validator.htmlparser.common.ByteReadable; +import nu.validator.htmlparser.io.Encoding; + +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; + +public class IcuDetectorSniffer extends InputStream { + + private final ByteReadable source; + + /** + * @param source + */ + public IcuDetectorSniffer(final ByteReadable source) { + this.source = source; + } + + @Override + public int read() throws IOException { + return source.readByte(); + } + + public Encoding sniff() throws IOException { + try { + CharsetDetector detector = new CharsetDetector(); + detector.setText(this); + CharsetMatch match = detector.detect(); + Encoding enc = Encoding.forName(match.getName()); + Encoding actual = enc.getActualHtmlEncoding(); + if (actual != null) { + enc = actual; + } + if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) { + return enc; + } else { + return null; + } + } catch (Exception e) { + return null; + } + } + + public static void main(String[] args) { + String[] detectable = CharsetDetector.getAllDetectableCharsets(); + for (int i = 0; i < detectable.length; i++) { + String charset = detectable[i]; + System.out.println(charset); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java new file mode 100644 index 000000000..45df62fb7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2006, 2007 Henri Sivonen + * Copyright (c) 2007 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import nu.validator.htmlparser.common.CharacterHandler; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.UnicodeSet; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NormalizationChecker implements CharacterHandler { + + private ErrorHandler errorHandler; + + private Locator locator; + + /** + * A thread-safe set of composing characters as per Charmod Norm. + */ + @SuppressWarnings("deprecation") + private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet( + "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze(); + + // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908 + + /** + * A buffer for holding sequences overlap the SAX buffer boundary. + */ + private char[] buf = new char[128]; + + /** + * A holder for the original buffer (for the memory leak prevention + * mechanism). + */ + private char[] bufHolder = null; + + /** + * The current used length of the buffer, i.e. the index of the first slot + * that does not hold current data. + */ + private int pos; + + /** + * Indicates whether the checker the next call to <code>characters()</code> + * is the first call in a run. + */ + private boolean atStartOfRun; + + /** + * Indicates whether the current run has already caused an error. + */ + private boolean alreadyComplainedAboutThisRun; + + /** + * Emit an error. The locator is used. + * + * @param message the error message + * @throws SAXException if something goes wrong + */ + public void err(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, locator); + errorHandler.error(spe); + } + } + + /** + * Returns <code>true</code> if the argument is a composing BMP character + * or a surrogate and <code>false</code> otherwise. + * + * @param c a UTF-16 code unit + * @return <code>true</code> if the argument is a composing BMP character + * or a surrogate and <code>false</code> otherwise + */ + private static boolean isComposingCharOrSurrogate(char c) { + if (UCharacter.isHighSurrogate(c) || UCharacter.isLowSurrogate(c)) { + return true; + } + return isComposingChar(c); + } + + /** + * Returns <code>true</code> if the argument is a composing character + * and <code>false</code> otherwise. + * + * @param c a Unicode code point + * @return <code>true</code> if the argument is a composing character + * <code>false</code> otherwise + */ + private static boolean isComposingChar(int c) { + return COMPOSING_CHARACTERS.contains(c); + } + + /** + * Constructor with mode selection. + * + * @param sourceTextMode whether the source text-related messages + * should be enabled. + */ + public NormalizationChecker(Locator locator) { + super(); + start(); + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#start() + */ + public void start() { + atStartOfRun = true; + alreadyComplainedAboutThisRun = false; + pos = 0; + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int) + */ + public void characters(char[] ch, int start, int length) + throws SAXException { + if (alreadyComplainedAboutThisRun) { + return; + } + if (atStartOfRun) { + char c = ch[start]; + if (pos == 1) { + // there's a single high surrogate in buf + if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) { + err("Text run starts with a composing character."); + } + atStartOfRun = false; + } else { + if (length == 1 && UCharacter.isHighSurrogate(c)) { + buf[0] = c; + pos = 1; + return; + } else { + if (UCharacter.isHighSurrogate(c)) { + if (isComposingChar(UCharacter.getCodePoint(c, + ch[start + 1]))) { + err("Text run starts with a composing character."); + } + } else { + if (isComposingCharOrSurrogate(c)) { + err("Text run starts with a composing character."); + } + } + atStartOfRun = false; + } + } + } + int i = start; + int stop = start + length; + if (pos > 0) { + // there's stuff in buf + while (i < stop && isComposingCharOrSurrogate(ch[i])) { + i++; + } + appendToBuf(ch, start, i); + if (i == stop) { + return; + } else { + if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { + errAboutTextRun(); + } + pos = 0; + } + } + if (i < stop) { + start = i; + i = stop - 1; + while (i > start && isComposingCharOrSurrogate(ch[i])) { + i--; + } + if (i > start) { + if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) { + errAboutTextRun(); + } + } + appendToBuf(ch, i, stop); + } + } + + /** + * Emits an error stating that the current text run or the source + * text is not in NFC. + * + * @throws SAXException if the <code>ErrorHandler</code> throws + */ + private void errAboutTextRun() throws SAXException { + err("Source text is not in Unicode Normalization Form C."); + alreadyComplainedAboutThisRun = true; + } + + /** + * Appends a slice of an UTF-16 code unit array to the internal + * buffer. + * + * @param ch the array from which to copy + * @param start the index of the first element that is copied + * @param end the index of the first element that is not copied + */ + private void appendToBuf(char[] ch, int start, int end) { + if (start == end) { + return; + } + int neededBufLen = pos + (end - start); + if (neededBufLen > buf.length) { + char[] newBuf = new char[neededBufLen]; + System.arraycopy(buf, 0, newBuf, 0, pos); + if (bufHolder == null) { + bufHolder = buf; // keep the original around + } + buf = newBuf; + } + System.arraycopy(ch, start, buf, pos, end - start); + pos += (end - start); + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#end() + */ + public void end() throws SAXException { + if (!alreadyComplainedAboutThisRun + && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { + errAboutTextRun(); + } + if (bufHolder != null) { + // restore the original small buffer to avoid leaking + // memory if this checker is recycled + buf = bufHolder; + bufHolder = null; + } + } + + public void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java new file mode 100644 index 000000000..48d82036c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/AttributeName.java @@ -0,0 +1,2475 @@ +/* + * Copyright (c) 2008-2011 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import java.util.Arrays; + +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.annotation.NsUri; +import nu.validator.htmlparser.annotation.Prefix; +import nu.validator.htmlparser.annotation.QName; +import nu.validator.htmlparser.annotation.Virtual; +import nu.validator.htmlparser.common.Interner; + +public final class AttributeName +// Uncomment to regenerate +// implements Comparable<AttributeName> +{ + // [NOCPP[ + + public static final int NCNAME_HTML = 1; + + public static final int NCNAME_FOREIGN = (1 << 1) | (1 << 2); + + public static final int NCNAME_LANG = (1 << 3); + + public static final int IS_XMLNS = (1 << 4); + + public static final int CASE_FOLDED = (1 << 5); + + public static final int BOOLEAN = (1 << 6); + + // ]NOCPP] + + /** + * An array representing no namespace regardless of namespace mode (HTML, + * SVG, MathML, lang-mapping HTML) used. + */ + static final @NoLength @NsUri String[] ALL_NO_NS = { "", "", "", + // [NOCPP[ + "" + // ]NOCPP] + }; + + /** + * An array that has no namespace for the HTML mode but the XMLNS namespace + * for the SVG and MathML modes. + */ + private static final @NoLength @NsUri String[] XMLNS_NS = { "", + "http://www.w3.org/2000/xmlns/", "http://www.w3.org/2000/xmlns/", + // [NOCPP[ + "" + // ]NOCPP] + }; + + /** + * An array that has no namespace for the HTML mode but the XML namespace + * for the SVG and MathML modes. + */ + private static final @NoLength @NsUri String[] XML_NS = { "", + "http://www.w3.org/XML/1998/namespace", + "http://www.w3.org/XML/1998/namespace", + // [NOCPP[ + "" + // ]NOCPP] + }; + + /** + * An array that has no namespace for the HTML mode but the XLink namespace + * for the SVG and MathML modes. + */ + private static final @NoLength @NsUri String[] XLINK_NS = { "", + "http://www.w3.org/1999/xlink", "http://www.w3.org/1999/xlink", + // [NOCPP[ + "" + // ]NOCPP] + }; + + // [NOCPP[ + /** + * An array that has no namespace for the HTML, SVG and MathML modes but has + * the XML namespace for the lang-mapping HTML mode. + */ + private static final @NoLength @NsUri String[] LANG_NS = { "", "", "", + "http://www.w3.org/XML/1998/namespace" }; + + // ]NOCPP] + + /** + * An array for no prefixes in any mode. + */ + static final @NoLength @Prefix String[] ALL_NO_PREFIX = { null, null, null, + // [NOCPP[ + null + // ]NOCPP] + }; + + /** + * An array for no prefixe in the HTML mode and the <code>xmlns</code> + * prefix in the SVG and MathML modes. + */ + private static final @NoLength @Prefix String[] XMLNS_PREFIX = { null, + "xmlns", "xmlns", + // [NOCPP[ + null + // ]NOCPP] + }; + + /** + * An array for no prefixe in the HTML mode and the <code>xlink</code> + * prefix in the SVG and MathML modes. + */ + private static final @NoLength @Prefix String[] XLINK_PREFIX = { null, + "xlink", "xlink", + // [NOCPP[ + null + // ]NOCPP] + }; + + /** + * An array for no prefixe in the HTML mode and the <code>xml</code> prefix + * in the SVG and MathML modes. + */ + private static final @NoLength @Prefix String[] XML_PREFIX = { null, "xml", + "xml", + // [NOCPP[ + null + // ]NOCPP] + }; + + // [NOCPP[ + + private static final @NoLength @Prefix String[] LANG_PREFIX = { null, null, + null, "xml" }; + + private static @QName String[] COMPUTE_QNAME(String[] local, String[] prefix) { + @QName String[] arr = new String[4]; + for (int i = 0; i < arr.length; i++) { + if (prefix[i] == null) { + arr[i] = local[i]; + } else { + arr[i] = (prefix[i] + ':' + local[i]).intern(); + } + } + return arr; + } + + // ]NOCPP] + + /** + * An initialization helper for having a one name in the SVG mode and + * another name in the other modes. + * + * @param name + * the name for the non-SVG modes + * @param camel + * the name for the SVG mode + * @return the initialized name array + */ + private static @NoLength @Local String[] SVG_DIFFERENT(@Local String name, + @Local String camel) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = name; + arr[2] = camel; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * An initialization helper for having a one name in the MathML mode and + * another name in the other modes. + * + * @param name + * the name for the non-MathML modes + * @param camel + * the name for the MathML mode + * @return the initialized name array + */ + private static @NoLength @Local String[] MATH_DIFFERENT(@Local String name, + @Local String camel) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = camel; + arr[2] = name; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * An initialization helper for having a different local name in the HTML + * mode and the SVG and MathML modes. + * + * @param name + * the name for the HTML mode + * @param suffix + * the name for the SVG and MathML modes + * @return the initialized name array + */ + private static @NoLength @Local String[] COLONIFIED_LOCAL( + @Local String name, @Local String suffix) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = suffix; + arr[2] = suffix; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * An initialization helper for having the same local name in all modes. + * + * @param name + * the name + * @return the initialized name array + */ + static @NoLength @Local String[] SAME_LOCAL(@Local String name) { + @NoLength @Local String[] arr = new String[4]; + arr[0] = name; + arr[1] = name; + arr[2] = name; + // [NOCPP[ + arr[3] = name; + // ]NOCPP] + return arr; + } + + /** + * Returns an attribute name by buffer. + * + * <p> + * C++ ownership: The return value is either released by the caller if the + * attribute is a duplicate or the ownership is transferred to + * HtmlAttributes and released upon clearing or destroying that object. + * + * @param buf + * the buffer + * @param offset + * ignored + * @param length + * length of data + * @param checkNcName + * whether to check ncnameness + * @return an <code>AttributeName</code> corresponding to the argument data + */ + static AttributeName nameByBuffer(@NoLength char[] buf, int offset, + int length + // [NOCPP[ + , boolean checkNcName + // ]NOCPP] + , Interner interner) { + // XXX deal with offset + int hash = AttributeName.bufToHash(buf, length); + int index = Arrays.binarySearch(AttributeName.ATTRIBUTE_HASHES, hash); + if (index < 0) { + return AttributeName.createAttributeName( + Portability.newLocalNameFromBuffer(buf, offset, length, + interner) + // [NOCPP[ + , checkNcName + // ]NOCPP] + ); + } else { + AttributeName attributeName = AttributeName.ATTRIBUTE_NAMES[index]; + @Local String name = attributeName.getLocal(AttributeName.HTML); + if (!Portability.localEqualsBuffer(name, buf, offset, length)) { + return AttributeName.createAttributeName( + Portability.newLocalNameFromBuffer(buf, offset, length, + interner) + // [NOCPP[ + , checkNcName + // ]NOCPP] + ); + } + return attributeName; + } + } + + /** + * This method has to return a unique integer for each well-known + * lower-cased attribute name. + * + * @param buf + * @param len + * @return + */ + private static int bufToHash(@NoLength char[] buf, int len) { + int hash2 = 0; + int hash = len; + hash <<= 5; + hash += buf[0] - 0x60; + int j = len; + for (int i = 0; i < 4 && j > 0; i++) { + j--; + hash <<= 5; + hash += buf[j] - 0x60; + hash2 <<= 6; + hash2 += buf[i] - 0x5F; + } + return hash ^ hash2; + } + + /** + * The mode value for HTML. + */ + public static final int HTML = 0; + + /** + * The mode value for MathML. + */ + public static final int MATHML = 1; + + /** + * The mode value for SVG. + */ + public static final int SVG = 2; + + // [NOCPP[ + + /** + * The mode value for lang-mapping HTML. + */ + public static final int HTML_LANG = 3; + + // ]NOCPP] + + /** + * The namespaces indexable by mode. + */ + private final @NsUri @NoLength String[] uri; + + /** + * The local names indexable by mode. + */ + private final @Local @NoLength String[] local; + + /** + * The prefixes indexably by mode. + */ + private final @Prefix @NoLength String[] prefix; + + // [NOCPP[ + + private final int flags; + + /** + * The qnames indexable by mode. + */ + private final @QName @NoLength String[] qName; + + // ]NOCPP] + + /** + * The startup-time constructor. + * + * @param uri + * the namespace + * @param local + * the local name + * @param prefix + * the prefix + * @param ncname + * the ncnameness + * @param xmlns + * whether this is an xmlns attribute + */ + protected AttributeName(@NsUri @NoLength String[] uri, + @Local @NoLength String[] local, @Prefix @NoLength String[] prefix + // [NOCPP[ + , int flags + // ]NOCPP] + ) { + this.uri = uri; + this.local = local; + this.prefix = prefix; + // [NOCPP[ + this.qName = COMPUTE_QNAME(local, prefix); + this.flags = flags; + // ]NOCPP] + } + + /** + * Creates an <code>AttributeName</code> for a local name. + * + * @param name + * the name + * @param checkNcName + * whether to check ncnameness + * @return an <code>AttributeName</code> + */ + private static AttributeName createAttributeName(@Local String name + // [NOCPP[ + , boolean checkNcName + // ]NOCPP] + ) { + // [NOCPP[ + int flags = NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG; + if (name.startsWith("xmlns:")) { + flags = IS_XMLNS; + } else if (checkNcName && !NCName.isNCName(name)) { + flags = 0; + } + // ]NOCPP] + return new AttributeName(AttributeName.ALL_NO_NS, + AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX, flags); + } + + /** + * Deletes runtime-allocated instances in C++. + */ + @Virtual void release() { + // No-op in Java. + // Implement as |delete this;| in subclass. + } + + /** + * The C++ destructor. + */ + @SuppressWarnings("unused") @Virtual private void destructor() { + Portability.deleteArray(local); + } + + /** + * Clones the attribute using an interner. Returns <code>this</code> in Java + * and for non-dynamic instances in C++. + * + * @param interner + * an interner + * @return a clone + */ + @Virtual public AttributeName cloneAttributeName(Interner interner) { + return this; + } + + // [NOCPP[ + /** + * Creator for use when the XML violation policy requires an attribute name + * to be changed. + * + * @param name + * the name of the attribute to create + */ + static AttributeName create(@Local String name) { + return new AttributeName(AttributeName.ALL_NO_NS, + AttributeName.SAME_LOCAL(name), ALL_NO_PREFIX, + NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + } + + /** + * Queries whether this name is an XML 1.0 4th ed. NCName. + * + * @param mode + * the SVG/MathML/HTML mode + * @return <code>true</code> if this is an NCName in the given mode + */ + public boolean isNcName(int mode) { + return (flags & (1 << mode)) != 0; + } + + /** + * Queries whether this is an <code>xmlns</code> attribute. + * + * @return <code>true</code> if this is an <code>xmlns</code> attribute + */ + public boolean isXmlns() { + return (flags & IS_XMLNS) != 0; + } + + /** + * Queries whether this attribute has a case-folded value in the HTML4 mode + * of the parser. + * + * @return <code>true</code> if the value is case-folded + */ + boolean isCaseFolded() { + return (flags & CASE_FOLDED) != 0; + } + + boolean isBoolean() { + return (flags & BOOLEAN) != 0; + } + + public @QName String getQName(int mode) { + return qName[mode]; + } + + // ]NOCPP] + + public @NsUri String getUri(int mode) { + return uri[mode]; + } + + public @Local String getLocal(int mode) { + return local[mode]; + } + + public @Prefix String getPrefix(int mode) { + return prefix[mode]; + } + + boolean equalsAnother(AttributeName another) { + return this.getLocal(AttributeName.HTML) == another.getLocal(AttributeName.HTML); + } + + // START CODE ONLY USED FOR GENERATING CODE uncomment to regenerate + +// /** +// * @see java.lang.Object#toString() +// */ +// @Override public String toString() { +// return "(" + formatNs() + ", " + formatLocal() + ", " + formatPrefix() +// + ", " + formatFlags() + ")"; +// } +// +// private String formatFlags() { +// StringBuilder builder = new StringBuilder(); +// if ((flags & NCNAME_HTML) != 0) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("NCNAME_HTML"); +// } +// if ((flags & NCNAME_FOREIGN) != 0) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("NCNAME_FOREIGN"); +// } +// if ((flags & NCNAME_LANG) != 0) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("NCNAME_LANG"); +// } +// if (isXmlns()) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("IS_XMLNS"); +// } +// if (isCaseFolded()) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("CASE_FOLDED"); +// } +// if (isBoolean()) { +// if (builder.length() != 0) { +// builder.append(" | "); +// } +// builder.append("BOOLEAN"); +// } +// if (builder.length() == 0) { +// return "0"; +// } +// return builder.toString(); +// } +// +// public int compareTo(AttributeName other) { +// int thisHash = this.hash(); +// int otherHash = other.hash(); +// if (thisHash < otherHash) { +// return -1; +// } else if (thisHash == otherHash) { +// return 0; +// } else { +// return 1; +// } +// } +// +// private String formatPrefix() { +// if (prefix[0] == null && prefix[1] == null && prefix[2] == null +// && prefix[3] == null) { +// return "ALL_NO_PREFIX"; +// } else if (prefix[0] == null && prefix[1] == prefix[2] +// && prefix[3] == null) { +// if ("xmlns".equals(prefix[1])) { +// return "XMLNS_PREFIX"; +// } else if ("xml".equals(prefix[1])) { +// return "XML_PREFIX"; +// } else if ("xlink".equals(prefix[1])) { +// return "XLINK_PREFIX"; +// } else { +// throw new IllegalStateException(); +// } +// } else if (prefix[0] == null && prefix[1] == null && prefix[2] == null +// && prefix[3] == "xml") { +// return "LANG_PREFIX"; +// } else { +// throw new IllegalStateException(); +// } +// } +// +// private String formatLocal() { +// if (local[0] == local[1] && local[0] == local[3] +// && local[0] != local[2]) { +// return "SVG_DIFFERENT(\"" + local[0] + "\", \"" + local[2] + "\")"; +// } +// if (local[0] == local[2] && local[0] == local[3] +// && local[0] != local[1]) { +// return "MATH_DIFFERENT(\"" + local[0] + "\", \"" + local[1] + "\")"; +// } +// if (local[0] == local[3] && local[1] == local[2] +// && local[0] != local[1]) { +// return "COLONIFIED_LOCAL(\"" + local[0] + "\", \"" + local[1] +// + "\")"; +// } +// for (int i = 1; i < local.length; i++) { +// if (local[0] != local[i]) { +// throw new IllegalStateException(); +// } +// } +// return "SAME_LOCAL(\"" + local[0] + "\")"; +// } +// +// private String formatNs() { +// if (uri[0] == "" && uri[1] == "" && uri[2] == "" && uri[3] == "") { +// return "ALL_NO_NS"; +// } else if (uri[0] == "" && uri[1] == uri[2] && uri[3] == "") { +// if ("http://www.w3.org/2000/xmlns/".equals(uri[1])) { +// return "XMLNS_NS"; +// } else if ("http://www.w3.org/XML/1998/namespace".equals(uri[1])) { +// return "XML_NS"; +// } else if ("http://www.w3.org/1999/xlink".equals(uri[1])) { +// return "XLINK_NS"; +// } else { +// throw new IllegalStateException(); +// } +// } else if (uri[0] == "" && uri[1] == "" && uri[2] == "" +// && uri[3] == "http://www.w3.org/XML/1998/namespace") { +// return "LANG_NS"; +// } else { +// throw new IllegalStateException(); +// } +// } +// +// private String constName() { +// String name = getLocal(HTML); +// char[] buf = new char[name.length()]; +// for (int i = 0; i < name.length(); i++) { +// char c = name.charAt(i); +// if (c == '-' || c == ':') { +// buf[i] = '_'; +// } else if (c >= 'a' && c <= 'z') { +// buf[i] = (char) (c - 0x20); +// } else { +// buf[i] = c; +// } +// } +// return new String(buf); +// } +// +// private int hash() { +// String name = getLocal(HTML); +// return bufToHash(name.toCharArray(), name.length()); +// } +// +// /** +// * Regenerate self +// * +// * @param args +// */ +// public static void main(String[] args) { +// Arrays.sort(ATTRIBUTE_NAMES); +// for (int i = 1; i < ATTRIBUTE_NAMES.length; i++) { +// if (ATTRIBUTE_NAMES[i].hash() == ATTRIBUTE_NAMES[i - 1].hash()) { +// System.err.println("Hash collision: " +// + ATTRIBUTE_NAMES[i].getLocal(HTML) + ", " +// + ATTRIBUTE_NAMES[i - 1].getLocal(HTML)); +// return; +// } +// } +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// AttributeName att = ATTRIBUTE_NAMES[i]; +// System.out.println("public static final AttributeName " +// + att.constName() + " = new AttributeName" + att.toString() +// + ";"); +// } +// System.out.println("private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = {"); +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// AttributeName att = ATTRIBUTE_NAMES[i]; +// System.out.println(att.constName() + ","); +// } +// System.out.println("};"); +// System.out.println("private final static int[] ATTRIBUTE_HASHES = {"); +// for (int i = 0; i < ATTRIBUTE_NAMES.length; i++) { +// AttributeName att = ATTRIBUTE_NAMES[i]; +// System.out.println(Integer.toString(att.hash()) + ","); +// } +// System.out.println("};"); +// } + + // START GENERATED CODE + public static final AttributeName D = new AttributeName(ALL_NO_NS, SAME_LOCAL("d"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K = new AttributeName(ALL_NO_NS, SAME_LOCAL("k"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName R = new AttributeName(ALL_NO_NS, SAME_LOCAL("r"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName X = new AttributeName(ALL_NO_NS, SAME_LOCAL("x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Z = new AttributeName(ALL_NO_NS, SAME_LOCAL("z"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BY = new AttributeName(ALL_NO_NS, SAME_LOCAL("by"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CX = new AttributeName(ALL_NO_NS, SAME_LOCAL("cx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CY = new AttributeName(ALL_NO_NS, SAME_LOCAL("cy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DX = new AttributeName(ALL_NO_NS, SAME_LOCAL("dx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DY = new AttributeName(ALL_NO_NS, SAME_LOCAL("dy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName G2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName G1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("g1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FX = new AttributeName(ALL_NO_NS, SAME_LOCAL("fx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K4 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k4"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K3 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k3"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName K1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("k1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ID = new AttributeName(ALL_NO_NS, SAME_LOCAL("id"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IN = new AttributeName(ALL_NO_NS, SAME_LOCAL("in"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName U2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName U1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("u1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RX = new AttributeName(ALL_NO_NS, SAME_LOCAL("rx"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RY = new AttributeName(ALL_NO_NS, SAME_LOCAL("ry"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TO = new AttributeName(ALL_NO_NS, SAME_LOCAL("to"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Y2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName Y1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("y1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName X1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName X2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("x2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dir"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName DUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("dur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName END = new AttributeName(ALL_NO_NS, SAME_LOCAL("end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("for"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IN2 = new AttributeName(ALL_NO_NS, SAME_LOCAL("in2"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("low"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REL = new AttributeName(ALL_NO_NS, SAME_LOCAL("rel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REV = new AttributeName(ALL_NO_NS, SAME_LOCAL("rev"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("src"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AXIS = new AttributeName(ALL_NO_NS, SAME_LOCAL("axis"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ABBR = new AttributeName(ALL_NO_NS, SAME_LOCAL("abbr"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("bbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CITE = new AttributeName(ALL_NO_NS, SAME_LOCAL("cite"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("code"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BIAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("bias"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("cols"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIP = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("char"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("base"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EDGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("edge"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATA = new AttributeName(ALL_NO_NS, SAME_LOCAL("data"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILL = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FROM = new AttributeName(ALL_NO_NS, SAME_LOCAL("from"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("face"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HIGH = new AttributeName(ALL_NO_NS, SAME_LOCAL("high"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("href"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("open"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ICON = new AttributeName(ALL_NO_NS, SAME_LOCAL("icon"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MASK = new AttributeName(ALL_NO_NS, SAME_LOCAL("mask"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("link"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LANG = new AttributeName(LANG_NS, SAME_LOCAL("lang"), LANG_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LOOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("loop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LIST = new AttributeName(ALL_NO_NS, SAME_LOCAL("list"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("type"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName WHEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("when"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("wrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("text"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PING = new AttributeName(ALL_NO_NS, SAME_LOCAL("ping"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REFX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refx", "refX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REFY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("refy", "refY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("seed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("rows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("span"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STEP = new AttributeName(ALL_NO_NS, SAME_LOCAL("step"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName ROLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("role"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("xref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ASYNC = new AttributeName(ALL_NO_NS, SAME_LOCAL("async"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ALINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("alink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("align"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName CLOSE = new AttributeName(ALL_NO_NS, SAME_LOCAL("close"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLASS = new AttributeName(ALL_NO_NS, SAME_LOCAL("class"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLEAR = new AttributeName(ALL_NO_NS, SAME_LOCAL("clear"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName BEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("begin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEPTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("depth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEFER = new AttributeName(ALL_NO_NS, SAME_LOCAL("defer"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName FENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FRAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("frame"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName ISMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ismap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ONEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("index"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("order"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OTHER = new AttributeName(ALL_NO_NS, SAME_LOCAL("other"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NARGS = new AttributeName(ALL_NO_NS, SAME_LOCAL("nargs"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MEDIA = new AttributeName(ALL_NO_NS, SAME_LOCAL("media"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LABEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("label"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LOCAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("local"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TITLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("title"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VLINK = new AttributeName(ALL_NO_NS, SAME_LOCAL("vlink"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALUE = new AttributeName(ALL_NO_NS, SAME_LOCAL("value"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SLOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("slope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SHAPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName SCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName SCALE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPEED = new AttributeName(ALL_NO_NS, SAME_LOCAL("speed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RULES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rules"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName STEMH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemh"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SIZES = new AttributeName(ALL_NO_NS, SAME_LOCAL("sizes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STEMV = new AttributeName(ALL_NO_NS, SAME_LOCAL("stemv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName START = new AttributeName(ALL_NO_NS, SAME_LOCAL("start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XMLNS = new AttributeName(XMLNS_NS, SAME_LOCAL("xmlns"), ALL_NO_PREFIX, IS_XMLNS); + public static final AttributeName ACCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ASCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("ascent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACTIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("active"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ALTIMG = new AttributeName(ALL_NO_NS, SAME_LOCAL("altimg"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("action"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("border"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CURSOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("cursor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COORDS = new AttributeName(ALL_NO_NS, SAME_LOCAL("coords"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("filter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FORMAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("format"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("hspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAG = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrag"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("origin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONZOOM = new AttributeName(ALL_NO_NS, SAME_LOCAL("onzoom"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONHELP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onhelp"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSTOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBLUR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onblur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OBJECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("object"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("offset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORIENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("orient"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NOWRAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("nowrap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName NOHREF = new AttributeName(ALL_NO_NS, SAME_LOCAL("nohref"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName MACROS = new AttributeName(ALL_NO_NS, SAME_LOCAL("macros"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName METHOD = new AttributeName(ALL_NO_NS, SAME_LOCAL("method"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName LOWSRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("lowsrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("lquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName USEMAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("usemap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WIDTHS = new AttributeName(ALL_NO_NS, SAME_LOCAL("widths"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TARGET = new AttributeName(ALL_NO_NS, SAME_LOCAL("target"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALUES = new AttributeName(ALL_NO_NS, SAME_LOCAL("values"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("valign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName VSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("vspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POSTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("poster"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("points"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PROMPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("prompt"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SRCDOC = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcdoc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCOPED = new AttributeName(ALL_NO_NS, SAME_LOCAL("scoped"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRING = new AttributeName(ALL_NO_NS, SAME_LOCAL("string"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCHEME = new AttributeName(ALL_NO_NS, SAME_LOCAL("scheme"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RADIUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("radius"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RESULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("result"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SRCSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("srcset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROTATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rotate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RQUOTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("rquote"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALTTEXT = new AttributeName(ALL_NO_NS, SAME_LOCAL("alttext"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARCHIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("archive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AZIMUTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("azimuth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLOSURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("closure"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName CLASSID = new AttributeName(ALL_NO_NS, SAME_LOCAL("classid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHAROFF = new AttributeName(ALL_NO_NS, SAME_LOCAL("charoff"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BGCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("bgcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("colspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COMPACT = new AttributeName(ALL_NO_NS, SAME_LOCAL("compact"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName CONTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("content"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ENCTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("enctype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName DATASRC = new AttributeName(ALL_NO_NS, SAME_LOCAL("datasrc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATAFLD = new AttributeName(ALL_NO_NS, SAME_LOCAL("datafld"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DECLARE = new AttributeName(ALL_NO_NS, SAME_LOCAL("declare"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName DISPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("display"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIVISOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("divisor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEFAULT = new AttributeName(ALL_NO_NS, SAME_LOCAL("default"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName DESCENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("descent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KERNING = new AttributeName(ALL_NO_NS, SAME_LOCAL("kerning"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HEADERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("headers"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("onclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPTIMUM = new AttributeName(ALL_NO_NS, SAME_LOCAL("optimum"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbegin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONKEYUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeyup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONERROR = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerror"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONABORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onabort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONRESET = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NOSHADE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noshade"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName MINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("minsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MAXSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LARGEOP = new AttributeName(ALL_NO_NS, SAME_LOCAL("largeop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNICODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TARGETX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targetx", "targetX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TARGETY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("targety", "targetY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VIEWBOX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewbox", "viewBox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERSION = new AttributeName(ALL_NO_NS, SAME_LOCAL("version"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERN = new AttributeName(ALL_NO_NS, SAME_LOCAL("pattern"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("restart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SANDBOX = new AttributeName(ALL_NO_NS, SAME_LOCAL("sandbox"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SUMMARY = new AttributeName(ALL_NO_NS, SAME_LOCAL("summary"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STANDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("standby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPLACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("replace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName AUTOPLAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("autoplay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ADDITIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("additive"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CALCMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("calcmode", "calcMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CODETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CODEBASE = new AttributeName(ALL_NO_NS, SAME_LOCAL("codebase"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BEVELLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("bevelled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EXPONENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("exponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EDGEMODE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("edgemode", "edgeMode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ENCODING = new AttributeName(ALL_NO_NS, SAME_LOCAL("encoding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPHREF = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("glyphref", "glyphRef"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATETIME = new AttributeName(ALL_NO_NS, SAME_LOCAL("datetime"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName FONTSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYTIMES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keytimes", "keyTimes"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PANOSE_1 = new AttributeName(ALL_NO_NS, SAME_LOCAL("panose-1"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HREFLANG = new AttributeName(ALL_NO_NS, SAME_LOCAL("hreflang"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONRESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBOUNCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbounce"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFINISH = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfinish"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSCROLL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onscroll"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OPERATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("operator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OVERFLOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("overflow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onsubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONREPEAT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrepeat"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NOTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("notation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NORESIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("noresize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName MANIFEST = new AttributeName(ALL_NO_NS, SAME_LOCAL("manifest"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MULTIPLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("multiple"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName LONGDESC = new AttributeName(ALL_NO_NS, SAME_LOCAL("longdesc"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LANGUAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("language"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TABINDEX = new AttributeName(ALL_NO_NS, SAME_LOCAL("tabindex"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PROPERTY = new AttributeName(ALL_NO_NS, SAME_LOCAL("property"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ROWLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEAMLESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("seamless"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRETCHY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stretchy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName XML_BASE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:base", "base"), XML_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XML_LANG = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:lang", "lang"), XML_PREFIX, NCNAME_FOREIGN); + public static final AttributeName X_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("x-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_OWNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-owns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AUTOFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("autofocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ARIA_SORT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-sort"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCESSKEY = new AttributeName(ALL_NO_NS, SAME_LOCAL("accesskey"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_BUSY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-busy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_GRAB = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-grab"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AMPLITUDE = new AttributeName(ALL_NO_NS, SAME_LOCAL("amplitude"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_LIVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-live"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIP_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIP_PATH = new AttributeName(ALL_NO_NS, SAME_LOCAL("clip-path"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EQUALROWS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalrows"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ELEVATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("elevation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIRECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("direction"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DRAGGABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("draggable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILL_RULE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-rule"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONTSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontstyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_SIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYSYSTEM = new AttributeName(ALL_NO_NS, SAME_LOCAL("keysystem"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYPOINTS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keypoints", "keyPoints"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HIDEFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("hidefocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMESSAGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmessage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INTERCEPT = new AttributeName(ALL_NO_NS, SAME_LOCAL("intercept"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOVEEND = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmoveend"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONINVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("oninvalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INTEGRITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("integrity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONKEYDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeydown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFOCUSIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName INPUTMODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("inputmode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWEXIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowexit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHCOLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathcolor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MASKUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskunits", "maskUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MAXLENGTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("maxlength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LINEBREAK = new AttributeName(ALL_NO_NS, SAME_LOCAL("linebreak"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TRANSFORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("transform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_HANGING = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-hanging"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VALUETYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("valuetype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName POINTSATZ = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatz", "pointsAtZ"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTSATX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsatx", "pointsAtX"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTSATY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pointsaty", "pointsAtY"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SYMMETRIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("symmetric"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCROLLING = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolling"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName REPEATDUR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatdur", "repeatDur"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SELECTION = new AttributeName(ALL_NO_NS, SAME_LOCAL("selection"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEPARATOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("separator"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XML_SPACE = new AttributeName(XML_NS, COLONIFIED_LOCAL("xml:space", "space"), XML_PREFIX, NCNAME_FOREIGN); + public static final AttributeName AUTOSUBMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("autosubmit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED | BOOLEAN); + public static final AttributeName ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACTIONTYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("actiontype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCUMULATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("accumulate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_LEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-level"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNSPAN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CAP_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("cap-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPH_NAME = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-name"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GROUPALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("groupalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONTFAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontfamily"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONTWEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("fontweight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_STYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-style"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KEYSPLINES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("keysplines", "keySplines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HTTP_EQUIV = new AttributeName(ALL_NO_NS, SAME_LOCAL("http-equiv"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OCCURRENCE = new AttributeName(ALL_NO_NS, SAME_LOCAL("occurrence"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IRRELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("irrelevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDBLCLICK = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondblclick"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGDROP = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragdrop"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONKEYPRESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onkeypress"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFOCUSOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfocusout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEOUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseout"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName NUMOCTAVES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("numoctaves", "numOctaves"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKER_MID = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-mid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKER_END = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-end"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXTLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("textlength", "textLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VISIBILITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("visibility"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VIEWTARGET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("viewtarget", "viewTarget"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERT_ADV_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-adv-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATHLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("pathlength", "pathLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_MAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-max"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RADIOGROUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("radiogroup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STOP_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SEPARATORS = new AttributeName(ALL_NO_NS, SAME_LOCAL("separators"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_MIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-min"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ROWSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("rowspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ZOOMANDPAN = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("zoomandpan", "zoomAndPan"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XLINK_TYPE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:type", "type"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_ROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:role", "role"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_HREF = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:href", "href"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_SHOW = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:show", "show"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName ACCENTUNDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("accentunder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_SECRET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-secret"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_ATOMIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-atomic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_HIDDEN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-hidden"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_FLOWTO = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-flowto"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARABIC_FORM = new AttributeName(ALL_NO_NS, SAME_LOCAL("arabic-form"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CELLPADDING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellpadding"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CELLSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("cellspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CROSSORIGIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("crossorigin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNALIGN = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnalign"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNLINES = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnlines"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("contextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASEPROFILE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("baseprofile", "baseProfile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_FAMILY = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-family"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FRAMEBORDER = new AttributeName(ALL_NO_NS, SAME_LOCAL("frameborder"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FILTERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("filterunits", "filterUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FLOOD_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_WEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-weight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HORIZ_ADV_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-adv-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGLEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEMOVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousemove"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ORIENTATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("orientation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEDOWN = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousedown"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEOVER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseover"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFORECUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecut"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFORMINPUT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onforminput"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDRAGSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondragstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOVESTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmovestart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKERUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerunits", "markerUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHVARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathvariant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARGINWIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginwidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKERWIDTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerwidth", "markerWidth"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT_ANCHOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-anchor"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TABLEVALUES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("tablevalues", "tableValues"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCRIPTLEVEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptlevel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEATCOUNT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("repeatcount", "repeatCount"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STITCHTILES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stitchtiles", "stitchTiles"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STARTOFFSET = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("startoffset", "startOffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCROLLDELAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("scrolldelay"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XMLNS_XLINK = new AttributeName(XMLNS_NS, COLONIFIED_LOCAL("xmlns:xlink", "xlink"), XMLNS_PREFIX, IS_XMLNS); + public static final AttributeName XLINK_TITLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:title", "title"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName ARIA_INVALID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-invalid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_PRESSED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-pressed"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_CHECKED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-checked"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName ARIA_SETSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-setsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_CHANNEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-channel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName EQUALCOLUMNS = new AttributeName(ALL_NO_NS, SAME_LOCAL("equalcolumns"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DISPLAYSTYLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("displaystyle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DATAFORMATAS = new AttributeName(ALL_NO_NS, SAME_LOCAL("dataformatas"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG | CASE_FOLDED); + public static final AttributeName FILL_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("fill-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_VARIANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-variant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_STRETCH = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-stretch"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FRAMESPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("framespacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KERNELMATRIX = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelmatrix", "kernelMatrix"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWSDELETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsdelete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSELEAVE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseleave"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFORMCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onformchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCELLCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncellchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEWHEEL = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmousewheel"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONMOUSEENTER = new AttributeName(ALL_NO_NS, SAME_LOCAL("onmouseenter"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONAFTERPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFORECOPY = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforecopy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARGINHEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("marginheight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKERHEIGHT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("markerheight", "markerHeight"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MARKER_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("marker-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LENGTHADJUST = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("lengthadjust", "lengthAdjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNSELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNICODE_BIDI = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-bidi"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNITS_PER_EM = new AttributeName(ALL_NO_NS, SAME_LOCAL("units-per-em"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WORD_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("word-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName WRITING_MODE = new AttributeName(ALL_NO_NS, SAME_LOCAL("writing-mode"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_ALPHABETIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-alphabetic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERNUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patternunits", "patternUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPREADMETHOD = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("spreadmethod", "spreadMethod"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SURFACESCALE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("surfacescale", "surfaceScale"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_WIDTH = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-width"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_START = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-start"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STDDEVIATION = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("stddeviation", "stdDeviation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STOP_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stop-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_CONTROLS = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-controls"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_HASPOPUP = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-haspopup"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ACCENT_HEIGHT = new AttributeName(ALL_NO_NS, SAME_LOCAL("accent-height"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_VALUENOW = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuenow"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_RELEVANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-relevant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_POSINSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-posinset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_VALUEMAX = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemax"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_READONLY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-readonly"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_SELECTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-selected"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_REQUIRED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-required"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_EXPANDED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-expanded"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DISABLED = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-disabled"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ATTRIBUTETYPE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributetype", "attributeType"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ATTRIBUTENAME = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("attributename", "attributeName"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DATATYPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-datatype"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_VALUEMIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-valuemin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASEFREQUENCY = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("basefrequency", "baseFrequency"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLUMNSPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("columnspacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_PROFILE = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-profile"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CLIPPATHUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("clippathunits", "clipPathUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DEFINITIONURL = new AttributeName(ALL_NO_NS, MATH_DIFFERENT("definitionurl", "definitionURL"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GRADIENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradientunits", "gradientUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FLOOD_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("flood-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONAFTERUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onafterupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONERRORUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onerrorupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREPASTE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforepaste"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONLOSECAPTURE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onlosecapture"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCONTEXTMENU = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontextmenu"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONSELECTSTART = new AttributeName(ALL_NO_NS, SAME_LOCAL("onselectstart"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREPRINT = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeprint"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MOVABLELIMITS = new AttributeName(ALL_NO_NS, SAME_LOCAL("movablelimits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LINETHICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("linethickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNICODE_RANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("unicode-range"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName THINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERT_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERT_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("vert-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_IDEOGRAPHIC = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-ideographic"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PRESERVEALPHA = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preservealpha", "preserveAlpha"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCRIPTMINSIZE = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptminsize"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPECIFICATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("specification"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XLINK_ACTUATE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:actuate", "actuate"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName XLINK_ARCROLE = new AttributeName(XLINK_NS, COLONIFIED_LOCAL("xlink:arcrole", "arcrole"), XLINK_PREFIX, NCNAME_FOREIGN); + public static final AttributeName ACCEPT_CHARSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("accept-charset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALIGNMENTSCOPE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignmentscope"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_MULTILINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName BASELINE_SHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("baseline-shift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HORIZ_ORIGIN_X = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-x"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName HORIZ_ORIGIN_Y = new AttributeName(ALL_NO_NS, SAME_LOCAL("horiz-origin-y"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREUPDATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeupdate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONFILTERCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onfilterchange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONROWSINSERTED = new AttributeName(ALL_NO_NS, SAME_LOCAL("onrowsinserted"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREUNLOAD = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeunload"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MATHBACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("mathbackground"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LETTER_SPACING = new AttributeName(ALL_NO_NS, SAME_LOCAL("letter-spacing"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LIGHTING_COLOR = new AttributeName(ALL_NO_NS, SAME_LOCAL("lighting-color"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName THICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("thickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName V_MATHEMATICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("v-mathematical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName POINTER_EVENTS = new AttributeName(ALL_NO_NS, SAME_LOCAL("pointer-events"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PRIMITIVEUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("primitiveunits", "primitiveUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REFERRERPOLICY = new AttributeName(ALL_NO_NS, SAME_LOCAL("referrerpolicy"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SYSTEMLANGUAGE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("systemlanguage", "systemLanguage"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_LINECAP = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linecap"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SUBSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("subscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_OPACITY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-opacity"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DROPEFFECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-dropeffect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_LABELLEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-labelledby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_TEMPLATEID = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-templateid"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName CONTENTEDITABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("contenteditable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DIFFUSECONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("diffuseconstant", "diffuseConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDATAAVAILABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondataavailable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONCONTROLSELECT = new AttributeName(ALL_NO_NS, SAME_LOCAL("oncontrolselect"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName IMAGE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("image-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MEDIUMMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("mediummathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName TEXT_DECORATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("text-decoration"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SHAPE_RENDERING = new AttributeName(ALL_NO_NS, SAME_LOCAL("shape-rendering"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_LINEJOIN = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-linejoin"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REPEAT_TEMPLATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("repeat-template"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_DESCRIBEDBY = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-describedby"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName FONT_SIZE_ADJUST = new AttributeName(ALL_NO_NS, SAME_LOCAL("font-size-adjust"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName KERNELUNITLENGTH = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("kernelunitlength", "kernelUnitLength"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONPROPERTYCHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onpropertychange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDATASETCHANGED = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetchanged"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName MASKCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("maskcontentunits", "maskContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERNTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterntransform", "patternTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REQUIREDFEATURES = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredfeatures", "requiredFeatures"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName RENDERING_INTENT = new AttributeName(ALL_NO_NS, SAME_LOCAL("rendering-intent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPECULAREXPONENT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularexponent", "specularExponent"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SPECULARCONSTANT = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("specularconstant", "specularConstant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SUPERSCRIPTSHIFT = new AttributeName(ALL_NO_NS, SAME_LOCAL("superscriptshift"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_DASHARRAY = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dasharray"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName XCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("xchannelselector", "xChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName YCHANNELSELECTOR = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("ychannelselector", "yChannelSelector"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_AUTOCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-autocomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ENABLE_BACKGROUND = new AttributeName(ALL_NO_NS, SAME_LOCAL("enable-background"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName DOMINANT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("dominant-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GRADIENTTRANSFORM = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("gradienttransform", "gradientTransform"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFORDEACTIVATE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbefordeactivate"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONDATASETCOMPLETE = new AttributeName(ALL_NO_NS, SAME_LOCAL("ondatasetcomplete"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OVERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONBEFOREEDITFOCUS = new AttributeName(ALL_NO_NS, SAME_LOCAL("onbeforeeditfocus"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName LIMITINGCONEANGLE = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("limitingconeangle", "limitingConeAngle"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_DASHOFFSET = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-dashoffset"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STROKE_MITERLIMIT = new AttributeName(ALL_NO_NS, SAME_LOCAL("stroke-miterlimit"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ALIGNMENT_BASELINE = new AttributeName(ALL_NO_NS, SAME_LOCAL("alignment-baseline"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ONREADYSTATECHANGE = new AttributeName(ALL_NO_NS, SAME_LOCAL("onreadystatechange"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName OVERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("overline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNDERLINE_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("verythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName REQUIREDEXTENSIONS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("requiredextensions", "requiredExtensions"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_INTERPOLATION = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName UNDERLINE_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("underline-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PRESERVEASPECTRATIO = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("preserveaspectratio", "preserveAspectRatio"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName PATTERNCONTENTUNITS = new AttributeName(ALL_NO_NS, SVG_DIFFERENT("patterncontentunits", "patternContentUnits"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_MULTISELECTABLE = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-multiselectable"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName SCRIPTSIZEMULTIPLIER = new AttributeName(ALL_NO_NS, SAME_LOCAL("scriptsizemultiplier"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName ARIA_ACTIVEDESCENDANT = new AttributeName(ALL_NO_NS, SAME_LOCAL("aria-activedescendant"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYVERYTHINMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythinmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName VERYVERYTHICKMATHSPACE = new AttributeName(ALL_NO_NS, SAME_LOCAL("veryverythickmathspace"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRIKETHROUGH_POSITION = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-position"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName STRIKETHROUGH_THICKNESS = new AttributeName(ALL_NO_NS, SAME_LOCAL("strikethrough-thickness"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPH_ORIENTATION_VERTICAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-vertical"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName COLOR_INTERPOLATION_FILTERS = new AttributeName(ALL_NO_NS, SAME_LOCAL("color-interpolation-filters"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + public static final AttributeName GLYPH_ORIENTATION_HORIZONTAL = new AttributeName(ALL_NO_NS, SAME_LOCAL("glyph-orientation-horizontal"), ALL_NO_PREFIX, NCNAME_HTML | NCNAME_FOREIGN | NCNAME_LANG); + private final static @NoLength AttributeName[] ATTRIBUTE_NAMES = { + D, + K, + R, + X, + Y, + Z, + BY, + CX, + CY, + DX, + DY, + G2, + G1, + FX, + FY, + K4, + K2, + K3, + K1, + ID, + IN, + U2, + U1, + RT, + RX, + RY, + TO, + Y2, + Y1, + X1, + X2, + ALT, + DIR, + DUR, + END, + FOR, + IN2, + MAX, + MIN, + LOW, + REL, + REV, + SRC, + AXIS, + ABBR, + BBOX, + CITE, + CODE, + BIAS, + COLS, + CLIP, + CHAR, + BASE, + EDGE, + DATA, + FILL, + FROM, + FORM, + FACE, + HIGH, + HREF, + OPEN, + ICON, + NAME, + MODE, + MASK, + LINK, + LANG, + LOOP, + LIST, + TYPE, + WHEN, + WRAP, + TEXT, + PATH, + PING, + REFX, + REFY, + SIZE, + SEED, + ROWS, + SPAN, + STEP, + ROLE, + XREF, + ASYNC, + ALINK, + ALIGN, + CLOSE, + COLOR, + CLASS, + CLEAR, + BEGIN, + DEPTH, + DEFER, + FENCE, + FRAME, + ISMAP, + ONEND, + INDEX, + ORDER, + OTHER, + ONCUT, + NARGS, + MEDIA, + LABEL, + LOCAL, + WIDTH, + TITLE, + VLINK, + VALUE, + SLOPE, + SHAPE, + SCOPE, + SCALE, + SPEED, + STYLE, + RULES, + STEMH, + SIZES, + STEMV, + START, + XMLNS, + ACCEPT, + ACCENT, + ASCENT, + ACTIVE, + ALTIMG, + ACTION, + BORDER, + CURSOR, + COORDS, + FILTER, + FORMAT, + HIDDEN, + HSPACE, + HEIGHT, + ONMOVE, + ONLOAD, + ONDRAG, + ORIGIN, + ONZOOM, + ONHELP, + ONSTOP, + ONDROP, + ONBLUR, + OBJECT, + OFFSET, + ORIENT, + ONCOPY, + NOWRAP, + NOHREF, + MACROS, + METHOD, + LOWSRC, + LSPACE, + LQUOTE, + USEMAP, + WIDTHS, + TARGET, + VALUES, + VALIGN, + VSPACE, + POSTER, + POINTS, + PROMPT, + SRCDOC, + SCOPED, + STRING, + SCHEME, + STROKE, + RADIUS, + RESULT, + REPEAT, + SRCSET, + RSPACE, + ROTATE, + RQUOTE, + ALTTEXT, + ARCHIVE, + AZIMUTH, + CLOSURE, + CHECKED, + CLASSID, + CHAROFF, + BGCOLOR, + COLSPAN, + CHARSET, + COMPACT, + CONTENT, + ENCTYPE, + DATASRC, + DATAFLD, + DECLARE, + DISPLAY, + DIVISOR, + DEFAULT, + DESCENT, + KERNING, + HANGING, + HEADERS, + ONPASTE, + ONCLICK, + OPTIMUM, + ONBEGIN, + ONKEYUP, + ONFOCUS, + ONERROR, + ONINPUT, + ONABORT, + ONSTART, + ONRESET, + OPACITY, + NOSHADE, + MINSIZE, + MAXSIZE, + LARGEOP, + UNICODE, + TARGETX, + TARGETY, + VIEWBOX, + VERSION, + PATTERN, + PROFILE, + SPACING, + RESTART, + ROWSPAN, + SANDBOX, + SUMMARY, + STANDBY, + REPLACE, + AUTOPLAY, + ADDITIVE, + CALCMODE, + CODETYPE, + CODEBASE, + CONTROLS, + BEVELLED, + BASELINE, + EXPONENT, + EDGEMODE, + ENCODING, + GLYPHREF, + DATETIME, + DISABLED, + FONTSIZE, + KEYTIMES, + PANOSE_1, + HREFLANG, + ONRESIZE, + ONCHANGE, + ONBOUNCE, + ONUNLOAD, + ONFINISH, + ONSCROLL, + OPERATOR, + OVERFLOW, + ONSUBMIT, + ONREPEAT, + ONSELECT, + NOTATION, + NORESIZE, + MANIFEST, + MATHSIZE, + MULTIPLE, + LONGDESC, + LANGUAGE, + TEMPLATE, + TABINDEX, + PROPERTY, + READONLY, + SELECTED, + ROWLINES, + SEAMLESS, + ROWALIGN, + STRETCHY, + REQUIRED, + XML_BASE, + XML_LANG, + X_HEIGHT, + ARIA_OWNS, + AUTOFOCUS, + ARIA_SORT, + ACCESSKEY, + ARIA_BUSY, + ARIA_GRAB, + AMPLITUDE, + ARIA_LIVE, + CLIP_RULE, + CLIP_PATH, + EQUALROWS, + ELEVATION, + DIRECTION, + DRAGGABLE, + FILL_RULE, + FONTSTYLE, + FONT_SIZE, + KEYSYSTEM, + KEYPOINTS, + HIDEFOCUS, + ONMESSAGE, + INTERCEPT, + ONDRAGEND, + ONMOVEEND, + ONINVALID, + INTEGRITY, + ONKEYDOWN, + ONFOCUSIN, + ONMOUSEUP, + INPUTMODE, + ONROWEXIT, + MATHCOLOR, + MASKUNITS, + MAXLENGTH, + LINEBREAK, + TRANSFORM, + V_HANGING, + VALUETYPE, + POINTSATZ, + POINTSATX, + POINTSATY, + SYMMETRIC, + SCROLLING, + REPEATDUR, + SELECTION, + SEPARATOR, + XML_SPACE, + AUTOSUBMIT, + ALPHABETIC, + ACTIONTYPE, + ACCUMULATE, + ARIA_LEVEL, + COLUMNSPAN, + CAP_HEIGHT, + BACKGROUND, + GLYPH_NAME, + GROUPALIGN, + FONTFAMILY, + FONTWEIGHT, + FONT_STYLE, + KEYSPLINES, + HTTP_EQUIV, + ONACTIVATE, + OCCURRENCE, + IRRELEVANT, + ONDBLCLICK, + ONDRAGDROP, + ONKEYPRESS, + ONROWENTER, + ONDRAGOVER, + ONFOCUSOUT, + ONMOUSEOUT, + NUMOCTAVES, + MARKER_MID, + MARKER_END, + TEXTLENGTH, + VISIBILITY, + VIEWTARGET, + VERT_ADV_Y, + PATHLENGTH, + REPEAT_MAX, + RADIOGROUP, + STOP_COLOR, + SEPARATORS, + REPEAT_MIN, + ROWSPACING, + ZOOMANDPAN, + XLINK_TYPE, + XLINK_ROLE, + XLINK_HREF, + XLINK_SHOW, + ACCENTUNDER, + ARIA_SECRET, + ARIA_ATOMIC, + ARIA_HIDDEN, + ARIA_FLOWTO, + ARABIC_FORM, + CELLPADDING, + CELLSPACING, + COLUMNWIDTH, + CROSSORIGIN, + COLUMNALIGN, + COLUMNLINES, + CONTEXTMENU, + BASEPROFILE, + FONT_FAMILY, + FRAMEBORDER, + FILTERUNITS, + FLOOD_COLOR, + FONT_WEIGHT, + HORIZ_ADV_X, + ONDRAGLEAVE, + ONMOUSEMOVE, + ORIENTATION, + ONMOUSEDOWN, + ONMOUSEOVER, + ONDRAGENTER, + IDEOGRAPHIC, + ONBEFORECUT, + ONFORMINPUT, + ONDRAGSTART, + ONMOVESTART, + MARKERUNITS, + MATHVARIANT, + MARGINWIDTH, + MARKERWIDTH, + TEXT_ANCHOR, + TABLEVALUES, + SCRIPTLEVEL, + REPEATCOUNT, + STITCHTILES, + STARTOFFSET, + SCROLLDELAY, + XMLNS_XLINK, + XLINK_TITLE, + ARIA_INVALID, + ARIA_PRESSED, + ARIA_CHECKED, + AUTOCOMPLETE, + ARIA_SETSIZE, + ARIA_CHANNEL, + EQUALCOLUMNS, + DISPLAYSTYLE, + DATAFORMATAS, + FILL_OPACITY, + FONT_VARIANT, + FONT_STRETCH, + FRAMESPACING, + KERNELMATRIX, + ONDEACTIVATE, + ONROWSDELETE, + ONMOUSELEAVE, + ONFORMCHANGE, + ONCELLCHANGE, + ONMOUSEWHEEL, + ONMOUSEENTER, + ONAFTERPRINT, + ONBEFORECOPY, + MARGINHEIGHT, + MARKERHEIGHT, + MARKER_START, + MATHEMATICAL, + LENGTHADJUST, + UNSELECTABLE, + UNICODE_BIDI, + UNITS_PER_EM, + WORD_SPACING, + WRITING_MODE, + V_ALPHABETIC, + PATTERNUNITS, + SPREADMETHOD, + SURFACESCALE, + STROKE_WIDTH, + REPEAT_START, + STDDEVIATION, + STOP_OPACITY, + ARIA_CONTROLS, + ARIA_HASPOPUP, + ACCENT_HEIGHT, + ARIA_VALUENOW, + ARIA_RELEVANT, + ARIA_POSINSET, + ARIA_VALUEMAX, + ARIA_READONLY, + ARIA_SELECTED, + ARIA_REQUIRED, + ARIA_EXPANDED, + ARIA_DISABLED, + ATTRIBUTETYPE, + ATTRIBUTENAME, + ARIA_DATATYPE, + ARIA_VALUEMIN, + BASEFREQUENCY, + COLUMNSPACING, + COLOR_PROFILE, + CLIPPATHUNITS, + DEFINITIONURL, + GRADIENTUNITS, + FLOOD_OPACITY, + ONAFTERUPDATE, + ONERRORUPDATE, + ONBEFOREPASTE, + ONLOSECAPTURE, + ONCONTEXTMENU, + ONSELECTSTART, + ONBEFOREPRINT, + MOVABLELIMITS, + LINETHICKNESS, + UNICODE_RANGE, + THINMATHSPACE, + VERT_ORIGIN_X, + VERT_ORIGIN_Y, + V_IDEOGRAPHIC, + PRESERVEALPHA, + SCRIPTMINSIZE, + SPECIFICATION, + XLINK_ACTUATE, + XLINK_ARCROLE, + ACCEPT_CHARSET, + ALIGNMENTSCOPE, + ARIA_MULTILINE, + BASELINE_SHIFT, + HORIZ_ORIGIN_X, + HORIZ_ORIGIN_Y, + ONBEFOREUPDATE, + ONFILTERCHANGE, + ONROWSINSERTED, + ONBEFOREUNLOAD, + MATHBACKGROUND, + LETTER_SPACING, + LIGHTING_COLOR, + THICKMATHSPACE, + TEXT_RENDERING, + V_MATHEMATICAL, + POINTER_EVENTS, + PRIMITIVEUNITS, + REFERRERPOLICY, + SYSTEMLANGUAGE, + STROKE_LINECAP, + SUBSCRIPTSHIFT, + STROKE_OPACITY, + ARIA_DROPEFFECT, + ARIA_LABELLEDBY, + ARIA_TEMPLATEID, + COLOR_RENDERING, + CONTENTEDITABLE, + DIFFUSECONSTANT, + ONDATAAVAILABLE, + ONCONTROLSELECT, + IMAGE_RENDERING, + MEDIUMMATHSPACE, + TEXT_DECORATION, + SHAPE_RENDERING, + STROKE_LINEJOIN, + REPEAT_TEMPLATE, + ARIA_DESCRIBEDBY, + FONT_SIZE_ADJUST, + KERNELUNITLENGTH, + ONBEFOREACTIVATE, + ONPROPERTYCHANGE, + ONDATASETCHANGED, + MASKCONTENTUNITS, + PATTERNTRANSFORM, + REQUIREDFEATURES, + RENDERING_INTENT, + SPECULAREXPONENT, + SPECULARCONSTANT, + SUPERSCRIPTSHIFT, + STROKE_DASHARRAY, + XCHANNELSELECTOR, + YCHANNELSELECTOR, + ARIA_AUTOCOMPLETE, + ENABLE_BACKGROUND, + DOMINANT_BASELINE, + GRADIENTTRANSFORM, + ONBEFORDEACTIVATE, + ONDATASETCOMPLETE, + OVERLINE_POSITION, + ONBEFOREEDITFOCUS, + LIMITINGCONEANGLE, + VERYTHINMATHSPACE, + STROKE_DASHOFFSET, + STROKE_MITERLIMIT, + ALIGNMENT_BASELINE, + ONREADYSTATECHANGE, + OVERLINE_THICKNESS, + UNDERLINE_POSITION, + VERYTHICKMATHSPACE, + REQUIREDEXTENSIONS, + COLOR_INTERPOLATION, + UNDERLINE_THICKNESS, + PRESERVEASPECTRATIO, + PATTERNCONTENTUNITS, + ARIA_MULTISELECTABLE, + SCRIPTSIZEMULTIPLIER, + ARIA_ACTIVEDESCENDANT, + VERYVERYTHINMATHSPACE, + VERYVERYTHICKMATHSPACE, + STRIKETHROUGH_POSITION, + STRIKETHROUGH_THICKNESS, + GLYPH_ORIENTATION_VERTICAL, + COLOR_INTERPOLATION_FILTERS, + GLYPH_ORIENTATION_HORIZONTAL, + }; + private final static int[] ATTRIBUTE_HASHES = { + 1153, + 1383, + 1601, + 1793, + 1827, + 1857, + 68600, + 69146, + 69177, + 70237, + 70270, + 71572, + 71669, + 72415, + 72444, + 74846, + 74904, + 74943, + 75001, + 75276, + 75590, + 84742, + 84839, + 85575, + 85963, + 85992, + 87204, + 88074, + 88171, + 89130, + 89163, + 3207892, + 3283895, + 3284791, + 3338752, + 3358197, + 3369562, + 3539124, + 3562402, + 3574260, + 3670335, + 3696933, + 3721879, + 135280021, + 135346322, + 136317019, + 136475749, + 136548517, + 136652214, + 136884919, + 136902418, + 136942992, + 137292068, + 139120259, + 139785574, + 142250603, + 142314056, + 142331176, + 142519584, + 144752417, + 145106895, + 146147200, + 146765926, + 148805544, + 149655723, + 149809441, + 150018784, + 150445028, + 150813181, + 150923321, + 152528754, + 152536216, + 152647366, + 152962785, + 155219321, + 155654904, + 157317483, + 157350248, + 157437941, + 157447478, + 157604838, + 157685404, + 157894402, + 158315188, + 166078431, + 169409980, + 169700259, + 169856932, + 170007032, + 170409695, + 170466488, + 170513710, + 170608367, + 173028944, + 173896963, + 176090625, + 176129212, + 179390001, + 179489057, + 179627464, + 179840468, + 179849042, + 180004216, + 181779081, + 183027151, + 183645319, + 183698797, + 185922012, + 185997252, + 188312483, + 188675799, + 190977533, + 190992569, + 191006194, + 191033518, + 191038774, + 191096249, + 191166163, + 191194426, + 191443343, + 191522106, + 191568039, + 200104642, + 202506661, + 202537381, + 202602917, + 203070590, + 203120766, + 203389054, + 203690071, + 203971238, + 203986524, + 209040857, + 209125756, + 212055489, + 212322418, + 212746849, + 213002877, + 213055164, + 213088023, + 213259873, + 213273386, + 213435118, + 213437318, + 213438231, + 213493071, + 213532268, + 213542834, + 213584431, + 213659891, + 215285828, + 215880731, + 216112976, + 216684637, + 217369699, + 217565298, + 217576549, + 218186795, + 219743185, + 220082234, + 221623802, + 221986406, + 222283890, + 223089542, + 223138630, + 223311265, + 224431494, + 224547358, + 224587256, + 224589550, + 224655650, + 224785518, + 224810917, + 224813302, + 225126263, + 225429618, + 225432950, + 225440869, + 236107233, + 236709921, + 236838947, + 237117095, + 237143271, + 237172455, + 237209953, + 237354143, + 237372743, + 237668065, + 237703073, + 237714273, + 239743521, + 240512803, + 240522627, + 240560417, + 240656513, + 241015715, + 241062755, + 241065383, + 243523041, + 245865199, + 246261793, + 246556195, + 246774817, + 246923491, + 246928419, + 246981667, + 247014847, + 247058369, + 247112833, + 247118177, + 247119137, + 247128739, + 247316903, + 249533729, + 250235623, + 250269543, + 251402351, + 252339047, + 253260911, + 253293679, + 254844367, + 255547879, + 256077281, + 256345377, + 258124199, + 258354465, + 258605063, + 258744193, + 258845603, + 258856961, + 258926689, + 269869248, + 270174334, + 270709417, + 270778994, + 270781796, + 271102503, + 271478858, + 271490090, + 272870654, + 273335275, + 273369140, + 273924313, + 274108530, + 274116736, + 276818662, + 277476156, + 279156579, + 279349675, + 280108533, + 280128712, + 280132869, + 280162403, + 280280292, + 280413430, + 280506130, + 280677397, + 280678580, + 280686710, + 280689066, + 282736758, + 283110901, + 283275116, + 283823226, + 283890012, + 284479340, + 284606461, + 286700477, + 286798916, + 290055764, + 291557706, + 291665349, + 291804100, + 292138018, + 292166446, + 292418738, + 292451039, + 300298041, + 300374839, + 300597935, + 303073389, + 303083839, + 303266673, + 303354997, + 303430688, + 303576261, + 303724281, + 303819694, + 304242723, + 304382625, + 306247792, + 307227811, + 307468786, + 307724489, + 310252031, + 310358241, + 310373094, + 310833159, + 311015256, + 313357609, + 313683893, + 313701861, + 313706996, + 313707317, + 313710350, + 313795700, + 314027746, + 314038181, + 314091299, + 314205627, + 314233813, + 316741830, + 316797986, + 317486755, + 317794164, + 320076137, + 322657125, + 322887778, + 323506876, + 323572412, + 323605180, + 325060058, + 325320188, + 325398738, + 325541490, + 325671619, + 333868843, + 336806130, + 337212108, + 337282686, + 337285434, + 337585223, + 338036037, + 338298087, + 338566051, + 340943551, + 341190970, + 342995704, + 343352124, + 343912673, + 344585053, + 346977248, + 347218098, + 347262163, + 347278576, + 347438191, + 347655959, + 347684788, + 347726430, + 347727772, + 347776035, + 347776629, + 349500753, + 350880161, + 350887073, + 353384123, + 355496998, + 355906922, + 355979793, + 356545959, + 358637867, + 358905016, + 359164318, + 359247286, + 359350571, + 359579447, + 365560330, + 367399355, + 367420285, + 367510727, + 368013212, + 370234760, + 370353345, + 370710317, + 371074566, + 371122285, + 371194213, + 371448425, + 371448430, + 371545055, + 371593469, + 371596922, + 371758751, + 371964792, + 372151328, + 376550136, + 376710172, + 376795771, + 376826271, + 376906556, + 380514830, + 380774774, + 380775037, + 381030322, + 381136500, + 381281631, + 381282269, + 381285504, + 381330595, + 381331422, + 381335911, + 381336484, + 383907298, + 383917408, + 384595009, + 384595013, + 387799894, + 387823201, + 392581647, + 392584937, + 392742684, + 392906485, + 393003349, + 400644707, + 400973830, + 404428547, + 404432113, + 404432865, + 404469244, + 404478897, + 404694860, + 406887479, + 408294949, + 408789955, + 410022510, + 410467324, + 410586448, + 410945965, + 411845275, + 414327152, + 414327932, + 414329781, + 414346257, + 414346439, + 414639928, + 414835998, + 414894517, + 414986533, + 417465377, + 417465381, + 417492216, + 418259232, + 419310946, + 420103495, + 420242342, + 420380455, + 420658662, + 420717432, + 423183880, + 424539259, + 425929170, + 425972964, + 426050649, + 426126450, + 426142833, + 426607922, + 437289840, + 437347469, + 437412335, + 437423943, + 437455540, + 437462252, + 437597991, + 437617485, + 437986305, + 437986507, + 437986828, + 437987072, + 438015591, + 438034813, + 438038966, + 438179623, + 438347971, + 438483573, + 438547062, + 438895551, + 441592676, + 442032555, + 443548979, + 447881379, + 447881655, + 447881895, + 447887844, + 448416189, + 448445746, + 448449012, + 450942191, + 452816744, + 453668677, + 454434495, + 456610076, + 456642844, + 456738709, + 457544600, + 459451897, + 459680944, + 468058810, + 468083581, + 470964084, + 471470955, + 471567278, + 472267822, + 481177859, + 481210627, + 481435874, + 481455115, + 481485378, + 481490218, + 485105638, + 486005878, + 486383494, + 487988916, + 488103783, + 490661867, + 491574090, + 491578272, + 492891370, + 493041952, + 493441205, + 493582844, + 493716979, + 504577572, + 504740359, + 505091638, + 505592418, + 505656212, + 509516275, + 514998531, + 515571132, + 515594682, + 518712698, + 521362273, + 526592419, + 526807354, + 527348842, + 538294791, + 544689535, + 545535009, + 548544752, + 548563346, + 548595116, + 551679010, + 558034099, + 560329411, + 560356209, + 560671018, + 560671152, + 560692590, + 560845442, + 569212097, + 569474241, + 572252718, + 575326764, + 576174758, + 576190819, + 582099184, + 582099438, + 582372519, + 582558889, + 586552164, + 591325418, + 594231990, + 594243961, + 605711268, + 615672071, + 616086845, + 621792370, + 624879850, + 627432831, + 640040548, + 654392808, + 658675477, + 659420283, + 672891587, + 694768102, + 705890982, + 725543146, + 759097578, + 761686526, + 795383908, + 878105336, + 908643300, + 945213471, + }; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java new file mode 100644 index 000000000..01d76d700 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.NoLength; + +import org.xml.sax.SAXException; + +/** + * A common superclass for tree builders that coalesce their text nodes. + * + * @version $Id$ + * @author hsivonen + */ +public abstract class CoalescingTreeBuilder<T> extends TreeBuilder<T> { + + protected final void accumulateCharacters(@NoLength char[] buf, int start, + int length) throws SAXException { + System.arraycopy(buf, start, charBuffer, charBufferLen, length); + charBufferLen += length; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendCharacters(java.lang.Object, char[], int, int) + */ + @Override protected final void appendCharacters(T parent, char[] buf, int start, + int length) throws SAXException { + appendCharacters(parent, new String(buf, start, length)); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object) + */ + @Override protected void appendIsindexPrompt(T parent) throws SAXException { + appendCharacters(parent, "This is a searchable index. Enter search keywords: "); + } + + protected abstract void appendCharacters(T parent, String text) throws SAXException; + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendComment(java.lang.Object, char[], int, int) + */ + @Override final protected void appendComment(T parent, char[] buf, int start, + int length) throws SAXException { + appendComment(parent, new String(buf, start, length)); + } + + protected abstract void appendComment(T parent, String comment) throws SAXException; + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendCommentToDocument(char[], int, int) + */ + @Override protected final void appendCommentToDocument(char[] buf, int start, + int length) throws SAXException { + // TODO Auto-generated method stub + appendCommentToDocument(new String(buf, start, length)); + } + + protected abstract void appendCommentToDocument(String comment) throws SAXException; + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#insertFosterParentedCharacters(char[], int, int, java.lang.Object, java.lang.Object) + */ + @Override protected final void insertFosterParentedCharacters(char[] buf, int start, + int length, T table, T stackParent) throws SAXException { + insertFosterParentedCharacters(new String(buf, start, length), table, stackParent); + } + + protected abstract void insertFosterParentedCharacters(String text, T table, T stackParent) throws SAXException; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java new file mode 100644 index 000000000..ee0493318 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ElementName.java @@ -0,0 +1,1614 @@ +/* + * Copyright (c) 2008-2016 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import java.util.Arrays; + +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.annotation.Virtual; +import nu.validator.htmlparser.common.Interner; + +public final class ElementName +// uncomment when regenerating self +// implements Comparable<ElementName> +{ + + /** + * The mask for extracting the dispatch group. + */ + public static final int GROUP_MASK = 127; + + /** + * Indicates that the element is not a pre-interned element. Forbidden + * on preinterned elements. + */ + public static final int CUSTOM = (1 << 30); + + /** + * Indicates that the element is in the "special" category. This bit + * should not be pre-set on MathML or SVG specials--only on HTML specials. + */ + public static final int SPECIAL = (1 << 29); + + /** + * The element is foster-parenting. This bit should be pre-set on elements + * that are foster-parenting as HTML. + */ + public static final int FOSTER_PARENTING = (1 << 28); + + /** + * The element is scoping. This bit should be pre-set on elements + * that are scoping as HTML. + */ + public static final int SCOPING = (1 << 27); + + /** + * The element is scoping as SVG. + */ + public static final int SCOPING_AS_SVG = (1 << 26); + + /** + * The element is scoping as MathML. + */ + public static final int SCOPING_AS_MATHML = (1 << 25); + + /** + * The element is an HTML integration point. + */ + public static final int HTML_INTEGRATION_POINT = (1 << 24); + + /** + * The element has an optional end tag. + */ + public static final int OPTIONAL_END_TAG = (1 << 23); + + public static final ElementName NULL_ELEMENT_NAME = new ElementName(null); + + public final @Local String name; + + public final @Local String camelCaseName; + + /** + * The lowest 7 bits are the dispatch group. The high bits are flags. + */ + public final int flags; + + @Inline public int getFlags() { + return flags; + } + + public int getGroup() { + return flags & GROUP_MASK; + } + + public boolean isCustom() { + return (flags & CUSTOM) != 0; + } + + static ElementName elementNameByBuffer(@NoLength char[] buf, int offset, int length, Interner interner) { + int hash = ElementName.bufToHash(buf, length); + int index = Arrays.binarySearch(ElementName.ELEMENT_HASHES, hash); + if (index < 0) { + return new ElementName(Portability.newLocalNameFromBuffer(buf, offset, length, interner)); + } else { + ElementName elementName = ElementName.ELEMENT_NAMES[index]; + @Local String name = elementName.name; + if (!Portability.localEqualsBuffer(name, buf, offset, length)) { + return new ElementName(Portability.newLocalNameFromBuffer(buf, + offset, length, interner)); + } + return elementName; + } + } + + /** + * This method has to return a unique integer for each well-known + * lower-cased element name. + * + * @param buf + * @param len + * @return + */ + private static int bufToHash(@NoLength char[] buf, int len) { + int hash = len; + hash <<= 5; + hash += buf[0] - 0x60; + int j = len; + for (int i = 0; i < 4 && j > 0; i++) { + j--; + hash <<= 5; + hash += buf[j] - 0x60; + } + return hash; + } + + private ElementName(@Local String name, @Local String camelCaseName, + int flags) { + this.name = name; + this.camelCaseName = camelCaseName; + this.flags = flags; + } + + protected ElementName(@Local String name) { + this.name = name; + this.camelCaseName = name; + this.flags = TreeBuilder.OTHER | CUSTOM; + } + + @Virtual void release() { + // No-op in Java. + // Implement as delete this in subclass. + // Be sure to release the local name + } + + @SuppressWarnings("unused") @Virtual private void destructor() { + } + + @Virtual public ElementName cloneElementName(Interner interner) { + return this; + } + + // START CODE ONLY USED FOR GENERATING CODE uncomment and run to regenerate + +// /** +// * @see java.lang.Object#toString() +// */ +// @Override public String toString() { +// return "(\"" + name + "\", \"" + camelCaseName + "\", " + decomposedFlags() + ")"; +// } +// +// private String decomposedFlags() { +// StringBuilder buf = new StringBuilder("TreeBuilder."); +// buf.append(treeBuilderGroupToName()); +// if ((flags & SPECIAL) != 0) { +// buf.append(" | SPECIAL"); +// } +// if ((flags & FOSTER_PARENTING) != 0) { +// buf.append(" | FOSTER_PARENTING"); +// } +// if ((flags & SCOPING) != 0) { +// buf.append(" | SCOPING"); +// } +// if ((flags & SCOPING_AS_MATHML) != 0) { +// buf.append(" | SCOPING_AS_MATHML"); +// } +// if ((flags & SCOPING_AS_SVG) != 0) { +// buf.append(" | SCOPING_AS_SVG"); +// } +// if ((flags & OPTIONAL_END_TAG) != 0) { +// buf.append(" | OPTIONAL_END_TAG"); +// } +// return buf.toString(); +// } +// +// private String constName() { +// char[] buf = new char[name.length()]; +// for (int i = 0; i < name.length(); i++) { +// char c = name.charAt(i); +// if (c == '-') { +// buf[i] = '_'; +// } else if (c >= '0' && c <= '9') { +// buf[i] = c; +// } else { +// buf[i] = (char) (c - 0x20); +// } +// } +// return new String(buf); +// } +// +// private int hash() { +// return bufToHash(name.toCharArray(), name.length()); +// } +// +// public int compareTo(ElementName other) { +// int thisHash = this.hash(); +// int otherHash = other.hash(); +// if (thisHash < otherHash) { +// return -1; +// } else if (thisHash == otherHash) { +// return 0; +// } else { +// return 1; +// } +// } +// +// private String treeBuilderGroupToName() { +// switch (getGroup()) { +// case TreeBuilder.OTHER: +// return "OTHER"; +// case TreeBuilder.A: +// return "A"; +// case TreeBuilder.BASE: +// return "BASE"; +// case TreeBuilder.BODY: +// return "BODY"; +// case TreeBuilder.BR: +// return "BR"; +// case TreeBuilder.BUTTON: +// return "BUTTON"; +// case TreeBuilder.CAPTION: +// return "CAPTION"; +// case TreeBuilder.COL: +// return "COL"; +// case TreeBuilder.COLGROUP: +// return "COLGROUP"; +// case TreeBuilder.FONT: +// return "FONT"; +// case TreeBuilder.FORM: +// return "FORM"; +// case TreeBuilder.FRAME: +// return "FRAME"; +// case TreeBuilder.FRAMESET: +// return "FRAMESET"; +// case TreeBuilder.IMAGE: +// return "IMAGE"; +// case TreeBuilder.INPUT: +// return "INPUT"; +// case TreeBuilder.ISINDEX: +// return "ISINDEX"; +// case TreeBuilder.LI: +// return "LI"; +// case TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND: +// return "LINK_OR_BASEFONT_OR_BGSOUND"; +// case TreeBuilder.MATH: +// return "MATH"; +// case TreeBuilder.META: +// return "META"; +// case TreeBuilder.SVG: +// return "SVG"; +// case TreeBuilder.HEAD: +// return "HEAD"; +// case TreeBuilder.HR: +// return "HR"; +// case TreeBuilder.HTML: +// return "HTML"; +// case TreeBuilder.KEYGEN: +// return "KEYGEN"; +// case TreeBuilder.NOBR: +// return "NOBR"; +// case TreeBuilder.NOFRAMES: +// return "NOFRAMES"; +// case TreeBuilder.NOSCRIPT: +// return "NOSCRIPT"; +// case TreeBuilder.OPTGROUP: +// return "OPTGROUP"; +// case TreeBuilder.OPTION: +// return "OPTION"; +// case TreeBuilder.P: +// return "P"; +// case TreeBuilder.PLAINTEXT: +// return "PLAINTEXT"; +// case TreeBuilder.SCRIPT: +// return "SCRIPT"; +// case TreeBuilder.SELECT: +// return "SELECT"; +// case TreeBuilder.STYLE: +// return "STYLE"; +// case TreeBuilder.TABLE: +// return "TABLE"; +// case TreeBuilder.TEXTAREA: +// return "TEXTAREA"; +// case TreeBuilder.TITLE: +// return "TITLE"; +// case TreeBuilder.TEMPLATE: +// return "TEMPLATE"; +// case TreeBuilder.TR: +// return "TR"; +// case TreeBuilder.XMP: +// return "XMP"; +// case TreeBuilder.TBODY_OR_THEAD_OR_TFOOT: +// return "TBODY_OR_THEAD_OR_TFOOT"; +// case TreeBuilder.TD_OR_TH: +// return "TD_OR_TH"; +// case TreeBuilder.DD_OR_DT: +// return "DD_OR_DT"; +// case TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: +// return "H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6"; +// case TreeBuilder.OBJECT: +// return "OBJECT"; +// case TreeBuilder.OUTPUT: +// return "OUTPUT"; +// case TreeBuilder.MARQUEE_OR_APPLET: +// return "MARQUEE_OR_APPLET"; +// case TreeBuilder.PRE_OR_LISTING: +// return "PRE_OR_LISTING"; +// case TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: +// return "B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U"; +// case TreeBuilder.UL_OR_OL_OR_DL: +// return "UL_OR_OL_OR_DL"; +// case TreeBuilder.IFRAME: +// return "IFRAME"; +// case TreeBuilder.NOEMBED: +// return "NOEMBED"; +// case TreeBuilder.EMBED: +// return "EMBED"; +// case TreeBuilder.IMG: +// return "IMG"; +// case TreeBuilder.AREA_OR_WBR: +// return "AREA_OR_WBR"; +// case TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: +// return "DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU"; +// case TreeBuilder.FIELDSET: +// return "FIELDSET"; +// case TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY: +// return "ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY"; +// case TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR: +// return "RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR"; +// case TreeBuilder.RB_OR_RTC: +// return "RB_OR_RTC"; +// case TreeBuilder.RT_OR_RP: +// return "RT_OR_RP"; +// case TreeBuilder.PARAM_OR_SOURCE_OR_TRACK: +// return "PARAM_OR_SOURCE_OR_TRACK"; +// case TreeBuilder.MGLYPH_OR_MALIGNMARK: +// return "MGLYPH_OR_MALIGNMARK"; +// case TreeBuilder.MI_MO_MN_MS_MTEXT: +// return "MI_MO_MN_MS_MTEXT"; +// case TreeBuilder.ANNOTATION_XML: +// return "ANNOTATION_XML"; +// case TreeBuilder.FOREIGNOBJECT_OR_DESC: +// return "FOREIGNOBJECT_OR_DESC"; +// case TreeBuilder.MENUITEM: +// return "MENUITEM"; +// } +// return null; +// } +// +// /** +// * Regenerate self +// * +// * @param args +// */ +// public static void main(String[] args) { +// Arrays.sort(ELEMENT_NAMES); +// for (int i = 1; i < ELEMENT_NAMES.length; i++) { +// if (ELEMENT_NAMES[i].hash() == ELEMENT_NAMES[i - 1].hash()) { +// System.err.println("Hash collision: " + ELEMENT_NAMES[i].name +// + ", " + ELEMENT_NAMES[i - 1].name); +// return; +// } +// } +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// ElementName el = ELEMENT_NAMES[i]; +// System.out.println("public static final ElementName " +// + el.constName() + " = new ElementName" + el.toString() +// + ";"); +// } +// System.out.println("private final static @NoLength ElementName[] ELEMENT_NAMES = {"); +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// ElementName el = ELEMENT_NAMES[i]; +// System.out.println(el.constName() + ","); +// } +// System.out.println("};"); +// System.out.println("private final static int[] ELEMENT_HASHES = {"); +// for (int i = 0; i < ELEMENT_NAMES.length; i++) { +// ElementName el = ELEMENT_NAMES[i]; +// System.out.println(Integer.toString(el.hash()) + ","); +// } +// System.out.println("};"); +// } + + // START GENERATED CODE + public static final ElementName A = new ElementName("a", "a", TreeBuilder.A); + public static final ElementName B = new ElementName("b", "b", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName G = new ElementName("g", "g", TreeBuilder.OTHER); + public static final ElementName I = new ElementName("i", "i", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName P = new ElementName("p", "p", TreeBuilder.P | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName Q = new ElementName("q", "q", TreeBuilder.OTHER); + public static final ElementName S = new ElementName("s", "s", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName U = new ElementName("u", "u", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName BR = new ElementName("br", "br", TreeBuilder.BR | SPECIAL); + public static final ElementName CI = new ElementName("ci", "ci", TreeBuilder.OTHER); + public static final ElementName CN = new ElementName("cn", "cn", TreeBuilder.OTHER); + public static final ElementName DD = new ElementName("dd", "dd", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName DL = new ElementName("dl", "dl", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); + public static final ElementName DT = new ElementName("dt", "dt", TreeBuilder.DD_OR_DT | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName EM = new ElementName("em", "em", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName EQ = new ElementName("eq", "eq", TreeBuilder.OTHER); + public static final ElementName FN = new ElementName("fn", "fn", TreeBuilder.OTHER); + public static final ElementName H1 = new ElementName("h1", "h1", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H2 = new ElementName("h2", "h2", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H3 = new ElementName("h3", "h3", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H4 = new ElementName("h4", "h4", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H5 = new ElementName("h5", "h5", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName H6 = new ElementName("h6", "h6", TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 | SPECIAL); + public static final ElementName GT = new ElementName("gt", "gt", TreeBuilder.OTHER); + public static final ElementName HR = new ElementName("hr", "hr", TreeBuilder.HR | SPECIAL); + public static final ElementName IN = new ElementName("in", "in", TreeBuilder.OTHER); + public static final ElementName LI = new ElementName("li", "li", TreeBuilder.LI | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName LN = new ElementName("ln", "ln", TreeBuilder.OTHER); + public static final ElementName LT = new ElementName("lt", "lt", TreeBuilder.OTHER); + public static final ElementName MI = new ElementName("mi", "mi", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName MN = new ElementName("mn", "mn", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName MO = new ElementName("mo", "mo", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName MS = new ElementName("ms", "ms", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName OL = new ElementName("ol", "ol", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); + public static final ElementName OR = new ElementName("or", "or", TreeBuilder.OTHER); + public static final ElementName PI = new ElementName("pi", "pi", TreeBuilder.OTHER); + public static final ElementName RB = new ElementName("rb", "rb", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG); + public static final ElementName RP = new ElementName("rp", "rp", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG); + public static final ElementName RT = new ElementName("rt", "rt", TreeBuilder.RT_OR_RP | OPTIONAL_END_TAG); + public static final ElementName TD = new ElementName("td", "td", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG); + public static final ElementName TH = new ElementName("th", "th", TreeBuilder.TD_OR_TH | SPECIAL | SCOPING | OPTIONAL_END_TAG); + public static final ElementName TR = new ElementName("tr", "tr", TreeBuilder.TR | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName TT = new ElementName("tt", "tt", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName UL = new ElementName("ul", "ul", TreeBuilder.UL_OR_OL_OR_DL | SPECIAL); + public static final ElementName AND = new ElementName("and", "and", TreeBuilder.OTHER); + public static final ElementName ARG = new ElementName("arg", "arg", TreeBuilder.OTHER); + public static final ElementName ABS = new ElementName("abs", "abs", TreeBuilder.OTHER); + public static final ElementName BIG = new ElementName("big", "big", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName BDO = new ElementName("bdo", "bdo", TreeBuilder.OTHER); + public static final ElementName CSC = new ElementName("csc", "csc", TreeBuilder.OTHER); + public static final ElementName COL = new ElementName("col", "col", TreeBuilder.COL | SPECIAL); + public static final ElementName COS = new ElementName("cos", "cos", TreeBuilder.OTHER); + public static final ElementName COT = new ElementName("cot", "cot", TreeBuilder.OTHER); + public static final ElementName DEL = new ElementName("del", "del", TreeBuilder.OTHER); + public static final ElementName DFN = new ElementName("dfn", "dfn", TreeBuilder.OTHER); + public static final ElementName DIR = new ElementName("dir", "dir", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName DIV = new ElementName("div", "div", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName EXP = new ElementName("exp", "exp", TreeBuilder.OTHER); + public static final ElementName GCD = new ElementName("gcd", "gcd", TreeBuilder.OTHER); + public static final ElementName GEQ = new ElementName("geq", "geq", TreeBuilder.OTHER); + public static final ElementName IMG = new ElementName("img", "img", TreeBuilder.IMG | SPECIAL); + public static final ElementName INS = new ElementName("ins", "ins", TreeBuilder.OTHER); + public static final ElementName INT = new ElementName("int", "int", TreeBuilder.OTHER); + public static final ElementName KBD = new ElementName("kbd", "kbd", TreeBuilder.OTHER); + public static final ElementName LOG = new ElementName("log", "log", TreeBuilder.OTHER); + public static final ElementName LCM = new ElementName("lcm", "lcm", TreeBuilder.OTHER); + public static final ElementName LEQ = new ElementName("leq", "leq", TreeBuilder.OTHER); + public static final ElementName MTD = new ElementName("mtd", "mtd", TreeBuilder.OTHER); + public static final ElementName MIN = new ElementName("min", "min", TreeBuilder.OTHER); + public static final ElementName MAP = new ElementName("map", "map", TreeBuilder.OTHER); + public static final ElementName MTR = new ElementName("mtr", "mtr", TreeBuilder.OTHER); + public static final ElementName MAX = new ElementName("max", "max", TreeBuilder.OTHER); + public static final ElementName NEQ = new ElementName("neq", "neq", TreeBuilder.OTHER); + public static final ElementName NOT = new ElementName("not", "not", TreeBuilder.OTHER); + public static final ElementName NAV = new ElementName("nav", "nav", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName PRE = new ElementName("pre", "pre", TreeBuilder.PRE_OR_LISTING | SPECIAL); + public static final ElementName RTC = new ElementName("rtc", "rtc", TreeBuilder.RB_OR_RTC | OPTIONAL_END_TAG); + public static final ElementName REM = new ElementName("rem", "rem", TreeBuilder.OTHER); + public static final ElementName SUB = new ElementName("sub", "sub", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SEC = new ElementName("sec", "sec", TreeBuilder.OTHER); + public static final ElementName SVG = new ElementName("svg", "svg", TreeBuilder.SVG); + public static final ElementName SUM = new ElementName("sum", "sum", TreeBuilder.OTHER); + public static final ElementName SIN = new ElementName("sin", "sin", TreeBuilder.OTHER); + public static final ElementName SEP = new ElementName("sep", "sep", TreeBuilder.OTHER); + public static final ElementName SUP = new ElementName("sup", "sup", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SET = new ElementName("set", "set", TreeBuilder.OTHER); + public static final ElementName TAN = new ElementName("tan", "tan", TreeBuilder.OTHER); + public static final ElementName USE = new ElementName("use", "use", TreeBuilder.OTHER); + public static final ElementName VAR = new ElementName("var", "var", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName WBR = new ElementName("wbr", "wbr", TreeBuilder.AREA_OR_WBR | SPECIAL); + public static final ElementName XMP = new ElementName("xmp", "xmp", TreeBuilder.XMP | SPECIAL); + public static final ElementName XOR = new ElementName("xor", "xor", TreeBuilder.OTHER); + public static final ElementName AREA = new ElementName("area", "area", TreeBuilder.AREA_OR_WBR | SPECIAL); + public static final ElementName ABBR = new ElementName("abbr", "abbr", TreeBuilder.OTHER); + public static final ElementName BASE = new ElementName("base", "base", TreeBuilder.BASE | SPECIAL); + public static final ElementName BVAR = new ElementName("bvar", "bvar", TreeBuilder.OTHER); + public static final ElementName BODY = new ElementName("body", "body", TreeBuilder.BODY | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName CARD = new ElementName("card", "card", TreeBuilder.OTHER); + public static final ElementName CODE = new ElementName("code", "code", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName CITE = new ElementName("cite", "cite", TreeBuilder.OTHER); + public static final ElementName CSCH = new ElementName("csch", "csch", TreeBuilder.OTHER); + public static final ElementName COSH = new ElementName("cosh", "cosh", TreeBuilder.OTHER); + public static final ElementName COTH = new ElementName("coth", "coth", TreeBuilder.OTHER); + public static final ElementName CURL = new ElementName("curl", "curl", TreeBuilder.OTHER); + public static final ElementName DESC = new ElementName("desc", "desc", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG); + public static final ElementName DIFF = new ElementName("diff", "diff", TreeBuilder.OTHER); + public static final ElementName DEFS = new ElementName("defs", "defs", TreeBuilder.OTHER); + public static final ElementName FORM = new ElementName("form", "form", TreeBuilder.FORM | SPECIAL); + public static final ElementName FONT = new ElementName("font", "font", TreeBuilder.FONT); + public static final ElementName GRAD = new ElementName("grad", "grad", TreeBuilder.OTHER); + public static final ElementName HEAD = new ElementName("head", "head", TreeBuilder.HEAD | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName HTML = new ElementName("html", "html", TreeBuilder.HTML | SPECIAL | SCOPING | OPTIONAL_END_TAG); + public static final ElementName LINE = new ElementName("line", "line", TreeBuilder.OTHER); + public static final ElementName LINK = new ElementName("link", "link", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); + public static final ElementName LIST = new ElementName("list", "list", TreeBuilder.OTHER); + public static final ElementName META = new ElementName("meta", "meta", TreeBuilder.META | SPECIAL); + public static final ElementName MSUB = new ElementName("msub", "msub", TreeBuilder.OTHER); + public static final ElementName MODE = new ElementName("mode", "mode", TreeBuilder.OTHER); + public static final ElementName MATH = new ElementName("math", "math", TreeBuilder.MATH); + public static final ElementName MARK = new ElementName("mark", "mark", TreeBuilder.OTHER); + public static final ElementName MASK = new ElementName("mask", "mask", TreeBuilder.OTHER); + public static final ElementName MEAN = new ElementName("mean", "mean", TreeBuilder.OTHER); + public static final ElementName MAIN = new ElementName("main", "main", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName MSUP = new ElementName("msup", "msup", TreeBuilder.OTHER); + public static final ElementName MENU = new ElementName("menu", "menu", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName MROW = new ElementName("mrow", "mrow", TreeBuilder.OTHER); + public static final ElementName NONE = new ElementName("none", "none", TreeBuilder.OTHER); + public static final ElementName NOBR = new ElementName("nobr", "nobr", TreeBuilder.NOBR); + public static final ElementName NEST = new ElementName("nest", "nest", TreeBuilder.OTHER); + public static final ElementName PATH = new ElementName("path", "path", TreeBuilder.OTHER); + public static final ElementName PLUS = new ElementName("plus", "plus", TreeBuilder.OTHER); + public static final ElementName RULE = new ElementName("rule", "rule", TreeBuilder.OTHER); + public static final ElementName REAL = new ElementName("real", "real", TreeBuilder.OTHER); + public static final ElementName RELN = new ElementName("reln", "reln", TreeBuilder.OTHER); + public static final ElementName RECT = new ElementName("rect", "rect", TreeBuilder.OTHER); + public static final ElementName ROOT = new ElementName("root", "root", TreeBuilder.OTHER); + public static final ElementName RUBY = new ElementName("ruby", "ruby", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SECH = new ElementName("sech", "sech", TreeBuilder.OTHER); + public static final ElementName SINH = new ElementName("sinh", "sinh", TreeBuilder.OTHER); + public static final ElementName SPAN = new ElementName("span", "span", TreeBuilder.RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR); + public static final ElementName SAMP = new ElementName("samp", "samp", TreeBuilder.OTHER); + public static final ElementName STOP = new ElementName("stop", "stop", TreeBuilder.OTHER); + public static final ElementName SDEV = new ElementName("sdev", "sdev", TreeBuilder.OTHER); + public static final ElementName TIME = new ElementName("time", "time", TreeBuilder.OTHER); + public static final ElementName TRUE = new ElementName("true", "true", TreeBuilder.OTHER); + public static final ElementName TREF = new ElementName("tref", "tref", TreeBuilder.OTHER); + public static final ElementName TANH = new ElementName("tanh", "tanh", TreeBuilder.OTHER); + public static final ElementName TEXT = new ElementName("text", "text", TreeBuilder.OTHER); + public static final ElementName VIEW = new ElementName("view", "view", TreeBuilder.OTHER); + public static final ElementName ASIDE = new ElementName("aside", "aside", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName AUDIO = new ElementName("audio", "audio", TreeBuilder.OTHER); + public static final ElementName APPLY = new ElementName("apply", "apply", TreeBuilder.OTHER); + public static final ElementName EMBED = new ElementName("embed", "embed", TreeBuilder.EMBED | SPECIAL); + public static final ElementName FRAME = new ElementName("frame", "frame", TreeBuilder.FRAME | SPECIAL); + public static final ElementName FALSE = new ElementName("false", "false", TreeBuilder.OTHER); + public static final ElementName FLOOR = new ElementName("floor", "floor", TreeBuilder.OTHER); + public static final ElementName GLYPH = new ElementName("glyph", "glyph", TreeBuilder.OTHER); + public static final ElementName HKERN = new ElementName("hkern", "hkern", TreeBuilder.OTHER); + public static final ElementName IMAGE = new ElementName("image", "image", TreeBuilder.IMAGE); + public static final ElementName IDENT = new ElementName("ident", "ident", TreeBuilder.OTHER); + public static final ElementName INPUT = new ElementName("input", "input", TreeBuilder.INPUT | SPECIAL); + public static final ElementName LABEL = new ElementName("label", "label", TreeBuilder.OTHER); + public static final ElementName LIMIT = new ElementName("limit", "limit", TreeBuilder.OTHER); + public static final ElementName MFRAC = new ElementName("mfrac", "mfrac", TreeBuilder.OTHER); + public static final ElementName MPATH = new ElementName("mpath", "mpath", TreeBuilder.OTHER); + public static final ElementName METER = new ElementName("meter", "meter", TreeBuilder.OTHER); + public static final ElementName MOVER = new ElementName("mover", "mover", TreeBuilder.OTHER); + public static final ElementName MINUS = new ElementName("minus", "minus", TreeBuilder.OTHER); + public static final ElementName MROOT = new ElementName("mroot", "mroot", TreeBuilder.OTHER); + public static final ElementName MSQRT = new ElementName("msqrt", "msqrt", TreeBuilder.OTHER); + public static final ElementName MTEXT = new ElementName("mtext", "mtext", TreeBuilder.MI_MO_MN_MS_MTEXT | SCOPING_AS_MATHML); + public static final ElementName NOTIN = new ElementName("notin", "notin", TreeBuilder.OTHER); + public static final ElementName PIECE = new ElementName("piece", "piece", TreeBuilder.OTHER); + public static final ElementName PARAM = new ElementName("param", "param", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL); + public static final ElementName POWER = new ElementName("power", "power", TreeBuilder.OTHER); + public static final ElementName REALS = new ElementName("reals", "reals", TreeBuilder.OTHER); + public static final ElementName STYLE = new ElementName("style", "style", TreeBuilder.STYLE | SPECIAL); + public static final ElementName SMALL = new ElementName("small", "small", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName THEAD = new ElementName("thead", "thead", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName TABLE = new ElementName("table", "table", TreeBuilder.TABLE | SPECIAL | FOSTER_PARENTING | SCOPING); + public static final ElementName TITLE = new ElementName("title", "title", TreeBuilder.TITLE | SPECIAL | SCOPING_AS_SVG); + public static final ElementName TRACK = new ElementName("track", "track", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK | SPECIAL); + public static final ElementName TSPAN = new ElementName("tspan", "tspan", TreeBuilder.OTHER); + public static final ElementName TIMES = new ElementName("times", "times", TreeBuilder.OTHER); + public static final ElementName TFOOT = new ElementName("tfoot", "tfoot", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName TBODY = new ElementName("tbody", "tbody", TreeBuilder.TBODY_OR_THEAD_OR_TFOOT | SPECIAL | FOSTER_PARENTING | OPTIONAL_END_TAG); + public static final ElementName UNION = new ElementName("union", "union", TreeBuilder.OTHER); + public static final ElementName VKERN = new ElementName("vkern", "vkern", TreeBuilder.OTHER); + public static final ElementName VIDEO = new ElementName("video", "video", TreeBuilder.OTHER); + public static final ElementName ARCSEC = new ElementName("arcsec", "arcsec", TreeBuilder.OTHER); + public static final ElementName ARCCSC = new ElementName("arccsc", "arccsc", TreeBuilder.OTHER); + public static final ElementName ARCTAN = new ElementName("arctan", "arctan", TreeBuilder.OTHER); + public static final ElementName ARCSIN = new ElementName("arcsin", "arcsin", TreeBuilder.OTHER); + public static final ElementName ARCCOS = new ElementName("arccos", "arccos", TreeBuilder.OTHER); + public static final ElementName APPLET = new ElementName("applet", "applet", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING); + public static final ElementName ARCCOT = new ElementName("arccot", "arccot", TreeBuilder.OTHER); + public static final ElementName APPROX = new ElementName("approx", "approx", TreeBuilder.OTHER); + public static final ElementName BUTTON = new ElementName("button", "button", TreeBuilder.BUTTON | SPECIAL); + public static final ElementName CIRCLE = new ElementName("circle", "circle", TreeBuilder.OTHER); + public static final ElementName CENTER = new ElementName("center", "center", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName CURSOR = new ElementName("cursor", "cursor", TreeBuilder.OTHER); + public static final ElementName CANVAS = new ElementName("canvas", "canvas", TreeBuilder.OTHER); + public static final ElementName DIVIDE = new ElementName("divide", "divide", TreeBuilder.OTHER); + public static final ElementName DEGREE = new ElementName("degree", "degree", TreeBuilder.OTHER); + public static final ElementName DIALOG = new ElementName("dialog", "dialog", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName DOMAIN = new ElementName("domain", "domain", TreeBuilder.OTHER); + public static final ElementName EXISTS = new ElementName("exists", "exists", TreeBuilder.OTHER); + public static final ElementName FETILE = new ElementName("fetile", "feTile", TreeBuilder.OTHER); + public static final ElementName FIGURE = new ElementName("figure", "figure", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName FORALL = new ElementName("forall", "forall", TreeBuilder.OTHER); + public static final ElementName FILTER = new ElementName("filter", "filter", TreeBuilder.OTHER); + public static final ElementName FOOTER = new ElementName("footer", "footer", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName HGROUP = new ElementName("hgroup", "hgroup", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName HEADER = new ElementName("header", "header", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName IFRAME = new ElementName("iframe", "iframe", TreeBuilder.IFRAME | SPECIAL); + public static final ElementName KEYGEN = new ElementName("keygen", "keygen", TreeBuilder.KEYGEN); + public static final ElementName LAMBDA = new ElementName("lambda", "lambda", TreeBuilder.OTHER); + public static final ElementName LEGEND = new ElementName("legend", "legend", TreeBuilder.OTHER); + public static final ElementName MSPACE = new ElementName("mspace", "mspace", TreeBuilder.OTHER); + public static final ElementName MTABLE = new ElementName("mtable", "mtable", TreeBuilder.OTHER); + public static final ElementName MSTYLE = new ElementName("mstyle", "mstyle", TreeBuilder.OTHER); + public static final ElementName MGLYPH = new ElementName("mglyph", "mglyph", TreeBuilder.MGLYPH_OR_MALIGNMARK); + public static final ElementName MEDIAN = new ElementName("median", "median", TreeBuilder.OTHER); + public static final ElementName MUNDER = new ElementName("munder", "munder", TreeBuilder.OTHER); + public static final ElementName MARKER = new ElementName("marker", "marker", TreeBuilder.OTHER); + public static final ElementName MERROR = new ElementName("merror", "merror", TreeBuilder.OTHER); + public static final ElementName MOMENT = new ElementName("moment", "moment", TreeBuilder.OTHER); + public static final ElementName MATRIX = new ElementName("matrix", "matrix", TreeBuilder.OTHER); + public static final ElementName OPTION = new ElementName("option", "option", TreeBuilder.OPTION | OPTIONAL_END_TAG); + public static final ElementName OBJECT = new ElementName("object", "object", TreeBuilder.OBJECT | SPECIAL | SCOPING); + public static final ElementName OUTPUT = new ElementName("output", "output", TreeBuilder.OUTPUT); + public static final ElementName PRIMES = new ElementName("primes", "primes", TreeBuilder.OTHER); + public static final ElementName SOURCE = new ElementName("source", "source", TreeBuilder.PARAM_OR_SOURCE_OR_TRACK); + public static final ElementName STRIKE = new ElementName("strike", "strike", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName STRONG = new ElementName("strong", "strong", TreeBuilder.B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U); + public static final ElementName SWITCH = new ElementName("switch", "switch", TreeBuilder.OTHER); + public static final ElementName SYMBOL = new ElementName("symbol", "symbol", TreeBuilder.OTHER); + public static final ElementName SELECT = new ElementName("select", "select", TreeBuilder.SELECT | SPECIAL); + public static final ElementName SUBSET = new ElementName("subset", "subset", TreeBuilder.OTHER); + public static final ElementName SCRIPT = new ElementName("script", "script", TreeBuilder.SCRIPT | SPECIAL); + public static final ElementName TBREAK = new ElementName("tbreak", "tbreak", TreeBuilder.OTHER); + public static final ElementName VECTOR = new ElementName("vector", "vector", TreeBuilder.OTHER); + public static final ElementName ARTICLE = new ElementName("article", "article", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName ANIMATE = new ElementName("animate", "animate", TreeBuilder.OTHER); + public static final ElementName ARCSECH = new ElementName("arcsech", "arcsech", TreeBuilder.OTHER); + public static final ElementName ARCCSCH = new ElementName("arccsch", "arccsch", TreeBuilder.OTHER); + public static final ElementName ARCTANH = new ElementName("arctanh", "arctanh", TreeBuilder.OTHER); + public static final ElementName ARCSINH = new ElementName("arcsinh", "arcsinh", TreeBuilder.OTHER); + public static final ElementName ARCCOSH = new ElementName("arccosh", "arccosh", TreeBuilder.OTHER); + public static final ElementName ARCCOTH = new ElementName("arccoth", "arccoth", TreeBuilder.OTHER); + public static final ElementName ACRONYM = new ElementName("acronym", "acronym", TreeBuilder.OTHER); + public static final ElementName ADDRESS = new ElementName("address", "address", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName BGSOUND = new ElementName("bgsound", "bgsound", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); + public static final ElementName COMPOSE = new ElementName("compose", "compose", TreeBuilder.OTHER); + public static final ElementName CEILING = new ElementName("ceiling", "ceiling", TreeBuilder.OTHER); + public static final ElementName CSYMBOL = new ElementName("csymbol", "csymbol", TreeBuilder.OTHER); + public static final ElementName CAPTION = new ElementName("caption", "caption", TreeBuilder.CAPTION | SPECIAL | SCOPING); + public static final ElementName DISCARD = new ElementName("discard", "discard", TreeBuilder.OTHER); + public static final ElementName DECLARE = new ElementName("declare", "declare", TreeBuilder.OTHER); + public static final ElementName DETAILS = new ElementName("details", "details", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName ELLIPSE = new ElementName("ellipse", "ellipse", TreeBuilder.OTHER); + public static final ElementName FEFUNCA = new ElementName("fefunca", "feFuncA", TreeBuilder.OTHER); + public static final ElementName FEFUNCB = new ElementName("fefuncb", "feFuncB", TreeBuilder.OTHER); + public static final ElementName FEBLEND = new ElementName("feblend", "feBlend", TreeBuilder.OTHER); + public static final ElementName FEFLOOD = new ElementName("feflood", "feFlood", TreeBuilder.OTHER); + public static final ElementName FEIMAGE = new ElementName("feimage", "feImage", TreeBuilder.OTHER); + public static final ElementName FEMERGE = new ElementName("femerge", "feMerge", TreeBuilder.OTHER); + public static final ElementName FEFUNCG = new ElementName("fefuncg", "feFuncG", TreeBuilder.OTHER); + public static final ElementName FEFUNCR = new ElementName("fefuncr", "feFuncR", TreeBuilder.OTHER); + public static final ElementName HANDLER = new ElementName("handler", "handler", TreeBuilder.OTHER); + public static final ElementName INVERSE = new ElementName("inverse", "inverse", TreeBuilder.OTHER); + public static final ElementName IMPLIES = new ElementName("implies", "implies", TreeBuilder.OTHER); + public static final ElementName ISINDEX = new ElementName("isindex", "isindex", TreeBuilder.ISINDEX | SPECIAL); + public static final ElementName LOGBASE = new ElementName("logbase", "logbase", TreeBuilder.OTHER); + public static final ElementName LISTING = new ElementName("listing", "listing", TreeBuilder.PRE_OR_LISTING | SPECIAL); + public static final ElementName MFENCED = new ElementName("mfenced", "mfenced", TreeBuilder.OTHER); + public static final ElementName MPADDED = new ElementName("mpadded", "mpadded", TreeBuilder.OTHER); + public static final ElementName MARQUEE = new ElementName("marquee", "marquee", TreeBuilder.MARQUEE_OR_APPLET | SPECIAL | SCOPING); + public static final ElementName MACTION = new ElementName("maction", "maction", TreeBuilder.OTHER); + public static final ElementName MSUBSUP = new ElementName("msubsup", "msubsup", TreeBuilder.OTHER); + public static final ElementName NOEMBED = new ElementName("noembed", "noembed", TreeBuilder.NOEMBED | SPECIAL); + public static final ElementName PICTURE = new ElementName("picture", "picture", TreeBuilder.OTHER); + public static final ElementName POLYGON = new ElementName("polygon", "polygon", TreeBuilder.OTHER); + public static final ElementName PATTERN = new ElementName("pattern", "pattern", TreeBuilder.OTHER); + public static final ElementName PRODUCT = new ElementName("product", "product", TreeBuilder.OTHER); + public static final ElementName SETDIFF = new ElementName("setdiff", "setdiff", TreeBuilder.OTHER); + public static final ElementName SECTION = new ElementName("section", "section", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName SUMMARY = new ElementName("summary", "summary", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName TENDSTO = new ElementName("tendsto", "tendsto", TreeBuilder.OTHER); + public static final ElementName UPLIMIT = new ElementName("uplimit", "uplimit", TreeBuilder.OTHER); + public static final ElementName ALTGLYPH = new ElementName("altglyph", "altGlyph", TreeBuilder.OTHER); + public static final ElementName BASEFONT = new ElementName("basefont", "basefont", TreeBuilder.LINK_OR_BASEFONT_OR_BGSOUND | SPECIAL); + public static final ElementName CLIPPATH = new ElementName("clippath", "clipPath", TreeBuilder.OTHER); + public static final ElementName CODOMAIN = new ElementName("codomain", "codomain", TreeBuilder.OTHER); + public static final ElementName COLGROUP = new ElementName("colgroup", "colgroup", TreeBuilder.COLGROUP | SPECIAL | OPTIONAL_END_TAG); + public static final ElementName EMPTYSET = new ElementName("emptyset", "emptyset", TreeBuilder.OTHER); + public static final ElementName FACTOROF = new ElementName("factorof", "factorof", TreeBuilder.OTHER); + public static final ElementName FIELDSET = new ElementName("fieldset", "fieldset", TreeBuilder.FIELDSET | SPECIAL); + public static final ElementName FRAMESET = new ElementName("frameset", "frameset", TreeBuilder.FRAMESET | SPECIAL); + public static final ElementName FEOFFSET = new ElementName("feoffset", "feOffset", TreeBuilder.OTHER); + public static final ElementName GLYPHREF = new ElementName("glyphref", "glyphRef", TreeBuilder.OTHER); + public static final ElementName INTERVAL = new ElementName("interval", "interval", TreeBuilder.OTHER); + public static final ElementName INTEGERS = new ElementName("integers", "integers", TreeBuilder.OTHER); + public static final ElementName INFINITY = new ElementName("infinity", "infinity", TreeBuilder.OTHER); + public static final ElementName LISTENER = new ElementName("listener", "listener", TreeBuilder.OTHER); + public static final ElementName LOWLIMIT = new ElementName("lowlimit", "lowlimit", TreeBuilder.OTHER); + public static final ElementName METADATA = new ElementName("metadata", "metadata", TreeBuilder.OTHER); + public static final ElementName MENCLOSE = new ElementName("menclose", "menclose", TreeBuilder.OTHER); + public static final ElementName MENUITEM = new ElementName("menuitem", "menuitem", TreeBuilder.MENUITEM); + public static final ElementName MPHANTOM = new ElementName("mphantom", "mphantom", TreeBuilder.OTHER); + public static final ElementName NOFRAMES = new ElementName("noframes", "noframes", TreeBuilder.NOFRAMES | SPECIAL); + public static final ElementName NOSCRIPT = new ElementName("noscript", "noscript", TreeBuilder.NOSCRIPT | SPECIAL); + public static final ElementName OPTGROUP = new ElementName("optgroup", "optgroup", TreeBuilder.OPTGROUP | OPTIONAL_END_TAG); + public static final ElementName POLYLINE = new ElementName("polyline", "polyline", TreeBuilder.OTHER); + public static final ElementName PREFETCH = new ElementName("prefetch", "prefetch", TreeBuilder.OTHER); + public static final ElementName PROGRESS = new ElementName("progress", "progress", TreeBuilder.OTHER); + public static final ElementName PRSUBSET = new ElementName("prsubset", "prsubset", TreeBuilder.OTHER); + public static final ElementName QUOTIENT = new ElementName("quotient", "quotient", TreeBuilder.OTHER); + public static final ElementName SELECTOR = new ElementName("selector", "selector", TreeBuilder.OTHER); + public static final ElementName TEXTAREA = new ElementName("textarea", "textarea", TreeBuilder.TEXTAREA | SPECIAL); + public static final ElementName TEMPLATE = new ElementName("template", "template", TreeBuilder.TEMPLATE | SPECIAL | SCOPING); + public static final ElementName TEXTPATH = new ElementName("textpath", "textPath", TreeBuilder.OTHER); + public static final ElementName VARIANCE = new ElementName("variance", "variance", TreeBuilder.OTHER); + public static final ElementName ANIMATION = new ElementName("animation", "animation", TreeBuilder.OTHER); + public static final ElementName CONJUGATE = new ElementName("conjugate", "conjugate", TreeBuilder.OTHER); + public static final ElementName CONDITION = new ElementName("condition", "condition", TreeBuilder.OTHER); + public static final ElementName COMPLEXES = new ElementName("complexes", "complexes", TreeBuilder.OTHER); + public static final ElementName FONT_FACE = new ElementName("font-face", "font-face", TreeBuilder.OTHER); + public static final ElementName FACTORIAL = new ElementName("factorial", "factorial", TreeBuilder.OTHER); + public static final ElementName INTERSECT = new ElementName("intersect", "intersect", TreeBuilder.OTHER); + public static final ElementName IMAGINARY = new ElementName("imaginary", "imaginary", TreeBuilder.OTHER); + public static final ElementName LAPLACIAN = new ElementName("laplacian", "laplacian", TreeBuilder.OTHER); + public static final ElementName MATRIXROW = new ElementName("matrixrow", "matrixrow", TreeBuilder.OTHER); + public static final ElementName NOTSUBSET = new ElementName("notsubset", "notsubset", TreeBuilder.OTHER); + public static final ElementName OTHERWISE = new ElementName("otherwise", "otherwise", TreeBuilder.OTHER); + public static final ElementName PIECEWISE = new ElementName("piecewise", "piecewise", TreeBuilder.OTHER); + public static final ElementName PLAINTEXT = new ElementName("plaintext", "plaintext", TreeBuilder.PLAINTEXT | SPECIAL); + public static final ElementName RATIONALS = new ElementName("rationals", "rationals", TreeBuilder.OTHER); + public static final ElementName SEMANTICS = new ElementName("semantics", "semantics", TreeBuilder.OTHER); + public static final ElementName TRANSPOSE = new ElementName("transpose", "transpose", TreeBuilder.OTHER); + public static final ElementName ANNOTATION = new ElementName("annotation", "annotation", TreeBuilder.OTHER); + public static final ElementName BLOCKQUOTE = new ElementName("blockquote", "blockquote", TreeBuilder.DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU | SPECIAL); + public static final ElementName DIVERGENCE = new ElementName("divergence", "divergence", TreeBuilder.OTHER); + public static final ElementName EULERGAMMA = new ElementName("eulergamma", "eulergamma", TreeBuilder.OTHER); + public static final ElementName EQUIVALENT = new ElementName("equivalent", "equivalent", TreeBuilder.OTHER); + public static final ElementName FIGCAPTION = new ElementName("figcaption", "figcaption", TreeBuilder.ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY | SPECIAL); + public static final ElementName IMAGINARYI = new ElementName("imaginaryi", "imaginaryi", TreeBuilder.OTHER); + public static final ElementName MALIGNMARK = new ElementName("malignmark", "malignmark", TreeBuilder.MGLYPH_OR_MALIGNMARK); + public static final ElementName MUNDEROVER = new ElementName("munderover", "munderover", TreeBuilder.OTHER); + public static final ElementName MLABELEDTR = new ElementName("mlabeledtr", "mlabeledtr", TreeBuilder.OTHER); + public static final ElementName NOTANUMBER = new ElementName("notanumber", "notanumber", TreeBuilder.OTHER); + public static final ElementName SOLIDCOLOR = new ElementName("solidcolor", "solidcolor", TreeBuilder.OTHER); + public static final ElementName ALTGLYPHDEF = new ElementName("altglyphdef", "altGlyphDef", TreeBuilder.OTHER); + public static final ElementName DETERMINANT = new ElementName("determinant", "determinant", TreeBuilder.OTHER); + public static final ElementName FEMERGENODE = new ElementName("femergenode", "feMergeNode", TreeBuilder.OTHER); + public static final ElementName FECOMPOSITE = new ElementName("fecomposite", "feComposite", TreeBuilder.OTHER); + public static final ElementName FESPOTLIGHT = new ElementName("fespotlight", "feSpotLight", TreeBuilder.OTHER); + public static final ElementName MALIGNGROUP = new ElementName("maligngroup", "maligngroup", TreeBuilder.OTHER); + public static final ElementName MPRESCRIPTS = new ElementName("mprescripts", "mprescripts", TreeBuilder.OTHER); + public static final ElementName MOMENTABOUT = new ElementName("momentabout", "momentabout", TreeBuilder.OTHER); + public static final ElementName NOTPRSUBSET = new ElementName("notprsubset", "notprsubset", TreeBuilder.OTHER); + public static final ElementName PARTIALDIFF = new ElementName("partialdiff", "partialdiff", TreeBuilder.OTHER); + public static final ElementName ALTGLYPHITEM = new ElementName("altglyphitem", "altGlyphItem", TreeBuilder.OTHER); + public static final ElementName ANIMATECOLOR = new ElementName("animatecolor", "animateColor", TreeBuilder.OTHER); + public static final ElementName DATATEMPLATE = new ElementName("datatemplate", "datatemplate", TreeBuilder.OTHER); + public static final ElementName EXPONENTIALE = new ElementName("exponentiale", "exponentiale", TreeBuilder.OTHER); + public static final ElementName FETURBULENCE = new ElementName("feturbulence", "feTurbulence", TreeBuilder.OTHER); + public static final ElementName FEPOINTLIGHT = new ElementName("fepointlight", "fePointLight", TreeBuilder.OTHER); + public static final ElementName FEDROPSHADOW = new ElementName("fedropshadow", "feDropShadow", TreeBuilder.OTHER); + public static final ElementName FEMORPHOLOGY = new ElementName("femorphology", "feMorphology", TreeBuilder.OTHER); + public static final ElementName OUTERPRODUCT = new ElementName("outerproduct", "outerproduct", TreeBuilder.OTHER); + public static final ElementName ANIMATEMOTION = new ElementName("animatemotion", "animateMotion", TreeBuilder.OTHER); + public static final ElementName COLOR_PROFILE = new ElementName("color-profile", "color-profile", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_SRC = new ElementName("font-face-src", "font-face-src", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_URI = new ElementName("font-face-uri", "font-face-uri", TreeBuilder.OTHER); + public static final ElementName FOREIGNOBJECT = new ElementName("foreignobject", "foreignObject", TreeBuilder.FOREIGNOBJECT_OR_DESC | SCOPING_AS_SVG); + public static final ElementName FECOLORMATRIX = new ElementName("fecolormatrix", "feColorMatrix", TreeBuilder.OTHER); + public static final ElementName MISSING_GLYPH = new ElementName("missing-glyph", "missing-glyph", TreeBuilder.OTHER); + public static final ElementName MMULTISCRIPTS = new ElementName("mmultiscripts", "mmultiscripts", TreeBuilder.OTHER); + public static final ElementName SCALARPRODUCT = new ElementName("scalarproduct", "scalarproduct", TreeBuilder.OTHER); + public static final ElementName VECTORPRODUCT = new ElementName("vectorproduct", "vectorproduct", TreeBuilder.OTHER); + public static final ElementName ANNOTATION_XML = new ElementName("annotation-xml", "annotation-xml", TreeBuilder.ANNOTATION_XML | SCOPING_AS_MATHML); + public static final ElementName DEFINITION_SRC = new ElementName("definition-src", "definition-src", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_NAME = new ElementName("font-face-name", "font-face-name", TreeBuilder.OTHER); + public static final ElementName FEGAUSSIANBLUR = new ElementName("fegaussianblur", "feGaussianBlur", TreeBuilder.OTHER); + public static final ElementName FEDISTANTLIGHT = new ElementName("fedistantlight", "feDistantLight", TreeBuilder.OTHER); + public static final ElementName LINEARGRADIENT = new ElementName("lineargradient", "linearGradient", TreeBuilder.OTHER); + public static final ElementName NATURALNUMBERS = new ElementName("naturalnumbers", "naturalnumbers", TreeBuilder.OTHER); + public static final ElementName RADIALGRADIENT = new ElementName("radialgradient", "radialGradient", TreeBuilder.OTHER); + public static final ElementName ANIMATETRANSFORM = new ElementName("animatetransform", "animateTransform", TreeBuilder.OTHER); + public static final ElementName CARTESIANPRODUCT = new ElementName("cartesianproduct", "cartesianproduct", TreeBuilder.OTHER); + public static final ElementName FONT_FACE_FORMAT = new ElementName("font-face-format", "font-face-format", TreeBuilder.OTHER); + public static final ElementName FECONVOLVEMATRIX = new ElementName("feconvolvematrix", "feConvolveMatrix", TreeBuilder.OTHER); + public static final ElementName FEDIFFUSELIGHTING = new ElementName("fediffuselighting", "feDiffuseLighting", TreeBuilder.OTHER); + public static final ElementName FEDISPLACEMENTMAP = new ElementName("fedisplacementmap", "feDisplacementMap", TreeBuilder.OTHER); + public static final ElementName FESPECULARLIGHTING = new ElementName("fespecularlighting", "feSpecularLighting", TreeBuilder.OTHER); + public static final ElementName DOMAINOFAPPLICATION = new ElementName("domainofapplication", "domainofapplication", TreeBuilder.OTHER); + public static final ElementName FECOMPONENTTRANSFER = new ElementName("fecomponenttransfer", "feComponentTransfer", TreeBuilder.OTHER); + private final static @NoLength ElementName[] ELEMENT_NAMES = { + A, + B, + G, + I, + P, + Q, + S, + U, + BR, + CI, + CN, + DD, + DL, + DT, + EM, + EQ, + FN, + H1, + H2, + H3, + H4, + H5, + H6, + GT, + HR, + IN, + LI, + LN, + LT, + MI, + MN, + MO, + MS, + OL, + OR, + PI, + RB, + RP, + RT, + TD, + TH, + TR, + TT, + UL, + AND, + ARG, + ABS, + BIG, + BDO, + CSC, + COL, + COS, + COT, + DEL, + DFN, + DIR, + DIV, + EXP, + GCD, + GEQ, + IMG, + INS, + INT, + KBD, + LOG, + LCM, + LEQ, + MTD, + MIN, + MAP, + MTR, + MAX, + NEQ, + NOT, + NAV, + PRE, + RTC, + REM, + SUB, + SEC, + SVG, + SUM, + SIN, + SEP, + SUP, + SET, + TAN, + USE, + VAR, + WBR, + XMP, + XOR, + AREA, + ABBR, + BASE, + BVAR, + BODY, + CARD, + CODE, + CITE, + CSCH, + COSH, + COTH, + CURL, + DESC, + DIFF, + DEFS, + FORM, + FONT, + GRAD, + HEAD, + HTML, + LINE, + LINK, + LIST, + META, + MSUB, + MODE, + MATH, + MARK, + MASK, + MEAN, + MAIN, + MSUP, + MENU, + MROW, + NONE, + NOBR, + NEST, + PATH, + PLUS, + RULE, + REAL, + RELN, + RECT, + ROOT, + RUBY, + SECH, + SINH, + SPAN, + SAMP, + STOP, + SDEV, + TIME, + TRUE, + TREF, + TANH, + TEXT, + VIEW, + ASIDE, + AUDIO, + APPLY, + EMBED, + FRAME, + FALSE, + FLOOR, + GLYPH, + HKERN, + IMAGE, + IDENT, + INPUT, + LABEL, + LIMIT, + MFRAC, + MPATH, + METER, + MOVER, + MINUS, + MROOT, + MSQRT, + MTEXT, + NOTIN, + PIECE, + PARAM, + POWER, + REALS, + STYLE, + SMALL, + THEAD, + TABLE, + TITLE, + TRACK, + TSPAN, + TIMES, + TFOOT, + TBODY, + UNION, + VKERN, + VIDEO, + ARCSEC, + ARCCSC, + ARCTAN, + ARCSIN, + ARCCOS, + APPLET, + ARCCOT, + APPROX, + BUTTON, + CIRCLE, + CENTER, + CURSOR, + CANVAS, + DIVIDE, + DEGREE, + DIALOG, + DOMAIN, + EXISTS, + FETILE, + FIGURE, + FORALL, + FILTER, + FOOTER, + HGROUP, + HEADER, + IFRAME, + KEYGEN, + LAMBDA, + LEGEND, + MSPACE, + MTABLE, + MSTYLE, + MGLYPH, + MEDIAN, + MUNDER, + MARKER, + MERROR, + MOMENT, + MATRIX, + OPTION, + OBJECT, + OUTPUT, + PRIMES, + SOURCE, + STRIKE, + STRONG, + SWITCH, + SYMBOL, + SELECT, + SUBSET, + SCRIPT, + TBREAK, + VECTOR, + ARTICLE, + ANIMATE, + ARCSECH, + ARCCSCH, + ARCTANH, + ARCSINH, + ARCCOSH, + ARCCOTH, + ACRONYM, + ADDRESS, + BGSOUND, + COMPOSE, + CEILING, + CSYMBOL, + CAPTION, + DISCARD, + DECLARE, + DETAILS, + ELLIPSE, + FEFUNCA, + FEFUNCB, + FEBLEND, + FEFLOOD, + FEIMAGE, + FEMERGE, + FEFUNCG, + FEFUNCR, + HANDLER, + INVERSE, + IMPLIES, + ISINDEX, + LOGBASE, + LISTING, + MFENCED, + MPADDED, + MARQUEE, + MACTION, + MSUBSUP, + NOEMBED, + PICTURE, + POLYGON, + PATTERN, + PRODUCT, + SETDIFF, + SECTION, + SUMMARY, + TENDSTO, + UPLIMIT, + ALTGLYPH, + BASEFONT, + CLIPPATH, + CODOMAIN, + COLGROUP, + EMPTYSET, + FACTOROF, + FIELDSET, + FRAMESET, + FEOFFSET, + GLYPHREF, + INTERVAL, + INTEGERS, + INFINITY, + LISTENER, + LOWLIMIT, + METADATA, + MENCLOSE, + MENUITEM, + MPHANTOM, + NOFRAMES, + NOSCRIPT, + OPTGROUP, + POLYLINE, + PREFETCH, + PROGRESS, + PRSUBSET, + QUOTIENT, + SELECTOR, + TEXTAREA, + TEMPLATE, + TEXTPATH, + VARIANCE, + ANIMATION, + CONJUGATE, + CONDITION, + COMPLEXES, + FONT_FACE, + FACTORIAL, + INTERSECT, + IMAGINARY, + LAPLACIAN, + MATRIXROW, + NOTSUBSET, + OTHERWISE, + PIECEWISE, + PLAINTEXT, + RATIONALS, + SEMANTICS, + TRANSPOSE, + ANNOTATION, + BLOCKQUOTE, + DIVERGENCE, + EULERGAMMA, + EQUIVALENT, + FIGCAPTION, + IMAGINARYI, + MALIGNMARK, + MUNDEROVER, + MLABELEDTR, + NOTANUMBER, + SOLIDCOLOR, + ALTGLYPHDEF, + DETERMINANT, + FEMERGENODE, + FECOMPOSITE, + FESPOTLIGHT, + MALIGNGROUP, + MPRESCRIPTS, + MOMENTABOUT, + NOTPRSUBSET, + PARTIALDIFF, + ALTGLYPHITEM, + ANIMATECOLOR, + DATATEMPLATE, + EXPONENTIALE, + FETURBULENCE, + FEPOINTLIGHT, + FEDROPSHADOW, + FEMORPHOLOGY, + OUTERPRODUCT, + ANIMATEMOTION, + COLOR_PROFILE, + FONT_FACE_SRC, + FONT_FACE_URI, + FOREIGNOBJECT, + FECOLORMATRIX, + MISSING_GLYPH, + MMULTISCRIPTS, + SCALARPRODUCT, + VECTORPRODUCT, + ANNOTATION_XML, + DEFINITION_SRC, + FONT_FACE_NAME, + FEGAUSSIANBLUR, + FEDISTANTLIGHT, + LINEARGRADIENT, + NATURALNUMBERS, + RADIALGRADIENT, + ANIMATETRANSFORM, + CARTESIANPRODUCT, + FONT_FACE_FORMAT, + FECONVOLVEMATRIX, + FEDIFFUSELIGHTING, + FEDISPLACEMENTMAP, + FESPECULARLIGHTING, + DOMAINOFAPPLICATION, + FECOMPONENTTRANSFER, + }; + private final static int[] ELEMENT_HASHES = { + 1057, + 1090, + 1255, + 1321, + 1552, + 1585, + 1651, + 1717, + 68162, + 68899, + 69059, + 69764, + 70020, + 70276, + 71077, + 71205, + 72134, + 72232, + 72264, + 72296, + 72328, + 72360, + 72392, + 73351, + 74312, + 75209, + 78124, + 78284, + 78476, + 79149, + 79309, + 79341, + 79469, + 81295, + 81487, + 82224, + 84050, + 84498, + 84626, + 86164, + 86292, + 86612, + 86676, + 87445, + 3183041, + 3186241, + 3198017, + 3218722, + 3226754, + 3247715, + 3256803, + 3263971, + 3264995, + 3289252, + 3291332, + 3295524, + 3299620, + 3326725, + 3379303, + 3392679, + 3448233, + 3460553, + 3461577, + 3510347, + 3546604, + 3552364, + 3556524, + 3576461, + 3586349, + 3588141, + 3590797, + 3596333, + 3622062, + 3625454, + 3627054, + 3675728, + 3739282, + 3749042, + 3771059, + 3771571, + 3776211, + 3782323, + 3782963, + 3784883, + 3785395, + 3788979, + 3815476, + 3839605, + 3885110, + 3917911, + 3948984, + 3951096, + 135304769, + 135858241, + 136498210, + 136906434, + 137138658, + 137512995, + 137531875, + 137548067, + 137629283, + 137645539, + 137646563, + 137775779, + 138529956, + 138615076, + 139040932, + 140954086, + 141179366, + 141690439, + 142738600, + 143013512, + 146979116, + 147175724, + 147475756, + 147902637, + 147936877, + 148017645, + 148131885, + 148228141, + 148229165, + 148309165, + 148317229, + 148395629, + 148551853, + 148618829, + 149076462, + 149490158, + 149572782, + 151277616, + 151639440, + 153268914, + 153486514, + 153563314, + 153750706, + 153763314, + 153914034, + 154406067, + 154417459, + 154600979, + 154678323, + 154680979, + 154866835, + 155366708, + 155375188, + 155391572, + 155465780, + 155869364, + 158045494, + 168988979, + 169321621, + 169652752, + 173151309, + 174240818, + 174247297, + 174669292, + 175391532, + 176638123, + 177380397, + 177879204, + 177886734, + 180753473, + 181020073, + 181503558, + 181686320, + 181999237, + 181999311, + 182048201, + 182074866, + 182078003, + 182083764, + 182920847, + 184716457, + 184976961, + 185145071, + 187281445, + 187872052, + 188100653, + 188875944, + 188919873, + 188920457, + 189107250, + 189203987, + 189371817, + 189414886, + 189567458, + 190266670, + 191318187, + 191337609, + 202479203, + 202493027, + 202835587, + 202843747, + 203013219, + 203036048, + 203045987, + 203177552, + 203898516, + 204648562, + 205067918, + 205078130, + 205096654, + 205689142, + 205690439, + 205766017, + 205988909, + 207213161, + 207794484, + 207800999, + 208023602, + 208213644, + 208213647, + 210261490, + 210310273, + 210940978, + 213325049, + 213946445, + 214055079, + 215125040, + 215134273, + 215135028, + 215237420, + 215418148, + 215553166, + 215553394, + 215563858, + 215627949, + 215754324, + 217529652, + 217713834, + 217732628, + 218731945, + 221417045, + 221424946, + 221493746, + 221515401, + 221658189, + 221908140, + 221910626, + 221921586, + 222659762, + 225001091, + 236105833, + 236113965, + 236194995, + 236195427, + 236206132, + 236206387, + 236211683, + 236212707, + 236381647, + 236571826, + 237124271, + 238210544, + 238270764, + 238435405, + 238501172, + 239224867, + 239257644, + 239710497, + 240307721, + 241208789, + 241241557, + 241318060, + 241319404, + 241343533, + 241344069, + 241405397, + 241765845, + 243864964, + 244502085, + 244946220, + 245109902, + 247647266, + 247707956, + 248648814, + 248648836, + 248682161, + 248986932, + 249058914, + 249697357, + 251841204, + 252132601, + 252135604, + 252317348, + 255007012, + 255278388, + 255641645, + 256365156, + 257566121, + 269763372, + 271202790, + 271863856, + 272049197, + 272127474, + 274339449, + 274939471, + 275388004, + 275388005, + 275388006, + 275977800, + 278267602, + 278513831, + 278712622, + 281613765, + 281683369, + 282120228, + 282250732, + 282498697, + 282508942, + 283743649, + 283787570, + 284710386, + 285391148, + 285478533, + 285854898, + 285873762, + 286931113, + 288964227, + 289445441, + 289591340, + 289689648, + 291671489, + 303512884, + 305319975, + 305610036, + 305764101, + 308448294, + 308675890, + 312085683, + 312264750, + 315032867, + 316391000, + 317331042, + 317902135, + 318950711, + 319447220, + 321499182, + 322538804, + 323145200, + 337067316, + 337826293, + 339905989, + 340833697, + 341457068, + 342310196, + 345302593, + 349554733, + 349771471, + 349786245, + 350819405, + 356072847, + 370349192, + 373962798, + 375558638, + 375574835, + 376053993, + 383276530, + 383373833, + 383407586, + 384439906, + 386079012, + 404133513, + 404307343, + 407031852, + 408072233, + 409112005, + 409608425, + 409713793, + 409771500, + 419040932, + 437730612, + 439529766, + 442616365, + 442813037, + 443157674, + 443295316, + 450118444, + 450482697, + 456789668, + 459935396, + 471217869, + 474073645, + 476230702, + 476665218, + 476717289, + 483014825, + 485083298, + 489306281, + 538364390, + 540675748, + 543819186, + 543958612, + 576960820, + 577242548, + 610515252, + 642202932, + 644420819, + }; +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java new file mode 100644 index 000000000..f1749e0b3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2009-2013 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +import java.util.HashMap; + +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class ErrorReportingTokenizer extends Tokenizer { + + /** + * Magic value for UTF-16 operations. + */ + private static final int SURROGATE_OFFSET = (0x10000 - (0xD800 << 10) - 0xDC00); + + /** + * The policy for non-space non-XML characters. + */ + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.ALTER_INFOSET; + + /** + * Keeps track of PUA warnings. + */ + private boolean alreadyWarnedAboutPrivateUseCharacters; + + /** + * The current line number in the current resource being parsed. (First line + * is 1.) Passed on as locator data. + */ + private int line; + + private int linePrev; + + /** + * The current column number in the current resource being tokenized. (First + * column is 1, counted by UTF-16 code units.) Passed on as locator data. + */ + private int col; + + private int colPrev; + + private boolean nextCharOnNewLine; + + private char prev; + + private HashMap<String, String> errorProfileMap = null; + + private TransitionHandler transitionHandler = null; + + private int transitionBaseOffset = 0; + + /** + * @param tokenHandler + * @param newAttributesEachTime + */ + public ErrorReportingTokenizer(TokenHandler tokenHandler, + boolean newAttributesEachTime) { + super(tokenHandler, newAttributesEachTime); + } + + /** + * @param tokenHandler + */ + public ErrorReportingTokenizer(TokenHandler tokenHandler) { + super(tokenHandler); + } + + /** + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + if (line > 0) { + return line; + } else { + return -1; + } + } + + /** + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + if (col > 0) { + return col; + } else { + return -1; + } + } + + /** + * Sets the contentNonXmlCharPolicy. + * + * @param contentNonXmlCharPolicy + * the contentNonXmlCharPolicy to set + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + } + + /** + * Sets the errorProfile. + * + * @param errorProfile + */ + public void setErrorProfile(HashMap<String, String> errorProfileMap) { + this.errorProfileMap = errorProfileMap; + } + + /** + * Reports on an event based on profile selected. + * + * @param profile + * the profile this message belongs to + * @param message + * the message itself + * @throws SAXException + */ + public void note(String profile, String message) throws SAXException { + if (errorProfileMap == null) + return; + String level = errorProfileMap.get(profile); + if ("warn".equals(level)) { + warn(message); + } else if ("err".equals(level)) { + err(message); + // } else if ("info".equals(level)) { + // info(message); + } + } + + protected void startErrorReporting() throws SAXException { + line = linePrev = 0; + col = colPrev = 1; + nextCharOnNewLine = true; + prev = '\u0000'; + alreadyWarnedAboutPrivateUseCharacters = false; + transitionBaseOffset = 0; + } + + @Inline protected void silentCarriageReturn() { + nextCharOnNewLine = true; + lastCR = true; + } + + @Inline protected void silentLineFeed() { + nextCharOnNewLine = true; + } + + /** + * Returns the line. + * + * @return the line + */ + public int getLine() { + return line; + } + + /** + * Returns the col. + * + * @return the col + */ + public int getCol() { + return col; + } + + /** + * Returns the nextCharOnNewLine. + * + * @return the nextCharOnNewLine + */ + public boolean isNextCharOnNewLine() { + return nextCharOnNewLine; + } + + /** + * Flushes coalesced character tokens. + * + * @param buf + * TODO + * @param pos + * TODO + * + * @throws SAXException + */ + @Override protected void flushChars(char[] buf, int pos) + throws SAXException { + if (pos > cstart) { + int currLine = line; + int currCol = col; + line = linePrev; + col = colPrev; + tokenHandler.characters(buf, cstart, pos - cstart); + line = currLine; + col = currCol; + } + cstart = 0x7fffffff; + } + + @Override protected char checkChar(@NoLength char[] buf, int pos) + throws SAXException { + linePrev = line; + colPrev = col; + if (nextCharOnNewLine) { + line++; + col = 1; + nextCharOnNewLine = false; + } else { + col++; + } + + char c = buf[pos]; + switch (c) { + case '\u0000': + err("Saw U+0000 in stream."); + case '\t': + case '\r': + case '\n': + break; + case '\u000C': + if (contentNonXmlCharPolicy == XmlViolationPolicy.FATAL) { + fatal("This document is not mappable to XML 1.0 without data loss due to " + + toUPlusString(c) + + " which is not a legal XML 1.0 character."); + } else { + if (contentNonXmlCharPolicy == XmlViolationPolicy.ALTER_INFOSET) { + c = buf[pos] = ' '; + } + warn("This document is not mappable to XML 1.0 without data loss due to " + + toUPlusString(c) + + " which is not a legal XML 1.0 character."); + } + break; + default: + if ((c & 0xFC00) == 0xDC00) { + // Got a low surrogate. See if prev was high + // surrogate + if ((prev & 0xFC00) == 0xD800) { + int intVal = (prev << 10) + c + SURROGATE_OFFSET; + if ((intVal & 0xFFFE) == 0xFFFE) { + err("Astral non-character."); + } + if (isAstralPrivateUse(intVal)) { + warnAboutPrivateUseChar(); + } + } + } else if ((c < ' ' || ((c & 0xFFFE) == 0xFFFE))) { + switch (contentNonXmlCharPolicy) { + case FATAL: + fatal("Forbidden code point " + toUPlusString(c) + + "."); + break; + case ALTER_INFOSET: + c = buf[pos] = '\uFFFD'; + // fall through + case ALLOW: + err("Forbidden code point " + toUPlusString(c) + + "."); + } + } else if ((c >= '\u007F') && (c <= '\u009F') + || (c >= '\uFDD0') && (c <= '\uFDEF')) { + err("Forbidden code point " + toUPlusString(c) + "."); + } else if (isPrivateUse(c)) { + warnAboutPrivateUseChar(); + } + } + prev = c; + return c; + } + + /** + * @throws SAXException + * @see nu.validator.htmlparser.impl.Tokenizer#transition(int, int, boolean, + * int) + */ + @Override protected int transition(int from, int to, boolean reconsume, + int pos) throws SAXException { + if (transitionHandler != null) { + transitionHandler.transition(from, to, reconsume, + transitionBaseOffset + pos); + } + return to; + } + + private String toUPlusString(int c) { + String hexString = Integer.toHexString(c); + switch (hexString.length()) { + case 1: + return "U+000" + hexString; + case 2: + return "U+00" + hexString; + case 3: + return "U+0" + hexString; + default: + return "U+" + hexString; + } + } + + /** + * Emits a warning about private use characters if the warning has not been + * emitted yet. + * + * @throws SAXException + */ + private void warnAboutPrivateUseChar() throws SAXException { + if (!alreadyWarnedAboutPrivateUseCharacters) { + warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)"); + alreadyWarnedAboutPrivateUseCharacters = true; + } + } + + /** + * Tells if the argument is a BMP PUA character. + * + * @param c + * the UTF-16 code unit to check + * @return <code>true</code> if PUA character + */ + private boolean isPrivateUse(char c) { + return c >= '\uE000' && c <= '\uF8FF'; + } + + /** + * Tells if the argument is an astral PUA character. + * + * @param c + * the code point to check + * @return <code>true</code> if astral private use + */ + private boolean isAstralPrivateUse(int c) { + return (c >= 0xF0000 && c <= 0xFFFFD) + || (c >= 0x100000 && c <= 0x10FFFD); + } + + @Override protected void errGarbageAfterLtSlash() throws SAXException { + err("Garbage after \u201C</\u201D."); + } + + @Override protected void errLtSlashGt() throws SAXException { + err("Saw \u201C</>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C<\u201D) or mistyped end tag."); + } + + @Override protected void errWarnLtSlashInRcdata() throws SAXException { + if (html4) { + err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") + + " element \u201C" + + endTagExpectation + + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)"); + } else { + warn((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") + + " element \u201C" + + endTagExpectation + + "\u201D contained the string \u201C</\u201D, but this did not close the element."); + } + } + + @Override protected void errHtml4LtSlashInRcdata(char folded) + throws SAXException { + if (html4 && (index > 0 || (folded >= 'a' && folded <= 'z')) + && ElementName.IFRAME != endTagExpectation) { + err((stateSave == Tokenizer.DATA ? "CDATA" : "RCDATA") + + " element \u201C" + + endTagExpectation.name + + "\u201D contained the string \u201C</\u201D, but it was not the start of the end tag. (HTML4-only error)"); + } + } + + @Override protected void errCharRefLacksSemicolon() throws SAXException { + err("Character reference was not terminated by a semicolon."); + } + + @Override protected void errNoDigitsInNCR() throws SAXException { + err("No digits after \u201C" + strBufToString() + "\u201D."); + } + + @Override protected void errGtInSystemId() throws SAXException { + err("\u201C>\u201D in system identifier."); + } + + @Override protected void errGtInPublicId() throws SAXException { + err("\u201C>\u201D in public identifier."); + } + + @Override protected void errNamelessDoctype() throws SAXException { + err("Nameless doctype."); + } + + @Override protected void errConsecutiveHyphens() throws SAXException { + err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is."); + } + + @Override protected void errPrematureEndOfComment() throws SAXException { + err("Premature end of comment. Use \u201C-->\u201D to end a comment properly."); + } + + @Override protected void errBogusComment() throws SAXException { + err("Bogus comment."); + } + + @Override protected void errUnquotedAttributeValOrNull(char c) + throws SAXException { + switch (c) { + case '<': + err("\u201C<\u201D in an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before."); + return; + case '`': + err("\u201C`\u201D in an unquoted attribute value. Probable cause: Using the wrong character as a quote."); + return; + case '\uFFFD': + return; + default: + err("\u201C" + + c + + "\u201D in an unquoted attribute value. Probable causes: Attributes running together or a URL query string in an unquoted attribute value."); + return; + } + } + + @Override protected void errSlashNotFollowedByGt() throws SAXException { + err("A slash was not immediately followed by \u201C>\u201D."); + } + + @Override protected void errHtml4XmlVoidSyntax() throws SAXException { + if (html4) { + err("The \u201C/>\u201D syntax on void elements is not allowed. (This is an HTML4-only error.)"); + } + } + + @Override protected void errNoSpaceBetweenAttributes() throws SAXException { + err("No space between attributes."); + } + + @Override protected void errHtml4NonNameInUnquotedAttribute(char c) + throws SAXException { + if (html4 + && !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '.' || c == '-' + || c == '_' || c == ':')) { + err("Non-name character in an unquoted attribute value. (This is an HTML4-only error.)"); + } + } + + @Override protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull( + char c) throws SAXException { + switch (c) { + case '=': + err("\u201C=\u201D at the start of an unquoted attribute value. Probable cause: Stray duplicate equals sign."); + return; + case '<': + err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before."); + return; + case '`': + err("\u201C`\u201D at the start of an unquoted attribute value. Probable cause: Using the wrong character as a quote."); + return; + } + } + + @Override protected void errAttributeValueMissing() throws SAXException { + err("Attribute value missing."); + } + + @Override protected void errBadCharBeforeAttributeNameOrNull(char c) + throws SAXException { + if (c == '<') { + err("Saw \u201C<\u201D when expecting an attribute name. Probable cause: Missing \u201C>\u201D immediately before."); + } else if (c == '=') { + errEqualsSignBeforeAttributeName(); + } else if (c != '\uFFFD') { + errQuoteBeforeAttributeName(c); + } + } + + @Override protected void errEqualsSignBeforeAttributeName() + throws SAXException { + err("Saw \u201C=\u201D when expecting an attribute name. Probable cause: Attribute name missing."); + } + + @Override protected void errBadCharAfterLt(char c) throws SAXException { + err("Bad character \u201C" + + c + + "\u201D after \u201C<\u201D. Probable cause: Unescaped \u201C<\u201D. Try escaping it as \u201C<\u201D."); + } + + @Override protected void errLtGt() throws SAXException { + err("Saw \u201C<>\u201D. Probable causes: Unescaped \u201C<\u201D (escape as \u201C<\u201D) or mistyped start tag."); + } + + @Override protected void errProcessingInstruction() throws SAXException { + err("Saw \u201C<?\u201D. Probable cause: Attempt to use an XML processing instruction in HTML. (XML processing instructions are not supported in HTML.)"); + } + + @Override protected void errUnescapedAmpersandInterpretedAsCharacterReference() + throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException( + "The string following \u201C&\u201D was interpreted as a character reference. (\u201C&\u201D probably should have been escaped as \u201C&\u201D.)", + ampersandLocation); + errorHandler.error(spe); + } + + @Override protected void errNotSemicolonTerminated() throws SAXException { + err("Named character reference was not terminated by a semicolon. (Or \u201C&\u201D should have been escaped as \u201C&\u201D.)"); + } + + @Override protected void errNoNamedCharacterMatch() throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException( + "\u201C&\u201D did not start a character reference. (\u201C&\u201D probably should have been escaped as \u201C&\u201D.)", + ampersandLocation); + errorHandler.error(spe); + } + + @Override protected void errQuoteBeforeAttributeName(char c) + throws SAXException { + err("Saw \u201C" + + c + + "\u201D when expecting an attribute name. Probable cause: \u201C=\u201D missing immediately before."); + } + + @Override protected void errQuoteOrLtInAttributeNameOrNull(char c) + throws SAXException { + if (c == '<') { + err("\u201C<\u201D in attribute name. Probable cause: \u201C>\u201D missing immediately before."); + } else if (c != '\uFFFD') { + err("Quote \u201C" + + c + + "\u201D in attribute name. Probable cause: Matching quote missing somewhere earlier."); + } + } + + @Override protected void errExpectedPublicId() throws SAXException { + err("Expected a public identifier but the doctype ended."); + } + + @Override protected void errBogusDoctype() throws SAXException { + err("Bogus doctype."); + } + + @Override protected void maybeWarnPrivateUseAstral() throws SAXException { + if (errorHandler != null && isAstralPrivateUse(value)) { + warnAboutPrivateUseChar(); + } + } + + @Override protected void maybeWarnPrivateUse(char ch) throws SAXException { + if (errorHandler != null && isPrivateUse(ch)) { + warnAboutPrivateUseChar(); + } + } + + @Override protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) + throws SAXException { + if (attrs.getLength() != 0) { + /* + * When an end tag token is emitted with attributes, that is a parse + * error. + */ + err("End tag had attributes."); + } + } + + @Override protected void maybeErrSlashInEndTag(boolean selfClosing) + throws SAXException { + if (selfClosing && endTag) { + err("Stray \u201C/\u201D at the end of an end tag."); + } + } + + @Override protected char errNcrNonCharacter(char ch) throws SAXException { + switch (contentNonXmlCharPolicy) { + case FATAL: + fatal("Character reference expands to a non-character (" + + toUPlusString((char) value) + ")."); + break; + case ALTER_INFOSET: + ch = '\uFFFD'; + // fall through + case ALLOW: + err("Character reference expands to a non-character (" + + toUPlusString((char) value) + ")."); + } + return ch; + } + + /** + * @see nu.validator.htmlparser.impl.Tokenizer#errAstralNonCharacter(int) + */ + @Override protected void errAstralNonCharacter(int ch) throws SAXException { + err("Character reference expands to an astral non-character (" + + toUPlusString(value) + ")."); + } + + @Override protected void errNcrSurrogate() throws SAXException { + err("Character reference expands to a surrogate."); + } + + @Override protected char errNcrControlChar(char ch) throws SAXException { + switch (contentNonXmlCharPolicy) { + case FATAL: + fatal("Character reference expands to a control character (" + + toUPlusString((char) value) + ")."); + break; + case ALTER_INFOSET: + ch = '\uFFFD'; + // fall through + case ALLOW: + err("Character reference expands to a control character (" + + toUPlusString((char) value) + ")."); + } + return ch; + } + + @Override protected void errNcrCr() throws SAXException { + err("A numeric character reference expanded to carriage return."); + } + + @Override protected void errNcrInC1Range() throws SAXException { + err("A numeric character reference expanded to the C1 controls range."); + } + + @Override protected void errEofInPublicId() throws SAXException { + err("End of file inside public identifier."); + } + + @Override protected void errEofInComment() throws SAXException { + err("End of file inside comment."); + } + + @Override protected void errEofInDoctype() throws SAXException { + err("End of file inside doctype."); + } + + @Override protected void errEofInAttributeValue() throws SAXException { + err("End of file reached when inside an attribute value. Ignoring tag."); + } + + @Override protected void errEofInAttributeName() throws SAXException { + err("End of file occurred in an attribute name. Ignoring tag."); + } + + @Override protected void errEofWithoutGt() throws SAXException { + err("Saw end of file without the previous tag ending with \u201C>\u201D. Ignoring tag."); + } + + @Override protected void errEofInTagName() throws SAXException { + err("End of file seen when looking for tag name. Ignoring tag."); + } + + @Override protected void errEofInEndTag() throws SAXException { + err("End of file inside end tag. Ignoring tag."); + } + + @Override protected void errEofAfterLt() throws SAXException { + err("End of file after \u201C<\u201D."); + } + + @Override protected void errNcrOutOfRange() throws SAXException { + err("Character reference outside the permissible Unicode range."); + } + + @Override protected void errNcrUnassigned() throws SAXException { + err("Character reference expands to a permanently unassigned code point."); + } + + @Override protected void errDuplicateAttribute() throws SAXException { + err("Duplicate attribute \u201C" + + attributeName.getLocal(AttributeName.HTML) + "\u201D."); + } + + @Override protected void errEofInSystemId() throws SAXException { + err("End of file inside system identifier."); + } + + @Override protected void errExpectedSystemId() throws SAXException { + err("Expected a system identifier but the doctype ended."); + } + + @Override protected void errMissingSpaceBeforeDoctypeName() + throws SAXException { + err("Missing space before doctype name."); + } + + @Override protected void errHyphenHyphenBang() throws SAXException { + err("\u201C--!\u201D found in comment."); + } + + @Override protected void errNcrControlChar() throws SAXException { + err("Character reference expands to a control character (" + + toUPlusString((char) value) + ")."); + } + + @Override protected void errNcrZero() throws SAXException { + err("Character reference expands to zero."); + } + + @Override protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() + throws SAXException { + err("No space between the doctype \u201CSYSTEM\u201D keyword and the quote."); + } + + @Override protected void errNoSpaceBetweenPublicAndSystemIds() + throws SAXException { + err("No space between the doctype public and system identifiers."); + } + + @Override protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() + throws SAXException { + err("No space between the doctype \u201CPUBLIC\u201D keyword and the quote."); + } + + @Override protected void noteAttributeWithoutValue() throws SAXException { + note("xhtml2", "Attribute without value"); + } + + @Override protected void noteUnquotedAttributeValue() throws SAXException { + note("xhtml1", "Unquoted attribute value."); + } + + /** + * Sets the transitionHandler. + * + * @param transitionHandler + * the transitionHandler to set + */ + public void setTransitionHandler(TransitionHandler transitionHandler) { + this.transitionHandler = transitionHandler; + } + + /** + * Sets an offset to be added to the position reported to + * <code>TransitionHandler</code>. + * + * @param offset + * the offset + */ + public void setTransitionBaseOffset(int offset) { + this.transitionBaseOffset = offset; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt new file mode 100644 index 000000000..c389a8cac --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /** + * compressed returnValue: + * int returnState = returnValue >> 33 + * boolean breakOuterState = ((returnValue >> 32) & 0x1) != 0) + * int pos = returnValue & 0xFFFFFFFF // same as (int)returnValue + */ + @SuppressWarnings("unused") private long workAroundHotSpotHugeMethodLimit( + int state, char c, int pos, @NoLength char[] buf, + boolean reconsume, int returnState, int endPos) throws SAXException { + stateloop: for (;;) { + switch (state) { + // BEGIN HOTSPOT WORKAROUND + default: + long returnStateAndPos = workAroundHotSpotHugeMethodLimit( + state, c, pos, buf, reconsume, returnState, endPos); + pos = (int)returnStateAndPos; // 5.1.3 in the Java spec + returnState = (int)(returnStateAndPos >> 33); + state = stateSave; + if ( (pos == endPos) || ( (((int)(returnStateAndPos >> 32)) & 0x1) != 0) ) { + break stateloop; + } + continue stateloop; + // END HOTSPOT WORKAROUND + default: + assert !reconsume : "Must not reconsume when returning from HotSpot workaround."; + stateSave = state; + return (((long)returnState) << 33) | pos; + } + } + assert !reconsume : "Must not reconsume when returning from HotSpot workaround."; + stateSave = state; + return (((long)returnState) << 33) | (1L << 32) | pos ; + } diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java new file mode 100644 index 000000000..45c9c6c3e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/HtmlAttributes.java @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2011 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.IdType; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NsUri; +import nu.validator.htmlparser.annotation.Prefix; +import nu.validator.htmlparser.annotation.QName; +import nu.validator.htmlparser.common.Interner; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; + +/** + * Be careful with this class. QName is the name in from HTML tokenization. + * Otherwise, please refer to the interface doc. + * + * @version $Id: AttributesImpl.java 206 2008-03-20 14:09:29Z hsivonen $ + * @author hsivonen + */ +public final class HtmlAttributes implements Attributes { + + // [NOCPP[ + + private static final AttributeName[] EMPTY_ATTRIBUTENAMES = new AttributeName[0]; + + private static final String[] EMPTY_STRINGS = new String[0]; + + // ]NOCPP] + + public static final HtmlAttributes EMPTY_ATTRIBUTES = new HtmlAttributes( + AttributeName.HTML); + + private int mode; + + private int length; + + private @Auto AttributeName[] names; + + private @Auto String[] values; // XXX perhaps make this @NoLength? + + // CPPONLY: private @Auto int[] lines; // XXX perhaps make this @NoLength? + + // [NOCPP[ + + private String idValue; + + private int xmlnsLength; + + private AttributeName[] xmlnsNames; + + private String[] xmlnsValues; + + // ]NOCPP] + + public HtmlAttributes(int mode) { + this.mode = mode; + this.length = 0; + /* + * The length of 5 covers covers 98.3% of elements + * according to Hixie, but lets round to the next power of two for + * jemalloc. + */ + this.names = new AttributeName[8]; + this.values = new String[8]; + // CPPONLY: this.lines = new int[8]; + + // [NOCPP[ + + this.idValue = null; + + this.xmlnsLength = 0; + + this.xmlnsNames = HtmlAttributes.EMPTY_ATTRIBUTENAMES; + + this.xmlnsValues = HtmlAttributes.EMPTY_STRINGS; + + // ]NOCPP] + } + /* + public HtmlAttributes(HtmlAttributes other) { + this.mode = other.mode; + this.length = other.length; + this.names = new AttributeName[other.length]; + this.values = new String[other.length]; + // [NOCPP[ + this.idValue = other.idValue; + this.xmlnsLength = other.xmlnsLength; + this.xmlnsNames = new AttributeName[other.xmlnsLength]; + this.xmlnsValues = new String[other.xmlnsLength]; + // ]NOCPP] + } + */ + + void destructor() { + clear(0); + } + + /** + * Only use with a static argument + * + * @param name + * @return + */ + public int getIndex(AttributeName name) { + for (int i = 0; i < length; i++) { + if (names[i] == name) { + return i; + } + } + return -1; + } + + /** + * Only use with static argument. + * + * @see org.xml.sax.Attributes#getValue(java.lang.String) + */ + public String getValue(AttributeName name) { + int index = getIndex(name); + if (index == -1) { + return null; + } else { + return getValueNoBoundsCheck(index); + } + } + + public int getLength() { + return length; + } + + /** + * Variant of <code>getLocalName(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the local name at index + */ + public @Local String getLocalNameNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index].getLocal(mode); + } + + /** + * Variant of <code>getURI(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the namespace URI at index + */ + public @NsUri String getURINoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index].getUri(mode); + } + + /** + * Variant of <code>getPrefix(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the namespace prefix at index + */ + public @Prefix String getPrefixNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index].getPrefix(mode); + } + + /** + * Variant of <code>getValue(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the attribute value at index + */ + public String getValueNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return values[index]; + } + + /** + * Variant of <code>getAttributeName(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the attribute name at index + */ + public AttributeName getAttributeNameNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + return names[index]; + } + + // CPPONLY: /** + // CPPONLY: * Obtains a line number without bounds check. + // CPPONLY: * @param index a valid attribute index + // CPPONLY: * @return the line number at index or -1 if unknown + // CPPONLY: */ + // CPPONLY: public int getLineNoBoundsCheck(int index) { + // CPPONLY: assert index < length && index >= 0: "Index out of bounds"; + // CPPONLY: return lines[index]; + // CPPONLY: } + + // [NOCPP[ + + /** + * Variant of <code>getQName(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the QName at index + */ + public @QName String getQNameNoBoundsCheck(int index) { + return names[index].getQName(mode); + } + + /** + * Variant of <code>getType(int index)</code> without bounds check. + * @param index a valid attribute index + * @return the attribute type at index + */ + public @IdType String getTypeNoBoundsCheck(int index) { + return (names[index] == AttributeName.ID) ? "ID" : "CDATA"; + } + + public int getIndex(String qName) { + for (int i = 0; i < length; i++) { + if (names[i].getQName(mode).equals(qName)) { + return i; + } + } + return -1; + } + + public int getIndex(String uri, String localName) { + for (int i = 0; i < length; i++) { + if (names[i].getLocal(mode).equals(localName) + && names[i].getUri(mode).equals(uri)) { + return i; + } + } + return -1; + } + + public @IdType String getType(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public @IdType String getType(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getType(index); + } + } + + public String getValue(String qName) { + int index = getIndex(qName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + public String getValue(String uri, String localName) { + int index = getIndex(uri, localName); + if (index == -1) { + return null; + } else { + return getValue(index); + } + } + + public @Local String getLocalName(int index) { + if (index < length && index >= 0) { + return names[index].getLocal(mode); + } else { + return null; + } + } + + public @QName String getQName(int index) { + if (index < length && index >= 0) { + return names[index].getQName(mode); + } else { + return null; + } + } + + public @IdType String getType(int index) { + if (index < length && index >= 0) { + return (names[index] == AttributeName.ID) ? "ID" : "CDATA"; + } else { + return null; + } + } + + public AttributeName getAttributeName(int index) { + if (index < length && index >= 0) { + return names[index]; + } else { + return null; + } + } + + public @NsUri String getURI(int index) { + if (index < length && index >= 0) { + return names[index].getUri(mode); + } else { + return null; + } + } + + public @Prefix String getPrefix(int index) { + if (index < length && index >= 0) { + return names[index].getPrefix(mode); + } else { + return null; + } + } + + public String getValue(int index) { + if (index < length && index >= 0) { + return values[index]; + } else { + return null; + } + } + + public String getId() { + return idValue; + } + + public int getXmlnsLength() { + return xmlnsLength; + } + + public @Local String getXmlnsLocalName(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsNames[index].getLocal(mode); + } else { + return null; + } + } + + public @NsUri String getXmlnsURI(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsNames[index].getUri(mode); + } else { + return null; + } + } + + public String getXmlnsValue(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsValues[index]; + } else { + return null; + } + } + + public int getXmlnsIndex(AttributeName name) { + for (int i = 0; i < xmlnsLength; i++) { + if (xmlnsNames[i] == name) { + return i; + } + } + return -1; + } + + public String getXmlnsValue(AttributeName name) { + int index = getXmlnsIndex(name); + if (index == -1) { + return null; + } else { + return getXmlnsValue(index); + } + } + + public AttributeName getXmlnsAttributeName(int index) { + if (index < xmlnsLength && index >= 0) { + return xmlnsNames[index]; + } else { + return null; + } + } + + // ]NOCPP] + + void addAttribute(AttributeName name, String value + // [NOCPP[ + , XmlViolationPolicy xmlnsPolicy + // ]NOCPP] + // CPPONLY: , int line + ) throws SAXException { + // [NOCPP[ + if (name == AttributeName.ID) { + idValue = value; + } + + if (name.isXmlns()) { + if (xmlnsNames.length == xmlnsLength) { + int newLen = xmlnsLength == 0 ? 2 : xmlnsLength << 1; + AttributeName[] newNames = new AttributeName[newLen]; + System.arraycopy(xmlnsNames, 0, newNames, 0, xmlnsNames.length); + xmlnsNames = newNames; + String[] newValues = new String[newLen]; + System.arraycopy(xmlnsValues, 0, newValues, 0, xmlnsValues.length); + xmlnsValues = newValues; + } + xmlnsNames[xmlnsLength] = name; + xmlnsValues[xmlnsLength] = value; + xmlnsLength++; + switch (xmlnsPolicy) { + case FATAL: + // this is ugly + throw new SAXException("Saw an xmlns attribute."); + case ALTER_INFOSET: + return; + case ALLOW: + // fall through + } + } + + // ]NOCPP] + + if (names.length == length) { + int newLen = length << 1; // The first growth covers virtually + // 100% of elements according to + // Hixie + AttributeName[] newNames = new AttributeName[newLen]; + System.arraycopy(names, 0, newNames, 0, names.length); + names = newNames; + String[] newValues = new String[newLen]; + System.arraycopy(values, 0, newValues, 0, values.length); + values = newValues; + // CPPONLY: int[] newLines = new int[newLen]; + // CPPONLY: System.arraycopy(lines, 0, newLines, 0, lines.length); + // CPPONLY: lines = newLines; + } + names[length] = name; + values[length] = value; + // CPPONLY: lines[length] = line; + length++; + } + + void clear(int m) { + for (int i = 0; i < length; i++) { + names[i].release(); + names[i] = null; + Portability.releaseString(values[i]); + values[i] = null; + } + length = 0; + mode = m; + // [NOCPP[ + idValue = null; + for (int i = 0; i < xmlnsLength; i++) { + xmlnsNames[i] = null; + xmlnsValues[i] = null; + } + xmlnsLength = 0; + // ]NOCPP] + } + + /** + * This is used in C++ to release special <code>isindex</code> + * attribute values whose ownership is not transferred. + */ + void releaseValue(int i) { + Portability.releaseString(values[i]); + } + + /** + * This is only used for <code>AttributeName</code> ownership transfer + * in the isindex case to avoid freeing custom names twice in C++. + */ + void clearWithoutReleasingContents() { + for (int i = 0; i < length; i++) { + names[i] = null; + values[i] = null; + } + length = 0; + } + + boolean contains(AttributeName name) { + for (int i = 0; i < length; i++) { + if (name.equalsAnother(names[i])) { + return true; + } + } + // [NOCPP[ + for (int i = 0; i < xmlnsLength; i++) { + if (name.equalsAnother(xmlnsNames[i])) { + return true; + } + } + // ]NOCPP] + return false; + } + + public void adjustForMath() { + mode = AttributeName.MATHML; + } + + public void adjustForSvg() { + mode = AttributeName.SVG; + } + + public HtmlAttributes cloneAttributes(Interner interner) + throws SAXException { + assert (length == 0 + // [NOCPP[ + && xmlnsLength == 0 + // ]NOCPP] + ) + || mode == 0 || mode == 3; + HtmlAttributes clone = new HtmlAttributes(0); + for (int i = 0; i < length; i++) { + clone.addAttribute(names[i].cloneAttributeName(interner), + Portability.newStringFromString(values[i]) + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , lines[i] + ); + } + // [NOCPP[ + for (int i = 0; i < xmlnsLength; i++) { + clone.addAttribute(xmlnsNames[i], xmlnsValues[i], + XmlViolationPolicy.ALLOW); + } + // ]NOCPP] + return clone; // XXX!!! + } + + public boolean equalsAnother(HtmlAttributes other) { + assert mode == 0 || mode == 3 : "Trying to compare attributes in foreign content."; + int otherLength = other.getLength(); + if (length != otherLength) { + return false; + } + for (int i = 0; i < length; i++) { + // Work around the limitations of C++ + boolean found = false; + // The comparing just the local names is OK, since these attribute + // holders are both supposed to belong to HTML formatting elements + @Local String ownLocal = names[i].getLocal(AttributeName.HTML); + for (int j = 0; j < otherLength; j++) { + if (ownLocal == other.names[j].getLocal(AttributeName.HTML)) { + found = true; + if (!Portability.stringEqualsString(values[i], other.values[j])) { + return false; + } + } + } + if (!found) { + return false; + } + } + return true; + } + + // [NOCPP[ + + void processNonNcNames(TreeBuilder<?> treeBuilder, XmlViolationPolicy namePolicy) throws SAXException { + for (int i = 0; i < length; i++) { + AttributeName attName = names[i]; + if (!attName.isNcName(mode)) { + String name = attName.getLocal(mode); + switch (namePolicy) { + case ALTER_INFOSET: + names[i] = AttributeName.create(NCName.escapeName(name)); + // fall through + case ALLOW: + if (attName != AttributeName.XML_LANG) { + treeBuilder.warn("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0."); + } + break; + case FATAL: + treeBuilder.fatal("Attribute \u201C" + name + "\u201D is not serializable as XML 1.0."); + break; + } + } + } + } + + public void merge(HtmlAttributes attributes) throws SAXException { + int len = attributes.getLength(); + for (int i = 0; i < len; i++) { + AttributeName name = attributes.getAttributeNameNoBoundsCheck(i); + if (!contains(name)) { + addAttribute(name, attributes.getValueNoBoundsCheck(i), XmlViolationPolicy.ALLOW); + } + } + } + + + // ]NOCPP] + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java new file mode 100644 index 000000000..7a559d903 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/LocatorImpl.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import org.xml.sax.Locator; + +public class LocatorImpl implements Locator { + + private final String systemId; + + private final String publicId; + + private final int column; + + private final int line; + + public LocatorImpl(Locator locator) { + this.systemId = locator.getSystemId(); + this.publicId = locator.getPublicId(); + this.column = locator.getColumnNumber(); + this.line = locator.getLineNumber(); + } + + public final int getColumnNumber() { + return column; + } + + public final int getLineNumber() { + return line; + } + + public final String getPublicId() { + return publicId; + } + + public final String getSystemId() { + return systemId; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java new file mode 100644 index 000000000..9a3dc16b2 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/MetaScanner.java @@ -0,0 +1,856 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2015 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import java.io.IOException; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.common.ByteReadable; + +import org.xml.sax.SAXException; + +public abstract class MetaScanner { + + /** + * Constant for "charset". + */ + private static final char[] CHARSET = { 'h', 'a', 'r', 's', 'e', 't' }; + + /** + * Constant for "content". + */ + private static final char[] CONTENT = { 'o', 'n', 't', 'e', 'n', 't' }; + + /** + * Constant for "http-equiv". + */ + private static final char[] HTTP_EQUIV = { 't', 't', 'p', '-', 'e', 'q', + 'u', 'i', 'v' }; + + /** + * Constant for "content-type". + */ + private static final char[] CONTENT_TYPE = { 'c', 'o', 'n', 't', 'e', 'n', + 't', '-', 't', 'y', 'p', 'e' }; + + private static final int NO = 0; + + private static final int M = 1; + + private static final int E = 2; + + private static final int T = 3; + + private static final int A = 4; + + private static final int DATA = 0; + + private static final int TAG_OPEN = 1; + + private static final int SCAN_UNTIL_GT = 2; + + private static final int TAG_NAME = 3; + + private static final int BEFORE_ATTRIBUTE_NAME = 4; + + private static final int ATTRIBUTE_NAME = 5; + + private static final int AFTER_ATTRIBUTE_NAME = 6; + + private static final int BEFORE_ATTRIBUTE_VALUE = 7; + + private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8; + + private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 9; + + private static final int ATTRIBUTE_VALUE_UNQUOTED = 10; + + private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 11; + + private static final int MARKUP_DECLARATION_OPEN = 13; + + private static final int MARKUP_DECLARATION_HYPHEN = 14; + + private static final int COMMENT_START = 15; + + private static final int COMMENT_START_DASH = 16; + + private static final int COMMENT = 17; + + private static final int COMMENT_END_DASH = 18; + + private static final int COMMENT_END = 19; + + private static final int SELF_CLOSING_START_TAG = 20; + + private static final int HTTP_EQUIV_NOT_SEEN = 0; + + private static final int HTTP_EQUIV_CONTENT_TYPE = 1; + + private static final int HTTP_EQUIV_OTHER = 2; + + /** + * The data source. + */ + protected ByteReadable readable; + + /** + * The state of the state machine that recognizes the tag name "meta". + */ + private int metaState = NO; + + /** + * The current position in recognizing the attribute name "content". + */ + private int contentIndex = Integer.MAX_VALUE; + + /** + * The current position in recognizing the attribute name "charset". + */ + private int charsetIndex = Integer.MAX_VALUE; + + /** + * The current position in recognizing the attribute name "http-equive". + */ + private int httpEquivIndex = Integer.MAX_VALUE; + + /** + * The current position in recognizing the attribute value "content-type". + */ + private int contentTypeIndex = Integer.MAX_VALUE; + + /** + * The tokenizer state. + */ + protected int stateSave = DATA; + + /** + * The currently filled length of strBuf. + */ + private int strBufLen; + + /** + * Accumulation buffer for attribute values. + */ + private @Auto char[] strBuf; + + private String content; + + private String charset; + + private int httpEquivState; + + // CPPONLY: private TreeBuilder treeBuilder; + + public MetaScanner( + // CPPONLY: TreeBuilder tb + ) { + this.readable = null; + this.metaState = NO; + this.contentIndex = Integer.MAX_VALUE; + this.charsetIndex = Integer.MAX_VALUE; + this.httpEquivIndex = Integer.MAX_VALUE; + this.contentTypeIndex = Integer.MAX_VALUE; + this.stateSave = DATA; + this.strBufLen = 0; + this.strBuf = new char[36]; + this.content = null; + this.charset = null; + this.httpEquivState = HTTP_EQUIV_NOT_SEEN; + // CPPONLY: this.treeBuilder = tb; + } + + @SuppressWarnings("unused") private void destructor() { + Portability.releaseString(content); + Portability.releaseString(charset); + } + + // [NOCPP[ + + /** + * Reads a byte from the data source. + * + * -1 means end. + * @return + * @throws IOException + */ + protected int read() throws IOException { + return readable.readByte(); + } + + // ]NOCPP] + + // WARNING When editing this, makes sure the bytecode length shown by javap + // stays under 8000 bytes! + /** + * The runs the meta scanning algorithm. + */ + protected final void stateLoop(int state) + throws SAXException, IOException { + int c = -1; + boolean reconsume = false; + stateloop: for (;;) { + switch (state) { + case DATA: + dataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '<': + state = MetaScanner.TAG_OPEN; + break dataloop; // FALL THROUGH continue + // stateloop; + default: + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case TAG_OPEN: + tagopenloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case 'm': + case 'M': + metaState = M; + state = MetaScanner.TAG_NAME; + break tagopenloop; + // continue stateloop; + case '!': + state = MetaScanner.MARKUP_DECLARATION_OPEN; + continue stateloop; + case '?': + case '/': + state = MetaScanner.SCAN_UNTIL_GT; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { + metaState = NO; + state = MetaScanner.TAG_NAME; + break tagopenloop; + // continue stateloop; + } + state = MetaScanner.DATA; + reconsume = true; + continue stateloop; + } + } + // FALL THROUGH DON'T REORDER + case TAG_NAME: + tagnameloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + break tagnameloop; + // continue stateloop; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + case 'e': + case 'E': + if (metaState == M) { + metaState = E; + } else { + metaState = NO; + } + continue; + case 't': + case 'T': + if (metaState == E) { + metaState = T; + } else { + metaState = NO; + } + continue; + case 'a': + case 'A': + if (metaState == T) { + metaState = A; + } else { + metaState = NO; + } + continue; + default: + metaState = NO; + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_NAME: + beforeattributenameloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + /* + * Consume the next input character: + */ + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + continue; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = DATA; + continue stateloop; + case 'c': + case 'C': + contentIndex = 0; + charsetIndex = 0; + httpEquivIndex = Integer.MAX_VALUE; + contentTypeIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + break beforeattributenameloop; + case 'h': + case 'H': + contentIndex = Integer.MAX_VALUE; + charsetIndex = Integer.MAX_VALUE; + httpEquivIndex = 0; + contentTypeIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + break beforeattributenameloop; + default: + contentIndex = Integer.MAX_VALUE; + charsetIndex = Integer.MAX_VALUE; + httpEquivIndex = Integer.MAX_VALUE; + contentTypeIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + break beforeattributenameloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_NAME: + attributenameloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + state = MetaScanner.AFTER_ATTRIBUTE_NAME; + continue stateloop; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '=': + strBufLen = 0; + contentTypeIndex = 0; + state = MetaScanner.BEFORE_ATTRIBUTE_VALUE; + break attributenameloop; + // continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + if (metaState == A) { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (contentIndex < CONTENT.length && c == CONTENT[contentIndex]) { + ++contentIndex; + } else { + contentIndex = Integer.MAX_VALUE; + } + if (charsetIndex < CHARSET.length && c == CHARSET[charsetIndex]) { + ++charsetIndex; + } else { + charsetIndex = Integer.MAX_VALUE; + } + if (httpEquivIndex < HTTP_EQUIV.length && c == HTTP_EQUIV[httpEquivIndex]) { + ++httpEquivIndex; + } else { + httpEquivIndex = Integer.MAX_VALUE; + } + } + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_VALUE: + beforeattributevalueloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + continue; + case '"': + state = MetaScanner.ATTRIBUTE_VALUE_DOUBLE_QUOTED; + break beforeattributevalueloop; + // continue stateloop; + case '\'': + state = MetaScanner.ATTRIBUTE_VALUE_SINGLE_QUOTED; + continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + handleCharInAttributeValue(c); + state = MetaScanner.ATTRIBUTE_VALUE_UNQUOTED; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: + attributevaluedoublequotedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '"': + handleAttributeValue(); + state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED; + break attributevaluedoublequotedloop; + // continue stateloop; + default: + handleCharInAttributeValue(c); + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_ATTRIBUTE_VALUE_QUOTED: + afterattributevaluequotedloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + continue stateloop; + case '/': + state = MetaScanner.SELF_CLOSING_START_TAG; + break afterattributevaluequotedloop; + // continue stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + reconsume = true; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case SELF_CLOSING_START_TAG: + c = read(); + switch (c) { + case -1: + break stateloop; + case '>': + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + reconsume = true; + continue stateloop; + } + // XXX reorder point + case ATTRIBUTE_VALUE_UNQUOTED: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + + case '\u000C': + handleAttributeValue(); + state = MetaScanner.BEFORE_ATTRIBUTE_NAME; + continue stateloop; + case '>': + handleAttributeValue(); + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + default: + handleCharInAttributeValue(c); + continue; + } + } + // XXX reorder point + case AFTER_ATTRIBUTE_NAME: + for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case ' ': + case '\t': + case '\n': + case '\u000C': + continue; + case '/': + handleAttributeValue(); + state = MetaScanner.SELF_CLOSING_START_TAG; + continue stateloop; + case '=': + strBufLen = 0; + contentTypeIndex = 0; + state = MetaScanner.BEFORE_ATTRIBUTE_VALUE; + continue stateloop; + case '>': + handleAttributeValue(); + if (handleTag()) { + break stateloop; + } + state = MetaScanner.DATA; + continue stateloop; + case 'c': + case 'C': + contentIndex = 0; + charsetIndex = 0; + state = MetaScanner.ATTRIBUTE_NAME; + continue stateloop; + default: + contentIndex = Integer.MAX_VALUE; + charsetIndex = Integer.MAX_VALUE; + state = MetaScanner.ATTRIBUTE_NAME; + continue stateloop; + } + } + // XXX reorder point + case MARKUP_DECLARATION_OPEN: + markupdeclarationopenloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.MARKUP_DECLARATION_HYPHEN; + break markupdeclarationopenloop; + // continue stateloop; + default: + state = MetaScanner.SCAN_UNTIL_GT; + reconsume = true; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case MARKUP_DECLARATION_HYPHEN: + markupdeclarationhyphenloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_START; + break markupdeclarationhyphenloop; + // continue stateloop; + default: + state = MetaScanner.SCAN_UNTIL_GT; + reconsume = true; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_START: + commentstartloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_START_DASH; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.COMMENT; + break commentstartloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT: + commentloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_END_DASH; + break commentloop; + // continue stateloop; + default: + continue; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END_DASH: + commentenddashloop: for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_END; + break commentenddashloop; + // continue stateloop; + default: + state = MetaScanner.COMMENT; + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END: + for (;;) { + c = read(); + switch (c) { + case -1: + break stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + case '-': + continue; + default: + state = MetaScanner.COMMENT; + continue stateloop; + } + } + // XXX reorder point + case COMMENT_START_DASH: + c = read(); + switch (c) { + case -1: + break stateloop; + case '-': + state = MetaScanner.COMMENT_END; + continue stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + state = MetaScanner.COMMENT; + continue stateloop; + } + // XXX reorder point + case ATTRIBUTE_VALUE_SINGLE_QUOTED: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '\'': + handleAttributeValue(); + state = MetaScanner.AFTER_ATTRIBUTE_VALUE_QUOTED; + continue stateloop; + default: + handleCharInAttributeValue(c); + continue; + } + } + // XXX reorder point + case SCAN_UNTIL_GT: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + c = read(); + } + switch (c) { + case -1: + break stateloop; + case '>': + state = MetaScanner.DATA; + continue stateloop; + default: + continue; + } + } + } + } + stateSave = state; + } + + private void handleCharInAttributeValue(int c) { + if (metaState == A) { + if (contentIndex == CONTENT.length || charsetIndex == CHARSET.length) { + addToBuffer(c); + } else if (httpEquivIndex == HTTP_EQUIV.length) { + if (contentTypeIndex < CONTENT_TYPE.length && toAsciiLowerCase(c) == CONTENT_TYPE[contentTypeIndex]) { + ++contentTypeIndex; + } else { + contentTypeIndex = Integer.MAX_VALUE; + } + } + } + } + + @Inline private int toAsciiLowerCase(int c) { + if (c >= 'A' && c <= 'Z') { + return c + 0x20; + } + return c; + } + + /** + * Adds a character to the accumulation buffer. + * @param c the character to add + */ + private void addToBuffer(int c) { + if (strBufLen == strBuf.length) { + char[] newBuf = new char[strBuf.length + (strBuf.length << 1)]; + System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length); + strBuf = newBuf; + } + strBuf[strBufLen++] = (char)c; + } + + /** + * Attempts to extract a charset name from the accumulation buffer. + * @return <code>true</code> if successful + * @throws SAXException + */ + private void handleAttributeValue() throws SAXException { + if (metaState != A) { + return; + } + if (contentIndex == CONTENT.length && content == null) { + content = Portability.newStringFromBuffer(strBuf, 0, strBufLen + // CPPONLY: , treeBuilder + ); + return; + } + if (charsetIndex == CHARSET.length && charset == null) { + charset = Portability.newStringFromBuffer(strBuf, 0, strBufLen + // CPPONLY: , treeBuilder + ); + return; + } + if (httpEquivIndex == HTTP_EQUIV.length + && httpEquivState == HTTP_EQUIV_NOT_SEEN) { + httpEquivState = (contentTypeIndex == CONTENT_TYPE.length) ? HTTP_EQUIV_CONTENT_TYPE + : HTTP_EQUIV_OTHER; + return; + } + } + + private boolean handleTag() throws SAXException { + boolean stop = handleTagInner(); + Portability.releaseString(content); + content = null; + Portability.releaseString(charset); + charset = null; + httpEquivState = HTTP_EQUIV_NOT_SEEN; + return stop; + } + + private boolean handleTagInner() throws SAXException { + if (charset != null && tryCharset(charset)) { + return true; + } + if (content != null && httpEquivState == HTTP_EQUIV_CONTENT_TYPE) { + String extract = TreeBuilder.extractCharsetFromContent(content + // CPPONLY: , treeBuilder + ); + if (extract == null) { + return false; + } + boolean success = tryCharset(extract); + Portability.releaseString(extract); + return success; + } + return false; + } + + /** + * Tries to switch to an encoding. + * + * @param encoding + * @return <code>true</code> if successful + * @throws SAXException + */ + protected abstract boolean tryCharset(String encoding) throws SAXException; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java new file mode 100644 index 000000000..940cf2e9c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NCName.java @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +public final class NCName { + // [NOCPP[ + + private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; + + private static final char[] HEX_TABLE = "0123456789ABCDEF".toCharArray(); + + public static boolean isNCNameStart(char c) { + return ((c >= '\u0041' && c <= '\u005A') + || (c >= '\u0061' && c <= '\u007A') + || (c >= '\u00C0' && c <= '\u00D6') + || (c >= '\u00D8' && c <= '\u00F6') + || (c >= '\u00F8' && c <= '\u00FF') + || (c >= '\u0100' && c <= '\u0131') + || (c >= '\u0134' && c <= '\u013E') + || (c >= '\u0141' && c <= '\u0148') + || (c >= '\u014A' && c <= '\u017E') + || (c >= '\u0180' && c <= '\u01C3') + || (c >= '\u01CD' && c <= '\u01F0') + || (c >= '\u01F4' && c <= '\u01F5') + || (c >= '\u01FA' && c <= '\u0217') + || (c >= '\u0250' && c <= '\u02A8') + || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386') + || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') + || (c >= '\u038E' && c <= '\u03A1') + || (c >= '\u03A3' && c <= '\u03CE') + || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA') + || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0') + || (c >= '\u03E2' && c <= '\u03F3') + || (c >= '\u0401' && c <= '\u040C') + || (c >= '\u040E' && c <= '\u044F') + || (c >= '\u0451' && c <= '\u045C') + || (c >= '\u045E' && c <= '\u0481') + || (c >= '\u0490' && c <= '\u04C4') + || (c >= '\u04C7' && c <= '\u04C8') + || (c >= '\u04CB' && c <= '\u04CC') + || (c >= '\u04D0' && c <= '\u04EB') + || (c >= '\u04EE' && c <= '\u04F5') + || (c >= '\u04F8' && c <= '\u04F9') + || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559') + || (c >= '\u0561' && c <= '\u0586') + || (c >= '\u05D0' && c <= '\u05EA') + || (c >= '\u05F0' && c <= '\u05F2') + || (c >= '\u0621' && c <= '\u063A') + || (c >= '\u0641' && c <= '\u064A') + || (c >= '\u0671' && c <= '\u06B7') + || (c >= '\u06BA' && c <= '\u06BE') + || (c >= '\u06C0' && c <= '\u06CE') + || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5') + || (c >= '\u06E5' && c <= '\u06E6') + || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D') + || (c >= '\u0958' && c <= '\u0961') + || (c >= '\u0985' && c <= '\u098C') + || (c >= '\u098F' && c <= '\u0990') + || (c >= '\u0993' && c <= '\u09A8') + || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2') + || (c >= '\u09B6' && c <= '\u09B9') + || (c >= '\u09DC' && c <= '\u09DD') + || (c >= '\u09DF' && c <= '\u09E1') + || (c >= '\u09F0' && c <= '\u09F1') + || (c >= '\u0A05' && c <= '\u0A0A') + || (c >= '\u0A0F' && c <= '\u0A10') + || (c >= '\u0A13' && c <= '\u0A28') + || (c >= '\u0A2A' && c <= '\u0A30') + || (c >= '\u0A32' && c <= '\u0A33') + || (c >= '\u0A35' && c <= '\u0A36') + || (c >= '\u0A38' && c <= '\u0A39') + || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E') + || (c >= '\u0A72' && c <= '\u0A74') + || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D') + || (c >= '\u0A8F' && c <= '\u0A91') + || (c >= '\u0A93' && c <= '\u0AA8') + || (c >= '\u0AAA' && c <= '\u0AB0') + || (c >= '\u0AB2' && c <= '\u0AB3') + || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD') + || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C') + || (c >= '\u0B0F' && c <= '\u0B10') + || (c >= '\u0B13' && c <= '\u0B28') + || (c >= '\u0B2A' && c <= '\u0B30') + || (c >= '\u0B32' && c <= '\u0B33') + || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D') + || (c >= '\u0B5C' && c <= '\u0B5D') + || (c >= '\u0B5F' && c <= '\u0B61') + || (c >= '\u0B85' && c <= '\u0B8A') + || (c >= '\u0B8E' && c <= '\u0B90') + || (c >= '\u0B92' && c <= '\u0B95') + || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C') + || (c >= '\u0B9E' && c <= '\u0B9F') + || (c >= '\u0BA3' && c <= '\u0BA4') + || (c >= '\u0BA8' && c <= '\u0BAA') + || (c >= '\u0BAE' && c <= '\u0BB5') + || (c >= '\u0BB7' && c <= '\u0BB9') + || (c >= '\u0C05' && c <= '\u0C0C') + || (c >= '\u0C0E' && c <= '\u0C10') + || (c >= '\u0C12' && c <= '\u0C28') + || (c >= '\u0C2A' && c <= '\u0C33') + || (c >= '\u0C35' && c <= '\u0C39') + || (c >= '\u0C60' && c <= '\u0C61') + || (c >= '\u0C85' && c <= '\u0C8C') + || (c >= '\u0C8E' && c <= '\u0C90') + || (c >= '\u0C92' && c <= '\u0CA8') + || (c >= '\u0CAA' && c <= '\u0CB3') + || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE') + || (c >= '\u0CE0' && c <= '\u0CE1') + || (c >= '\u0D05' && c <= '\u0D0C') + || (c >= '\u0D0E' && c <= '\u0D10') + || (c >= '\u0D12' && c <= '\u0D28') + || (c >= '\u0D2A' && c <= '\u0D39') + || (c >= '\u0D60' && c <= '\u0D61') + || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30') + || (c >= '\u0E32' && c <= '\u0E33') + || (c >= '\u0E40' && c <= '\u0E45') + || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84') + || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A') + || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97') + || (c >= '\u0E99' && c <= '\u0E9F') + || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5') + || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB') + || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0') + || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD') + || (c >= '\u0EC0' && c <= '\u0EC4') + || (c >= '\u0F40' && c <= '\u0F47') + || (c >= '\u0F49' && c <= '\u0F69') + || (c >= '\u10A0' && c <= '\u10C5') + || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100') + || (c >= '\u1102' && c <= '\u1103') + || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109') + || (c >= '\u110B' && c <= '\u110C') + || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C') + || (c == '\u113E') || (c == '\u1140') || (c == '\u114C') + || (c == '\u114E') || (c == '\u1150') + || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159') + || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163') + || (c == '\u1165') || (c == '\u1167') || (c == '\u1169') + || (c >= '\u116D' && c <= '\u116E') + || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175') + || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB') + || (c >= '\u11AE' && c <= '\u11AF') + || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA') + || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB') + || (c == '\u11F0') || (c == '\u11F9') + || (c >= '\u1E00' && c <= '\u1E9B') + || (c >= '\u1EA0' && c <= '\u1EF9') + || (c >= '\u1F00' && c <= '\u1F15') + || (c >= '\u1F18' && c <= '\u1F1D') + || (c >= '\u1F20' && c <= '\u1F45') + || (c >= '\u1F48' && c <= '\u1F4D') + || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59') + || (c == '\u1F5B') || (c == '\u1F5D') + || (c >= '\u1F5F' && c <= '\u1F7D') + || (c >= '\u1F80' && c <= '\u1FB4') + || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE') + || (c >= '\u1FC2' && c <= '\u1FC4') + || (c >= '\u1FC6' && c <= '\u1FCC') + || (c >= '\u1FD0' && c <= '\u1FD3') + || (c >= '\u1FD6' && c <= '\u1FDB') + || (c >= '\u1FE0' && c <= '\u1FEC') + || (c >= '\u1FF2' && c <= '\u1FF4') + || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126') + || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E') + || (c >= '\u2180' && c <= '\u2182') + || (c >= '\u3041' && c <= '\u3094') + || (c >= '\u30A1' && c <= '\u30FA') + || (c >= '\u3105' && c <= '\u312C') + || (c >= '\uAC00' && c <= '\uD7A3') + || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007') + || (c >= '\u3021' && c <= '\u3029') || (c == '_')); + } + + public static boolean isNCNameTrail(char c) { + return ((c >= '\u0030' && c <= '\u0039') + || (c >= '\u0660' && c <= '\u0669') + || (c >= '\u06F0' && c <= '\u06F9') + || (c >= '\u0966' && c <= '\u096F') + || (c >= '\u09E6' && c <= '\u09EF') + || (c >= '\u0A66' && c <= '\u0A6F') + || (c >= '\u0AE6' && c <= '\u0AEF') + || (c >= '\u0B66' && c <= '\u0B6F') + || (c >= '\u0BE7' && c <= '\u0BEF') + || (c >= '\u0C66' && c <= '\u0C6F') + || (c >= '\u0CE6' && c <= '\u0CEF') + || (c >= '\u0D66' && c <= '\u0D6F') + || (c >= '\u0E50' && c <= '\u0E59') + || (c >= '\u0ED0' && c <= '\u0ED9') + || (c >= '\u0F20' && c <= '\u0F29') + || (c >= '\u0041' && c <= '\u005A') + || (c >= '\u0061' && c <= '\u007A') + || (c >= '\u00C0' && c <= '\u00D6') + || (c >= '\u00D8' && c <= '\u00F6') + || (c >= '\u00F8' && c <= '\u00FF') + || (c >= '\u0100' && c <= '\u0131') + || (c >= '\u0134' && c <= '\u013E') + || (c >= '\u0141' && c <= '\u0148') + || (c >= '\u014A' && c <= '\u017E') + || (c >= '\u0180' && c <= '\u01C3') + || (c >= '\u01CD' && c <= '\u01F0') + || (c >= '\u01F4' && c <= '\u01F5') + || (c >= '\u01FA' && c <= '\u0217') + || (c >= '\u0250' && c <= '\u02A8') + || (c >= '\u02BB' && c <= '\u02C1') || (c == '\u0386') + || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') + || (c >= '\u038E' && c <= '\u03A1') + || (c >= '\u03A3' && c <= '\u03CE') + || (c >= '\u03D0' && c <= '\u03D6') || (c == '\u03DA') + || (c == '\u03DC') || (c == '\u03DE') || (c == '\u03E0') + || (c >= '\u03E2' && c <= '\u03F3') + || (c >= '\u0401' && c <= '\u040C') + || (c >= '\u040E' && c <= '\u044F') + || (c >= '\u0451' && c <= '\u045C') + || (c >= '\u045E' && c <= '\u0481') + || (c >= '\u0490' && c <= '\u04C4') + || (c >= '\u04C7' && c <= '\u04C8') + || (c >= '\u04CB' && c <= '\u04CC') + || (c >= '\u04D0' && c <= '\u04EB') + || (c >= '\u04EE' && c <= '\u04F5') + || (c >= '\u04F8' && c <= '\u04F9') + || (c >= '\u0531' && c <= '\u0556') || (c == '\u0559') + || (c >= '\u0561' && c <= '\u0586') + || (c >= '\u05D0' && c <= '\u05EA') + || (c >= '\u05F0' && c <= '\u05F2') + || (c >= '\u0621' && c <= '\u063A') + || (c >= '\u0641' && c <= '\u064A') + || (c >= '\u0671' && c <= '\u06B7') + || (c >= '\u06BA' && c <= '\u06BE') + || (c >= '\u06C0' && c <= '\u06CE') + || (c >= '\u06D0' && c <= '\u06D3') || (c == '\u06D5') + || (c >= '\u06E5' && c <= '\u06E6') + || (c >= '\u0905' && c <= '\u0939') || (c == '\u093D') + || (c >= '\u0958' && c <= '\u0961') + || (c >= '\u0985' && c <= '\u098C') + || (c >= '\u098F' && c <= '\u0990') + || (c >= '\u0993' && c <= '\u09A8') + || (c >= '\u09AA' && c <= '\u09B0') || (c == '\u09B2') + || (c >= '\u09B6' && c <= '\u09B9') + || (c >= '\u09DC' && c <= '\u09DD') + || (c >= '\u09DF' && c <= '\u09E1') + || (c >= '\u09F0' && c <= '\u09F1') + || (c >= '\u0A05' && c <= '\u0A0A') + || (c >= '\u0A0F' && c <= '\u0A10') + || (c >= '\u0A13' && c <= '\u0A28') + || (c >= '\u0A2A' && c <= '\u0A30') + || (c >= '\u0A32' && c <= '\u0A33') + || (c >= '\u0A35' && c <= '\u0A36') + || (c >= '\u0A38' && c <= '\u0A39') + || (c >= '\u0A59' && c <= '\u0A5C') || (c == '\u0A5E') + || (c >= '\u0A72' && c <= '\u0A74') + || (c >= '\u0A85' && c <= '\u0A8B') || (c == '\u0A8D') + || (c >= '\u0A8F' && c <= '\u0A91') + || (c >= '\u0A93' && c <= '\u0AA8') + || (c >= '\u0AAA' && c <= '\u0AB0') + || (c >= '\u0AB2' && c <= '\u0AB3') + || (c >= '\u0AB5' && c <= '\u0AB9') || (c == '\u0ABD') + || (c == '\u0AE0') || (c >= '\u0B05' && c <= '\u0B0C') + || (c >= '\u0B0F' && c <= '\u0B10') + || (c >= '\u0B13' && c <= '\u0B28') + || (c >= '\u0B2A' && c <= '\u0B30') + || (c >= '\u0B32' && c <= '\u0B33') + || (c >= '\u0B36' && c <= '\u0B39') || (c == '\u0B3D') + || (c >= '\u0B5C' && c <= '\u0B5D') + || (c >= '\u0B5F' && c <= '\u0B61') + || (c >= '\u0B85' && c <= '\u0B8A') + || (c >= '\u0B8E' && c <= '\u0B90') + || (c >= '\u0B92' && c <= '\u0B95') + || (c >= '\u0B99' && c <= '\u0B9A') || (c == '\u0B9C') + || (c >= '\u0B9E' && c <= '\u0B9F') + || (c >= '\u0BA3' && c <= '\u0BA4') + || (c >= '\u0BA8' && c <= '\u0BAA') + || (c >= '\u0BAE' && c <= '\u0BB5') + || (c >= '\u0BB7' && c <= '\u0BB9') + || (c >= '\u0C05' && c <= '\u0C0C') + || (c >= '\u0C0E' && c <= '\u0C10') + || (c >= '\u0C12' && c <= '\u0C28') + || (c >= '\u0C2A' && c <= '\u0C33') + || (c >= '\u0C35' && c <= '\u0C39') + || (c >= '\u0C60' && c <= '\u0C61') + || (c >= '\u0C85' && c <= '\u0C8C') + || (c >= '\u0C8E' && c <= '\u0C90') + || (c >= '\u0C92' && c <= '\u0CA8') + || (c >= '\u0CAA' && c <= '\u0CB3') + || (c >= '\u0CB5' && c <= '\u0CB9') || (c == '\u0CDE') + || (c >= '\u0CE0' && c <= '\u0CE1') + || (c >= '\u0D05' && c <= '\u0D0C') + || (c >= '\u0D0E' && c <= '\u0D10') + || (c >= '\u0D12' && c <= '\u0D28') + || (c >= '\u0D2A' && c <= '\u0D39') + || (c >= '\u0D60' && c <= '\u0D61') + || (c >= '\u0E01' && c <= '\u0E2E') || (c == '\u0E30') + || (c >= '\u0E32' && c <= '\u0E33') + || (c >= '\u0E40' && c <= '\u0E45') + || (c >= '\u0E81' && c <= '\u0E82') || (c == '\u0E84') + || (c >= '\u0E87' && c <= '\u0E88') || (c == '\u0E8A') + || (c == '\u0E8D') || (c >= '\u0E94' && c <= '\u0E97') + || (c >= '\u0E99' && c <= '\u0E9F') + || (c >= '\u0EA1' && c <= '\u0EA3') || (c == '\u0EA5') + || (c == '\u0EA7') || (c >= '\u0EAA' && c <= '\u0EAB') + || (c >= '\u0EAD' && c <= '\u0EAE') || (c == '\u0EB0') + || (c >= '\u0EB2' && c <= '\u0EB3') || (c == '\u0EBD') + || (c >= '\u0EC0' && c <= '\u0EC4') + || (c >= '\u0F40' && c <= '\u0F47') + || (c >= '\u0F49' && c <= '\u0F69') + || (c >= '\u10A0' && c <= '\u10C5') + || (c >= '\u10D0' && c <= '\u10F6') || (c == '\u1100') + || (c >= '\u1102' && c <= '\u1103') + || (c >= '\u1105' && c <= '\u1107') || (c == '\u1109') + || (c >= '\u110B' && c <= '\u110C') + || (c >= '\u110E' && c <= '\u1112') || (c == '\u113C') + || (c == '\u113E') || (c == '\u1140') || (c == '\u114C') + || (c == '\u114E') || (c == '\u1150') + || (c >= '\u1154' && c <= '\u1155') || (c == '\u1159') + || (c >= '\u115F' && c <= '\u1161') || (c == '\u1163') + || (c == '\u1165') || (c == '\u1167') || (c == '\u1169') + || (c >= '\u116D' && c <= '\u116E') + || (c >= '\u1172' && c <= '\u1173') || (c == '\u1175') + || (c == '\u119E') || (c == '\u11A8') || (c == '\u11AB') + || (c >= '\u11AE' && c <= '\u11AF') + || (c >= '\u11B7' && c <= '\u11B8') || (c == '\u11BA') + || (c >= '\u11BC' && c <= '\u11C2') || (c == '\u11EB') + || (c == '\u11F0') || (c == '\u11F9') + || (c >= '\u1E00' && c <= '\u1E9B') + || (c >= '\u1EA0' && c <= '\u1EF9') + || (c >= '\u1F00' && c <= '\u1F15') + || (c >= '\u1F18' && c <= '\u1F1D') + || (c >= '\u1F20' && c <= '\u1F45') + || (c >= '\u1F48' && c <= '\u1F4D') + || (c >= '\u1F50' && c <= '\u1F57') || (c == '\u1F59') + || (c == '\u1F5B') || (c == '\u1F5D') + || (c >= '\u1F5F' && c <= '\u1F7D') + || (c >= '\u1F80' && c <= '\u1FB4') + || (c >= '\u1FB6' && c <= '\u1FBC') || (c == '\u1FBE') + || (c >= '\u1FC2' && c <= '\u1FC4') + || (c >= '\u1FC6' && c <= '\u1FCC') + || (c >= '\u1FD0' && c <= '\u1FD3') + || (c >= '\u1FD6' && c <= '\u1FDB') + || (c >= '\u1FE0' && c <= '\u1FEC') + || (c >= '\u1FF2' && c <= '\u1FF4') + || (c >= '\u1FF6' && c <= '\u1FFC') || (c == '\u2126') + || (c >= '\u212A' && c <= '\u212B') || (c == '\u212E') + || (c >= '\u2180' && c <= '\u2182') + || (c >= '\u3041' && c <= '\u3094') + || (c >= '\u30A1' && c <= '\u30FA') + || (c >= '\u3105' && c <= '\u312C') + || (c >= '\uAC00' && c <= '\uD7A3') + || (c >= '\u4E00' && c <= '\u9FA5') || (c == '\u3007') + || (c >= '\u3021' && c <= '\u3029') || (c == '_') || (c == '.') + || (c == '-') || (c >= '\u0300' && c <= '\u0345') + || (c >= '\u0360' && c <= '\u0361') + || (c >= '\u0483' && c <= '\u0486') + || (c >= '\u0591' && c <= '\u05A1') + || (c >= '\u05A3' && c <= '\u05B9') + || (c >= '\u05BB' && c <= '\u05BD') || (c == '\u05BF') + || (c >= '\u05C1' && c <= '\u05C2') || (c == '\u05C4') + || (c >= '\u064B' && c <= '\u0652') || (c == '\u0670') + || (c >= '\u06D6' && c <= '\u06DC') + || (c >= '\u06DD' && c <= '\u06DF') + || (c >= '\u06E0' && c <= '\u06E4') + || (c >= '\u06E7' && c <= '\u06E8') + || (c >= '\u06EA' && c <= '\u06ED') + || (c >= '\u0901' && c <= '\u0903') || (c == '\u093C') + || (c >= '\u093E' && c <= '\u094C') || (c == '\u094D') + || (c >= '\u0951' && c <= '\u0954') + || (c >= '\u0962' && c <= '\u0963') + || (c >= '\u0981' && c <= '\u0983') || (c == '\u09BC') + || (c == '\u09BE') || (c == '\u09BF') + || (c >= '\u09C0' && c <= '\u09C4') + || (c >= '\u09C7' && c <= '\u09C8') + || (c >= '\u09CB' && c <= '\u09CD') || (c == '\u09D7') + || (c >= '\u09E2' && c <= '\u09E3') || (c == '\u0A02') + || (c == '\u0A3C') || (c == '\u0A3E') || (c == '\u0A3F') + || (c >= '\u0A40' && c <= '\u0A42') + || (c >= '\u0A47' && c <= '\u0A48') + || (c >= '\u0A4B' && c <= '\u0A4D') + || (c >= '\u0A70' && c <= '\u0A71') + || (c >= '\u0A81' && c <= '\u0A83') || (c == '\u0ABC') + || (c >= '\u0ABE' && c <= '\u0AC5') + || (c >= '\u0AC7' && c <= '\u0AC9') + || (c >= '\u0ACB' && c <= '\u0ACD') + || (c >= '\u0B01' && c <= '\u0B03') || (c == '\u0B3C') + || (c >= '\u0B3E' && c <= '\u0B43') + || (c >= '\u0B47' && c <= '\u0B48') + || (c >= '\u0B4B' && c <= '\u0B4D') + || (c >= '\u0B56' && c <= '\u0B57') + || (c >= '\u0B82' && c <= '\u0B83') + || (c >= '\u0BBE' && c <= '\u0BC2') + || (c >= '\u0BC6' && c <= '\u0BC8') + || (c >= '\u0BCA' && c <= '\u0BCD') || (c == '\u0BD7') + || (c >= '\u0C01' && c <= '\u0C03') + || (c >= '\u0C3E' && c <= '\u0C44') + || (c >= '\u0C46' && c <= '\u0C48') + || (c >= '\u0C4A' && c <= '\u0C4D') + || (c >= '\u0C55' && c <= '\u0C56') + || (c >= '\u0C82' && c <= '\u0C83') + || (c >= '\u0CBE' && c <= '\u0CC4') + || (c >= '\u0CC6' && c <= '\u0CC8') + || (c >= '\u0CCA' && c <= '\u0CCD') + || (c >= '\u0CD5' && c <= '\u0CD6') + || (c >= '\u0D02' && c <= '\u0D03') + || (c >= '\u0D3E' && c <= '\u0D43') + || (c >= '\u0D46' && c <= '\u0D48') + || (c >= '\u0D4A' && c <= '\u0D4D') || (c == '\u0D57') + || (c == '\u0E31') || (c >= '\u0E34' && c <= '\u0E3A') + || (c >= '\u0E47' && c <= '\u0E4E') || (c == '\u0EB1') + || (c >= '\u0EB4' && c <= '\u0EB9') + || (c >= '\u0EBB' && c <= '\u0EBC') + || (c >= '\u0EC8' && c <= '\u0ECD') + || (c >= '\u0F18' && c <= '\u0F19') || (c == '\u0F35') + || (c == '\u0F37') || (c == '\u0F39') || (c == '\u0F3E') + || (c == '\u0F3F') || (c >= '\u0F71' && c <= '\u0F84') + || (c >= '\u0F86' && c <= '\u0F8B') + || (c >= '\u0F90' && c <= '\u0F95') || (c == '\u0F97') + || (c >= '\u0F99' && c <= '\u0FAD') + || (c >= '\u0FB1' && c <= '\u0FB7') || (c == '\u0FB9') + || (c >= '\u20D0' && c <= '\u20DC') || (c == '\u20E1') + || (c >= '\u302A' && c <= '\u302F') || (c == '\u3099') + || (c == '\u309A') || (c == '\u00B7') || (c == '\u02D0') + || (c == '\u02D1') || (c == '\u0387') || (c == '\u0640') + || (c == '\u0E46') || (c == '\u0EC6') || (c == '\u3005') + || (c >= '\u3031' && c <= '\u3035') + || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE')); + } + + public static boolean isNCName(String str) { + if (str == null) { + return false; + } else { + int len = str.length(); + switch (len) { + case 0: + return false; + case 1: + return NCName.isNCNameStart(str.charAt(0)); + default: + if (!NCName.isNCNameStart(str.charAt(0))) { + return false; + } + for (int i = 1; i < len; i++) { + if (!NCName.isNCNameTrail(str.charAt(i))) { + return false; + } + } + } + return true; + } + } + + private static void appendUHexTo(StringBuilder sb, int c) { + sb.append('U'); + for (int i = 0; i < 6; i++) { + sb.append(HEX_TABLE[(c & 0xF00000) >> 20]); + c <<= 4; + } + } + + public static String escapeName(String str) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if ((c & 0xFC00) == 0xD800) { + char next = str.charAt(++i); + appendUHexTo(sb, (c << 10) + next + SURROGATE_OFFSET); + } else if (i == 0 && !isNCNameStart(c)) { + appendUHexTo(sb, c); + } else if (i != 0 && !isNCNameTrail(c)) { + appendUHexTo(sb, c); + } else { + sb.append(c); + } + } + return sb.toString().intern(); + } + // ]NOCPP] +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java new file mode 100644 index 000000000..266a5a28e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharacters.java @@ -0,0 +1,944 @@ +/* + * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera + * Software ASA. + * + * You are granted a license to use, reproduce and create derivative works of + * this document. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.CharacterName; +import nu.validator.htmlparser.annotation.NoLength; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NamedCharacters { + + static final @NoLength @CharacterName String[] NAMES = { "lig", "lig;", + "P", "P;", "cute", "cute;", "reve;", "irc", "irc;", "y;", "r;", + "rave", "rave;", "pha;", "acr;", "d;", "gon;", "pf;", + "plyFunction;", "ing", "ing;", "cr;", "sign;", "ilde", "ilde;", + "ml", "ml;", "ckslash;", "rv;", "rwed;", "y;", "cause;", + "rnoullis;", "ta;", "r;", "pf;", "eve;", "cr;", "mpeq;", "cy;", + "PY", "PY;", "cute;", "p;", "pitalDifferentialD;", "yleys;", + "aron;", "edil", "edil;", "irc;", "onint;", "ot;", "dilla;", + "nterDot;", "r;", "i;", "rcleDot;", "rcleMinus;", "rclePlus;", + "rcleTimes;", "ockwiseContourIntegral;", "oseCurlyDoubleQuote;", + "oseCurlyQuote;", "lon;", "lone;", "ngruent;", "nint;", + "ntourIntegral;", "pf;", "product;", + "unterClockwiseContourIntegral;", "oss;", "cr;", "p;", "pCap;", + ";", "otrahd;", "cy;", "cy;", "cy;", "gger;", "rr;", "shv;", + "aron;", "y;", "l;", "lta;", "r;", "acriticalAcute;", + "acriticalDot;", "acriticalDoubleAcute;", "acriticalGrave;", + "acriticalTilde;", "amond;", "fferentialD;", "pf;", "t;", "tDot;", + "tEqual;", "ubleContourIntegral;", "ubleDot;", "ubleDownArrow;", + "ubleLeftArrow;", "ubleLeftRightArrow;", "ubleLeftTee;", + "ubleLongLeftArrow;", "ubleLongLeftRightArrow;", + "ubleLongRightArrow;", "ubleRightArrow;", "ubleRightTee;", + "ubleUpArrow;", "ubleUpDownArrow;", "ubleVerticalBar;", "wnArrow;", + "wnArrowBar;", "wnArrowUpArrow;", "wnBreve;", "wnLeftRightVector;", + "wnLeftTeeVector;", "wnLeftVector;", "wnLeftVectorBar;", + "wnRightTeeVector;", "wnRightVector;", "wnRightVectorBar;", + "wnTee;", "wnTeeArrow;", "wnarrow;", "cr;", "trok;", "G;", "H", + "H;", "cute", "cute;", "aron;", "irc", "irc;", "y;", "ot;", "r;", + "rave", "rave;", "ement;", "acr;", "ptySmallSquare;", + "ptyVerySmallSquare;", "gon;", "pf;", "silon;", "ual;", + "ualTilde;", "uilibrium;", "cr;", "im;", "a;", "ml", "ml;", + "ists;", "ponentialE;", "y;", "r;", "lledSmallSquare;", + "lledVerySmallSquare;", "pf;", "rAll;", "uriertrf;", "cr;", "cy;", + "", ";", "mma;", "mmad;", "reve;", "edil;", "irc;", "y;", "ot;", + "r;", ";", "pf;", "eaterEqual;", "eaterEqualLess;", + "eaterFullEqual;", "eaterGreater;", "eaterLess;", + "eaterSlantEqual;", "eaterTilde;", "cr;", ";", "RDcy;", "cek;", + "t;", "irc;", "r;", "lbertSpace;", "pf;", "rizontalLine;", "cr;", + "trok;", "mpDownHump;", "mpEqual;", "cy;", "lig;", "cy;", "cute", + "cute;", "irc", "irc;", "y;", "ot;", "r;", "rave", "rave;", ";", + "acr;", "aginaryI;", "plies;", "t;", "tegral;", "tersection;", + "visibleComma;", "visibleTimes;", "gon;", "pf;", "ta;", "cr;", + "ilde;", "kcy;", "ml", "ml;", "irc;", "y;", "r;", "pf;", "cr;", + "ercy;", "kcy;", "cy;", "cy;", "ppa;", "edil;", "y;", "r;", "pf;", + "cr;", "cy;", "", ";", "cute;", "mbda;", "ng;", "placetrf;", "rr;", + "aron;", "edil;", "y;", "ftAngleBracket;", "ftArrow;", + "ftArrowBar;", "ftArrowRightArrow;", "ftCeiling;", + "ftDoubleBracket;", "ftDownTeeVector;", "ftDownVector;", + "ftDownVectorBar;", "ftFloor;", "ftRightArrow;", "ftRightVector;", + "ftTee;", "ftTeeArrow;", "ftTeeVector;", "ftTriangle;", + "ftTriangleBar;", "ftTriangleEqual;", "ftUpDownVector;", + "ftUpTeeVector;", "ftUpVector;", "ftUpVectorBar;", "ftVector;", + "ftVectorBar;", "ftarrow;", "ftrightarrow;", "ssEqualGreater;", + "ssFullEqual;", "ssGreater;", "ssLess;", "ssSlantEqual;", + "ssTilde;", "r;", ";", "eftarrow;", "idot;", "ngLeftArrow;", + "ngLeftRightArrow;", "ngRightArrow;", "ngleftarrow;", + "ngleftrightarrow;", "ngrightarrow;", "pf;", "werLeftArrow;", + "werRightArrow;", "cr;", "h;", "trok;", ";", "p;", "y;", + "diumSpace;", "llintrf;", "r;", "nusPlus;", "pf;", "cr;", ";", + "cy;", "cute;", "aron;", "edil;", "y;", "gativeMediumSpace;", + "gativeThickSpace;", "gativeThinSpace;", "gativeVeryThinSpace;", + "stedGreaterGreater;", "stedLessLess;", "wLine;", "r;", "Break;", + "nBreakingSpace;", "pf;", "t;", "tCongruent;", "tCupCap;", + "tDoubleVerticalBar;", "tElement;", "tEqual;", "tEqualTilde;", + "tExists;", "tGreater;", "tGreaterEqual;", "tGreaterFullEqual;", + "tGreaterGreater;", "tGreaterLess;", "tGreaterSlantEqual;", + "tGreaterTilde;", "tHumpDownHump;", "tHumpEqual;", + "tLeftTriangle;", "tLeftTriangleBar;", "tLeftTriangleEqual;", + "tLess;", "tLessEqual;", "tLessGreater;", "tLessLess;", + "tLessSlantEqual;", "tLessTilde;", "tNestedGreaterGreater;", + "tNestedLessLess;", "tPrecedes;", "tPrecedesEqual;", + "tPrecedesSlantEqual;", "tReverseElement;", "tRightTriangle;", + "tRightTriangleBar;", "tRightTriangleEqual;", "tSquareSubset;", + "tSquareSubsetEqual;", "tSquareSuperset;", "tSquareSupersetEqual;", + "tSubset;", "tSubsetEqual;", "tSucceeds;", "tSucceedsEqual;", + "tSucceedsSlantEqual;", "tSucceedsTilde;", "tSuperset;", + "tSupersetEqual;", "tTilde;", "tTildeEqual;", "tTildeFullEqual;", + "tTildeTilde;", "tVerticalBar;", "cr;", "ilde", "ilde;", ";", + "lig;", "cute", "cute;", "irc", "irc;", "y;", "blac;", "r;", + "rave", "rave;", "acr;", "ega;", "icron;", "pf;", + "enCurlyDoubleQuote;", "enCurlyQuote;", ";", "cr;", "lash", + "lash;", "ilde", "ilde;", "imes;", "ml", "ml;", "erBar;", + "erBrace;", "erBracket;", "erParenthesis;", "rtialD;", "y;", "r;", + "i;", ";", "usMinus;", "incareplane;", "pf;", ";", "ecedes;", + "ecedesEqual;", "ecedesSlantEqual;", "ecedesTilde;", "ime;", + "oduct;", "oportion;", "oportional;", "cr;", "i;", "OT", "OT;", + "r;", "pf;", "cr;", "arr;", "G", "G;", "cute;", "ng;", "rr;", + "rrtl;", "aron;", "edil;", "y;", ";", "verseElement;", + "verseEquilibrium;", "verseUpEquilibrium;", "r;", "o;", + "ghtAngleBracket;", "ghtArrow;", "ghtArrowBar;", + "ghtArrowLeftArrow;", "ghtCeiling;", "ghtDoubleBracket;", + "ghtDownTeeVector;", "ghtDownVector;", "ghtDownVectorBar;", + "ghtFloor;", "ghtTee;", "ghtTeeArrow;", "ghtTeeVector;", + "ghtTriangle;", "ghtTriangleBar;", "ghtTriangleEqual;", + "ghtUpDownVector;", "ghtUpTeeVector;", "ghtUpVector;", + "ghtUpVectorBar;", "ghtVector;", "ghtVectorBar;", "ghtarrow;", + "pf;", "undImplies;", "ightarrow;", "cr;", "h;", "leDelayed;", + "CHcy;", "cy;", "FTcy;", "cute;", ";", "aron;", "edil;", "irc;", + "y;", "r;", "ortDownArrow;", "ortLeftArrow;", "ortRightArrow;", + "ortUpArrow;", "gma;", "allCircle;", "pf;", "rt;", "uare;", + "uareIntersection;", "uareSubset;", "uareSubsetEqual;", + "uareSuperset;", "uareSupersetEqual;", "uareUnion;", "cr;", "ar;", + "b;", "bset;", "bsetEqual;", "cceeds;", "cceedsEqual;", + "cceedsSlantEqual;", "cceedsTilde;", "chThat;", "m;", "p;", + "perset;", "persetEqual;", "pset;", "ORN", "ORN;", "ADE;", "Hcy;", + "cy;", "b;", "u;", "aron;", "edil;", "y;", "r;", "erefore;", + "eta;", "ickSpace;", "inSpace;", "lde;", "ldeEqual;", + "ldeFullEqual;", "ldeTilde;", "pf;", "ipleDot;", "cr;", "trok;", + "cute", "cute;", "rr;", "rrocir;", "rcy;", "reve;", "irc", "irc;", + "y;", "blac;", "r;", "rave", "rave;", "acr;", "derBar;", + "derBrace;", "derBracket;", "derParenthesis;", "ion;", "ionPlus;", + "gon;", "pf;", "Arrow;", "ArrowBar;", "ArrowDownArrow;", + "DownArrow;", "Equilibrium;", "Tee;", "TeeArrow;", "arrow;", + "downarrow;", "perLeftArrow;", "perRightArrow;", "si;", "silon;", + "ing;", "cr;", "ilde;", "ml", "ml;", "ash;", "ar;", "y;", "ash;", + "ashl;", "e;", "rbar;", "rt;", "rticalBar;", "rticalLine;", + "rticalSeparator;", "rticalTilde;", "ryThinSpace;", "r;", "pf;", + "cr;", "dash;", "irc;", "dge;", "r;", "pf;", "cr;", "r;", ";", + "pf;", "cr;", "cy;", "cy;", "cy;", "cute", "cute;", "irc;", "y;", + "r;", "pf;", "cr;", "ml;", "cy;", "cute;", "aron;", "y;", "ot;", + "roWidthSpace;", "ta;", "r;", "pf;", "cr;", "cute", "cute;", + "reve;", ";", "E;", "d;", "irc", "irc;", "ute", "ute;", "y;", + "lig", "lig;", ";", "r;", "rave", "rave;", "efsym;", "eph;", + "pha;", "acr;", "alg;", "p", "p;", "d;", "dand;", "dd;", "dslope;", + "dv;", "g;", "ge;", "gle;", "gmsd;", "gmsdaa;", "gmsdab;", + "gmsdac;", "gmsdad;", "gmsdae;", "gmsdaf;", "gmsdag;", "gmsdah;", + "grt;", "grtvb;", "grtvbd;", "gsph;", "gst;", "gzarr;", "gon;", + "pf;", ";", "E;", "acir;", "e;", "id;", "os;", "prox;", "proxeq;", + "ing", "ing;", "cr;", "t;", "ymp;", "ympeq;", "ilde", "ilde;", + "ml", "ml;", "conint;", "int;", "ot;", "ckcong;", "ckepsilon;", + "ckprime;", "cksim;", "cksimeq;", "rvee;", "rwed;", "rwedge;", + "rk;", "rktbrk;", "ong;", "y;", "quo;", "caus;", "cause;", + "mptyv;", "psi;", "rnou;", "ta;", "th;", "tween;", "r;", "gcap;", + "gcirc;", "gcup;", "godot;", "goplus;", "gotimes;", "gsqcup;", + "gstar;", "gtriangledown;", "gtriangleup;", "guplus;", "gvee;", + "gwedge;", "arow;", "acklozenge;", "acksquare;", "acktriangle;", + "acktriangledown;", "acktriangleleft;", "acktriangleright;", + "ank;", "k12;", "k14;", "k34;", "ock;", "e;", "equiv;", "ot;", + "pf;", "t;", "ttom;", "wtie;", "xDL;", "xDR;", "xDl;", "xDr;", + "xH;", "xHD;", "xHU;", "xHd;", "xHu;", "xUL;", "xUR;", "xUl;", + "xUr;", "xV;", "xVH;", "xVL;", "xVR;", "xVh;", "xVl;", "xVr;", + "xbox;", "xdL;", "xdR;", "xdl;", "xdr;", "xh;", "xhD;", "xhU;", + "xhd;", "xhu;", "xminus;", "xplus;", "xtimes;", "xuL;", "xuR;", + "xul;", "xur;", "xv;", "xvH;", "xvL;", "xvR;", "xvh;", "xvl;", + "xvr;", "rime;", "eve;", "vbar", "vbar;", "cr;", "emi;", "im;", + "ime;", "ol;", "olb;", "olhsub;", "ll;", "llet;", "mp;", "mpE;", + "mpe;", "mpeq;", "cute;", "p;", "pand;", "pbrcup;", "pcap;", + "pcup;", "pdot;", "ps;", "ret;", "ron;", "aps;", "aron;", "edil", + "edil;", "irc;", "ups;", "upssm;", "ot;", "dil", "dil;", "mptyv;", + "nt", "nt;", "nterdot;", "r;", "cy;", "eck;", "eckmark;", "i;", + "r;", "rE;", "rc;", "rceq;", "rclearrowleft;", "rclearrowright;", + "rcledR;", "rcledS;", "rcledast;", "rcledcirc;", "rcleddash;", + "re;", "rfnint;", "rmid;", "rscir;", "ubs;", "ubsuit;", "lon;", + "lone;", "loneq;", "mma;", "mmat;", "mp;", "mpfn;", "mplement;", + "mplexes;", "ng;", "ngdot;", "nint;", "pf;", "prod;", "py", "py;", + "pysr;", "arr;", "oss;", "cr;", "ub;", "ube;", "up;", "upe;", + "dot;", "darrl;", "darrr;", "epr;", "esc;", "larr;", "larrp;", + "p;", "pbrcap;", "pcap;", "pcup;", "pdot;", "por;", "ps;", "rarr;", + "rarrm;", "rlyeqprec;", "rlyeqsucc;", "rlyvee;", "rlywedge;", + "rren", "rren;", "rvearrowleft;", "rvearrowright;", "vee;", "wed;", + "conint;", "int;", "lcty;", "rr;", "ar;", "gger;", "leth;", "rr;", + "sh;", "shv;", "karow;", "lac;", "aron;", "y;", ";", "agger;", + "arr;", "otseq;", "g", "g;", "lta;", "mptyv;", "isht;", "r;", + "arl;", "arr;", "am;", "amond;", "amondsuit;", "ams;", "e;", + "gamma;", "sin;", "v;", "vide", "vide;", "videontimes;", "vonx;", + "cy;", "corn;", "crop;", "llar;", "pf;", "t;", "teq;", "teqdot;", + "tminus;", "tplus;", "tsquare;", "ublebarwedge;", "wnarrow;", + "wndownarrows;", "wnharpoonleft;", "wnharpoonright;", "bkarow;", + "corn;", "crop;", "cr;", "cy;", "ol;", "trok;", "dot;", "ri;", + "rif;", "arr;", "har;", "angle;", "cy;", "igrarr;", "Dot;", "ot;", + "cute", "cute;", "ster;", "aron;", "ir;", "irc", "irc;", "olon;", + "y;", "ot;", ";", "Dot;", "r;", ";", "rave", "rave;", "s;", + "sdot;", ";", "inters;", "l;", "s;", "sdot;", "acr;", "pty;", + "ptyset;", "ptyv;", "sp13;", "sp14;", "sp;", "g;", "sp;", "gon;", + "pf;", "ar;", "arsl;", "lus;", "si;", "silon;", "siv;", "circ;", + "colon;", "sim;", "slantgtr;", "slantless;", "uals;", "uest;", + "uiv;", "uivDD;", "vparsl;", "Dot;", "arr;", "cr;", "dot;", "im;", + "a;", "h", "h;", "ml", "ml;", "ro;", "cl;", "ist;", "pectation;", + "ponentiale;", "llingdotseq;", "y;", "male;", "ilig;", "lig;", + "llig;", "r;", "lig;", "lig;", "at;", "lig;", "tns;", "of;", "pf;", + "rall;", "rk;", "rkv;", "artint;", "ac12", "ac12;", "ac13;", + "ac14", "ac14;", "ac15;", "ac16;", "ac18;", "ac23;", "ac25;", + "ac34", "ac34;", "ac35;", "ac38;", "ac45;", "ac56;", "ac58;", + "ac78;", "asl;", "own;", "cr;", ";", "l;", "cute;", "mma;", + "mmad;", "p;", "reve;", "irc;", "y;", "ot;", ";", "l;", "q;", + "qq;", "qslant;", "s;", "scc;", "sdot;", "sdoto;", "sdotol;", + "sl;", "sles;", "r;", ";", "g;", "mel;", "cy;", ";", "E;", "a;", + "j;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;", "pf;", + "ave;", "cr;", "im;", "ime;", "iml;", "", ";", "cc;", "cir;", + "dot;", "lPar;", "quest;", "rapprox;", "rarr;", "rdot;", + "reqless;", "reqqless;", "rless;", "rsim;", "ertneqq;", "nE;", + "rr;", "irsp;", "lf;", "milt;", "rdcy;", "rr;", "rrcir;", "rrw;", + "ar;", "irc;", "arts;", "artsuit;", "llip;", "rcon;", "r;", + "searow;", "swarow;", "arr;", "mtht;", "okleftarrow;", + "okrightarrow;", "pf;", "rbar;", "cr;", "lash;", "trok;", "bull;", + "phen;", "cute", "cute;", ";", "irc", "irc;", "y;", "cy;", "xcl", + "xcl;", "f;", "r;", "rave", "rave;", ";", "iint;", "int;", "nfin;", + "ota;", "lig;", "acr;", "age;", "agline;", "agpart;", "ath;", + "of;", "ped;", ";", "care;", "fin;", "fintie;", "odot;", "t;", + "tcal;", "tegers;", "tercal;", "tlarhk;", "tprod;", "cy;", "gon;", + "pf;", "ta;", "rod;", "uest", "uest;", "cr;", "in;", "inE;", + "indot;", "ins;", "insv;", "inv;", ";", "ilde;", "kcy;", "ml", + "ml;", "irc;", "y;", "r;", "ath;", "pf;", "cr;", "ercy;", "kcy;", + "ppa;", "ppav;", "edil;", "y;", "r;", "reen;", "cy;", "cy;", "pf;", + "cr;", "arr;", "rr;", "tail;", "arr;", ";", "g;", "ar;", "cute;", + "emptyv;", "gran;", "mbda;", "ng;", "ngd;", "ngle;", "p;", "quo", + "quo;", "rr;", "rrb;", "rrbfs;", "rrfs;", "rrhk;", "rrlp;", + "rrpl;", "rrsim;", "rrtl;", "t;", "tail;", "te;", "tes;", "arr;", + "brk;", "race;", "rack;", "rke;", "rksld;", "rkslu;", "aron;", + "edil;", "eil;", "ub;", "y;", "ca;", "quo;", "quor;", "rdhar;", + "rushar;", "sh;", ";", "ftarrow;", "ftarrowtail;", + "ftharpoondown;", "ftharpoonup;", "ftleftarrows;", "ftrightarrow;", + "ftrightarrows;", "ftrightharpoons;", "ftrightsquigarrow;", + "ftthreetimes;", "g;", "q;", "qq;", "qslant;", "s;", "scc;", + "sdot;", "sdoto;", "sdotor;", "sg;", "sges;", "ssapprox;", + "ssdot;", "sseqgtr;", "sseqqgtr;", "ssgtr;", "sssim;", "isht;", + "loor;", "r;", ";", "E;", "ard;", "aru;", "arul;", "blk;", "cy;", + ";", "arr;", "corner;", "hard;", "tri;", "idot;", "oust;", + "oustache;", "E;", "ap;", "approx;", "e;", "eq;", "eqq;", "sim;", + "ang;", "arr;", "brk;", "ngleftarrow;", "ngleftrightarrow;", + "ngmapsto;", "ngrightarrow;", "oparrowleft;", "oparrowright;", + "par;", "pf;", "plus;", "times;", "wast;", "wbar;", "z;", "zenge;", + "zf;", "ar;", "arlt;", "arr;", "corner;", "har;", "hard;", "m;", + "tri;", "aquo;", "cr;", "h;", "im;", "ime;", "img;", "qb;", "quo;", + "quor;", "trok;", "", ";", "cc;", "cir;", "dot;", "hree;", "imes;", + "larr;", "quest;", "rPar;", "ri;", "rie;", "rif;", "rdshar;", + "ruhar;", "ertneqq;", "nE;", "Dot;", "cr", "cr;", "le;", "lt;", + "ltese;", "p;", "psto;", "pstodown;", "pstoleft;", "pstoup;", + "rker;", "omma;", "y;", "ash;", "asuredangle;", "r;", "o;", "cro", + "cro;", "d;", "dast;", "dcir;", "ddot", "ddot;", "nus;", "nusb;", + "nusd;", "nusdu;", "cp;", "dr;", "plus;", "dels;", "pf;", ";", + "cr;", "tpos;", ";", "ltimap;", "map;", "g;", "t;", "tv;", + "eftarrow;", "eftrightarrow;", "l;", "t;", "tv;", "ightarrow;", + "Dash;", "dash;", "bla;", "cute;", "ng;", "p;", "pE;", "pid;", + "pos;", "pprox;", "tur;", "tural;", "turals;", "sp", "sp;", "ump;", + "umpe;", "ap;", "aron;", "edil;", "ong;", "ongdot;", "up;", "y;", + "ash;", ";", "Arr;", "arhk;", "arr;", "arrow;", "dot;", "quiv;", + "sear;", "sim;", "xist;", "xists;", "r;", "E;", "e;", "eq;", + "eqq;", "eqslant;", "es;", "sim;", "t;", "tr;", "Arr;", "arr;", + "par;", ";", "s;", "sd;", "v;", "cy;", "Arr;", "E;", "arr;", "dr;", + "e;", "eftarrow;", "eftrightarrow;", "eq;", "eqq;", "eqslant;", + "es;", "ess;", "sim;", "t;", "tri;", "trie;", "id;", "pf;", "t", + "t;", "tin;", "tinE;", "tindot;", "tinva;", "tinvb;", "tinvc;", + "tni;", "tniva;", "tnivb;", "tnivc;", "ar;", "arallel;", "arsl;", + "art;", "olint;", "r;", "rcue;", "re;", "rec;", "receq;", "Arr;", + "arr;", "arrc;", "arrw;", "ightarrow;", "tri;", "trie;", "c;", + "ccue;", "ce;", "cr;", "hortmid;", "hortparallel;", "im;", "ime;", + "imeq;", "mid;", "par;", "qsube;", "qsupe;", "ub;", "ubE;", "ube;", + "ubset;", "ubseteq;", "ubseteqq;", "ucc;", "ucceq;", "up;", "upE;", + "upe;", "upset;", "upseteq;", "upseteqq;", "gl;", "ilde", "ilde;", + "lg;", "riangleleft;", "rianglelefteq;", "riangleright;", + "rianglerighteq;", ";", "m;", "mero;", "msp;", "Dash;", "Harr;", + "ap;", "dash;", "ge;", "gt;", "infin;", "lArr;", "le;", "lt;", + "ltrie;", "rArr;", "rtrie;", "sim;", "Arr;", "arhk;", "arr;", + "arrow;", "near;", ";", "cute", "cute;", "st;", "ir;", "irc", + "irc;", "y;", "ash;", "blac;", "iv;", "ot;", "sold;", "lig;", + "cir;", "r;", "on;", "rave", "rave;", "t;", "bar;", "m;", "nt;", + "arr;", "cir;", "cross;", "ine;", "t;", "acr;", "ega;", "icron;", + "id;", "inus;", "pf;", "ar;", "erp;", "lus;", ";", "arr;", "d;", + "der;", "derof;", "df", "df;", "dm", "dm;", "igof;", "or;", + "slope;", "v;", "cr;", "lash", "lash;", "ol;", "ilde", "ilde;", + "imes;", "imesas;", "ml", "ml;", "bar;", "r;", "ra", "ra;", + "rallel;", "rsim;", "rsl;", "rt;", "y;", "rcnt;", "riod;", "rmil;", + "rp;", "rtenk;", "r;", "i;", "iv;", "mmat;", "one;", ";", + "tchfork;", "v;", "anck;", "anckh;", "ankv;", "us;", "usacir;", + "usb;", "uscir;", "usdo;", "usdu;", "use;", "usmn", "usmn;", + "ussim;", "ustwo;", ";", "intint;", "pf;", "und", "und;", ";", + "E;", "ap;", "cue;", "e;", "ec;", "ecapprox;", "eccurlyeq;", + "eceq;", "ecnapprox;", "ecneqq;", "ecnsim;", "ecsim;", "ime;", + "imes;", "nE;", "nap;", "nsim;", "od;", "ofalar;", "ofline;", + "ofsurf;", "op;", "opto;", "sim;", "urel;", "cr;", "i;", "ncsp;", + "r;", "nt;", "pf;", "rime;", "cr;", "aternions;", "atint;", "est;", + "esteq;", "ot", "ot;", "arr;", "rr;", "tail;", "arr;", "ar;", + "ce;", "cute;", "dic;", "emptyv;", "ng;", "ngd;", "nge;", "ngle;", + "quo", "quo;", "rr;", "rrap;", "rrb;", "rrbfs;", "rrc;", "rrfs;", + "rrhk;", "rrlp;", "rrpl;", "rrsim;", "rrtl;", "rrw;", "tail;", + "tio;", "tionals;", "arr;", "brk;", "race;", "rack;", "rke;", + "rksld;", "rkslu;", "aron;", "edil;", "eil;", "ub;", "y;", "ca;", + "ldhar;", "quo;", "quor;", "sh;", "al;", "aline;", "alpart;", + "als;", "ct;", "g", "g;", "isht;", "loor;", "r;", "ard;", "aru;", + "arul;", "o;", "ov;", "ghtarrow;", "ghtarrowtail;", + "ghtharpoondown;", "ghtharpoonup;", "ghtleftarrows;", + "ghtleftharpoons;", "ghtrightarrows;", "ghtsquigarrow;", + "ghtthreetimes;", "ng;", "singdotseq;", "arr;", "har;", "m;", + "oust;", "oustache;", "mid;", "ang;", "arr;", "brk;", "par;", + "pf;", "plus;", "times;", "ar;", "argt;", "polint;", "arr;", + "aquo;", "cr;", "h;", "qb;", "quo;", "quor;", "hree;", "imes;", + "ri;", "rie;", "rif;", "riltri;", "luhar;", ";", "cute;", "quo;", + ";", "E;", "ap;", "aron;", "cue;", "e;", "edil;", "irc;", "nE;", + "nap;", "nsim;", "polint;", "sim;", "y;", "ot;", "otb;", "ote;", + "Arr;", "arhk;", "arr;", "arrow;", "ct", "ct;", "mi;", "swar;", + "tminus;", "tmn;", "xt;", "r;", "rown;", "arp;", "chcy;", "cy;", + "ortmid;", "ortparallel;", "y", "y;", "gma;", "gmaf;", "gmav;", + "m;", "mdot;", "me;", "meq;", "mg;", "mgE;", "ml;", "mlE;", "mne;", + "mplus;", "mrarr;", "arr;", "allsetminus;", "ashp;", "eparsl;", + "id;", "ile;", "t;", "te;", "tes;", "ftcy;", "l;", "lb;", "lbar;", + "pf;", "ades;", "adesuit;", "ar;", "cap;", "caps;", "cup;", + "cups;", "sub;", "sube;", "subset;", "subseteq;", "sup;", "supe;", + "supset;", "supseteq;", "u;", "uare;", "uarf;", "uf;", "arr;", + "cr;", "etmn;", "mile;", "tarf;", "ar;", "arf;", "raightepsilon;", + "raightphi;", "rns;", "b;", "bE;", "bdot;", "be;", "bedot;", + "bmult;", "bnE;", "bne;", "bplus;", "brarr;", "bset;", "bseteq;", + "bseteqq;", "bsetneq;", "bsetneqq;", "bsim;", "bsub;", "bsup;", + "cc;", "ccapprox;", "cccurlyeq;", "cceq;", "ccnapprox;", "ccneqq;", + "ccnsim;", "ccsim;", "m;", "ng;", "p1", "p1;", "p2", "p2;", "p3", + "p3;", "p;", "pE;", "pdot;", "pdsub;", "pe;", "pedot;", "phsol;", + "phsub;", "plarr;", "pmult;", "pnE;", "pne;", "pplus;", "pset;", + "pseteq;", "pseteqq;", "psetneq;", "psetneqq;", "psim;", "psub;", + "psup;", "Arr;", "arhk;", "arr;", "arrow;", "nwar;", "lig", "lig;", + "rget;", "u;", "rk;", "aron;", "edil;", "y;", "ot;", "lrec;", "r;", + "ere4;", "erefore;", "eta;", "etasym;", "etav;", "ickapprox;", + "icksim;", "insp;", "kap;", "ksim;", "orn", "orn;", "lde;", "mes", + "mes;", "mesb;", "mesbar;", "mesd;", "nt;", "ea;", "p;", "pbot;", + "pcir;", "pf;", "pfork;", "sa;", "rime;", "ade;", "iangle;", + "iangledown;", "iangleleft;", "ianglelefteq;", "iangleq;", + "iangleright;", "ianglerighteq;", "idot;", "ie;", "iminus;", + "iplus;", "isb;", "itime;", "pezium;", "cr;", "cy;", "hcy;", + "trok;", "ixt;", "oheadleftarrow;", "oheadrightarrow;", "rr;", + "ar;", "cute", "cute;", "rr;", "rcy;", "reve;", "irc", "irc;", + "y;", "arr;", "blac;", "har;", "isht;", "r;", "rave", "rave;", + "arl;", "arr;", "blk;", "corn;", "corner;", "crop;", "tri;", + "acr;", "l", "l;", "gon;", "pf;", "arrow;", "downarrow;", + "harpoonleft;", "harpoonright;", "lus;", "si;", "sih;", "silon;", + "uparrows;", "corn;", "corner;", "crop;", "ing;", "tri;", "cr;", + "dot;", "ilde;", "ri;", "rif;", "arr;", "ml", "ml;", "angle;", + "rr;", "ar;", "arv;", "ash;", "ngrt;", "repsilon;", "rkappa;", + "rnothing;", "rphi;", "rpi;", "rpropto;", "rr;", "rrho;", + "rsigma;", "rsubsetneq;", "rsubsetneqq;", "rsupsetneq;", + "rsupsetneqq;", "rtheta;", "rtriangleleft;", "rtriangleright;", + "y;", "ash;", "e;", "ebar;", "eeq;", "llip;", "rbar;", "rt;", "r;", + "tri;", "sub;", "sup;", "pf;", "rop;", "tri;", "cr;", "ubnE;", + "ubne;", "upnE;", "upne;", "igzag;", "irc;", "dbar;", "dge;", + "dgeq;", "ierp;", "r;", "pf;", ";", ";", "eath;", "cr;", "ap;", + "irc;", "up;", "tri;", "r;", "Arr;", "arr;", ";", "Arr;", "arr;", + "ap;", "is;", "dot;", "pf;", "plus;", "time;", "Arr;", "arr;", + "cr;", "qcup;", "plus;", "tri;", "ee;", "edge;", "cute", "cute;", + "cy;", "irc;", "y;", "n", "n;", "r;", "cy;", "pf;", "cr;", "cy;", + "ml", "ml;", "cute;", "aron;", "y;", "ot;", "etrf;", "ta;", "r;", + "cy;", "grarr;", "pf;", "cr;", "j;", "nj;", }; + + static final @NoLength char[][] VALUES = { { '\u00c6' }, { '\u00c6' }, + { '\u0026' }, { '\u0026' }, { '\u00c1' }, { '\u00c1' }, + { '\u0102' }, { '\u00c2' }, { '\u00c2' }, { '\u0410' }, + { '\ud835', '\udd04' }, { '\u00c0' }, { '\u00c0' }, { '\u0391' }, + { '\u0100' }, { '\u2a53' }, { '\u0104' }, { '\ud835', '\udd38' }, + { '\u2061' }, { '\u00c5' }, { '\u00c5' }, { '\ud835', '\udc9c' }, + { '\u2254' }, { '\u00c3' }, { '\u00c3' }, { '\u00c4' }, + { '\u00c4' }, { '\u2216' }, { '\u2ae7' }, { '\u2306' }, + { '\u0411' }, { '\u2235' }, { '\u212c' }, { '\u0392' }, + { '\ud835', '\udd05' }, { '\ud835', '\udd39' }, { '\u02d8' }, + { '\u212c' }, { '\u224e' }, { '\u0427' }, { '\u00a9' }, + { '\u00a9' }, { '\u0106' }, { '\u22d2' }, { '\u2145' }, + { '\u212d' }, { '\u010c' }, { '\u00c7' }, { '\u00c7' }, + { '\u0108' }, { '\u2230' }, { '\u010a' }, { '\u00b8' }, + { '\u00b7' }, { '\u212d' }, { '\u03a7' }, { '\u2299' }, + { '\u2296' }, { '\u2295' }, { '\u2297' }, { '\u2232' }, + { '\u201d' }, { '\u2019' }, { '\u2237' }, { '\u2a74' }, + { '\u2261' }, { '\u222f' }, { '\u222e' }, { '\u2102' }, + { '\u2210' }, { '\u2233' }, { '\u2a2f' }, { '\ud835', '\udc9e' }, + { '\u22d3' }, { '\u224d' }, { '\u2145' }, { '\u2911' }, + { '\u0402' }, { '\u0405' }, { '\u040f' }, { '\u2021' }, + { '\u21a1' }, { '\u2ae4' }, { '\u010e' }, { '\u0414' }, + { '\u2207' }, { '\u0394' }, { '\ud835', '\udd07' }, { '\u00b4' }, + { '\u02d9' }, { '\u02dd' }, { '\u0060' }, { '\u02dc' }, + { '\u22c4' }, { '\u2146' }, { '\ud835', '\udd3b' }, { '\u00a8' }, + { '\u20dc' }, { '\u2250' }, { '\u222f' }, { '\u00a8' }, + { '\u21d3' }, { '\u21d0' }, { '\u21d4' }, { '\u2ae4' }, + { '\u27f8' }, { '\u27fa' }, { '\u27f9' }, { '\u21d2' }, + { '\u22a8' }, { '\u21d1' }, { '\u21d5' }, { '\u2225' }, + { '\u2193' }, { '\u2913' }, { '\u21f5' }, { '\u0311' }, + { '\u2950' }, { '\u295e' }, { '\u21bd' }, { '\u2956' }, + { '\u295f' }, { '\u21c1' }, { '\u2957' }, { '\u22a4' }, + { '\u21a7' }, { '\u21d3' }, { '\ud835', '\udc9f' }, { '\u0110' }, + { '\u014a' }, { '\u00d0' }, { '\u00d0' }, { '\u00c9' }, + { '\u00c9' }, { '\u011a' }, { '\u00ca' }, { '\u00ca' }, + { '\u042d' }, { '\u0116' }, { '\ud835', '\udd08' }, { '\u00c8' }, + { '\u00c8' }, { '\u2208' }, { '\u0112' }, { '\u25fb' }, + { '\u25ab' }, { '\u0118' }, { '\ud835', '\udd3c' }, { '\u0395' }, + { '\u2a75' }, { '\u2242' }, { '\u21cc' }, { '\u2130' }, + { '\u2a73' }, { '\u0397' }, { '\u00cb' }, { '\u00cb' }, + { '\u2203' }, { '\u2147' }, { '\u0424' }, { '\ud835', '\udd09' }, + { '\u25fc' }, { '\u25aa' }, { '\ud835', '\udd3d' }, { '\u2200' }, + { '\u2131' }, { '\u2131' }, { '\u0403' }, { '\u003e' }, + { '\u003e' }, { '\u0393' }, { '\u03dc' }, { '\u011e' }, + { '\u0122' }, { '\u011c' }, { '\u0413' }, { '\u0120' }, + { '\ud835', '\udd0a' }, { '\u22d9' }, { '\ud835', '\udd3e' }, + { '\u2265' }, { '\u22db' }, { '\u2267' }, { '\u2aa2' }, + { '\u2277' }, { '\u2a7e' }, { '\u2273' }, { '\ud835', '\udca2' }, + { '\u226b' }, { '\u042a' }, { '\u02c7' }, { '\u005e' }, + { '\u0124' }, { '\u210c' }, { '\u210b' }, { '\u210d' }, + { '\u2500' }, { '\u210b' }, { '\u0126' }, { '\u224e' }, + { '\u224f' }, { '\u0415' }, { '\u0132' }, { '\u0401' }, + { '\u00cd' }, { '\u00cd' }, { '\u00ce' }, { '\u00ce' }, + { '\u0418' }, { '\u0130' }, { '\u2111' }, { '\u00cc' }, + { '\u00cc' }, { '\u2111' }, { '\u012a' }, { '\u2148' }, + { '\u21d2' }, { '\u222c' }, { '\u222b' }, { '\u22c2' }, + { '\u2063' }, { '\u2062' }, { '\u012e' }, { '\ud835', '\udd40' }, + { '\u0399' }, { '\u2110' }, { '\u0128' }, { '\u0406' }, + { '\u00cf' }, { '\u00cf' }, { '\u0134' }, { '\u0419' }, + { '\ud835', '\udd0d' }, { '\ud835', '\udd41' }, + { '\ud835', '\udca5' }, { '\u0408' }, { '\u0404' }, { '\u0425' }, + { '\u040c' }, { '\u039a' }, { '\u0136' }, { '\u041a' }, + { '\ud835', '\udd0e' }, { '\ud835', '\udd42' }, + { '\ud835', '\udca6' }, { '\u0409' }, { '\u003c' }, { '\u003c' }, + { '\u0139' }, { '\u039b' }, { '\u27ea' }, { '\u2112' }, + { '\u219e' }, { '\u013d' }, { '\u013b' }, { '\u041b' }, + { '\u27e8' }, { '\u2190' }, { '\u21e4' }, { '\u21c6' }, + { '\u2308' }, { '\u27e6' }, { '\u2961' }, { '\u21c3' }, + { '\u2959' }, { '\u230a' }, { '\u2194' }, { '\u294e' }, + { '\u22a3' }, { '\u21a4' }, { '\u295a' }, { '\u22b2' }, + { '\u29cf' }, { '\u22b4' }, { '\u2951' }, { '\u2960' }, + { '\u21bf' }, { '\u2958' }, { '\u21bc' }, { '\u2952' }, + { '\u21d0' }, { '\u21d4' }, { '\u22da' }, { '\u2266' }, + { '\u2276' }, { '\u2aa1' }, { '\u2a7d' }, { '\u2272' }, + { '\ud835', '\udd0f' }, { '\u22d8' }, { '\u21da' }, { '\u013f' }, + { '\u27f5' }, { '\u27f7' }, { '\u27f6' }, { '\u27f8' }, + { '\u27fa' }, { '\u27f9' }, { '\ud835', '\udd43' }, { '\u2199' }, + { '\u2198' }, { '\u2112' }, { '\u21b0' }, { '\u0141' }, + { '\u226a' }, { '\u2905' }, { '\u041c' }, { '\u205f' }, + { '\u2133' }, { '\ud835', '\udd10' }, { '\u2213' }, + { '\ud835', '\udd44' }, { '\u2133' }, { '\u039c' }, { '\u040a' }, + { '\u0143' }, { '\u0147' }, { '\u0145' }, { '\u041d' }, + { '\u200b' }, { '\u200b' }, { '\u200b' }, { '\u200b' }, + { '\u226b' }, { '\u226a' }, { '\n' }, { '\ud835', '\udd11' }, + { '\u2060' }, { '\u00a0' }, { '\u2115' }, { '\u2aec' }, + { '\u2262' }, { '\u226d' }, { '\u2226' }, { '\u2209' }, + { '\u2260' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u226f' }, + { '\u2271' }, { '\u2267', '\u0338' }, { '\u226b', '\u0338' }, + { '\u2279' }, { '\u2a7e', '\u0338' }, { '\u2275' }, + { '\u224e', '\u0338' }, { '\u224f', '\u0338' }, { '\u22ea' }, + { '\u29cf', '\u0338' }, { '\u22ec' }, { '\u226e' }, { '\u2270' }, + { '\u2278' }, { '\u226a', '\u0338' }, { '\u2a7d', '\u0338' }, + { '\u2274' }, { '\u2aa2', '\u0338' }, { '\u2aa1', '\u0338' }, + { '\u2280' }, { '\u2aaf', '\u0338' }, { '\u22e0' }, { '\u220c' }, + { '\u22eb' }, { '\u29d0', '\u0338' }, { '\u22ed' }, + { '\u228f', '\u0338' }, { '\u22e2' }, { '\u2290', '\u0338' }, + { '\u22e3' }, { '\u2282', '\u20d2' }, { '\u2288' }, { '\u2281' }, + { '\u2ab0', '\u0338' }, { '\u22e1' }, { '\u227f', '\u0338' }, + { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2241' }, { '\u2244' }, + { '\u2247' }, { '\u2249' }, { '\u2224' }, { '\ud835', '\udca9' }, + { '\u00d1' }, { '\u00d1' }, { '\u039d' }, { '\u0152' }, + { '\u00d3' }, { '\u00d3' }, { '\u00d4' }, { '\u00d4' }, + { '\u041e' }, { '\u0150' }, { '\ud835', '\udd12' }, { '\u00d2' }, + { '\u00d2' }, { '\u014c' }, { '\u03a9' }, { '\u039f' }, + { '\ud835', '\udd46' }, { '\u201c' }, { '\u2018' }, { '\u2a54' }, + { '\ud835', '\udcaa' }, { '\u00d8' }, { '\u00d8' }, { '\u00d5' }, + { '\u00d5' }, { '\u2a37' }, { '\u00d6' }, { '\u00d6' }, + { '\u203e' }, { '\u23de' }, { '\u23b4' }, { '\u23dc' }, + { '\u2202' }, { '\u041f' }, { '\ud835', '\udd13' }, { '\u03a6' }, + { '\u03a0' }, { '\u00b1' }, { '\u210c' }, { '\u2119' }, + { '\u2abb' }, { '\u227a' }, { '\u2aaf' }, { '\u227c' }, + { '\u227e' }, { '\u2033' }, { '\u220f' }, { '\u2237' }, + { '\u221d' }, { '\ud835', '\udcab' }, { '\u03a8' }, { '\u0022' }, + { '\u0022' }, { '\ud835', '\udd14' }, { '\u211a' }, + { '\ud835', '\udcac' }, { '\u2910' }, { '\u00ae' }, { '\u00ae' }, + { '\u0154' }, { '\u27eb' }, { '\u21a0' }, { '\u2916' }, + { '\u0158' }, { '\u0156' }, { '\u0420' }, { '\u211c' }, + { '\u220b' }, { '\u21cb' }, { '\u296f' }, { '\u211c' }, + { '\u03a1' }, { '\u27e9' }, { '\u2192' }, { '\u21e5' }, + { '\u21c4' }, { '\u2309' }, { '\u27e7' }, { '\u295d' }, + { '\u21c2' }, { '\u2955' }, { '\u230b' }, { '\u22a2' }, + { '\u21a6' }, { '\u295b' }, { '\u22b3' }, { '\u29d0' }, + { '\u22b5' }, { '\u294f' }, { '\u295c' }, { '\u21be' }, + { '\u2954' }, { '\u21c0' }, { '\u2953' }, { '\u21d2' }, + { '\u211d' }, { '\u2970' }, { '\u21db' }, { '\u211b' }, + { '\u21b1' }, { '\u29f4' }, { '\u0429' }, { '\u0428' }, + { '\u042c' }, { '\u015a' }, { '\u2abc' }, { '\u0160' }, + { '\u015e' }, { '\u015c' }, { '\u0421' }, { '\ud835', '\udd16' }, + { '\u2193' }, { '\u2190' }, { '\u2192' }, { '\u2191' }, + { '\u03a3' }, { '\u2218' }, { '\ud835', '\udd4a' }, { '\u221a' }, + { '\u25a1' }, { '\u2293' }, { '\u228f' }, { '\u2291' }, + { '\u2290' }, { '\u2292' }, { '\u2294' }, { '\ud835', '\udcae' }, + { '\u22c6' }, { '\u22d0' }, { '\u22d0' }, { '\u2286' }, + { '\u227b' }, { '\u2ab0' }, { '\u227d' }, { '\u227f' }, + { '\u220b' }, { '\u2211' }, { '\u22d1' }, { '\u2283' }, + { '\u2287' }, { '\u22d1' }, { '\u00de' }, { '\u00de' }, + { '\u2122' }, { '\u040b' }, { '\u0426' }, { '\u0009' }, + { '\u03a4' }, { '\u0164' }, { '\u0162' }, { '\u0422' }, + { '\ud835', '\udd17' }, { '\u2234' }, { '\u0398' }, + { '\u205f', '\u200a' }, { '\u2009' }, { '\u223c' }, { '\u2243' }, + { '\u2245' }, { '\u2248' }, { '\ud835', '\udd4b' }, { '\u20db' }, + { '\ud835', '\udcaf' }, { '\u0166' }, { '\u00da' }, { '\u00da' }, + { '\u219f' }, { '\u2949' }, { '\u040e' }, { '\u016c' }, + { '\u00db' }, { '\u00db' }, { '\u0423' }, { '\u0170' }, + { '\ud835', '\udd18' }, { '\u00d9' }, { '\u00d9' }, { '\u016a' }, + { '\u005f' }, { '\u23df' }, { '\u23b5' }, { '\u23dd' }, + { '\u22c3' }, { '\u228e' }, { '\u0172' }, { '\ud835', '\udd4c' }, + { '\u2191' }, { '\u2912' }, { '\u21c5' }, { '\u2195' }, + { '\u296e' }, { '\u22a5' }, { '\u21a5' }, { '\u21d1' }, + { '\u21d5' }, { '\u2196' }, { '\u2197' }, { '\u03d2' }, + { '\u03a5' }, { '\u016e' }, { '\ud835', '\udcb0' }, { '\u0168' }, + { '\u00dc' }, { '\u00dc' }, { '\u22ab' }, { '\u2aeb' }, + { '\u0412' }, { '\u22a9' }, { '\u2ae6' }, { '\u22c1' }, + { '\u2016' }, { '\u2016' }, { '\u2223' }, { '\u007c' }, + { '\u2758' }, { '\u2240' }, { '\u200a' }, { '\ud835', '\udd19' }, + { '\ud835', '\udd4d' }, { '\ud835', '\udcb1' }, { '\u22aa' }, + { '\u0174' }, { '\u22c0' }, { '\ud835', '\udd1a' }, + { '\ud835', '\udd4e' }, { '\ud835', '\udcb2' }, + { '\ud835', '\udd1b' }, { '\u039e' }, { '\ud835', '\udd4f' }, + { '\ud835', '\udcb3' }, { '\u042f' }, { '\u0407' }, { '\u042e' }, + { '\u00dd' }, { '\u00dd' }, { '\u0176' }, { '\u042b' }, + { '\ud835', '\udd1c' }, { '\ud835', '\udd50' }, + { '\ud835', '\udcb4' }, { '\u0178' }, { '\u0416' }, { '\u0179' }, + { '\u017d' }, { '\u0417' }, { '\u017b' }, { '\u200b' }, + { '\u0396' }, { '\u2128' }, { '\u2124' }, { '\ud835', '\udcb5' }, + { '\u00e1' }, { '\u00e1' }, { '\u0103' }, { '\u223e' }, + { '\u223e', '\u0333' }, { '\u223f' }, { '\u00e2' }, { '\u00e2' }, + { '\u00b4' }, { '\u00b4' }, { '\u0430' }, { '\u00e6' }, + { '\u00e6' }, { '\u2061' }, { '\ud835', '\udd1e' }, { '\u00e0' }, + { '\u00e0' }, { '\u2135' }, { '\u2135' }, { '\u03b1' }, + { '\u0101' }, { '\u2a3f' }, { '\u0026' }, { '\u0026' }, + { '\u2227' }, { '\u2a55' }, { '\u2a5c' }, { '\u2a58' }, + { '\u2a5a' }, { '\u2220' }, { '\u29a4' }, { '\u2220' }, + { '\u2221' }, { '\u29a8' }, { '\u29a9' }, { '\u29aa' }, + { '\u29ab' }, { '\u29ac' }, { '\u29ad' }, { '\u29ae' }, + { '\u29af' }, { '\u221f' }, { '\u22be' }, { '\u299d' }, + { '\u2222' }, { '\u00c5' }, { '\u237c' }, { '\u0105' }, + { '\ud835', '\udd52' }, { '\u2248' }, { '\u2a70' }, { '\u2a6f' }, + { '\u224a' }, { '\u224b' }, { '\'' }, { '\u2248' }, { '\u224a' }, + { '\u00e5' }, { '\u00e5' }, { '\ud835', '\udcb6' }, { '\u002a' }, + { '\u2248' }, { '\u224d' }, { '\u00e3' }, { '\u00e3' }, + { '\u00e4' }, { '\u00e4' }, { '\u2233' }, { '\u2a11' }, + { '\u2aed' }, { '\u224c' }, { '\u03f6' }, { '\u2035' }, + { '\u223d' }, { '\u22cd' }, { '\u22bd' }, { '\u2305' }, + { '\u2305' }, { '\u23b5' }, { '\u23b6' }, { '\u224c' }, + { '\u0431' }, { '\u201e' }, { '\u2235' }, { '\u2235' }, + { '\u29b0' }, { '\u03f6' }, { '\u212c' }, { '\u03b2' }, + { '\u2136' }, { '\u226c' }, { '\ud835', '\udd1f' }, { '\u22c2' }, + { '\u25ef' }, { '\u22c3' }, { '\u2a00' }, { '\u2a01' }, + { '\u2a02' }, { '\u2a06' }, { '\u2605' }, { '\u25bd' }, + { '\u25b3' }, { '\u2a04' }, { '\u22c1' }, { '\u22c0' }, + { '\u290d' }, { '\u29eb' }, { '\u25aa' }, { '\u25b4' }, + { '\u25be' }, { '\u25c2' }, { '\u25b8' }, { '\u2423' }, + { '\u2592' }, { '\u2591' }, { '\u2593' }, { '\u2588' }, + { '\u003d', '\u20e5' }, { '\u2261', '\u20e5' }, { '\u2310' }, + { '\ud835', '\udd53' }, { '\u22a5' }, { '\u22a5' }, { '\u22c8' }, + { '\u2557' }, { '\u2554' }, { '\u2556' }, { '\u2553' }, + { '\u2550' }, { '\u2566' }, { '\u2569' }, { '\u2564' }, + { '\u2567' }, { '\u255d' }, { '\u255a' }, { '\u255c' }, + { '\u2559' }, { '\u2551' }, { '\u256c' }, { '\u2563' }, + { '\u2560' }, { '\u256b' }, { '\u2562' }, { '\u255f' }, + { '\u29c9' }, { '\u2555' }, { '\u2552' }, { '\u2510' }, + { '\u250c' }, { '\u2500' }, { '\u2565' }, { '\u2568' }, + { '\u252c' }, { '\u2534' }, { '\u229f' }, { '\u229e' }, + { '\u22a0' }, { '\u255b' }, { '\u2558' }, { '\u2518' }, + { '\u2514' }, { '\u2502' }, { '\u256a' }, { '\u2561' }, + { '\u255e' }, { '\u253c' }, { '\u2524' }, { '\u251c' }, + { '\u2035' }, { '\u02d8' }, { '\u00a6' }, { '\u00a6' }, + { '\ud835', '\udcb7' }, { '\u204f' }, { '\u223d' }, { '\u22cd' }, + { '\\' }, { '\u29c5' }, { '\u27c8' }, { '\u2022' }, { '\u2022' }, + { '\u224e' }, { '\u2aae' }, { '\u224f' }, { '\u224f' }, + { '\u0107' }, { '\u2229' }, { '\u2a44' }, { '\u2a49' }, + { '\u2a4b' }, { '\u2a47' }, { '\u2a40' }, { '\u2229', '\ufe00' }, + { '\u2041' }, { '\u02c7' }, { '\u2a4d' }, { '\u010d' }, + { '\u00e7' }, { '\u00e7' }, { '\u0109' }, { '\u2a4c' }, + { '\u2a50' }, { '\u010b' }, { '\u00b8' }, { '\u00b8' }, + { '\u29b2' }, { '\u00a2' }, { '\u00a2' }, { '\u00b7' }, + { '\ud835', '\udd20' }, { '\u0447' }, { '\u2713' }, { '\u2713' }, + { '\u03c7' }, { '\u25cb' }, { '\u29c3' }, { '\u02c6' }, + { '\u2257' }, { '\u21ba' }, { '\u21bb' }, { '\u00ae' }, + { '\u24c8' }, { '\u229b' }, { '\u229a' }, { '\u229d' }, + { '\u2257' }, { '\u2a10' }, { '\u2aef' }, { '\u29c2' }, + { '\u2663' }, { '\u2663' }, { '\u003a' }, { '\u2254' }, + { '\u2254' }, { '\u002c' }, { '\u0040' }, { '\u2201' }, + { '\u2218' }, { '\u2201' }, { '\u2102' }, { '\u2245' }, + { '\u2a6d' }, { '\u222e' }, { '\ud835', '\udd54' }, { '\u2210' }, + { '\u00a9' }, { '\u00a9' }, { '\u2117' }, { '\u21b5' }, + { '\u2717' }, { '\ud835', '\udcb8' }, { '\u2acf' }, { '\u2ad1' }, + { '\u2ad0' }, { '\u2ad2' }, { '\u22ef' }, { '\u2938' }, + { '\u2935' }, { '\u22de' }, { '\u22df' }, { '\u21b6' }, + { '\u293d' }, { '\u222a' }, { '\u2a48' }, { '\u2a46' }, + { '\u2a4a' }, { '\u228d' }, { '\u2a45' }, { '\u222a', '\ufe00' }, + { '\u21b7' }, { '\u293c' }, { '\u22de' }, { '\u22df' }, + { '\u22ce' }, { '\u22cf' }, { '\u00a4' }, { '\u00a4' }, + { '\u21b6' }, { '\u21b7' }, { '\u22ce' }, { '\u22cf' }, + { '\u2232' }, { '\u2231' }, { '\u232d' }, { '\u21d3' }, + { '\u2965' }, { '\u2020' }, { '\u2138' }, { '\u2193' }, + { '\u2010' }, { '\u22a3' }, { '\u290f' }, { '\u02dd' }, + { '\u010f' }, { '\u0434' }, { '\u2146' }, { '\u2021' }, + { '\u21ca' }, { '\u2a77' }, { '\u00b0' }, { '\u00b0' }, + { '\u03b4' }, { '\u29b1' }, { '\u297f' }, { '\ud835', '\udd21' }, + { '\u21c3' }, { '\u21c2' }, { '\u22c4' }, { '\u22c4' }, + { '\u2666' }, { '\u2666' }, { '\u00a8' }, { '\u03dd' }, + { '\u22f2' }, { '\u00f7' }, { '\u00f7' }, { '\u00f7' }, + { '\u22c7' }, { '\u22c7' }, { '\u0452' }, { '\u231e' }, + { '\u230d' }, { '\u0024' }, { '\ud835', '\udd55' }, { '\u02d9' }, + { '\u2250' }, { '\u2251' }, { '\u2238' }, { '\u2214' }, + { '\u22a1' }, { '\u2306' }, { '\u2193' }, { '\u21ca' }, + { '\u21c3' }, { '\u21c2' }, { '\u2910' }, { '\u231f' }, + { '\u230c' }, { '\ud835', '\udcb9' }, { '\u0455' }, { '\u29f6' }, + { '\u0111' }, { '\u22f1' }, { '\u25bf' }, { '\u25be' }, + { '\u21f5' }, { '\u296f' }, { '\u29a6' }, { '\u045f' }, + { '\u27ff' }, { '\u2a77' }, { '\u2251' }, { '\u00e9' }, + { '\u00e9' }, { '\u2a6e' }, { '\u011b' }, { '\u2256' }, + { '\u00ea' }, { '\u00ea' }, { '\u2255' }, { '\u044d' }, + { '\u0117' }, { '\u2147' }, { '\u2252' }, { '\ud835', '\udd22' }, + { '\u2a9a' }, { '\u00e8' }, { '\u00e8' }, { '\u2a96' }, + { '\u2a98' }, { '\u2a99' }, { '\u23e7' }, { '\u2113' }, + { '\u2a95' }, { '\u2a97' }, { '\u0113' }, { '\u2205' }, + { '\u2205' }, { '\u2205' }, { '\u2004' }, { '\u2005' }, + { '\u2003' }, { '\u014b' }, { '\u2002' }, { '\u0119' }, + { '\ud835', '\udd56' }, { '\u22d5' }, { '\u29e3' }, { '\u2a71' }, + { '\u03b5' }, { '\u03b5' }, { '\u03f5' }, { '\u2256' }, + { '\u2255' }, { '\u2242' }, { '\u2a96' }, { '\u2a95' }, + { '\u003d' }, { '\u225f' }, { '\u2261' }, { '\u2a78' }, + { '\u29e5' }, { '\u2253' }, { '\u2971' }, { '\u212f' }, + { '\u2250' }, { '\u2242' }, { '\u03b7' }, { '\u00f0' }, + { '\u00f0' }, { '\u00eb' }, { '\u00eb' }, { '\u20ac' }, + { '\u0021' }, { '\u2203' }, { '\u2130' }, { '\u2147' }, + { '\u2252' }, { '\u0444' }, { '\u2640' }, { '\ufb03' }, + { '\ufb00' }, { '\ufb04' }, { '\ud835', '\udd23' }, { '\ufb01' }, + { '\u0066', '\u006a' }, { '\u266d' }, { '\ufb02' }, { '\u25b1' }, + { '\u0192' }, { '\ud835', '\udd57' }, { '\u2200' }, { '\u22d4' }, + { '\u2ad9' }, { '\u2a0d' }, { '\u00bd' }, { '\u00bd' }, + { '\u2153' }, { '\u00bc' }, { '\u00bc' }, { '\u2155' }, + { '\u2159' }, { '\u215b' }, { '\u2154' }, { '\u2156' }, + { '\u00be' }, { '\u00be' }, { '\u2157' }, { '\u215c' }, + { '\u2158' }, { '\u215a' }, { '\u215d' }, { '\u215e' }, + { '\u2044' }, { '\u2322' }, { '\ud835', '\udcbb' }, { '\u2267' }, + { '\u2a8c' }, { '\u01f5' }, { '\u03b3' }, { '\u03dd' }, + { '\u2a86' }, { '\u011f' }, { '\u011d' }, { '\u0433' }, + { '\u0121' }, { '\u2265' }, { '\u22db' }, { '\u2265' }, + { '\u2267' }, { '\u2a7e' }, { '\u2a7e' }, { '\u2aa9' }, + { '\u2a80' }, { '\u2a82' }, { '\u2a84' }, { '\u22db', '\ufe00' }, + { '\u2a94' }, { '\ud835', '\udd24' }, { '\u226b' }, { '\u22d9' }, + { '\u2137' }, { '\u0453' }, { '\u2277' }, { '\u2a92' }, + { '\u2aa5' }, { '\u2aa4' }, { '\u2269' }, { '\u2a8a' }, + { '\u2a8a' }, { '\u2a88' }, { '\u2a88' }, { '\u2269' }, + { '\u22e7' }, { '\ud835', '\udd58' }, { '\u0060' }, { '\u210a' }, + { '\u2273' }, { '\u2a8e' }, { '\u2a90' }, { '\u003e' }, + { '\u003e' }, { '\u2aa7' }, { '\u2a7a' }, { '\u22d7' }, + { '\u2995' }, { '\u2a7c' }, { '\u2a86' }, { '\u2978' }, + { '\u22d7' }, { '\u22db' }, { '\u2a8c' }, { '\u2277' }, + { '\u2273' }, { '\u2269', '\ufe00' }, { '\u2269', '\ufe00' }, + { '\u21d4' }, { '\u200a' }, { '\u00bd' }, { '\u210b' }, + { '\u044a' }, { '\u2194' }, { '\u2948' }, { '\u21ad' }, + { '\u210f' }, { '\u0125' }, { '\u2665' }, { '\u2665' }, + { '\u2026' }, { '\u22b9' }, { '\ud835', '\udd25' }, { '\u2925' }, + { '\u2926' }, { '\u21ff' }, { '\u223b' }, { '\u21a9' }, + { '\u21aa' }, { '\ud835', '\udd59' }, { '\u2015' }, + { '\ud835', '\udcbd' }, { '\u210f' }, { '\u0127' }, { '\u2043' }, + { '\u2010' }, { '\u00ed' }, { '\u00ed' }, { '\u2063' }, + { '\u00ee' }, { '\u00ee' }, { '\u0438' }, { '\u0435' }, + { '\u00a1' }, { '\u00a1' }, { '\u21d4' }, { '\ud835', '\udd26' }, + { '\u00ec' }, { '\u00ec' }, { '\u2148' }, { '\u2a0c' }, + { '\u222d' }, { '\u29dc' }, { '\u2129' }, { '\u0133' }, + { '\u012b' }, { '\u2111' }, { '\u2110' }, { '\u2111' }, + { '\u0131' }, { '\u22b7' }, { '\u01b5' }, { '\u2208' }, + { '\u2105' }, { '\u221e' }, { '\u29dd' }, { '\u0131' }, + { '\u222b' }, { '\u22ba' }, { '\u2124' }, { '\u22ba' }, + { '\u2a17' }, { '\u2a3c' }, { '\u0451' }, { '\u012f' }, + { '\ud835', '\udd5a' }, { '\u03b9' }, { '\u2a3c' }, { '\u00bf' }, + { '\u00bf' }, { '\ud835', '\udcbe' }, { '\u2208' }, { '\u22f9' }, + { '\u22f5' }, { '\u22f4' }, { '\u22f3' }, { '\u2208' }, + { '\u2062' }, { '\u0129' }, { '\u0456' }, { '\u00ef' }, + { '\u00ef' }, { '\u0135' }, { '\u0439' }, { '\ud835', '\udd27' }, + { '\u0237' }, { '\ud835', '\udd5b' }, { '\ud835', '\udcbf' }, + { '\u0458' }, { '\u0454' }, { '\u03ba' }, { '\u03f0' }, + { '\u0137' }, { '\u043a' }, { '\ud835', '\udd28' }, { '\u0138' }, + { '\u0445' }, { '\u045c' }, { '\ud835', '\udd5c' }, + { '\ud835', '\udcc0' }, { '\u21da' }, { '\u21d0' }, { '\u291b' }, + { '\u290e' }, { '\u2266' }, { '\u2a8b' }, { '\u2962' }, + { '\u013a' }, { '\u29b4' }, { '\u2112' }, { '\u03bb' }, + { '\u27e8' }, { '\u2991' }, { '\u27e8' }, { '\u2a85' }, + { '\u00ab' }, { '\u00ab' }, { '\u2190' }, { '\u21e4' }, + { '\u291f' }, { '\u291d' }, { '\u21a9' }, { '\u21ab' }, + { '\u2939' }, { '\u2973' }, { '\u21a2' }, { '\u2aab' }, + { '\u2919' }, { '\u2aad' }, { '\u2aad', '\ufe00' }, { '\u290c' }, + { '\u2772' }, { '\u007b' }, { '\u005b' }, { '\u298b' }, + { '\u298f' }, { '\u298d' }, { '\u013e' }, { '\u013c' }, + { '\u2308' }, { '\u007b' }, { '\u043b' }, { '\u2936' }, + { '\u201c' }, { '\u201e' }, { '\u2967' }, { '\u294b' }, + { '\u21b2' }, { '\u2264' }, { '\u2190' }, { '\u21a2' }, + { '\u21bd' }, { '\u21bc' }, { '\u21c7' }, { '\u2194' }, + { '\u21c6' }, { '\u21cb' }, { '\u21ad' }, { '\u22cb' }, + { '\u22da' }, { '\u2264' }, { '\u2266' }, { '\u2a7d' }, + { '\u2a7d' }, { '\u2aa8' }, { '\u2a7f' }, { '\u2a81' }, + { '\u2a83' }, { '\u22da', '\ufe00' }, { '\u2a93' }, { '\u2a85' }, + { '\u22d6' }, { '\u22da' }, { '\u2a8b' }, { '\u2276' }, + { '\u2272' }, { '\u297c' }, { '\u230a' }, { '\ud835', '\udd29' }, + { '\u2276' }, { '\u2a91' }, { '\u21bd' }, { '\u21bc' }, + { '\u296a' }, { '\u2584' }, { '\u0459' }, { '\u226a' }, + { '\u21c7' }, { '\u231e' }, { '\u296b' }, { '\u25fa' }, + { '\u0140' }, { '\u23b0' }, { '\u23b0' }, { '\u2268' }, + { '\u2a89' }, { '\u2a89' }, { '\u2a87' }, { '\u2a87' }, + { '\u2268' }, { '\u22e6' }, { '\u27ec' }, { '\u21fd' }, + { '\u27e6' }, { '\u27f5' }, { '\u27f7' }, { '\u27fc' }, + { '\u27f6' }, { '\u21ab' }, { '\u21ac' }, { '\u2985' }, + { '\ud835', '\udd5d' }, { '\u2a2d' }, { '\u2a34' }, { '\u2217' }, + { '\u005f' }, { '\u25ca' }, { '\u25ca' }, { '\u29eb' }, + { '\u0028' }, { '\u2993' }, { '\u21c6' }, { '\u231f' }, + { '\u21cb' }, { '\u296d' }, { '\u200e' }, { '\u22bf' }, + { '\u2039' }, { '\ud835', '\udcc1' }, { '\u21b0' }, { '\u2272' }, + { '\u2a8d' }, { '\u2a8f' }, { '\u005b' }, { '\u2018' }, + { '\u201a' }, { '\u0142' }, { '\u003c' }, { '\u003c' }, + { '\u2aa6' }, { '\u2a79' }, { '\u22d6' }, { '\u22cb' }, + { '\u22c9' }, { '\u2976' }, { '\u2a7b' }, { '\u2996' }, + { '\u25c3' }, { '\u22b4' }, { '\u25c2' }, { '\u294a' }, + { '\u2966' }, { '\u2268', '\ufe00' }, { '\u2268', '\ufe00' }, + { '\u223a' }, { '\u00af' }, { '\u00af' }, { '\u2642' }, + { '\u2720' }, { '\u2720' }, { '\u21a6' }, { '\u21a6' }, + { '\u21a7' }, { '\u21a4' }, { '\u21a5' }, { '\u25ae' }, + { '\u2a29' }, { '\u043c' }, { '\u2014' }, { '\u2221' }, + { '\ud835', '\udd2a' }, { '\u2127' }, { '\u00b5' }, { '\u00b5' }, + { '\u2223' }, { '\u002a' }, { '\u2af0' }, { '\u00b7' }, + { '\u00b7' }, { '\u2212' }, { '\u229f' }, { '\u2238' }, + { '\u2a2a' }, { '\u2adb' }, { '\u2026' }, { '\u2213' }, + { '\u22a7' }, { '\ud835', '\udd5e' }, { '\u2213' }, + { '\ud835', '\udcc2' }, { '\u223e' }, { '\u03bc' }, { '\u22b8' }, + { '\u22b8' }, { '\u22d9', '\u0338' }, { '\u226b', '\u20d2' }, + { '\u226b', '\u0338' }, { '\u21cd' }, { '\u21ce' }, + { '\u22d8', '\u0338' }, { '\u226a', '\u20d2' }, + { '\u226a', '\u0338' }, { '\u21cf' }, { '\u22af' }, { '\u22ae' }, + { '\u2207' }, { '\u0144' }, { '\u2220', '\u20d2' }, { '\u2249' }, + { '\u2a70', '\u0338' }, { '\u224b', '\u0338' }, { '\u0149' }, + { '\u2249' }, { '\u266e' }, { '\u266e' }, { '\u2115' }, + { '\u00a0' }, { '\u00a0' }, { '\u224e', '\u0338' }, + { '\u224f', '\u0338' }, { '\u2a43' }, { '\u0148' }, { '\u0146' }, + { '\u2247' }, { '\u2a6d', '\u0338' }, { '\u2a42' }, { '\u043d' }, + { '\u2013' }, { '\u2260' }, { '\u21d7' }, { '\u2924' }, + { '\u2197' }, { '\u2197' }, { '\u2250', '\u0338' }, { '\u2262' }, + { '\u2928' }, { '\u2242', '\u0338' }, { '\u2204' }, { '\u2204' }, + { '\ud835', '\udd2b' }, { '\u2267', '\u0338' }, { '\u2271' }, + { '\u2271' }, { '\u2267', '\u0338' }, { '\u2a7e', '\u0338' }, + { '\u2a7e', '\u0338' }, { '\u2275' }, { '\u226f' }, { '\u226f' }, + { '\u21ce' }, { '\u21ae' }, { '\u2af2' }, { '\u220b' }, + { '\u22fc' }, { '\u22fa' }, { '\u220b' }, { '\u045a' }, + { '\u21cd' }, { '\u2266', '\u0338' }, { '\u219a' }, { '\u2025' }, + { '\u2270' }, { '\u219a' }, { '\u21ae' }, { '\u2270' }, + { '\u2266', '\u0338' }, { '\u2a7d', '\u0338' }, + { '\u2a7d', '\u0338' }, { '\u226e' }, { '\u2274' }, { '\u226e' }, + { '\u22ea' }, { '\u22ec' }, { '\u2224' }, { '\ud835', '\udd5f' }, + { '\u00ac' }, { '\u00ac' }, { '\u2209' }, { '\u22f9', '\u0338' }, + { '\u22f5', '\u0338' }, { '\u2209' }, { '\u22f7' }, { '\u22f6' }, + { '\u220c' }, { '\u220c' }, { '\u22fe' }, { '\u22fd' }, + { '\u2226' }, { '\u2226' }, { '\u2afd', '\u20e5' }, + { '\u2202', '\u0338' }, { '\u2a14' }, { '\u2280' }, { '\u22e0' }, + { '\u2aaf', '\u0338' }, { '\u2280' }, { '\u2aaf', '\u0338' }, + { '\u21cf' }, { '\u219b' }, { '\u2933', '\u0338' }, + { '\u219d', '\u0338' }, { '\u219b' }, { '\u22eb' }, { '\u22ed' }, + { '\u2281' }, { '\u22e1' }, { '\u2ab0', '\u0338' }, + { '\ud835', '\udcc3' }, { '\u2224' }, { '\u2226' }, { '\u2241' }, + { '\u2244' }, { '\u2244' }, { '\u2224' }, { '\u2226' }, + { '\u22e2' }, { '\u22e3' }, { '\u2284' }, { '\u2ac5', '\u0338' }, + { '\u2288' }, { '\u2282', '\u20d2' }, { '\u2288' }, + { '\u2ac5', '\u0338' }, { '\u2281' }, { '\u2ab0', '\u0338' }, + { '\u2285' }, { '\u2ac6', '\u0338' }, { '\u2289' }, + { '\u2283', '\u20d2' }, { '\u2289' }, { '\u2ac6', '\u0338' }, + { '\u2279' }, { '\u00f1' }, { '\u00f1' }, { '\u2278' }, + { '\u22ea' }, { '\u22ec' }, { '\u22eb' }, { '\u22ed' }, + { '\u03bd' }, { '\u0023' }, { '\u2116' }, { '\u2007' }, + { '\u22ad' }, { '\u2904' }, { '\u224d', '\u20d2' }, { '\u22ac' }, + { '\u2265', '\u20d2' }, { '\u003e', '\u20d2' }, { '\u29de' }, + { '\u2902' }, { '\u2264', '\u20d2' }, { '\u003c', '\u20d2' }, + { '\u22b4', '\u20d2' }, { '\u2903' }, { '\u22b5', '\u20d2' }, + { '\u223c', '\u20d2' }, { '\u21d6' }, { '\u2923' }, { '\u2196' }, + { '\u2196' }, { '\u2927' }, { '\u24c8' }, { '\u00f3' }, + { '\u00f3' }, { '\u229b' }, { '\u229a' }, { '\u00f4' }, + { '\u00f4' }, { '\u043e' }, { '\u229d' }, { '\u0151' }, + { '\u2a38' }, { '\u2299' }, { '\u29bc' }, { '\u0153' }, + { '\u29bf' }, { '\ud835', '\udd2c' }, { '\u02db' }, { '\u00f2' }, + { '\u00f2' }, { '\u29c1' }, { '\u29b5' }, { '\u03a9' }, + { '\u222e' }, { '\u21ba' }, { '\u29be' }, { '\u29bb' }, + { '\u203e' }, { '\u29c0' }, { '\u014d' }, { '\u03c9' }, + { '\u03bf' }, { '\u29b6' }, { '\u2296' }, { '\ud835', '\udd60' }, + { '\u29b7' }, { '\u29b9' }, { '\u2295' }, { '\u2228' }, + { '\u21bb' }, { '\u2a5d' }, { '\u2134' }, { '\u2134' }, + { '\u00aa' }, { '\u00aa' }, { '\u00ba' }, { '\u00ba' }, + { '\u22b6' }, { '\u2a56' }, { '\u2a57' }, { '\u2a5b' }, + { '\u2134' }, { '\u00f8' }, { '\u00f8' }, { '\u2298' }, + { '\u00f5' }, { '\u00f5' }, { '\u2297' }, { '\u2a36' }, + { '\u00f6' }, { '\u00f6' }, { '\u233d' }, { '\u2225' }, + { '\u00b6' }, { '\u00b6' }, { '\u2225' }, { '\u2af3' }, + { '\u2afd' }, { '\u2202' }, { '\u043f' }, { '\u0025' }, + { '\u002e' }, { '\u2030' }, { '\u22a5' }, { '\u2031' }, + { '\ud835', '\udd2d' }, { '\u03c6' }, { '\u03d5' }, { '\u2133' }, + { '\u260e' }, { '\u03c0' }, { '\u22d4' }, { '\u03d6' }, + { '\u210f' }, { '\u210e' }, { '\u210f' }, { '\u002b' }, + { '\u2a23' }, { '\u229e' }, { '\u2a22' }, { '\u2214' }, + { '\u2a25' }, { '\u2a72' }, { '\u00b1' }, { '\u00b1' }, + { '\u2a26' }, { '\u2a27' }, { '\u00b1' }, { '\u2a15' }, + { '\ud835', '\udd61' }, { '\u00a3' }, { '\u00a3' }, { '\u227a' }, + { '\u2ab3' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' }, + { '\u227a' }, { '\u2ab7' }, { '\u227c' }, { '\u2aaf' }, + { '\u2ab9' }, { '\u2ab5' }, { '\u22e8' }, { '\u227e' }, + { '\u2032' }, { '\u2119' }, { '\u2ab5' }, { '\u2ab9' }, + { '\u22e8' }, { '\u220f' }, { '\u232e' }, { '\u2312' }, + { '\u2313' }, { '\u221d' }, { '\u221d' }, { '\u227e' }, + { '\u22b0' }, { '\ud835', '\udcc5' }, { '\u03c8' }, { '\u2008' }, + { '\ud835', '\udd2e' }, { '\u2a0c' }, { '\ud835', '\udd62' }, + { '\u2057' }, { '\ud835', '\udcc6' }, { '\u210d' }, { '\u2a16' }, + { '\u003f' }, { '\u225f' }, { '\u0022' }, { '\u0022' }, + { '\u21db' }, { '\u21d2' }, { '\u291c' }, { '\u290f' }, + { '\u2964' }, { '\u223d', '\u0331' }, { '\u0155' }, { '\u221a' }, + { '\u29b3' }, { '\u27e9' }, { '\u2992' }, { '\u29a5' }, + { '\u27e9' }, { '\u00bb' }, { '\u00bb' }, { '\u2192' }, + { '\u2975' }, { '\u21e5' }, { '\u2920' }, { '\u2933' }, + { '\u291e' }, { '\u21aa' }, { '\u21ac' }, { '\u2945' }, + { '\u2974' }, { '\u21a3' }, { '\u219d' }, { '\u291a' }, + { '\u2236' }, { '\u211a' }, { '\u290d' }, { '\u2773' }, + { '\u007d' }, { '\u005d' }, { '\u298c' }, { '\u298e' }, + { '\u2990' }, { '\u0159' }, { '\u0157' }, { '\u2309' }, + { '\u007d' }, { '\u0440' }, { '\u2937' }, { '\u2969' }, + { '\u201d' }, { '\u201d' }, { '\u21b3' }, { '\u211c' }, + { '\u211b' }, { '\u211c' }, { '\u211d' }, { '\u25ad' }, + { '\u00ae' }, { '\u00ae' }, { '\u297d' }, { '\u230b' }, + { '\ud835', '\udd2f' }, { '\u21c1' }, { '\u21c0' }, { '\u296c' }, + { '\u03c1' }, { '\u03f1' }, { '\u2192' }, { '\u21a3' }, + { '\u21c1' }, { '\u21c0' }, { '\u21c4' }, { '\u21cc' }, + { '\u21c9' }, { '\u219d' }, { '\u22cc' }, { '\u02da' }, + { '\u2253' }, { '\u21c4' }, { '\u21cc' }, { '\u200f' }, + { '\u23b1' }, { '\u23b1' }, { '\u2aee' }, { '\u27ed' }, + { '\u21fe' }, { '\u27e7' }, { '\u2986' }, { '\ud835', '\udd63' }, + { '\u2a2e' }, { '\u2a35' }, { '\u0029' }, { '\u2994' }, + { '\u2a12' }, { '\u21c9' }, { '\u203a' }, { '\ud835', '\udcc7' }, + { '\u21b1' }, { '\u005d' }, { '\u2019' }, { '\u2019' }, + { '\u22cc' }, { '\u22ca' }, { '\u25b9' }, { '\u22b5' }, + { '\u25b8' }, { '\u29ce' }, { '\u2968' }, { '\u211e' }, + { '\u015b' }, { '\u201a' }, { '\u227b' }, { '\u2ab4' }, + { '\u2ab8' }, { '\u0161' }, { '\u227d' }, { '\u2ab0' }, + { '\u015f' }, { '\u015d' }, { '\u2ab6' }, { '\u2aba' }, + { '\u22e9' }, { '\u2a13' }, { '\u227f' }, { '\u0441' }, + { '\u22c5' }, { '\u22a1' }, { '\u2a66' }, { '\u21d8' }, + { '\u2925' }, { '\u2198' }, { '\u2198' }, { '\u00a7' }, + { '\u00a7' }, { '\u003b' }, { '\u2929' }, { '\u2216' }, + { '\u2216' }, { '\u2736' }, { '\ud835', '\udd30' }, { '\u2322' }, + { '\u266f' }, { '\u0449' }, { '\u0448' }, { '\u2223' }, + { '\u2225' }, { '\u00ad' }, { '\u00ad' }, { '\u03c3' }, + { '\u03c2' }, { '\u03c2' }, { '\u223c' }, { '\u2a6a' }, + { '\u2243' }, { '\u2243' }, { '\u2a9e' }, { '\u2aa0' }, + { '\u2a9d' }, { '\u2a9f' }, { '\u2246' }, { '\u2a24' }, + { '\u2972' }, { '\u2190' }, { '\u2216' }, { '\u2a33' }, + { '\u29e4' }, { '\u2223' }, { '\u2323' }, { '\u2aaa' }, + { '\u2aac' }, { '\u2aac', '\ufe00' }, { '\u044c' }, { '\u002f' }, + { '\u29c4' }, { '\u233f' }, { '\ud835', '\udd64' }, { '\u2660' }, + { '\u2660' }, { '\u2225' }, { '\u2293' }, { '\u2293', '\ufe00' }, + { '\u2294' }, { '\u2294', '\ufe00' }, { '\u228f' }, { '\u2291' }, + { '\u228f' }, { '\u2291' }, { '\u2290' }, { '\u2292' }, + { '\u2290' }, { '\u2292' }, { '\u25a1' }, { '\u25a1' }, + { '\u25aa' }, { '\u25aa' }, { '\u2192' }, { '\ud835', '\udcc8' }, + { '\u2216' }, { '\u2323' }, { '\u22c6' }, { '\u2606' }, + { '\u2605' }, { '\u03f5' }, { '\u03d5' }, { '\u00af' }, + { '\u2282' }, { '\u2ac5' }, { '\u2abd' }, { '\u2286' }, + { '\u2ac3' }, { '\u2ac1' }, { '\u2acb' }, { '\u228a' }, + { '\u2abf' }, { '\u2979' }, { '\u2282' }, { '\u2286' }, + { '\u2ac5' }, { '\u228a' }, { '\u2acb' }, { '\u2ac7' }, + { '\u2ad5' }, { '\u2ad3' }, { '\u227b' }, { '\u2ab8' }, + { '\u227d' }, { '\u2ab0' }, { '\u2aba' }, { '\u2ab6' }, + { '\u22e9' }, { '\u227f' }, { '\u2211' }, { '\u266a' }, + { '\u00b9' }, { '\u00b9' }, { '\u00b2' }, { '\u00b2' }, + { '\u00b3' }, { '\u00b3' }, { '\u2283' }, { '\u2ac6' }, + { '\u2abe' }, { '\u2ad8' }, { '\u2287' }, { '\u2ac4' }, + { '\u27c9' }, { '\u2ad7' }, { '\u297b' }, { '\u2ac2' }, + { '\u2acc' }, { '\u228b' }, { '\u2ac0' }, { '\u2283' }, + { '\u2287' }, { '\u2ac6' }, { '\u228b' }, { '\u2acc' }, + { '\u2ac8' }, { '\u2ad4' }, { '\u2ad6' }, { '\u21d9' }, + { '\u2926' }, { '\u2199' }, { '\u2199' }, { '\u292a' }, + { '\u00df' }, { '\u00df' }, { '\u2316' }, { '\u03c4' }, + { '\u23b4' }, { '\u0165' }, { '\u0163' }, { '\u0442' }, + { '\u20db' }, { '\u2315' }, { '\ud835', '\udd31' }, { '\u2234' }, + { '\u2234' }, { '\u03b8' }, { '\u03d1' }, { '\u03d1' }, + { '\u2248' }, { '\u223c' }, { '\u2009' }, { '\u2248' }, + { '\u223c' }, { '\u00fe' }, { '\u00fe' }, { '\u02dc' }, + { '\u00d7' }, { '\u00d7' }, { '\u22a0' }, { '\u2a31' }, + { '\u2a30' }, { '\u222d' }, { '\u2928' }, { '\u22a4' }, + { '\u2336' }, { '\u2af1' }, { '\ud835', '\udd65' }, { '\u2ada' }, + { '\u2929' }, { '\u2034' }, { '\u2122' }, { '\u25b5' }, + { '\u25bf' }, { '\u25c3' }, { '\u22b4' }, { '\u225c' }, + { '\u25b9' }, { '\u22b5' }, { '\u25ec' }, { '\u225c' }, + { '\u2a3a' }, { '\u2a39' }, { '\u29cd' }, { '\u2a3b' }, + { '\u23e2' }, { '\ud835', '\udcc9' }, { '\u0446' }, { '\u045b' }, + { '\u0167' }, { '\u226c' }, { '\u219e' }, { '\u21a0' }, + { '\u21d1' }, { '\u2963' }, { '\u00fa' }, { '\u00fa' }, + { '\u2191' }, { '\u045e' }, { '\u016d' }, { '\u00fb' }, + { '\u00fb' }, { '\u0443' }, { '\u21c5' }, { '\u0171' }, + { '\u296e' }, { '\u297e' }, { '\ud835', '\udd32' }, { '\u00f9' }, + { '\u00f9' }, { '\u21bf' }, { '\u21be' }, { '\u2580' }, + { '\u231c' }, { '\u231c' }, { '\u230f' }, { '\u25f8' }, + { '\u016b' }, { '\u00a8' }, { '\u00a8' }, { '\u0173' }, + { '\ud835', '\udd66' }, { '\u2191' }, { '\u2195' }, { '\u21bf' }, + { '\u21be' }, { '\u228e' }, { '\u03c5' }, { '\u03d2' }, + { '\u03c5' }, { '\u21c8' }, { '\u231d' }, { '\u231d' }, + { '\u230e' }, { '\u016f' }, { '\u25f9' }, { '\ud835', '\udcca' }, + { '\u22f0' }, { '\u0169' }, { '\u25b5' }, { '\u25b4' }, + { '\u21c8' }, { '\u00fc' }, { '\u00fc' }, { '\u29a7' }, + { '\u21d5' }, { '\u2ae8' }, { '\u2ae9' }, { '\u22a8' }, + { '\u299c' }, { '\u03f5' }, { '\u03f0' }, { '\u2205' }, + { '\u03d5' }, { '\u03d6' }, { '\u221d' }, { '\u2195' }, + { '\u03f1' }, { '\u03c2' }, { '\u228a', '\ufe00' }, + { '\u2acb', '\ufe00' }, { '\u228b', '\ufe00' }, + { '\u2acc', '\ufe00' }, { '\u03d1' }, { '\u22b2' }, { '\u22b3' }, + { '\u0432' }, { '\u22a2' }, { '\u2228' }, { '\u22bb' }, + { '\u225a' }, { '\u22ee' }, { '\u007c' }, { '\u007c' }, + { '\ud835', '\udd33' }, { '\u22b2' }, { '\u2282', '\u20d2' }, + { '\u2283', '\u20d2' }, { '\ud835', '\udd67' }, { '\u221d' }, + { '\u22b3' }, { '\ud835', '\udccb' }, { '\u2acb', '\ufe00' }, + { '\u228a', '\ufe00' }, { '\u2acc', '\ufe00' }, + { '\u228b', '\ufe00' }, { '\u299a' }, { '\u0175' }, { '\u2a5f' }, + { '\u2227' }, { '\u2259' }, { '\u2118' }, { '\ud835', '\udd34' }, + { '\ud835', '\udd68' }, { '\u2118' }, { '\u2240' }, { '\u2240' }, + { '\ud835', '\udccc' }, { '\u22c2' }, { '\u25ef' }, { '\u22c3' }, + { '\u25bd' }, { '\ud835', '\udd35' }, { '\u27fa' }, { '\u27f7' }, + { '\u03be' }, { '\u27f8' }, { '\u27f5' }, { '\u27fc' }, + { '\u22fb' }, { '\u2a00' }, { '\ud835', '\udd69' }, { '\u2a01' }, + { '\u2a02' }, { '\u27f9' }, { '\u27f6' }, { '\ud835', '\udccd' }, + { '\u2a06' }, { '\u2a04' }, { '\u25b3' }, { '\u22c1' }, + { '\u22c0' }, { '\u00fd' }, { '\u00fd' }, { '\u044f' }, + { '\u0177' }, { '\u044b' }, { '\u00a5' }, { '\u00a5' }, + { '\ud835', '\udd36' }, { '\u0457' }, { '\ud835', '\udd6a' }, + { '\ud835', '\udcce' }, { '\u044e' }, { '\u00ff' }, { '\u00ff' }, + { '\u017a' }, { '\u017e' }, { '\u0437' }, { '\u017c' }, + { '\u2128' }, { '\u03b6' }, { '\ud835', '\udd37' }, { '\u0436' }, + { '\u21dd' }, { '\ud835', '\udd6b' }, { '\ud835', '\udccf' }, + { '\u200d' }, { '\u200c' }, }; + + final static char[][] WINDOWS_1252 = { { '\u20AC' }, { '\u0081' }, + { '\u201A' }, { '\u0192' }, { '\u201E' }, { '\u2026' }, + { '\u2020' }, { '\u2021' }, { '\u02C6' }, { '\u2030' }, + { '\u0160' }, { '\u2039' }, { '\u0152' }, { '\u008D' }, + { '\u017D' }, { '\u008F' }, { '\u0090' }, { '\u2018' }, + { '\u2019' }, { '\u201C' }, { '\u201D' }, { '\u2022' }, + { '\u2013' }, { '\u2014' }, { '\u02DC' }, { '\u2122' }, + { '\u0161' }, { '\u203A' }, { '\u0153' }, { '\u009D' }, + { '\u017E' }, { '\u0178' } }; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java new file mode 100644 index 000000000..311f8f77f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java @@ -0,0 +1,311 @@ +/* + * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera + * Software ASA. + * + * You are granted a license to use, reproduce and create derivative works of + * this document. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.NoLength; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NamedCharactersAccel { + + static final @NoLength int[][] HILO_ACCEL = { + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + { 0, 0, 0, 0, 0, 0, 0, 12386493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 40174181, 0, 0, 0, 0, 60162966, 0, 0, 0, + 75367550, 0, 0, 0, 82183396, 0, 0, 0, 0, 0, 115148507, 0, + 0, 135989275, 139397199, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28770743, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 82248935, 0, 0, 0, 0, 0, 115214046, 0, 0, 0, 139528272, 0, + 0, 0, 0, }, + null, + { 0, 0, 0, 4980811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 38470219, 0, 0, 0, 0, 0, 0, 0, 0, 64553944, 0, 0, 0, 0, + 0, 0, 0, 92145022, 0, 0, 0, 0, 0, 0, 0, 0, 139593810, 0, 0, + 0, 0, }, + { 65536, 0, 0, 0, 0, 0, 0, 0, 13172937, 0, 0, 0, 0, 0, 25297282, 0, + 0, 28901816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 71500866, 0, 0, 0, 0, 82380008, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, }, + null, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 94897574, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 2555943, 0, 0, 0, 0, 0, 0, 0, 15532269, 0, 0, 0, 0, 0, 0, + 0, 31785444, 34406924, 0, 0, 0, 0, 0, 40895088, 0, 0, 0, + 60228503, 0, 0, 0, 0, 0, 0, 0, 82445546, 0, 0, 0, 0, 0, + 115279583, 0, 0, 136054812, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 40239718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 5046349, 0, 0, 10944679, 0, 13238474, 0, 15597806, + 16056565, 0, 20578618, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, }, + null, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 95225257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 196610, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 8454273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 46072511, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 2687016, 0, 0, 0, 0, 0, 13304011, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 31850982, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + null, + null, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 34472462, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 95290798, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 5111886, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 34603535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 105776718, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 8585346, 0, 11075752, 0, 0, 0, 0, 16187638, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28508594, 0, 0, + 0, 0, 0, 0, 0, 40305255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 95421871, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + null, + null, + null, + { 0, 0, 0, 5177423, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + null, + null, + null, + null, + null, + null, + { 327684, 1900571, 2949162, 5374032, 8716420, 0, 11206826, + 12517566, 13435084, 0, 15663343, 16515320, 19988785, + 20644155, 25428355, 27197855, 0, 29163962, 31916519, + 34734609, 36045347, 0, 0, 0, 40436328, 40960625, 41615994, + 46596800, 54264627, 60556184, 64750554, 68879387, 71763012, + 75826303, 77268122, 0, 81462490, 83952875, 92865919, + 96142769, 105973327, 110167691, 0, 116917984, 121833283, + 132253665, 136251421, 140707923, 0, 0, 144574620, + 145361066, }, + { 393222, 0, 0, 0, 0, 0, 11272364, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 36176423, 38535756, 0, 0, 0, 0, 41681532, 46727880, + 0, 60687261, 0, 0, 71828552, 75891846, 0, 0, 0, 84411650, + 0, 96404924, 0, 0, 0, 117376761, 121898820, 132319203, + 136382496, 0, 0, 0, 0, 0, }, + { 589831, 1966110, 3276846, 5505107, 8978566, 10420383, 11468973, + 12583104, 13631694, 15139046, 15794416, 16711933, 20054322, + 20840764, 25624965, 27263392, 0, 29360574, 32244200, + 34931219, 36373033, 38601293, 39584348, 0, 40567402, + 41091698, 42205821, 46858954, 54723389, 60818335, 65143773, + 68944924, 71959625, 75957383, 77530268, 80938194, 81593564, + 84739337, 92997002, 96863680, 106235474, 110233234, 0, + 117704448, 122816325, 132515812, 136579106, 140773476, + 142149753, 143001732, 144705695, 145492139, }, + { 0, 0, 3342387, 0, 9044106, 0, 11534512, 0, 13697233, 0, 0, 0, 0, + 0, 25690504, 0, 0, 0, 0, 0, 36438572, 38732366, 0, 0, 0, + 41157236, 0, 46924492, 54788932, 61080481, 65209315, 0, + 72025163, 0, 0, 0, 0, 85132558, 93062540, 96929223, + 106563158, 0, 0, 118032133, 123012947, 132581351, + 136775717, 140839013, 0, 143067271, 0, 145557677, }, + { 0, 2162719, 3473460, 5636181, 0, 0, 0, 0, 0, 0, 0, 18809088, + 20185395, 21299519, 0, 0, 0, 29622721, 0, 0, 0, 39256656, + 39649885, 0, 0, 41288309, 42336901, 47448781, 55182149, + 61342629, 65274852, 69010461, 72811596, 76219528, 77726880, + 0, 0, 86967572, 93128077, 97650120, 106628699, 110560915, + 0, 118490890, 123733846, 132646888, 0, 141232230, + 142411898, 0, 144836769, 145688750, }, + { 655370, 2228258, 3538998, 5701719, 9109643, 10485920, 11600049, + 12648641, 13762770, 15204584, 15859954, 18874656, 20250933, + 21365062, 25756041, 27328929, 28574132, 29688261, 32309741, + 34996758, 36504109, 39322200, 39715422, 39912033, 40632940, + 41353847, 42467975, 47514325, 55247691, 61473705, 65405925, + 69272606, 72877144, 76285068, 77857955, 81003732, 81659102, + 87164208, 93193614, 97715667, 106759772, 110626456, + 114296528, 118687505, 123864929, 132712425, 136906792, + 141297772, 142477438, 143132808, 144902307, 145754288, }, + { 786443, 0, 0, 0, 9240716, 0, 11665586, 0, 13893843, 0, 0, 0, 0, + 0, 25887114, 0, 0, 0, 0, 0, 36635182, 0, 0, 0, 0, 0, + 42599049, 0, 0, 0, 65733607, 0, 73008217, 0, 77989029, 0, + 81724639, 87295283, 0, 98305492, 107021918, 0, 0, 0, 0, 0, + 137037866, 0, 0, 0, 0, 0, }, + { 0, 0, 3604535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27394466, 0, + 29753798, 32571886, 35258903, 0, 0, 0, 0, 0, 0, 0, 0, + 55509836, 61604779, 0, 0, 0, 0, 0, 0, 81790176, 87557429, + 93259151, 98502109, 107152994, 110888601, 0, 119015188, + 124323683, 133498858, 137234476, 0, 0, 143263881, 0, + 145819825, }, + { 0, 0, 3866680, 6160472, 0, 10616993, 0, 12714178, 0, 0, 0, 0, + 20316470, 0, 0, 27460003, 0, 31261127, 32637426, 35521051, + 0, 0, 0, 39977570, 0, 0, 0, 48366294, 56492880, 62391213, + 0, 69338146, 73073755, 0, 78316711, 0, 0, 0, 93980048, + 98764256, 107218532, 111085213, 114362065, 119736089, + 125241194, 133957622, 0, 0, 0, 143329419, 144967844, + 145885362, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62456761, 0, 69403683, 73139292, 0, + 78382252, 0, 81855713, 87622969, 0, 98829796, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 48431843, 0, 0, 0, 0, 0, 76416141, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 851981, 0, 4063292, 0, 9306254, 0, 0, 0, 0, 0, 0, 19005729, 0, 0, + 0, 27525540, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42795659, + 49152740, 56623967, 62587834, 66061292, 69600292, 73401437, + 0, 0, 0, 0, 87950650, 94111131, 99878373, 107546213, + 112002720, 0, 119932708, 125306744, 0, 137496623, + 141363309, 0, 143460492, 0, 0, }, + { 917518, 0, 0, 0, 9502863, 0, 0, 0, 14155989, 0, 0, 19071267, 0, + 0, 26083724, 0, 0, 0, 32702963, 0, 36700720, 0, 0, 0, 0, 0, + 43057806, 0, 0, 0, 66520049, 0, 0, 0, 78841005, 81069269, + 0, 88147263, 0, 99943925, 107873898, 112068270, 0, + 120063783, 125831033, 0, 137693235, 0, 0, 143526030, 0, 0, }, + { 983055, 0, 0, 0, 0, 0, 0, 0, 14483673, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 37093937, 0, 0, 0, 0, 0, 44565138, 49349359, 0, 0, + 66651128, 69665831, 73860193, 0, 79561908, 0, 0, 88606018, + 94176669, 0, 0, 0, 0, 120129321, 0, 0, 0, 141494382, 0, + 143591567, 0, 0, }, + { 1114128, 2293795, 4587583, 8257631, 9633938, 10813603, 11731123, + 12845251, 14680286, 15270121, 15925491, 19661092, 20382007, + 24969543, 26149263, 27656613, 28639669, 31392222, 32768500, + 35586591, 37225015, 39387737, 39780959, 40043107, 40698477, + 41419384, 44696233, 52495090, 57738081, 63439804, 66782202, + 69927976, 73925736, 76809359, 79824063, 81134806, 81921250, + 89785673, 94307742, 100795894, 107939439, 112330415, + 114427602, 120588074, 126158721, 134416381, 137824310, + 141559920, 142542975, 143853712, 145033381, 145950899, }, + { 1179666, 0, 0, 0, 9699476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26280336, + 0, 0, 0, 0, 0, 38076985, 0, 0, 0, 0, 0, 45220523, 52560674, + 0, 0, 67175420, 69993516, 0, 0, 79889603, 0, 0, 89916763, + 94373280, 101451267, 108136048, 0, 114493139, 120784689, + 126355334, 134481924, 138414136, 141625457, 142608512, 0, + 0, 0, }, + { 0, 0, 0, 0, 9896085, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 33292789, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67830786, 0, 0, + 0, 80020676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127403913, 0, 0, 0, + 0, 0, 0, 0, }, + { 1310739, 2359332, 4653127, 0, 0, 0, 12189876, 0, 0, 0, 0, 0, 0, + 0, 26345874, 28246439, 0, 31457760, 0, 35652128, 38142534, + 0, 0, 0, 0, 0, 45351603, 52757283, 57869170, 63636425, + 67961868, 71304237, 73991273, 0, 0, 0, 0, 90309981, 0, + 101910029, 108988019, 114034355, 0, 120850228, 127469465, + 135464965, 138741825, 141690994, 142739585, 143984788, 0, + 0, }, + { 1441813, 2424869, 4718664, 8388735, 10027160, 10879142, 12255419, + 12976325, 14745825, 15401194, 15991028, 19857709, 20447544, + 25035134, 26542483, 28377520, 28705206, 31588833, 33358333, + 35783201, 38208071, 39453274, 39846496, 40108644, 40764014, + 41484921, 45613749, 53216038, 58196852, 63898572, 68158478, + 71369793, 74253418, 77005973, 80479430, 81265879, 81986787, + 90965347, 94504353, 103679508, 109250176, 114165453, + 114558676, 121243445, 127731610, 135727124, 138807366, + 142018675, 142805123, 144115862, 145098918, 146016436, }, + { 1572887, 0, 0, 0, 10092698, 0, 12320956, 0, 14811362, 0, 0, + 19923248, 0, 25166207, 26739094, 0, 0, 0, 33423870, 0, + 38273608, 0, 0, 0, 0, 0, 45744825, 0, 58262393, 64095184, + 68355089, 0, 75170926, 0, 80610509, 0, 0, 91817325, 0, + 104203823, 109512324, 0, 0, 121636667, 128059294, 0, + 139069511, 0, 0, 0, 0, 0, }, + { 1703961, 2490406, 4849737, 0, 10223771, 0, 0, 13107399, 15007971, + 15466732, 0, 0, 20513081, 25231745, 26870169, 0, 0, + 31654371, 34275839, 0, 38404681, 0, 0, 0, 40829551, 0, + 45875899, 53609261, 59900794, 64226259, 68551700, 0, 0, 0, + 80807119, 81331417, 0, 91948410, 94700963, 104465975, + 109643400, 114230991, 114951893, 121702209, 131663779, 0, + 139266123, 0, 0, 144246936, 145295527, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27132315, 0, 0, 0, 0, + 0, 0, 39518811, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75302012, 0, + 0, 0, 0, 92079484, 0, 105383483, 109708938, 0, 0, 0, 0, 0, + 0, 0, 0, 144312474, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 46006973, 0, 60031891, 64291797, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 105711177, 0, 0, 0, 0, 131991514, 135923736, + 139331662, 0, 0, 144378011, 0, 146147509, }, + { 0, 0, 0, 0, 10354845, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68813847, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 121767746, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 60097429, 0, 0, 0, 0, 77137048, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 64422870, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 132122591, 0, 0, 142084216, 0, 0, 0, 0, }, }; + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java new file mode 100644 index 000000000..2b3f96625 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Portability.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008-2015 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Literal; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.common.Interner; + +public final class Portability { + + // Allocating methods + + /** + * Allocates a new local name object. In C++, the refcount must be set up in such a way that + * calling <code>releaseLocal</code> on the return value balances the refcount set by this method. + */ + public static @Local String newLocalNameFromBuffer(@NoLength char[] buf, int offset, int length, Interner interner) { + return new String(buf, offset, length).intern(); + } + + public static String newStringFromBuffer(@NoLength char[] buf, int offset, int length + // CPPONLY: , TreeBuilder treeBuilder + ) { + return new String(buf, offset, length); + } + + public static String newEmptyString() { + return ""; + } + + public static String newStringFromLiteral(@Literal String literal) { + return literal; + } + + public static String newStringFromString(String string) { + return string; + } + + // XXX get rid of this + public static char[] newCharArrayFromLocal(@Local String local) { + return local.toCharArray(); + } + + public static char[] newCharArrayFromString(String string) { + return string.toCharArray(); + } + + public static @Local String newLocalFromLocal(@Local String local, Interner interner) { + return local; + } + + // Deallocation methods + + public static void releaseString(String str) { + // No-op in Java + } + + // Comparison methods + + public static boolean localEqualsBuffer(@Local String local, @NoLength char[] buf, int offset, int length) { + if (local.length() != length) { + return false; + } + for (int i = 0; i < length; i++) { + if (local.charAt(i) != buf[offset + i]) { + return false; + } + } + return true; + } + + public static boolean lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString(@Literal String lowerCaseLiteral, + String string) { + if (string == null) { + return false; + } + if (lowerCaseLiteral.length() > string.length()) { + return false; + } + for (int i = 0; i < lowerCaseLiteral.length(); i++) { + char c0 = lowerCaseLiteral.charAt(i); + char c1 = string.charAt(i); + if (c1 >= 'A' && c1 <= 'Z') { + c1 += 0x20; + } + if (c0 != c1) { + return false; + } + } + return true; + } + + public static boolean lowerCaseLiteralEqualsIgnoreAsciiCaseString(@Literal String lowerCaseLiteral, + String string) { + if (string == null) { + return false; + } + if (lowerCaseLiteral.length() != string.length()) { + return false; + } + for (int i = 0; i < lowerCaseLiteral.length(); i++) { + char c0 = lowerCaseLiteral.charAt(i); + char c1 = string.charAt(i); + if (c1 >= 'A' && c1 <= 'Z') { + c1 += 0x20; + } + if (c0 != c1) { + return false; + } + } + return true; + } + + public static boolean literalEqualsString(@Literal String literal, String string) { + return literal.equals(string); + } + + public static boolean stringEqualsString(String one, String other) { + return one.equals(other); + } + + public static void delete(Object o) { + + } + + public static void deleteArray(Object o) { + + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java new file mode 100644 index 000000000..fad5f43db --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/PushedLocation.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +public class PushedLocation { + private final int line; + + private final int linePrev; + + private final int col; + + private final int colPrev; + + private final boolean nextCharOnNewLine; + + private final String publicId; + + private final String systemId; + + private final PushedLocation next; + + /** + * @param line + * @param linePrev + * @param col + * @param colPrev + * @param nextCharOnNewLine + * @param publicId + * @param systemId + * @param next + */ + public PushedLocation(int line, int linePrev, int col, int colPrev, + boolean nextCharOnNewLine, String publicId, String systemId, + PushedLocation next) { + this.line = line; + this.linePrev = linePrev; + this.col = col; + this.colPrev = colPrev; + this.nextCharOnNewLine = nextCharOnNewLine; + this.publicId = publicId; + this.systemId = systemId; + this.next = next; + } + + /** + * Returns the line. + * + * @return the line + */ + public int getLine() { + return line; + } + + /** + * Returns the linePrev. + * + * @return the linePrev + */ + public int getLinePrev() { + return linePrev; + } + + /** + * Returns the col. + * + * @return the col + */ + public int getCol() { + return col; + } + + /** + * Returns the colPrev. + * + * @return the colPrev + */ + public int getColPrev() { + return colPrev; + } + + /** + * Returns the nextCharOnNewLine. + * + * @return the nextCharOnNewLine + */ + public boolean isNextCharOnNewLine() { + return nextCharOnNewLine; + } + + /** + * Returns the publicId. + * + * @return the publicId + */ + public String getPublicId() { + return publicId; + } + + /** + * Returns the systemId. + * + * @return the systemId + */ + public String getSystemId() { + return systemId; + } + + /** + * Returns the next. + * + * @return the next + */ + public PushedLocation getNext() { + return next; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java new file mode 100644 index 000000000..b671bc903 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StackNode.java @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2011 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NsUri; + +final class StackNode<T> { + final int flags; + + final @Local String name; + + final @Local String popName; + + final @NsUri String ns; + + final T node; + + // Only used on the list of formatting elements + HtmlAttributes attributes; + + private int refcount = 1; + + // [NOCPP[ + + private final TaintableLocatorImpl locator; + + public TaintableLocatorImpl getLocator() { + return locator; + } + + // ]NOCPP] + + @Inline public int getFlags() { + return flags; + } + + public int getGroup() { + return flags & ElementName.GROUP_MASK; + } + + public boolean isScoping() { + return (flags & ElementName.SCOPING) != 0; + } + + public boolean isSpecial() { + return (flags & ElementName.SPECIAL) != 0; + } + + public boolean isFosterParenting() { + return (flags & ElementName.FOSTER_PARENTING) != 0; + } + + public boolean isHtmlIntegrationPoint() { + return (flags & ElementName.HTML_INTEGRATION_POINT) != 0; + } + + // [NOCPP[ + + public boolean isOptionalEndTag() { + return (flags & ElementName.OPTIONAL_END_TAG) != 0; + } + + // ]NOCPP] + + /** + * Constructor for copying. This doesn't take another <code>StackNode</code> + * because in C++ the caller is reponsible for reobtaining the local names + * from another interner. + * + * @param flags + * @param ns + * @param name + * @param node + * @param popName + * @param attributes + */ + StackNode(int flags, @NsUri String ns, @Local String name, T node, + @Local String popName, HtmlAttributes attributes + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = flags; + this.name = name; + this.popName = popName; + this.ns = ns; + this.node = node; + this.attributes = attributes; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Short hand for well-known HTML elements. + * + * @param elementName + * @param node + */ + StackNode(ElementName elementName, T node + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = elementName.getFlags(); + this.name = elementName.name; + this.popName = elementName.name; + this.ns = "http://www.w3.org/1999/xhtml"; + this.node = node; + this.attributes = null; + this.refcount = 1; + assert !elementName.isCustom() : "Don't use this constructor for custom elements."; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Constructor for HTML formatting elements. + * + * @param elementName + * @param node + * @param attributes + */ + StackNode(ElementName elementName, T node, HtmlAttributes attributes + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = elementName.getFlags(); + this.name = elementName.name; + this.popName = elementName.name; + this.ns = "http://www.w3.org/1999/xhtml"; + this.node = node; + this.attributes = attributes; + this.refcount = 1; + assert !elementName.isCustom() : "Don't use this constructor for custom elements."; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * The common-case HTML constructor. + * + * @param elementName + * @param node + * @param popName + */ + StackNode(ElementName elementName, T node, @Local String popName + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = elementName.getFlags(); + this.name = elementName.name; + this.popName = popName; + this.ns = "http://www.w3.org/1999/xhtml"; + this.node = node; + this.attributes = null; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Constructor for SVG elements. Note that the order of the arguments is + * what distinguishes this from the HTML constructor. This is ugly, but + * AFAICT the least disruptive way to make this work with Java's generics + * and without unnecessary branches. :-( + * + * @param elementName + * @param popName + * @param node + */ + StackNode(ElementName elementName, @Local String popName, T node + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = prepareSvgFlags(elementName.getFlags()); + this.name = elementName.name; + this.popName = popName; + this.ns = "http://www.w3.org/2000/svg"; + this.node = node; + this.attributes = null; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + /** + * Constructor for MathML. + * + * @param elementName + * @param node + * @param popName + * @param markAsIntegrationPoint + */ + StackNode(ElementName elementName, T node, @Local String popName, + boolean markAsIntegrationPoint + // [NOCPP[ + , TaintableLocatorImpl locator + // ]NOCPP] + ) { + this.flags = prepareMathFlags(elementName.getFlags(), + markAsIntegrationPoint); + this.name = elementName.name; + this.popName = popName; + this.ns = "http://www.w3.org/1998/Math/MathML"; + this.node = node; + this.attributes = null; + this.refcount = 1; + // [NOCPP[ + this.locator = locator; + // ]NOCPP] + } + + private static int prepareSvgFlags(int flags) { + flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING + | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG); + if ((flags & ElementName.SCOPING_AS_SVG) != 0) { + flags |= (ElementName.SCOPING | ElementName.SPECIAL | ElementName.HTML_INTEGRATION_POINT); + } + return flags; + } + + private static int prepareMathFlags(int flags, + boolean markAsIntegrationPoint) { + flags &= ~(ElementName.FOSTER_PARENTING | ElementName.SCOPING + | ElementName.SPECIAL | ElementName.OPTIONAL_END_TAG); + if ((flags & ElementName.SCOPING_AS_MATHML) != 0) { + flags |= (ElementName.SCOPING | ElementName.SPECIAL); + } + if (markAsIntegrationPoint) { + flags |= ElementName.HTML_INTEGRATION_POINT; + } + return flags; + } + + @SuppressWarnings("unused") private void destructor() { + Portability.delete(attributes); + } + + public void dropAttributes() { + attributes = null; + } + + // [NOCPP[ + /** + * @see java.lang.Object#toString() + */ + @Override public @Local String toString() { + return name; + } + + // ]NOCPP] + + public void retain() { + refcount++; + } + + public void release() { + refcount--; + if (refcount == 0) { + Portability.delete(this); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java new file mode 100644 index 000000000..d79641bcb --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/StateSnapshot.java @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.Auto; + + +public class StateSnapshot<T> implements TreeBuilderState<T> { + + private final @Auto StackNode<T>[] stack; + + private final @Auto StackNode<T>[] listOfActiveFormattingElements; + + private final @Auto int[] templateModeStack; + + private final T formPointer; + + private final T headPointer; + + private final T deepTreeSurrogateParent; + + private final int mode; + + private final int originalMode; + + private final boolean framesetOk; + + private final boolean needToDropLF; + + private final boolean quirks; + + /** + * @param stack + * @param listOfActiveFormattingElements + * @param templateModeStack + * @param formPointer + * @param headPointer + * @param deepTreeSurrogateParent + * @param mode + * @param originalMode + * @param framesetOk + * @param needToDropLF + * @param quirks + */ + StateSnapshot(StackNode<T>[] stack, + StackNode<T>[] listOfActiveFormattingElements, int[] templateModeStack, T formPointer, + T headPointer, T deepTreeSurrogateParent, int mode, int originalMode, + boolean framesetOk, boolean needToDropLF, boolean quirks) { + this.stack = stack; + this.listOfActiveFormattingElements = listOfActiveFormattingElements; + this.templateModeStack = templateModeStack; + this.formPointer = formPointer; + this.headPointer = headPointer; + this.deepTreeSurrogateParent = deepTreeSurrogateParent; + this.mode = mode; + this.originalMode = originalMode; + this.framesetOk = framesetOk; + this.needToDropLF = needToDropLF; + this.quirks = quirks; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack() + */ + public StackNode<T>[] getStack() { + return stack; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack() + */ + public int[] getTemplateModeStack() { + return templateModeStack; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements() + */ + public StackNode<T>[] getListOfActiveFormattingElements() { + return listOfActiveFormattingElements; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer() + */ + public T getFormPointer() { + return formPointer; + } + + /** + * Returns the headPointer. + * + * @return the headPointer + */ + public T getHeadPointer() { + return headPointer; + } + + /** + * Returns the deepTreeSurrogateParent. + * + * @return the deepTreeSurrogateParent + */ + public T getDeepTreeSurrogateParent() { + return deepTreeSurrogateParent; + } + + /** + * Returns the mode. + * + * @return the mode + */ + public int getMode() { + return mode; + } + + /** + * Returns the originalMode. + * + * @return the originalMode + */ + public int getOriginalMode() { + return originalMode; + } + + /** + * Returns the framesetOk. + * + * @return the framesetOk + */ + public boolean isFramesetOk() { + return framesetOk; + } + + /** + * Returns the needToDropLF. + * + * @return the needToDropLF + */ + public boolean isNeedToDropLF() { + return needToDropLF; + } + + /** + * Returns the quirks. + * + * @return the quirks + */ + public boolean isQuirks() { + return quirks; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength() + */ + public int getListOfActiveFormattingElementsLength() { + return listOfActiveFormattingElements.length; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength() + */ + public int getStackLength() { + return stack.length; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength() + */ + public int getTemplateModeStackLength() { + return templateModeStack.length; + } + + @SuppressWarnings("unused") private void destructor() { + for (int i = 0; i < stack.length; i++) { + stack[i].release(); + } + for (int i = 0; i < listOfActiveFormattingElements.length; i++) { + if (listOfActiveFormattingElements[i] != null) { + listOfActiveFormattingElements[i].release(); + } + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java new file mode 100644 index 000000000..37cdb75d3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import org.xml.sax.Locator; + +public class TaintableLocatorImpl extends LocatorImpl { + + private boolean tainted; + + public TaintableLocatorImpl(Locator locator) { + super(locator); + this.tainted = false; + } + + public void markTainted() { + tainted = true; + } + + public boolean isTainted() { + return tainted; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java new file mode 100644 index 000000000..125ef3266 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -0,0 +1,7080 @@ +/* + * Copyright (c) 2005-2007 Henri Sivonen + * Copyright (c) 2007-2015 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla + * Foundation, and Opera Software ASA. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * The comments following this one that use the same comment syntax as this + * comment are quotes from the WHATWG HTML 5 spec as of 2 June 2007 + * amended as of June 18 2008 and May 31 2010. + * That document came with this statement: + * "© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and + * Opera Software ASA. You are granted a license to use, reproduce and + * create derivative works of this document." + */ + +package nu.validator.htmlparser.impl; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.CharacterName; +import nu.validator.htmlparser.annotation.Const; +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.common.EncodingDeclarationHandler; +import nu.validator.htmlparser.common.Interner; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +/** + * An implementation of + * https://html.spec.whatwg.org/multipage/syntax.html#tokenization + * + * This class implements the <code>Locator</code> interface. This is not an + * incidental implementation detail: Users of this class are encouraged to make + * use of the <code>Locator</code> nature. + * + * By default, the tokenizer may report data that XML 1.0 bans. The tokenizer + * can be configured to treat these conditions as fatal or to coerce the infoset + * to something that XML 1.0 allows. + * + * @version $Id$ + * @author hsivonen + */ +public class Tokenizer implements Locator { + + private static final int DATA_AND_RCDATA_MASK = ~1; + + public static final int DATA = 0; + + public static final int RCDATA = 1; + + public static final int SCRIPT_DATA = 2; + + public static final int RAWTEXT = 3; + + public static final int SCRIPT_DATA_ESCAPED = 4; + + public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5; + + public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 6; + + public static final int ATTRIBUTE_VALUE_UNQUOTED = 7; + + public static final int PLAINTEXT = 8; + + public static final int TAG_OPEN = 9; + + public static final int CLOSE_TAG_OPEN = 10; + + public static final int TAG_NAME = 11; + + public static final int BEFORE_ATTRIBUTE_NAME = 12; + + public static final int ATTRIBUTE_NAME = 13; + + public static final int AFTER_ATTRIBUTE_NAME = 14; + + public static final int BEFORE_ATTRIBUTE_VALUE = 15; + + public static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 16; + + public static final int BOGUS_COMMENT = 17; + + public static final int MARKUP_DECLARATION_OPEN = 18; + + public static final int DOCTYPE = 19; + + public static final int BEFORE_DOCTYPE_NAME = 20; + + public static final int DOCTYPE_NAME = 21; + + public static final int AFTER_DOCTYPE_NAME = 22; + + public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23; + + public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24; + + public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25; + + public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26; + + public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27; + + public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28; + + public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29; + + public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30; + + public static final int BOGUS_DOCTYPE = 31; + + public static final int COMMENT_START = 32; + + public static final int COMMENT_START_DASH = 33; + + public static final int COMMENT = 34; + + public static final int COMMENT_END_DASH = 35; + + public static final int COMMENT_END = 36; + + public static final int COMMENT_END_BANG = 37; + + public static final int NON_DATA_END_TAG_NAME = 38; + + public static final int MARKUP_DECLARATION_HYPHEN = 39; + + public static final int MARKUP_DECLARATION_OCTYPE = 40; + + public static final int DOCTYPE_UBLIC = 41; + + public static final int DOCTYPE_YSTEM = 42; + + public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD = 43; + + public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44; + + public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD = 45; + + public static final int CONSUME_CHARACTER_REFERENCE = 46; + + public static final int CONSUME_NCR = 47; + + public static final int CHARACTER_REFERENCE_TAIL = 48; + + public static final int HEX_NCR_LOOP = 49; + + public static final int DECIMAL_NRC_LOOP = 50; + + public static final int HANDLE_NCR_VALUE = 51; + + public static final int HANDLE_NCR_VALUE_RECONSUME = 52; + + public static final int CHARACTER_REFERENCE_HILO_LOOKUP = 53; + + public static final int SELF_CLOSING_START_TAG = 54; + + public static final int CDATA_START = 55; + + public static final int CDATA_SECTION = 56; + + public static final int CDATA_RSQB = 57; + + public static final int CDATA_RSQB_RSQB = 58; + + public static final int SCRIPT_DATA_LESS_THAN_SIGN = 59; + + public static final int SCRIPT_DATA_ESCAPE_START = 60; + + public static final int SCRIPT_DATA_ESCAPE_START_DASH = 61; + + public static final int SCRIPT_DATA_ESCAPED_DASH = 62; + + public static final int SCRIPT_DATA_ESCAPED_DASH_DASH = 63; + + public static final int BOGUS_COMMENT_HYPHEN = 64; + + public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN = 65; + + public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START = 67; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED = 68; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71; + + public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END = 72; + + public static final int PROCESSING_INSTRUCTION = 73; + + public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74; + + /** + * Magic value for UTF-16 operations. + */ + private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10)); + + /** + * UTF-16 code unit array containing less than and greater than for emitting + * those characters on certain parse errors. + */ + private static final @NoLength char[] LT_GT = { '<', '>' }; + + /** + * UTF-16 code unit array containing less than and solidus for emitting + * those characters on certain parse errors. + */ + private static final @NoLength char[] LT_SOLIDUS = { '<', '/' }; + + /** + * UTF-16 code unit array containing ]] for emitting those characters on + * state transitions. + */ + private static final @NoLength char[] RSQB_RSQB = { ']', ']' }; + + /** + * Array version of U+FFFD. + */ + private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; + + // [NOCPP[ + + /** + * Array version of space. + */ + private static final @NoLength char[] SPACE = { ' ' }; + + // ]NOCPP] + + /** + * Array version of line feed. + */ + private static final @NoLength char[] LF = { '\n' }; + + /** + * "CDATA[" as <code>char[]</code> + */ + private static final @NoLength char[] CDATA_LSQB = { 'C', 'D', 'A', 'T', + 'A', '[' }; + + /** + * "octype" as <code>char[]</code> + */ + private static final @NoLength char[] OCTYPE = { 'o', 'c', 't', 'y', 'p', + 'e' }; + + /** + * "ublic" as <code>char[]</code> + */ + private static final @NoLength char[] UBLIC = { 'u', 'b', 'l', 'i', 'c' }; + + /** + * "ystem" as <code>char[]</code> + */ + private static final @NoLength char[] YSTEM = { 'y', 's', 't', 'e', 'm' }; + + private static final char[] TITLE_ARR = { 't', 'i', 't', 'l', 'e' }; + + private static final char[] SCRIPT_ARR = { 's', 'c', 'r', 'i', 'p', 't' }; + + private static final char[] STYLE_ARR = { 's', 't', 'y', 'l', 'e' }; + + private static final char[] PLAINTEXT_ARR = { 'p', 'l', 'a', 'i', 'n', 't', + 'e', 'x', 't' }; + + private static final char[] XMP_ARR = { 'x', 'm', 'p' }; + + private static final char[] TEXTAREA_ARR = { 't', 'e', 'x', 't', 'a', 'r', + 'e', 'a' }; + + private static final char[] IFRAME_ARR = { 'i', 'f', 'r', 'a', 'm', 'e' }; + + private static final char[] NOEMBED_ARR = { 'n', 'o', 'e', 'm', 'b', 'e', + 'd' }; + + private static final char[] NOSCRIPT_ARR = { 'n', 'o', 's', 'c', 'r', 'i', + 'p', 't' }; + + private static final char[] NOFRAMES_ARR = { 'n', 'o', 'f', 'r', 'a', 'm', + 'e', 's' }; + + /** + * The token handler. + */ + protected final TokenHandler tokenHandler; + + protected EncodingDeclarationHandler encodingDeclarationHandler; + + // [NOCPP[ + + /** + * The error handler. + */ + protected ErrorHandler errorHandler; + + // ]NOCPP] + + /** + * Whether the previous char read was CR. + */ + protected boolean lastCR; + + protected int stateSave; + + private int returnStateSave; + + protected int index; + + private boolean forceQuirks; + + private char additional; + + private int entCol; + + private int firstCharKey; + + private int lo; + + private int hi; + + private int candidate; + + private int charRefBufMark; + + protected int value; + + private boolean seenDigits; + + protected int cstart; + + /** + * The SAX public id for the resource being tokenized. (Only passed to back + * as part of locator data.) + */ + private String publicId; + + /** + * The SAX system id for the resource being tokenized. (Only passed to back + * as part of locator data.) + */ + private String systemId; + + /** + * Buffer for bufferable things other than those that fit the description + * of <code>charRefBuf</code>. + */ + private @Auto char[] strBuf; + + /** + * Number of significant <code>char</code>s in <code>strBuf</code>. + */ + private int strBufLen; + + /** + * Buffer for characters that might form a character reference but may + * end up not forming one. + */ + private final @Auto char[] charRefBuf; + + /** + * Number of significant <code>char</code>s in <code>charRefBuf</code>. + */ + private int charRefBufLen; + + /** + * Buffer for expanding NCRs falling into the Basic Multilingual Plane. + */ + private final @Auto char[] bmpChar; + + /** + * Buffer for expanding astral NCRs. + */ + private final @Auto char[] astralChar; + + /** + * The element whose end tag closes the current CDATA or RCDATA element. + */ + protected ElementName endTagExpectation = null; + + private char[] endTagExpectationAsArray; // not @Auto! + + /** + * <code>true</code> if tokenizing an end tag + */ + protected boolean endTag; + + /** + * The current tag token name. + */ + private ElementName tagName = null; + + /** + * The current attribute name. + */ + protected AttributeName attributeName = null; + + // [NOCPP[ + + /** + * Whether comment tokens are emitted. + */ + private boolean wantsComments = false; + + /** + * <code>true</code> when HTML4-specific additional errors are requested. + */ + protected boolean html4; + + /** + * Whether the stream is past the first 1024 bytes. + */ + private boolean metaBoundaryPassed; + + // ]NOCPP] + + /** + * The name of the current doctype token. + */ + private @Local String doctypeName; + + /** + * The public id of the current doctype token. + */ + private String publicIdentifier; + + /** + * The system id of the current doctype token. + */ + private String systemIdentifier; + + /** + * The attribute holder. + */ + private HtmlAttributes attributes; + + // [NOCPP[ + + /** + * The policy for vertical tab and form feed. + */ + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.ALTER_INFOSET; + + /** + * The policy for comments. + */ + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.ALTER_INFOSET; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.ALTER_INFOSET; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET; + + private boolean html4ModeCompatibleWithXhtml1Schemata; + + private int mappingLangToXmlLang; + + // ]NOCPP] + + private final boolean newAttributesEachTime; + + private boolean shouldSuspend; + + protected boolean confident; + + private int line; + + /* + * The line number of the current attribute. First set to the line of the + * attribute name and if there is a value, set to the line the value + * started on. + */ + // CPPONLY: private int attributeLine; + + private Interner interner; + + // CPPONLY: private boolean viewingXmlSource; + + // [NOCPP[ + + protected LocatorImpl ampersandLocation; + + public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime) { + this.tokenHandler = tokenHandler; + this.encodingDeclarationHandler = null; + this.newAttributesEachTime = newAttributesEachTime; + // ∳ is the longest valid char ref and + // the semicolon never gets appended to the buffer. + this.charRefBuf = new char[32]; + this.bmpChar = new char[1]; + this.astralChar = new char[2]; + this.tagName = null; + this.attributeName = null; + this.doctypeName = null; + this.publicIdentifier = null; + this.systemIdentifier = null; + this.attributes = null; + } + + // ]NOCPP] + + /** + * The constructor. + * + * @param tokenHandler + * the handler for receiving tokens + */ + public Tokenizer(TokenHandler tokenHandler + // CPPONLY: , boolean viewingXmlSource + ) { + this.tokenHandler = tokenHandler; + this.encodingDeclarationHandler = null; + // [NOCPP[ + this.newAttributesEachTime = false; + // ]NOCPP] + // ∳ is the longest valid char ref and + // the semicolon never gets appended to the buffer. + this.charRefBuf = new char[32]; + this.bmpChar = new char[1]; + this.astralChar = new char[2]; + this.tagName = null; + this.attributeName = null; + this.doctypeName = null; + this.publicIdentifier = null; + this.systemIdentifier = null; + // [NOCPP[ + this.attributes = null; + // ]NOCPP] + // CPPONLY: this.attributes = tokenHandler.HasBuilder() ? new HtmlAttributes(mappingLangToXmlLang) : null; + // CPPONLY: this.newAttributesEachTime = !tokenHandler.HasBuilder(); + // CPPONLY: this.viewingXmlSource = viewingXmlSource; + } + + public void setInterner(Interner interner) { + this.interner = interner; + } + + public void initLocation(String newPublicId, String newSystemId) { + this.systemId = newSystemId; + this.publicId = newPublicId; + + } + + // CPPONLY: boolean isViewingXmlSource() { + // CPPONLY: return viewingXmlSource; + // CPPONLY: } + + // [NOCPP[ + + /** + * Returns the mappingLangToXmlLang. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang == AttributeName.HTML_LANG; + } + + /** + * Sets the mappingLangToXmlLang. + * + * @param mappingLangToXmlLang + * the mappingLangToXmlLang to set + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang ? AttributeName.HTML_LANG + : AttributeName.HTML; + } + + /** + * Sets the error handler. + * + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler eh) { + this.errorHandler = eh; + } + + public ErrorHandler getErrorHandler() { + return this.errorHandler; + } + + /** + * Sets the commentPolicy. + * + * @param commentPolicy + * the commentPolicy to set + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + } + + /** + * Sets the contentNonXmlCharPolicy. + * + * @param contentNonXmlCharPolicy + * the contentNonXmlCharPolicy to set + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + if (contentNonXmlCharPolicy != XmlViolationPolicy.ALLOW) { + throw new IllegalArgumentException( + "Must use ErrorReportingTokenizer to set contentNonXmlCharPolicy to non-ALLOW."); + } + } + + /** + * Sets the contentSpacePolicy. + * + * @param contentSpacePolicy + * the contentSpacePolicy to set + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + } + + /** + * Sets the xmlnsPolicy. + * + * @param xmlnsPolicy + * the xmlnsPolicy to set + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + } + + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + } + + /** + * Sets the html4ModeCompatibleWithXhtml1Schemata. + * + * @param html4ModeCompatibleWithXhtml1Schemata + * the html4ModeCompatibleWithXhtml1Schemata to set + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + } + + // ]NOCPP] + + // For the token handler to call + /** + * Sets the tokenizer state and the associated element name. This should + * only ever used to put the tokenizer into one of the states that have + * a special end tag expectation. + * + * @param specialTokenizerState + * the tokenizer state to set + */ + public void setState(int specialTokenizerState) { + this.stateSave = specialTokenizerState; + this.endTagExpectation = null; + this.endTagExpectationAsArray = null; + } + + // [NOCPP[ + + /** + * Sets the tokenizer state and the associated element name. This should + * only ever used to put the tokenizer into one of the states that have + * a special end tag expectation. For use from the tokenizer test harness. + * + * @param specialTokenizerState + * the tokenizer state to set + * @param endTagExpectation + * the expected end tag for transitioning back to normal + */ + public void setStateAndEndTagExpectation(int specialTokenizerState, + @Local String endTagExpectation) { + this.stateSave = specialTokenizerState; + if (specialTokenizerState == Tokenizer.DATA) { + return; + } + @Auto char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation); + this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0, + asArray.length, interner); + endTagExpectationToArray(); + } + + // ]NOCPP] + + /** + * Sets the tokenizer state and the associated element name. This should + * only ever used to put the tokenizer into one of the states that have + * a special end tag expectation. + * + * @param specialTokenizerState + * the tokenizer state to set + * @param endTagExpectation + * the expected end tag for transitioning back to normal + */ + public void setStateAndEndTagExpectation(int specialTokenizerState, + ElementName endTagExpectation) { + this.stateSave = specialTokenizerState; + this.endTagExpectation = endTagExpectation; + endTagExpectationToArray(); + } + + private void endTagExpectationToArray() { + switch (endTagExpectation.getGroup()) { + case TreeBuilder.TITLE: + endTagExpectationAsArray = TITLE_ARR; + return; + case TreeBuilder.SCRIPT: + endTagExpectationAsArray = SCRIPT_ARR; + return; + case TreeBuilder.STYLE: + endTagExpectationAsArray = STYLE_ARR; + return; + case TreeBuilder.PLAINTEXT: + endTagExpectationAsArray = PLAINTEXT_ARR; + return; + case TreeBuilder.XMP: + endTagExpectationAsArray = XMP_ARR; + return; + case TreeBuilder.TEXTAREA: + endTagExpectationAsArray = TEXTAREA_ARR; + return; + case TreeBuilder.IFRAME: + endTagExpectationAsArray = IFRAME_ARR; + return; + case TreeBuilder.NOEMBED: + endTagExpectationAsArray = NOEMBED_ARR; + return; + case TreeBuilder.NOSCRIPT: + endTagExpectationAsArray = NOSCRIPT_ARR; + return; + case TreeBuilder.NOFRAMES: + endTagExpectationAsArray = NOFRAMES_ARR; + return; + default: + assert false: "Bad end tag expectation."; + return; + } + } + + /** + * For C++ use only. + */ + public void setLineNumber(int line) { + // CPPONLY: this.attributeLine = line; // XXX is this needed? + this.line = line; + } + + // start Locator impl + + /** + * @see org.xml.sax.Locator#getLineNumber() + */ + @Inline public int getLineNumber() { + return line; + } + + // [NOCPP[ + + /** + * @see org.xml.sax.Locator#getColumnNumber() + */ + @Inline public int getColumnNumber() { + return -1; + } + + /** + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + return publicId; + } + + /** + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + return systemId; + } + + // end Locator impl + + // end public API + + public void notifyAboutMetaBoundary() { + metaBoundaryPassed = true; + } + + void turnOnAdditionalHtml4Errors() { + html4 = true; + } + + // ]NOCPP] + + HtmlAttributes emptyAttributes() { + // [NOCPP[ + if (newAttributesEachTime) { + return new HtmlAttributes(mappingLangToXmlLang); + } else { + // ]NOCPP] + return HtmlAttributes.EMPTY_ATTRIBUTES; + // [NOCPP[ + } + // ]NOCPP] + } + + @Inline private void appendCharRefBuf(char c) { + // CPPONLY: assert charRefBufLen < charRefBuf.length: + // CPPONLY: "RELEASE: Attempted to overrun charRefBuf!"; + charRefBuf[charRefBufLen++] = c; + } + + private void emitOrAppendCharRefBuf(int returnState) throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendCharRefBufToStrBuf(); + } else { + if (charRefBufLen > 0) { + tokenHandler.characters(charRefBuf, 0, charRefBufLen); + charRefBufLen = 0; + } + } + } + + @Inline private void clearStrBufAfterUse() { + strBufLen = 0; + } + + @Inline private void clearStrBufBeforeUse() { + assert strBufLen == 0: "strBufLen not reset after previous use!"; + strBufLen = 0; // no-op in the absence of bugs + } + + @Inline private void clearStrBufAfterOneHyphen() { + assert strBufLen == 1: "strBufLen length not one!"; + assert strBuf[0] == '-': "strBuf does not start with a hyphen!"; + strBufLen = 0; + } + + /** + * Appends to the buffer. + * + * @param c + * the UTF-16 code unit to append + */ + @Inline private void appendStrBuf(char c) { + // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient."; + // CPPONLY: if (strBufLen == strBuf.length) { + // CPPONLY: if (!EnsureBufferSpace(1)) { + // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; + // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not + // CPPONLY: } + strBuf[strBufLen++] = c; + } + + /** + * The buffer as a String. Currently only used for error reporting. + * + * <p> + * C++ memory note: The return value must be released. + * + * @return the buffer as a string + */ + protected String strBufToString() { + String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen + // CPPONLY: , tokenHandler + ); + clearStrBufAfterUse(); + return str; + } + + /** + * Returns the buffer as a local name. The return value is released in + * emitDoctypeToken(). + * + * @return the buffer as local name + */ + private void strBufToDoctypeName() { + doctypeName = Portability.newLocalNameFromBuffer(strBuf, 0, strBufLen, + interner); + clearStrBufAfterUse(); + } + + /** + * Emits the buffer as character tokens. + * + * @throws SAXException + * if the token handler threw + */ + private void emitStrBuf() throws SAXException { + if (strBufLen > 0) { + tokenHandler.characters(strBuf, 0, strBufLen); + clearStrBufAfterUse(); + } + } + + @Inline private void appendSecondHyphenToBogusComment() throws SAXException { + // [NOCPP[ + switch (commentPolicy) { + case ALTER_INFOSET: + appendStrBuf(' '); + // FALLTHROUGH + case ALLOW: + warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + // ]NOCPP] + appendStrBuf('-'); + // [NOCPP[ + break; + case FATAL: + fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + break; + } + // ]NOCPP] + } + + // [NOCPP[ + private void maybeAppendSpaceToBogusComment() throws SAXException { + switch (commentPolicy) { + case ALTER_INFOSET: + appendStrBuf(' '); + // FALLTHROUGH + case ALLOW: + warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); + break; + case FATAL: + fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment."); + break; + } + } + + // ]NOCPP] + + @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c) + throws SAXException { + errConsecutiveHyphens(); + // [NOCPP[ + switch (commentPolicy) { + case ALTER_INFOSET: + strBufLen--; + // WARNING!!! This expands the worst case of the buffer length + // given the length of input! + appendStrBuf(' '); + appendStrBuf('-'); + // FALLTHROUGH + case ALLOW: + warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + // ]NOCPP] + appendStrBuf(c); + // [NOCPP[ + break; + case FATAL: + fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment."); + break; + } + // ]NOCPP] + } + + private void appendStrBuf(@NoLength char[] buffer, int offset, int length) { + int newLen = strBufLen + length; + // CPPONLY: assert newLen <= strBuf.length: "Previous buffer length insufficient."; + // CPPONLY: if (strBuf.length < newLen) { + // CPPONLY: if (!EnsureBufferSpace(length)) { + // CPPONLY: assert false: "RELEASE: Unable to recover from buffer reallocation failure"; + // CPPONLY: } // TODO: Add telemetry when outer if fires but inner does not + // CPPONLY: } + System.arraycopy(buffer, offset, strBuf, strBufLen, length); + strBufLen = newLen; + } + + /** + * Append the contents of the char reference buffer to the main one. + */ + @Inline private void appendCharRefBufToStrBuf() { + appendStrBuf(charRefBuf, 0, charRefBufLen); + charRefBufLen = 0; + } + + /** + * Emits the current comment token. + * + * @param pos + * TODO + * + * @throws SAXException + */ + private void emitComment(int provisionalHyphens, int pos) + throws SAXException { + // [NOCPP[ + if (wantsComments) { + // ]NOCPP] + tokenHandler.comment(strBuf, 0, strBufLen + - provisionalHyphens); + // [NOCPP[ + } + // ]NOCPP] + clearStrBufAfterUse(); + cstart = pos + 1; + } + + /** + * Flushes coalesced character tokens. + * + * @param buf + * TODO + * @param pos + * TODO + * + * @throws SAXException + */ + protected void flushChars(@NoLength char[] buf, int pos) + throws SAXException { + if (pos > cstart) { + tokenHandler.characters(buf, cstart, pos - cstart); + } + cstart = Integer.MAX_VALUE; + } + + /** + * Reports an condition that would make the infoset incompatible with XML + * 1.0 as fatal. + * + * @param message + * the message + * @throws SAXException + * @throws SAXParseException + */ + public void fatal(String message) throws SAXException { + SAXParseException spe = new SAXParseException(message, this); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + /** + * Reports a Parse Error. + * + * @param message + * the message + * @throws SAXException + */ + public void err(String message) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, this); + errorHandler.error(spe); + } + + public void errTreeBuilder(String message) throws SAXException { + ErrorHandler eh = null; + if (tokenHandler instanceof TreeBuilder<?>) { + TreeBuilder<?> treeBuilder = (TreeBuilder<?>) tokenHandler; + eh = treeBuilder.getErrorHandler(); + } + if (eh == null) { + eh = errorHandler; + } + if (eh == null) { + return; + } + SAXParseException spe = new SAXParseException(message, this); + eh.error(spe); + } + + /** + * Reports a warning + * + * @param message + * the message + * @throws SAXException + */ + public void warn(String message) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, this); + errorHandler.warning(spe); + } + + private void strBufToElementNameString() { + tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen, + interner); + clearStrBufAfterUse(); + } + + private int emitCurrentTagToken(boolean selfClosing, int pos) + throws SAXException { + cstart = pos + 1; + maybeErrSlashInEndTag(selfClosing); + stateSave = Tokenizer.DATA; + HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES + : attributes); + if (endTag) { + /* + * When an end tag token is emitted, the content model flag must be + * switched to the PCDATA state. + */ + maybeErrAttributesOnEndTag(attrs); + // CPPONLY: if (!viewingXmlSource) { + tokenHandler.endTag(tagName); + // CPPONLY: } + // CPPONLY: if (newAttributesEachTime) { + // CPPONLY: Portability.delete(attributes); + // CPPONLY: attributes = null; + // CPPONLY: } + } else { + // CPPONLY: if (viewingXmlSource) { + // CPPONLY: assert newAttributesEachTime; + // CPPONLY: Portability.delete(attributes); + // CPPONLY: attributes = null; + // CPPONLY: } else { + tokenHandler.startTag(tagName, attrs, selfClosing); + // CPPONLY: } + } + tagName.release(); + tagName = null; + if (newAttributesEachTime) { + attributes = null; + } else { + attributes.clear(mappingLangToXmlLang); + } + /* + * The token handler may have called setStateAndEndTagExpectation + * and changed stateSave since the start of this method. + */ + return stateSave; + } + + private void attributeNameComplete() throws SAXException { + attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen + // [NOCPP[ + , namePolicy != XmlViolationPolicy.ALLOW + // ]NOCPP] + , interner); + clearStrBufAfterUse(); + + if (attributes == null) { + attributes = new HtmlAttributes(mappingLangToXmlLang); + } + + /* + * When the user agent leaves the attribute name state (and before + * emitting the tag token, if appropriate), the complete attribute's + * name must be compared to the other attributes on the same token; if + * there is already an attribute on the token with the exact same name, + * then this is a parse error and the new attribute must be dropped, + * along with the value that gets associated with it (if any). + */ + if (attributes.contains(attributeName)) { + errDuplicateAttribute(); + attributeName.release(); + attributeName = null; + } + } + + private void addAttributeWithoutValue() throws SAXException { + noteAttributeWithoutValue(); + + // [NOCPP[ + if (metaBoundaryPassed && AttributeName.CHARSET == attributeName + && ElementName.META == tagName) { + err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes."); + } + // ]NOCPP] + if (attributeName != null) { + // [NOCPP[ + if (html4) { + if (attributeName.isBoolean()) { + if (html4ModeCompatibleWithXhtml1Schemata) { + attributes.addAttribute(attributeName, + attributeName.getLocal(AttributeName.HTML), + xmlnsPolicy); + } else { + attributes.addAttribute(attributeName, "", xmlnsPolicy); + } + } else { + if (AttributeName.BORDER != attributeName) { + err("Attribute value omitted for a non-boolean attribute. (HTML4-only error.)"); + attributes.addAttribute(attributeName, "", xmlnsPolicy); + } + } + } else { + if (AttributeName.SRC == attributeName + || AttributeName.HREF == attributeName) { + warn("Attribute \u201C" + + attributeName.getLocal(AttributeName.HTML) + + "\u201D without an explicit value seen. The attribute may be dropped by IE7."); + } + // ]NOCPP] + attributes.addAttribute(attributeName, + Portability.newEmptyString() + // [NOCPP[ + , xmlnsPolicy + // ]NOCPP] + // CPPONLY: , attributeLine + ); + // [NOCPP[ + } + // ]NOCPP] + attributeName = null; // attributeName has been adopted by the + // |attributes| object + } else { + clearStrBufAfterUse(); + } + } + + private void addAttributeWithValue() throws SAXException { + // [NOCPP[ + if (metaBoundaryPassed && ElementName.META == tagName + && AttributeName.CHARSET == attributeName) { + err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes."); + } + // ]NOCPP] + if (attributeName != null) { + String val = strBufToString(); // Ownership transferred to + // HtmlAttributes + // CPPONLY: if (mViewSource) { + // CPPONLY: mViewSource.MaybeLinkifyAttributeValue(attributeName, val); + // CPPONLY: } + // [NOCPP[ + if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata + && attributeName.isCaseFolded()) { + val = newAsciiLowerCaseStringFromString(val); + } + // ]NOCPP] + attributes.addAttribute(attributeName, val + // [NOCPP[ + , xmlnsPolicy + // ]NOCPP] + // CPPONLY: , attributeLine + ); + attributeName = null; // attributeName has been adopted by the + // |attributes| object + } else { + // We have a duplicate attribute. Explicitly discard its value. + clearStrBufAfterUse(); + } + } + + // [NOCPP[ + + private static String newAsciiLowerCaseStringFromString(String str) { + if (str == null) { + return null; + } + char[] buf = new char[str.length()]; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + buf[i] = c; + } + return new String(buf); + } + + protected void startErrorReporting() throws SAXException { + + } + + // ]NOCPP] + + public void start() throws SAXException { + initializeWithoutStarting(); + tokenHandler.startTokenization(this); + // [NOCPP[ + startErrorReporting(); + // ]NOCPP] + } + + public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException { + int state = stateSave; + int returnState = returnStateSave; + char c = '\u0000'; + shouldSuspend = false; + lastCR = false; + + int start = buffer.getStart(); + int end = buffer.getEnd(); + + // In C++, the caller of tokenizeBuffer needs to do this explicitly. + // [NOCPP[ + ensureBufferSpace(end - start); + // ]NOCPP] + + /** + * The index of the last <code>char</code> read from <code>buf</code>. + */ + int pos = start - 1; + + /** + * The index of the first <code>char</code> in <code>buf</code> that is + * part of a coalesced run of character tokens or + * <code>Integer.MAX_VALUE</code> if there is not a current run being + * coalesced. + */ + switch (state) { + case DATA: + case RCDATA: + case SCRIPT_DATA: + case PLAINTEXT: + case RAWTEXT: + case CDATA_SECTION: + case SCRIPT_DATA_ESCAPED: + case SCRIPT_DATA_ESCAPE_START: + case SCRIPT_DATA_ESCAPE_START_DASH: + case SCRIPT_DATA_ESCAPED_DASH: + case SCRIPT_DATA_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_START: + case SCRIPT_DATA_DOUBLE_ESCAPED: + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_END: + cstart = start; + break; + default: + cstart = Integer.MAX_VALUE; + break; + } + + /** + * The number of <code>char</code>s in <code>buf</code> that have + * meaning. (The rest of the array is garbage and should not be + * examined.) + */ + // CPPONLY: if (mViewSource) { + // CPPONLY: mViewSource.SetBuffer(buffer); + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1); + // CPPONLY: } else { + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: } + // [NOCPP[ + pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, + end); + // ]NOCPP] + if (pos == end) { + // exiting due to end of buffer + buffer.setStart(pos); + } else { + buffer.setStart(pos + 1); + } + return lastCR; + } + + // [NOCPP[ + private void ensureBufferSpace(int inputLength) throws SAXException { + // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. + // Adding to the general worst case instead of only the + // TreeBuilder-exposed worst case to avoid re-introducing a bug when + // unifying the tokenizer and tree builder buffers in the future. + int worstCase = strBufLen + inputLength + charRefBufLen + 2; + tokenHandler.ensureBufferSpace(worstCase); + if (commentPolicy == XmlViolationPolicy.ALTER_INFOSET) { + // When altering infoset, if the comment contents are consecutive + // hyphens, each hyphen generates a space, too. These buffer + // contents never get emitted as characters() to the tokenHandler, + // which is why this calculation happens after the call to + // ensureBufferSpace on tokenHandler. + worstCase *= 2; + } + if (strBuf == null) { + // Add an arbitrary small value to avoid immediate reallocation + // once there are a few characters in the buffer. + strBuf = new char[worstCase + 128]; + } else if (worstCase > strBuf.length) { + // HotSpot reportedly allocates memory with 8-byte accuracy, so + // there's no point in trying to do math here to avoid slop. + // Maybe we should add some small constant to worstCase here + // but not doing that without profiling. In C++ with jemalloc, + // the corresponding method should do math to round up here + // to avoid slop. + char[] newBuf = new char[worstCase]; + System.arraycopy(strBuf, 0, newBuf, 0, strBufLen); + strBuf = newBuf; + } + } + // ]NOCPP] + + @SuppressWarnings("unused") private int stateLoop(int state, char c, + int pos, @NoLength char[] buf, boolean reconsume, int returnState, + int endPos) throws SAXException { + /* + * Idioms used in this code: + * + * + * Consuming the next input character + * + * To consume the next input character, the code does this: if (++pos == + * endPos) { break stateloop; } c = checkChar(buf, pos); + * + * + * Staying in a state + * + * When there's a state that the tokenizer may stay in over multiple + * input characters, the state has a wrapper |for(;;)| loop and staying + * in the state continues the loop. + * + * + * Switching to another state + * + * To switch to another state, the code sets the state variable to the + * magic number of the new state. Then it either continues stateloop or + * breaks out of the state's own wrapper loop if the target state is + * right after the current state in source order. (This is a partial + * workaround for Java's lack of goto.) + * + * + * Reconsume support + * + * The spec sometimes says that an input character is reconsumed in + * another state. If a state can ever be entered so that an input + * character can be reconsumed in it, the state's code starts with an + * |if (reconsume)| that sets reconsume to false and skips over the + * normal code for consuming a new character. + * + * To reconsume the current character in another state, the code sets + * |reconsume| to true and then switches to the other state. + * + * + * Emitting character tokens + * + * This method emits character tokens lazily. Whenever a new range of + * character tokens starts, the field cstart must be set to the start + * index of the range. The flushChars() method must be called at the end + * of a range to flush it. + * + * + * U+0000 handling + * + * The various states have to handle the replacement of U+0000 with + * U+FFFD. However, if U+0000 would be reconsumed in another state, the + * replacement doesn't need to happen, because it's handled by the + * reconsuming state. + * + * + * LF handling + * + * Every state needs to increment the line number upon LF unless the LF + * gets reconsumed by another state which increments the line number. + * + * + * CR handling + * + * Every state needs to handle CR unless the CR gets reconsumed and is + * handled by the reconsuming state. The CR needs to be handled as if it + * were and LF, the lastCR field must be set to true and then this + * method must return. The IO driver will then swallow the next + * character if it is an LF to coalesce CRLF. + */ + stateloop: for (;;) { + switch (state) { + case DATA: + dataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in data state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the tag + * open state. + */ + flushChars(buf, pos); + + state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); + break dataloop; // FALL THROUGH continue + // stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the input character as a + * character token. + * + * Stay in the data state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case TAG_OPEN: + tagopenloop: for (;;) { + /* + * The behavior of this state depends on the content + * model flag. + */ + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * If the content model flag is set to the PCDATA state + * Consume the next input character: + */ + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to U+005A + * LATIN CAPITAL LETTER Z Create a new start tag + * token, + */ + endTag = false; + /* + * set its tag name to the lowercase version of the + * input character (add 0x0020 to the character's + * code point), + */ + clearStrBufBeforeUse(); + appendStrBuf((char) (c + 0x20)); + /* then switch to the tag name state. */ + state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); + /* + * (Don't emit the token yet; further details will + * be filled in before it is emitted.) + */ + break tagopenloop; + // continue stateloop; + } else if (c >= 'a' && c <= 'z') { + /* + * U+0061 LATIN SMALL LETTER A through to U+007A + * LATIN SMALL LETTER Z Create a new start tag + * token, + */ + endTag = false; + /* + * set its tag name to the input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* then switch to the tag name state. */ + state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); + /* + * (Don't emit the token yet; further details will + * be filled in before it is emitted.) + */ + break tagopenloop; + // continue stateloop; + } + switch (c) { + case '!': + /* + * U+0021 EXCLAMATION MARK (!) Switch to the + * markup declaration open state. + */ + state = transition(state, Tokenizer.MARKUP_DECLARATION_OPEN, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the close tag + * open state. + */ + state = transition(state, Tokenizer.CLOSE_TAG_OPEN, reconsume, pos); + continue stateloop; + case '?': + // CPPONLY: if (viewingXmlSource) { + // CPPONLY: state = transition(state, + // CPPONLY: Tokenizer.PROCESSING_INSTRUCTION, + // CPPONLY: reconsume, + // CPPONLY: pos); + // CPPONLY: continue stateloop; + // CPPONLY: } + /* + * U+003F QUESTION MARK (?) Parse error. + */ + errProcessingInstruction(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errLtGt(); + /* + * Emit a U+003C LESS-THAN SIGN character token + * and a U+003E GREATER-THAN SIGN character + * token. + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 2); + /* Switch to the data state. */ + cstart = pos + 1; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + /* + * Anything else Parse error. + */ + errBadCharAfterLt(c); + /* + * Emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in + * the data state. + */ + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + } + } + // FALL THROUGH DON'T REORDER + case TAG_NAME: + tagnameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + strBufToElementNameString(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before attribute name state. + */ + strBufToElementNameString(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break tagnameloop; + // continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + strBufToElementNameString(); + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + strBufToElementNameString(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Append the + * lowercase version of the current input + * character (add 0x0020 to the character's + * code point) to the current tag token's + * tag name. + */ + c += 0x20; + } + /* + * Anything else Append the current input + * character to the current tag token's tag + * name. + */ + appendStrBuf(c); + /* + * Stay in the tag name state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_NAME: + beforeattributenameloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before attribute name state. + */ + continue; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '\"': + case '\'': + case '<': + case '=': + /* + * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE + * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS + * SIGN (=) Parse error. + */ + errBadCharBeforeAttributeNameOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + /* + * Anything else Start a new attribute in the + * current tag token. + */ + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Set that + * attribute's name to the lowercase version + * of the current input character (add + * 0x0020 to the character's code point) + */ + c += 0x20; + } + // CPPONLY: attributeLine = line; + /* + * Set that attribute's name to the current + * input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * and its value to the empty string. + */ + // Will do later. + /* + * Switch to the attribute name state. + */ + state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos); + break beforeattributenameloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_NAME: + attributenameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + attributeNameComplete(); + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the after attribute name state. + */ + attributeNameComplete(); + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + attributeNameComplete(); + addAttributeWithoutValue(); + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '=': + /* + * U+003D EQUALS SIGN (=) Switch to the before + * attribute value state. + */ + attributeNameComplete(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos); + break attributenameloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + attributeNameComplete(); + addAttributeWithoutValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '\"': + case '\'': + case '<': + /* + * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE + * (') U+003C LESS-THAN SIGN (<) Parse error. + */ + errQuoteOrLtInAttributeNameOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Append the + * lowercase version of the current input + * character (add 0x0020 to the character's + * code point) to the current attribute's + * name. + */ + c += 0x20; + } + /* + * Anything else Append the current input + * character to the current attribute's name. + */ + appendStrBuf(c); + /* + * Stay in the attribute name state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_ATTRIBUTE_VALUE: + beforeattributevalueloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before attribute value state. + */ + continue; + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the + * attribute value (double-quoted) state. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos); + break beforeattributevalueloop; + // continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the attribute + * value (unquoted) state and reconsume this + * input character. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + reconsume = true; + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); + noteUnquotedAttributeValue(); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the attribute + * value (single-quoted) state. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errAttributeValueMissing(); + /* + * Emit the current tag token. + */ + addAttributeWithoutValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '<': + case '=': + case '`': + /* + * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN + * (=) U+0060 GRAVE ACCENT (`) + */ + errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + // [NOCPP[ + errHtml4NonNameInUnquotedAttribute(c); + // ]NOCPP] + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + // CPPONLY: attributeLine = line; + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * Switch to the attribute value (unquoted) + * state. + */ + + state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); + noteUnquotedAttributeValue(); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: + attributevaluedoublequotedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the after + * attribute value (quoted) state. + */ + addAttributeWithValue(); + + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos); + break attributevaluedoublequotedloop; + // continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in attribute value state, with the + * additional allowed character being U+0022 + * QUOTATION MARK ("). + */ + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\"'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + appendStrBuf(c); + /* + * Stay in the attribute value (double-quoted) + * state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_ATTRIBUTE_VALUE_QUOTED: + afterattributevaluequotedloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before attribute name state. + */ + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + break afterattributevaluequotedloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + default: + /* + * Anything else Parse error. + */ + errNoSpaceBetweenAttributes(); + /* + * Reconsume the character in the before + * attribute name state. + */ + reconsume = true; + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case SELF_CLOSING_START_TAG: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Set the self-closing + * flag of the current tag token. Emit the current + * tag token. + */ + // [NOCPP[ + errHtml4XmlVoidSyntax(); + // ]NOCPP] + state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + default: + /* Anything else Parse error. */ + errSlashNotFollowedByGt(); + /* + * Reconsume the character in the before attribute + * name state. + */ + reconsume = true; + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + } + // XXX reorder point + case ATTRIBUTE_VALUE_UNQUOTED: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + addAttributeWithValue(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before attribute name state. + */ + addAttributeWithValue(); + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in attribute value state, with the + * additional allowed character being U+003E + * GREATER-THAN SIGN (>) + */ + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('>'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + addAttributeWithValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '<': + case '\"': + case '\'': + case '=': + case '`': + /* + * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE + * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS + * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error. + */ + errUnquotedAttributeValOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + // fall through + default: + // [NOCPP] + errHtml4NonNameInUnquotedAttribute(c); + // ]NOCPP] + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + appendStrBuf(c); + /* + * Stay in the attribute value (unquoted) state. + */ + continue; + } + } + // XXX reorder point + case AFTER_ATTRIBUTE_NAME: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the after attribute name state. + */ + continue; + case '/': + /* + * U+002F SOLIDUS (/) Switch to the self-closing + * start tag state. + */ + addAttributeWithoutValue(); + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '=': + /* + * U+003D EQUALS SIGN (=) Switch to the before + * attribute value state. + */ + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_VALUE, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * tag token. + */ + addAttributeWithoutValue(); + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + /* + * Switch to the data state. + */ + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + case '\"': + case '\'': + case '<': + errQuoteOrLtInAttributeNameOrNull(c); + /* + * Treat it as per the "anything else" entry + * below. + */ + default: + addAttributeWithoutValue(); + /* + * Anything else Start a new attribute in the + * current tag token. + */ + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Set that + * attribute's name to the lowercase version + * of the current input character (add + * 0x0020 to the character's code point) + */ + c += 0x20; + } + /* + * Set that attribute's name to the current + * input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * and its value to the empty string. + */ + // Will do later. + /* + * Switch to the attribute name state. + */ + state = transition(state, Tokenizer.ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case MARKUP_DECLARATION_OPEN: + markupdeclarationopenloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * If the next two characters are both U+002D + * HYPHEN-MINUS characters (-), consume those two + * characters, create a comment token whose data is the + * empty string, and switch to the comment start state. + * + * Otherwise, if the next seven characters are an ASCII + * case-insensitive match for the word "DOCTYPE", then + * consume those characters and switch to the DOCTYPE + * state. + * + * Otherwise, if the insertion mode is + * "in foreign content" and the current node is not an + * element in the HTML namespace and the next seven + * characters are an case-sensitive match for the string + * "[CDATA[" (the five uppercase letters "CDATA" with a + * U+005B LEFT SQUARE BRACKET character before and + * after), then consume those characters and switch to + * the CDATA section state. + * + * Otherwise, is is a parse error. Switch to the bogus + * comment state. The next character that is consumed, + * if any, is the first character that will be in the + * comment. + */ + switch (c) { + case '-': + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos); + break markupdeclarationopenloop; + // continue stateloop; + case 'd': + case 'D': + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos); + continue stateloop; + case '[': + if (tokenHandler.cdataSectionAllowed()) { + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = transition(state, Tokenizer.CDATA_START, reconsume, pos); + continue stateloop; + } + // else fall through + default: + errBogusComment(); + clearStrBufBeforeUse(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case MARKUP_DECLARATION_HYPHEN: + markupdeclarationhyphenloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '-': + clearStrBufAfterOneHyphen(); + state = transition(state, Tokenizer.COMMENT_START, reconsume, pos); + break markupdeclarationhyphenloop; + // continue stateloop; + default: + errBogusComment(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_START: + commentstartloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment start state + * + * + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment + * start dash state. + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errPrematureEndOfComment(); + /* Emit the comment token. */ + emitComment(0, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break commentstartloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the input character to + * the comment token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break commentstartloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT: + commentloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment state Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment + * end dash state + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); + break commentloop; + // continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the input character to + * the comment token's data. + */ + appendStrBuf(c); + /* + * Stay in the comment state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END_DASH: + commentenddashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment end dash state Consume the next input + * character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment + * end state + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); + break commentenddashloop; + // continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append a U+002D HYPHEN-MINUS + * (-) character and the input character to the + * comment token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case COMMENT_END: + commentendloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment end dash state Consume the next input + * character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the comment + * token. + */ + emitComment(2, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + /* U+002D HYPHEN-MINUS (-) Parse error. */ + /* + * Append a U+002D HYPHEN-MINUS (-) character to + * the comment token's data. + */ + adjustDoubleHyphenAndAppendToStrBufAndErr(c); + /* + * Stay in the comment end state. + */ + continue; + case '\r': + adjustDoubleHyphenAndAppendToStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + adjustDoubleHyphenAndAppendToStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + case '!': + errHyphenHyphenBang(); + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Append two U+002D HYPHEN-MINUS (-) characters + * and the input character to the comment + * token's data. + */ + adjustDoubleHyphenAndAppendToStrBufAndErr(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case COMMENT_END_BANG: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment end bang state + * + * Consume the next input character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the comment + * token. + */ + emitComment(3, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + /* + * Append two U+002D HYPHEN-MINUS (-) characters + * and a U+0021 EXCLAMATION MARK (!) character + * to the comment token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment end dash state. + */ + state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append two U+002D HYPHEN-MINUS + * (-) characters, a U+0021 EXCLAMATION MARK (!) + * character, and the input character to the + * comment token's data. Switch to the comment + * state. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case COMMENT_START_DASH: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Comment start dash state + * + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Switch to the comment end + * state + */ + appendStrBuf(c); + state = transition(state, Tokenizer.COMMENT_END, reconsume, pos); + continue stateloop; + case '>': + errPrematureEndOfComment(); + /* Emit the comment token. */ + emitComment(1, pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Append a U+002D HYPHEN-MINUS character (-) and + * the current input character to the comment + * token's data. + */ + appendStrBuf(c); + /* + * Switch to the comment state. + */ + state = transition(state, Tokenizer.COMMENT, reconsume, pos); + continue stateloop; + } + // XXX reorder point + case CDATA_START: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (index < 6) { // CDATA_LSQB.length + if (c == Tokenizer.CDATA_LSQB[index]) { + appendStrBuf(c); + } else { + errBogusComment(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + index++; + continue; + } else { + clearStrBufAfterUse(); + cstart = pos; // start coalescing + reconsume = true; + state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); + break; // FALL THROUGH continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CDATA_SECTION: + cdatasectionloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case ']': + flushChars(buf, pos); + state = transition(state, Tokenizer.CDATA_RSQB, reconsume, pos); + break cdatasectionloop; // FALL THROUGH + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + default: + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CDATA_RSQB: + cdatarsqb: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case ']': + state = transition(state, Tokenizer.CDATA_RSQB_RSQB, reconsume, pos); + break cdatarsqb; + default: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, + 1); + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CDATA_RSQB_RSQB: + cdatarsqbrsqb: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case ']': + // Saw a third ]. Emit one ] (logically the + // first one) and stay in this state to + // remember that the last two characters seen + // have been ]]. + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1); + continue; + case '>': + cstart = pos + 1; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2); + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case ATTRIBUTE_VALUE_SINGLE_QUOTED: + attributevaluesinglequotedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the after + * attribute value (quoted) state. + */ + addAttributeWithValue(); + + state = transition(state, Tokenizer.AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos); + continue stateloop; + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in attribute value state, with the + * + additional allowed character being U+0027 + * APOSTROPHE ('). + */ + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\''); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + break attributevaluesinglequotedloop; + // continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current attribute's value. + */ + appendStrBuf(c); + /* + * Stay in the attribute value (double-quoted) + * state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case CONSUME_CHARACTER_REFERENCE: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Unlike the definition is the spec, this state does not + * return a value and never requires the caller to + * backtrack. This state takes care of emitting characters + * or appending to the current attribute value. It also + * takes care of that in the case when consuming the + * character reference fails. + */ + /* + * This section defines how to consume a character + * reference. This definition is used when parsing character + * references in text and in attributes. + * + * The behavior depends on the identity of the next + * character (the one immediately after the U+0026 AMPERSAND + * character): + */ + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': // we'll reconsume! + case '\u000C': + case '<': + case '&': + case '\u0000': + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + case '#': + /* + * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER + * SIGN. + */ + appendCharRefBuf('#'); + state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos); + continue stateloop; + default: + if (c == additional) { + emitOrAppendCharRefBuf(returnState); + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + if (c >= 'a' && c <= 'z') { + firstCharKey = c - 'a' + 26; + } else if (c >= 'A' && c <= 'Z') { + firstCharKey = c - 'A'; + } else { + // No match + /* + * If no match can be made, then this is a parse + * error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + // Didn't fail yet + appendCharRefBuf(c); + state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos); + // FALL THROUGH continue stateloop; + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case CHARACTER_REFERENCE_HILO_LOOKUP: + { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * The data structure is as follows: + * + * HILO_ACCEL is a two-dimensional int array whose major + * index corresponds to the second character of the + * character reference (code point as index) and the + * minor index corresponds to the first character of the + * character reference (packed so that A-Z runs from 0 + * to 25 and a-z runs from 26 to 51). This layout makes + * it easier to use the sparseness of the data structure + * to omit parts of it: The second dimension of the + * table is null when no character reference starts with + * the character corresponding to that row. + * + * The int value HILO_ACCEL (by these indeces) is zero + * if there exists no character reference starting with + * that two-letter prefix. Otherwise, the value is an + * int that packs two shorts so that the higher short is + * the index of the highest character reference name + * with that prefix in NAMES and the lower short + * corresponds to the index of the lowest character + * reference name with that prefix. (It happens that the + * first two character reference names share their + * prefix so the packed int cannot be 0 by packing the + * two shorts.) + * + * NAMES is an array of byte arrays where each byte + * array encodes the name of a character references as + * ASCII. The names omit the first two letters of the + * name. (Since storing the first two letters would be + * redundant with the data contained in HILO_ACCEL.) The + * entries are lexically sorted. + * + * For a given index in NAMES, the same index in VALUES + * contains the corresponding expansion as an array of + * two UTF-16 code units (either the character and + * U+0000 or a suggogate pair). + */ + int hilo = 0; + if (c <= 'z') { + @Const @NoLength int[] row = NamedCharactersAccel.HILO_ACCEL[c]; + if (row != null) { + hilo = row[firstCharKey]; + } + } + if (hilo == 0) { + /* + * If no match can be made, then this is a parse + * error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + // Didn't fail yet + appendCharRefBuf(c); + lo = hilo & 0xFFFF; + hi = hilo >> 16; + entCol = -1; + candidate = -1; + charRefBufMark = 0; + state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos); + // FALL THROUGH continue stateloop; + } + case CHARACTER_REFERENCE_TAIL: + outer: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + entCol++; + /* + * Consume the maximum number of characters possible, + * with the consumed characters matching one of the + * identifiers in the first column of the named + * character references table (in a case-sensitive + * manner). + */ + loloop: for (;;) { + if (hi < lo) { + break outer; + } + if (entCol == NamedCharacters.NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + lo++; + } else if (entCol > NamedCharacters.NAMES[lo].length()) { + break outer; + } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) { + lo++; + } else { + break loloop; + } + } + + hiloop: for (;;) { + if (hi < lo) { + break outer; + } + if (entCol == NamedCharacters.NAMES[hi].length()) { + break hiloop; + } + if (entCol > NamedCharacters.NAMES[hi].length()) { + break outer; + } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) { + hi--; + } else { + break hiloop; + } + } + + if (c == ';') { + // If we see a semicolon, there cannot be a + // longer match. Break the loop. However, before + // breaking, take the longest match so far as the + // candidate, if we are just about to complete a + // match. + if (entCol + 1 == NamedCharacters.NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + } + break outer; + } + + if (hi < lo) { + break outer; + } + appendCharRefBuf(c); + continue; + } + + if (candidate == -1) { + // reconsume deals with CR, LF or nul + /* + * If no match can be made, then this is a parse error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else { + // c can't be CR, LF or nul if we got here + @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate]; + if (candidateName.length() == 0 + || candidateName.charAt(candidateName.length() - 1) != ';') { + /* + * If the last character matched is not a U+003B + * SEMICOLON (;), there is a parse error. + */ + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + /* + * If the entity is being consumed as part of an + * attribute, and the last character matched is + * not a U+003B SEMICOLON (;), + */ + char ch; + if (charRefBufMark == charRefBufLen) { + ch = c; + } else { + ch = charRefBuf[charRefBufMark]; + } + if (ch == '=' || (ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z')) { + /* + * and the next character is either a U+003D + * EQUALS SIGN character (=) or in the range + * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, + * U+0041 LATIN CAPITAL LETTER A to U+005A + * LATIN CAPITAL LETTER Z, or U+0061 LATIN + * SMALL LETTER A to U+007A LATIN SMALL + * LETTER Z, then, for historical reasons, + * all the characters that were matched + * after the U+0026 AMPERSAND (&) must be + * unconsumed, and nothing is returned. + */ + errNoNamedCharacterMatch(); + appendCharRefBufToStrBuf(); + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + errUnescapedAmpersandInterpretedAsCharacterReference(); + } else { + errNotSemicolonTerminated(); + } + } + + /* + * Otherwise, return a character token for the character + * corresponding to the entity name (as given by the + * second column of the named character references + * table). + */ + // CPPONLY: completedNamedCharacterReference(); + @Const @NoLength char[] val = NamedCharacters.VALUES[candidate]; + if ( + // [NOCPP[ + val.length == 1 + // ]NOCPP] + // CPPONLY: val[1] == 0 + ) { + emitOrAppendOne(val, returnState); + } else { + emitOrAppendTwo(val, returnState); + } + // this is so complicated! + if (charRefBufMark < charRefBufLen) { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } else { + tokenHandler.characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } + } + // charRefBufLen will be zeroed below! + + // Check if we broke out early with c being the last + // character that matched as opposed to being the + // first one that didn't match. In the case of an + // early break, the next run on text should start + // *after* the current character and the current + // character shouldn't be reconsumed. + boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); + charRefBufLen = 0; + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = earlyBreak ? pos + 1 : pos; + } + reconsume = !earlyBreak; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + /* + * If the markup contains I'm ¬it; I tell you, the + * entity is parsed as "not", as in, I'm ¬it; I tell + * you. But if the markup was I'm ∉ I tell you, + * the entity would be parsed as "notin;", resulting in + * I'm ∉ I tell you. + */ + } + // XXX reorder point + case CONSUME_NCR: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + value = 0; + seenDigits = false; + /* + * The behavior further depends on the character after the + * U+0023 NUMBER SIGN: + */ + switch (c) { + case 'x': + case 'X': + + /* + * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL + * LETTER X Consume the X. + * + * Follow the steps below, but using the range of + * characters U+0030 DIGIT ZERO through to U+0039 + * DIGIT NINE, U+0061 LATIN SMALL LETTER A through + * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN + * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL + * LETTER F (in other words, 0-9, A-F, a-f). + * + * When it comes to interpreting the number, + * interpret it as a hexadecimal number. + */ + appendCharRefBuf(c); + state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos); + continue stateloop; + default: + /* + * Anything else Follow the steps below, but using + * the range of characters U+0030 DIGIT ZERO through + * to U+0039 DIGIT NINE (i.e. just 0-9). + * + * When it comes to interpreting the number, + * interpret it as a decimal number. + */ + reconsume = true; + state = transition(state, Tokenizer.DECIMAL_NRC_LOOP, reconsume, pos); + // FALL THROUGH continue stateloop; + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case DECIMAL_NRC_LOOP: + decimalloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume as many characters as match the range of + * characters given above. + */ + assert value >= 0: "value must not become negative."; + if (c >= '0' && c <= '9') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 10; + value += c - '0'; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + // FALL THROUGH continue stateloop; + break decimalloop; + } else { + errNoDigitsInNCR(); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } else { + /* + * If no characters match the range, then don't + * consume any characters (and unconsume the U+0023 + * NUMBER SIGN character and, if appropriate, the X + * character). This is a parse error; nothing is + * returned. + * + * Otherwise, if the next character is a U+003B + * SEMICOLON, consume that too. If it isn't, there + * is a parse error. + */ + if (!seenDigits) { + errNoDigitsInNCR(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else { + errCharRefLacksSemicolon(); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + // FALL THROUGH continue stateloop; + break decimalloop; + } + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case HANDLE_NCR_VALUE: + // WARNING previous state sets reconsume + // We are not going to emit the contents of charRefBuf. + charRefBufLen = 0; + // XXX inline this case if the method size can take it + handleNcrValue(returnState); + state = transition(state, returnState, reconsume, pos); + continue stateloop; + // XXX reorder point + case HEX_NCR_LOOP: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume as many characters as match the range of + * characters given above. + */ + assert value >= 0: "value must not become negative."; + if (c >= '0' && c <= '9') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - '0'; + } + continue; + } else if (c >= 'A' && c <= 'F') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'A' + 10; + } + continue; + } else if (c >= 'a' && c <= 'f') { + seenDigits = true; + // Avoid overflow + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'a' + 10; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + continue stateloop; + } else { + errNoDigitsInNCR(); + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos + 1; + } + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } else { + /* + * If no characters match the range, then don't + * consume any characters (and unconsume the U+0023 + * NUMBER SIGN character and, if appropriate, the X + * character). This is a parse error; nothing is + * returned. + * + * Otherwise, if the next character is a U+003B + * SEMICOLON, consume that too. If it isn't, there + * is a parse error. + */ + if (!seenDigits) { + errNoDigitsInNCR(); + emitOrAppendCharRefBuf(returnState); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else { + errCharRefLacksSemicolon(); + if ((returnState & DATA_AND_RCDATA_MASK) == 0) { + cstart = pos; + } + reconsume = true; + state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos); + continue stateloop; + } + } + } + // XXX reorder point + case PLAINTEXT: + plaintextloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '\u0000': + emitPlaintextReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * RAWTEXT state. + */ + continue; + } + } + // XXX reorder point + case CLOSE_TAG_OPEN: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Otherwise, if the content model flag is set to the PCDATA + * state, or if the next few characters do match that tag + * name, consume the next input character: + */ + switch (c) { + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errLtSlashGt(); + /* + * Switch to the data state. + */ + cstart = pos + 1; + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + silentCarriageReturn(); + /* Anything else Parse error. */ + errGarbageAfterLtSlash(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf('\n'); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + /* Anything else Parse error. */ + errGarbageAfterLtSlash(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (c >= 'a' && c <= 'z') { + /* + * U+0061 LATIN SMALL LETTER A through to U+007A + * LATIN SMALL LETTER Z Create a new end tag + * token, + */ + endTag = true; + /* + * set its tag name to the input character, + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * then switch to the tag name state. (Don't + * emit the token yet; further details will be + * filled in before it is emitted.) + */ + state = transition(state, Tokenizer.TAG_NAME, reconsume, pos); + continue stateloop; + } else { + /* Anything else Parse error. */ + errGarbageAfterLtSlash(); + /* + * Switch to the bogus comment state. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case RCDATA: + rcdataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in RCDATA state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RCDATA less-than sign state. + */ + flushChars(buf, pos); + + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Emit the current input character as a + * character token. Stay in the RCDATA state. + */ + continue; + } + } + // XXX reorder point + case RAWTEXT: + rawtextloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RAWTEXT less-than sign state. + */ + flushChars(buf, pos); + + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + break rawtextloop; + // FALL THRU continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Emit the current input character as a + * character token. Stay in the RAWTEXT state. + */ + continue; + } + } + // XXX fallthru don't reorder + case RAWTEXT_RCDATA_LESS_THAN_SIGN: + rawtextrcdatalessthansignloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Set the temporary buffer + * to the empty string. Switch to the script + * data end tag open state. + */ + index = 0; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); + break rawtextrcdatalessthansignloop; + // FALL THRU continue stateloop; + default: + /* + * Otherwise, emit a U+003C LESS-THAN SIGN + * character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in + * the data state. + */ + cstart = pos; + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } + // XXX fall thru. don't reorder. + case NON_DATA_END_TAG_NAME: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * ASSERT! when entering this state, set index to 0 and + * call clearStrBufBeforeUse(); Let's implement the above + * without lookahead. strBuf is the 'temporary buffer'. + */ + if (endTagExpectationAsArray == null) { + tokenHandler.characters(Tokenizer.LT_SOLIDUS, + 0, 2); + cstart = pos; + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } else if (index < endTagExpectationAsArray.length) { + char e = endTagExpectationAsArray[index]; + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != e) { + // [NOCPP[ + errHtml4LtSlashInRcdata(folded); + // ]NOCPP] + tokenHandler.characters(Tokenizer.LT_SOLIDUS, + 0, 2); + emitStrBuf(); + cstart = pos; + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + appendStrBuf(c); + index++; + continue; + } else { + endTag = true; + // XXX replace contentModelElement with different + // type + tagName = endTagExpectation; + switch (c) { + case '\r': + silentCarriageReturn(); + clearStrBufAfterUse(); // strBuf not used + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE + * FEED (LF) U+000C FORM FEED (FF) U+0020 + * SPACE If the current end tag token is an + * appropriate end tag token, then switch to + * the before attribute name state. + */ + clearStrBufAfterUse(); // strBuf not used + state = transition(state, Tokenizer.BEFORE_ATTRIBUTE_NAME, reconsume, pos); + continue stateloop; + case '/': + /* + * U+002F SOLIDUS (/) If the current end tag + * token is an appropriate end tag token, + * then switch to the self-closing start tag + * state. + */ + clearStrBufAfterUse(); // strBuf not used + state = transition(state, Tokenizer.SELF_CLOSING_START_TAG, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) If the + * current end tag token is an appropriate + * end tag token, then emit the current tag + * token and switch to the data state. + */ + clearStrBufAfterUse(); // strBuf not used + state = transition(state, emitCurrentTagToken(false, pos), reconsume, pos); + if (shouldSuspend) { + break stateloop; + } + continue stateloop; + default: + /* + * Emit a U+003C LESS-THAN SIGN character + * token, a U+002F SOLIDUS character token, + * a character token for each of the + * characters in the temporary buffer (in + * the order they were added to the buffer), + * and reconsume the current input character + * in the RAWTEXT state. + */ + // [NOCPP[ + errWarnLtSlashInRcdata(); + // ]NOCPP] + tokenHandler.characters( + Tokenizer.LT_SOLIDUS, 0, 2); + emitStrBuf(); + cstart = pos; // don't drop the + // character + reconsume = true; + state = transition(state, returnState, reconsume, pos); + continue stateloop; + } + } + } + // XXX reorder point + // BEGIN HOTSPOT WORKAROUND + case BOGUS_COMMENT: + boguscommentloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume every character up to and including the first + * U+003E GREATER-THAN SIGN character (>) or the end of + * the file (EOF), whichever comes first. Emit a comment + * token whose data is the concatenation of all the + * characters starting from and including the character + * that caused the state machine to switch into the + * bogus comment state, up to and including the + * character immediately before the last consumed + * character (i.e. up to the character just before the + * U+003E or EOF character). (If the comment was started + * by the end of the file (EOF), the token is empty.) + * + * Switch to the data state. + * + * If the end of the file was reached, reconsume the EOF + * character. + */ + switch (c) { + case '>': + emitComment(0, pos); + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos); + break boguscommentloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + appendStrBuf(c); + continue; + } + } + // FALLTHRU DON'T REORDER + case BOGUS_COMMENT_HYPHEN: + boguscommenthyphenloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '>': + // [NOCPP[ + maybeAppendSpaceToBogusComment(); + // ]NOCPP] + emitComment(0, pos); + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '-': + appendSecondHyphenToBogusComment(); + continue boguscommenthyphenloop; + case '\r': + appendStrBufCarriageReturn(); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + break stateloop; + case '\n': + appendStrBufLineFeed(); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + appendStrBuf(c); + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case SCRIPT_DATA: + scriptdataloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + switch (c) { + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data less-than sign state. + */ + flushChars(buf, pos); + returnState = state; + state = transition(state, Tokenizer.SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos); + break scriptdataloop; // FALL THRU continue + // stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * script data state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_LESS_THAN_SIGN: + scriptdatalessthansignloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Set the temporary buffer + * to the empty string. Switch to the script + * data end tag open state. + */ + index = 0; + clearStrBufBeforeUse(); + state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); + continue stateloop; + case '!': + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + cstart = pos; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START, reconsume, pos); + break scriptdatalessthansignloop; // FALL THRU + // continue + // stateloop; + default: + /* + * Otherwise, emit a U+003C LESS-THAN SIGN + * character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in + * the data state. + */ + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPE_START: + scriptdataescapestartloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escape start dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPE_START_DASH, reconsume, pos); + break scriptdataescapestartloop; // FALL THRU + // continue + // stateloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPE_START_DASH: + scriptdataescapestartdashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escaped dash dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos); + break scriptdataescapestartdashloop; + // continue stateloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED_DASH_DASH: + scriptdataescapeddashdashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Stay in the + * script data escaped dash dash state. + */ + continue; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data escaped less-than sign state. + */ + flushChars(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit a U+003E + * GREATER-THAN SIGN character token. Switch to + * the script data state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break scriptdataescapeddashdashloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break scriptdataescapeddashdashloop; + // continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED: + scriptdataescapedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escaped dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH, reconsume, pos); + break scriptdataescapedloop; // FALL THRU + // continue + // stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data escaped less-than sign state. + */ + flushChars(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * script data escaped state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED_DASH: + scriptdataescapeddashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data escaped dash dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * script data escaped less-than sign state. + */ + flushChars(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + break scriptdataescapeddashloop; + // continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: + scriptdataescapedlessthanloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Set the temporary buffer + * to the empty string. Switch to the script + * data escaped end tag open state. + */ + index = 0; + clearStrBufBeforeUse(); + returnState = Tokenizer.SCRIPT_DATA_ESCAPED; + state = transition(state, Tokenizer.NON_DATA_END_TAG_NAME, reconsume, pos); + continue stateloop; + case 'S': + case 's': + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Emit a U+003C + * LESS-THAN SIGN character token and the + * current input character as a character token. + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + cstart = pos; + index = 1; + /* + * Set the temporary buffer to the empty string. + * Append the lowercase version of the current + * input character (add 0x0020 to the + * character's code point) to the temporary + * buffer. Switch to the script data double + * escape start state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, pos); + break scriptdataescapedlessthanloop; + // continue stateloop; + default: + /* + * Anything else Emit a U+003C LESS-THAN SIGN + * character token and reconsume the current + * input character in the script data escaped + * state. + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPE_START: + scriptdatadoubleescapestartloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + assert index > 0; + if (index < 6) { // SCRIPT_ARR.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.SCRIPT_ARR[index]) { + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + index++; + continue; + } + switch (c) { + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + case ' ': + case '\t': + case '\u000C': + case '/': + case '>': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN + * (>) Emit the current input character as a + * character token. If the temporary buffer is + * the string "script", then switch to the + * script data double escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break scriptdatadoubleescapestartloop; + // continue stateloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data escaped state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED: + scriptdatadoubleescapedloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data double escaped dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, pos); + break scriptdatadoubleescapedloop; // FALL THRU + // continue + // stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Emit a U+003C + * LESS-THAN SIGN character token. Switch to the + * script data double escaped less-than sign + * state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Stay in the + * script data double escaped state. + */ + continue; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + scriptdatadoubleescapeddashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Switch to the + * script data double escaped dash dash state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, reconsume, pos); + break scriptdatadoubleescapeddashloop; + // continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Emit a U+003C + * LESS-THAN SIGN character token. Switch to the + * script data double escaped less-than sign + * state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data double escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + scriptdatadoubleescapeddashdashloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '-': + /* + * U+002D HYPHEN-MINUS (-) Emit a U+002D + * HYPHEN-MINUS character token. Stay in the + * script data double escaped dash dash state. + */ + continue; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Emit a U+003C + * LESS-THAN SIGN character token. Switch to the + * script data double escaped less-than sign + * state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, reconsume, pos); + break scriptdatadoubleescapeddashdashloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit a U+003E + * GREATER-THAN SIGN character token. Switch to + * the script data state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + default: + /* + * Anything else Emit the current input + * character as a character token. Switch to the + * script data double escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + scriptdatadoubleescapedlessthanloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '/': + /* + * U+002F SOLIDUS (/) Emit a U+002F SOLIDUS + * character token. Set the temporary buffer to + * the empty string. Switch to the script data + * double escape end state. + */ + index = 0; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, pos); + break scriptdatadoubleescapedlessthanloop; + default: + /* + * Anything else Reconsume the current input + * character in the script data double escaped + * state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER + case SCRIPT_DATA_DOUBLE_ESCAPE_END: + scriptdatadoubleescapeendloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (index < 6) { // SCRIPT_ARR.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.SCRIPT_ARR[index]) { + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + index++; + continue; + } + switch (c) { + case '\r': + emitCarriageReturn(buf, pos); + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + case ' ': + case '\t': + case '\u000C': + case '/': + case '>': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * U+002F SOLIDUS (/) U+003E GREATER-THAN SIGN + * (>) Emit the current input character as a + * character token. If the temporary buffer is + * the string "script", then switch to the + * script data escaped state. + */ + state = transition(state, Tokenizer.SCRIPT_DATA_ESCAPED, reconsume, pos); + continue stateloop; + default: + /* + * Reconsume the current input character in the + * script data double escaped state. + */ + reconsume = true; + state = transition(state, Tokenizer.SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + continue stateloop; + } + } + // XXX reorder point + case MARKUP_DECLARATION_OCTYPE: + markupdeclarationdoctypeloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + if (index < 6) { // OCTYPE.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded == Tokenizer.OCTYPE[index]) { + appendStrBuf(c); + } else { + errBogusComment(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos); + continue stateloop; + } + index++; + continue; + } else { + reconsume = true; + state = transition(state, Tokenizer.DOCTYPE, reconsume, pos); + break markupdeclarationdoctypeloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE: + doctypeloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + initDoctypeFields(); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before DOCTYPE name state. + */ + state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); + break doctypeloop; + // continue stateloop; + default: + /* + * Anything else Parse error. + */ + errMissingSpaceBeforeDoctypeName(); + /* + * Reconsume the current character in the before + * DOCTYPE name state. + */ + reconsume = true; + state = transition(state, Tokenizer.BEFORE_DOCTYPE_NAME, reconsume, pos); + break doctypeloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_DOCTYPE_NAME: + beforedoctypenameloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before DOCTYPE name state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errNamelessDoctype(); + /* + * Create a new DOCTYPE token. Set its + * force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit the token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + if (c >= 'A' && c <= 'Z') { + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Create a + * new DOCTYPE token. Set the token's name + * to the lowercase version of the input + * character (add 0x0020 to the character's + * code point). + */ + c += 0x20; + } + /* Anything else Create a new DOCTYPE token. */ + /* + * Set the token's name name to the current + * input character. + */ + clearStrBufBeforeUse(); + appendStrBuf(c); + /* + * Switch to the DOCTYPE name state. + */ + state = transition(state, Tokenizer.DOCTYPE_NAME, reconsume, pos); + break beforedoctypenameloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_NAME: + doctypenameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + strBufToDoctypeName(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the after DOCTYPE name state. + */ + strBufToDoctypeName(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_NAME, reconsume, pos); + break doctypenameloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + strBufToDoctypeName(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * U+0041 LATIN CAPITAL LETTER A through to + * U+005A LATIN CAPITAL LETTER Z Append the + * lowercase version of the input character (add + * 0x0020 to the character's code point) to the + * current DOCTYPE token's name. + */ + if (c >= 'A' && c <= 'Z') { + c += 0x0020; + } + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * name. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE name state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_NAME: + afterdoctypenameloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the after DOCTYPE name state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case 'p': + case 'P': + index = 0; + state = transition(state, Tokenizer.DOCTYPE_UBLIC, reconsume, pos); + break afterdoctypenameloop; + // continue stateloop; + case 's': + case 'S': + index = 0; + state = transition(state, Tokenizer.DOCTYPE_YSTEM, reconsume, pos); + continue stateloop; + default: + /* + * Otherwise, this is the parse error. + */ + bogusDoctype(); + + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_UBLIC: + doctypeublicloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * If the six characters starting from the current input + * character are an ASCII case-insensitive match for the + * word "PUBLIC", then consume those characters and + * switch to the before DOCTYPE public identifier state. + */ + if (index < 5) { // UBLIC.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.UBLIC[index]) { + bogusDoctype(); + // forceQuirks = true; + reconsume = true; + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + index++; + continue; + } else { + reconsume = true; + state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos); + break doctypeublicloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_PUBLIC_KEYWORD: + afterdoctypepublickeywordloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before DOCTYPE public + * identifier state. + */ + state = transition(state, Tokenizer.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + break afterdoctypepublickeywordloop; + // FALL THROUGH continue stateloop + case '"': + /* + * U+0022 QUOTATION MARK (") Parse Error. + */ + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + /* + * Set the DOCTYPE token's public identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Parse Error. + */ + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + /* + * Set the DOCTYPE token's public identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + beforedoctypepublicidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before DOCTYPE public identifier + * state. + */ + continue; + case '"': + /* + * U+0022 QUOTATION MARK (") Set the DOCTYPE + * token's public identifier to the empty string + * (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + break beforedoctypepublicidentifierloop; + // continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Set the DOCTYPE token's + * public identifier to the empty string (not + * missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + doctypepublicidentifierdoublequotedloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the after + * DOCTYPE public identifier state. + */ + publicIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + break doctypepublicidentifierdoublequotedloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errGtInPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * public identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE public identifier + * (double-quoted) state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + afterdoctypepublicidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the between DOCTYPE public and + * system identifiers state. + */ + state = transition(state, Tokenizer.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, reconsume, pos); + break afterdoctypepublicidentifierloop; + // continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '"': + /* + * U+0022 QUOTATION MARK (") Parse error. + */ + errNoSpaceBetweenPublicAndSystemIds(); + /* + * Set the DOCTYPE token's system identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Parse error. + */ + errNoSpaceBetweenPublicAndSystemIds(); + /* + * Set the DOCTYPE token's system identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + betweendoctypepublicandsystemidentifiersloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the between DOCTYPE public and system + * identifiers state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '"': + /* + * U+0022 QUOTATION MARK (") Set the DOCTYPE + * token's system identifier to the empty string + * (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + break betweendoctypepublicandsystemidentifiersloop; + // continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Set the DOCTYPE token's + * system identifier to the empty string (not + * missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + doctypesystemidentifierdoublequotedloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '"': + /* + * U+0022 QUOTATION MARK (") Switch to the after + * DOCTYPE system identifier state. + */ + systemIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + continue stateloop; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Parse error. + */ + errGtInSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * system identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE system identifier + * (double-quoted) state. + */ + continue; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + afterdoctypesystemidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the after DOCTYPE system identifier state. + */ + continue; + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit the current + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + /* + * Switch to the bogus DOCTYPE state. (This does + * not set the DOCTYPE token's force-quirks flag + * to on.) + */ + bogusDoctypeWithoutQuirks(); + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + break afterdoctypesystemidentifierloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BOGUS_DOCTYPE: + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '>': + /* + * U+003E GREATER-THAN SIGN (>) Emit that + * DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + default: + /* + * Anything else Stay in the bogus DOCTYPE + * state. + */ + continue; + } + } + // XXX reorder point + case DOCTYPE_YSTEM: + doctypeystemloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Otherwise, if the six characters starting from the + * current input character are an ASCII case-insensitive + * match for the word "SYSTEM", then consume those + * characters and switch to the before DOCTYPE system + * identifier state. + */ + if (index < 5) { // YSTEM.length + char folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != Tokenizer.YSTEM[index]) { + bogusDoctype(); + reconsume = true; + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + index++; + continue stateloop; + } else { + reconsume = true; + state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos); + break doctypeystemloop; + // continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case AFTER_DOCTYPE_SYSTEM_KEYWORD: + afterdoctypesystemkeywordloop: for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + } + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE + * Switch to the before DOCTYPE public + * identifier state. + */ + state = transition(state, Tokenizer.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + break afterdoctypesystemkeywordloop; + // FALL THROUGH continue stateloop + case '"': + /* + * U+0022 QUOTATION MARK (") Parse Error. + */ + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + /* + * Set the DOCTYPE token's system identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Parse Error. + */ + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + /* + * Set the DOCTYPE token's public identifier to + * the empty string (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE public identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + beforedoctypesystemidentifierloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\r': + silentCarriageReturn(); + break stateloop; + case '\n': + silentLineFeed(); + // fall thru + case ' ': + case '\t': + case '\u000C': + /* + * U+0009 CHARACTER TABULATION U+000A LINE FEED + * (LF) U+000C FORM FEED (FF) U+0020 SPACE Stay + * in the before DOCTYPE system identifier + * state. + */ + continue; + case '"': + /* + * U+0022 QUOTATION MARK (") Set the DOCTYPE + * token's system identifier to the empty string + * (not missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (double-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos); + continue stateloop; + case '\'': + /* + * U+0027 APOSTROPHE (') Set the DOCTYPE token's + * system identifier to the empty string (not + * missing), + */ + clearStrBufBeforeUse(); + /* + * then switch to the DOCTYPE system identifier + * (single-quoted) state. + */ + state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos); + break beforedoctypesystemidentifierloop; + // continue stateloop; + case '>': + /* U+003E GREATER-THAN SIGN (>) Parse error. */ + errExpectedSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + default: + bogusDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + // done by bogusDoctype(); + /* + * Switch to the bogus DOCTYPE state. + */ + state = transition(state, Tokenizer.BOGUS_DOCTYPE, reconsume, pos); + continue stateloop; + } + } + // FALLTHRU DON'T REORDER + case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the after + * DOCTYPE system identifier state. + */ + systemIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos); + continue stateloop; + case '>': + errGtInSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * system identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE system identifier + * (double-quoted) state. + */ + continue; + } + } + // XXX reorder point + case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + /* + * Consume the next input character: + */ + switch (c) { + case '\'': + /* + * U+0027 APOSTROPHE (') Switch to the after + * DOCTYPE public identifier state. + */ + publicIdentifier = strBufToString(); + state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos); + continue stateloop; + case '>': + errGtInPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + /* + * Switch to the data state. + */ + state = transition(state, Tokenizer.DATA, reconsume, pos); + continue stateloop; + case '\r': + appendStrBufCarriageReturn(); + break stateloop; + case '\n': + appendStrBufLineFeed(); + continue; + case '\u0000': + c = '\uFFFD'; + // fall thru + default: + /* + * Anything else Append the current input + * character to the current DOCTYPE token's + * public identifier. + */ + appendStrBuf(c); + /* + * Stay in the DOCTYPE public identifier + * (single-quoted) state. + */ + continue; + } + } + // XXX reorder point + case PROCESSING_INSTRUCTION: + processinginstructionloop: for (;;) { + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '?': + state = transition( + state, + Tokenizer.PROCESSING_INSTRUCTION_QUESTION_MARK, + reconsume, pos); + break processinginstructionloop; + // continue stateloop; + default: + continue; + } + } + case PROCESSING_INSTRUCTION_QUESTION_MARK: + if (++pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + switch (c) { + case '>': + state = transition(state, Tokenizer.DATA, + reconsume, pos); + continue stateloop; + default: + state = transition(state, + Tokenizer.PROCESSING_INSTRUCTION, + reconsume, pos); + continue stateloop; + } + // END HOTSPOT WORKAROUND + } + } + flushChars(buf, pos); + /* + * if (prevCR && pos != endPos) { // why is this needed? pos--; col--; } + */ + // Save locals + stateSave = state; + returnStateSave = returnState; + return pos; + } + + // HOTSPOT WORKAROUND INSERTION POINT + + // [NOCPP[ + + protected int transition(int from, int to, boolean reconsume, int pos) throws SAXException { + return to; + } + + // ]NOCPP] + + private void initDoctypeFields() { + // Discard the characters "DOCTYPE" accumulated as a potential bogus + // comment into strBuf. + clearStrBufAfterUse(); + doctypeName = ""; + if (systemIdentifier != null) { + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + if (publicIdentifier != null) { + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + } + forceQuirks = false; + } + + @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() + throws SAXException { + silentCarriageReturn(); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); + } + + @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed() + throws SAXException { + silentLineFeed(); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); + } + + @Inline private void appendStrBufLineFeed() { + silentLineFeed(); + appendStrBuf('\n'); + } + + @Inline private void appendStrBufCarriageReturn() { + silentCarriageReturn(); + appendStrBuf('\n'); + } + + @Inline protected void silentCarriageReturn() { + ++line; + lastCR = true; + } + + @Inline protected void silentLineFeed() { + ++line; + } + + private void emitCarriageReturn(@NoLength char[] buf, int pos) + throws SAXException { + silentCarriageReturn(); + flushChars(buf, pos); + tokenHandler.characters(Tokenizer.LF, 0, 1); + cstart = Integer.MAX_VALUE; + } + + private void emitReplacementCharacter(@NoLength char[] buf, int pos) + throws SAXException { + flushChars(buf, pos); + tokenHandler.zeroOriginatingReplacementCharacter(); + cstart = pos + 1; + } + + private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos) + throws SAXException { + flushChars(buf, pos); + tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1); + cstart = pos + 1; + } + + private void setAdditionalAndRememberAmpersandLocation(char add) { + additional = add; + // [NOCPP[ + ampersandLocation = new LocatorImpl(this); + // ]NOCPP] + } + + private void bogusDoctype() throws SAXException { + errBogusDoctype(); + forceQuirks = true; + } + + private void bogusDoctypeWithoutQuirks() throws SAXException { + errBogusDoctype(); + forceQuirks = false; + } + + private void handleNcrValue(int returnState) throws SAXException { + /* + * If one or more characters match the range, then take them all and + * interpret the string of characters as a number (either hexadecimal or + * decimal as appropriate). + */ + if (value <= 0xFFFF) { + if (value >= 0x80 && value <= 0x9f) { + /* + * If that number is one of the numbers in the first column of + * the following table, then this is a parse error. + */ + errNcrInC1Range(); + /* + * Find the row with that number in the first column, and return + * a character token for the Unicode character given in the + * second column of that row. + */ + @NoLength char[] val = NamedCharacters.WINDOWS_1252[value - 0x80]; + emitOrAppendOne(val, returnState); + // [NOCPP[ + } else if (value == 0xC + && contentSpacePolicy != XmlViolationPolicy.ALLOW) { + if (contentSpacePolicy == XmlViolationPolicy.ALTER_INFOSET) { + emitOrAppendOne(Tokenizer.SPACE, returnState); + } else if (contentSpacePolicy == XmlViolationPolicy.FATAL) { + fatal("A character reference expanded to a form feed which is not legal XML 1.0 white space."); + } + // ]NOCPP] + } else if (value == 0x0) { + errNcrZero(); + emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState); + } else if ((value & 0xF800) == 0xD800) { + errNcrSurrogate(); + emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState); + } else { + /* + * Otherwise, return a character token for the Unicode character + * whose code point is that number. + */ + char ch = (char) value; + // [NOCPP[ + if (value == 0x0D) { + errNcrCr(); + } else if ((value <= 0x0008) || (value == 0x000B) + || (value >= 0x000E && value <= 0x001F)) { + ch = errNcrControlChar(ch); + } else if (value >= 0xFDD0 && value <= 0xFDEF) { + errNcrUnassigned(); + } else if ((value & 0xFFFE) == 0xFFFE) { + ch = errNcrNonCharacter(ch); + } else if (value >= 0x007F && value <= 0x009F) { + errNcrControlChar(); + } else { + maybeWarnPrivateUse(ch); + } + // ]NOCPP] + bmpChar[0] = ch; + emitOrAppendOne(bmpChar, returnState); + } + } else if (value <= 0x10FFFF) { + // [NOCPP[ + maybeWarnPrivateUseAstral(); + if ((value & 0xFFFE) == 0xFFFE) { + errAstralNonCharacter(value); + } + // ]NOCPP] + astralChar[0] = (char) (Tokenizer.LEAD_OFFSET + (value >> 10)); + astralChar[1] = (char) (0xDC00 + (value & 0x3FF)); + emitOrAppendTwo(astralChar, returnState); + } else { + errNcrOutOfRange(); + emitOrAppendOne(Tokenizer.REPLACEMENT_CHARACTER, returnState); + } + } + + public void eof() throws SAXException { + int state = stateSave; + int returnState = returnStateSave; + + eofloop: for (;;) { + switch (state) { + case SCRIPT_DATA_LESS_THAN_SIGN: + case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: + /* + * Otherwise, emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in the data + * state. + */ + break eofloop; + case TAG_OPEN: + /* + * The behavior of this state depends on the content model + * flag. + */ + /* + * Anything else Parse error. + */ + errEofAfterLt(); + /* + * Emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in the data + * state. + */ + break eofloop; + case RAWTEXT_RCDATA_LESS_THAN_SIGN: + /* + * Emit a U+003C LESS-THAN SIGN character token + */ + tokenHandler.characters(Tokenizer.LT_GT, 0, 1); + /* + * and reconsume the current input character in the RCDATA + * state. + */ + break eofloop; + case NON_DATA_END_TAG_NAME: + /* + * Emit a U+003C LESS-THAN SIGN character token, a U+002F + * SOLIDUS character token, + */ + tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2); + /* + * a character token for each of the characters in the + * temporary buffer (in the order they were added to the + * buffer), + */ + emitStrBuf(); + /* + * and reconsume the current input character in the RCDATA + * state. + */ + break eofloop; + case CLOSE_TAG_OPEN: + /* EOF Parse error. */ + errEofAfterLt(); + /* + * Emit a U+003C LESS-THAN SIGN character token and a U+002F + * SOLIDUS character token. + */ + tokenHandler.characters(Tokenizer.LT_SOLIDUS, 0, 2); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case TAG_NAME: + /* + * EOF Parse error. + */ + errEofInTagName(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case BEFORE_ATTRIBUTE_NAME: + case AFTER_ATTRIBUTE_VALUE_QUOTED: + case SELF_CLOSING_START_TAG: + /* EOF Parse error. */ + errEofWithoutGt(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case ATTRIBUTE_NAME: + /* + * EOF Parse error. + */ + errEofInAttributeName(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case AFTER_ATTRIBUTE_NAME: + case BEFORE_ATTRIBUTE_VALUE: + /* EOF Parse error. */ + errEofWithoutGt(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: + case ATTRIBUTE_VALUE_SINGLE_QUOTED: + case ATTRIBUTE_VALUE_UNQUOTED: + /* EOF Parse error. */ + errEofInAttributeValue(); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case BOGUS_COMMENT: + emitComment(0, 0); + break eofloop; + case BOGUS_COMMENT_HYPHEN: + // [NOCPP[ + maybeAppendSpaceToBogusComment(); + // ]NOCPP] + emitComment(0, 0); + break eofloop; + case MARKUP_DECLARATION_OPEN: + errBogusComment(); + emitComment(0, 0); + break eofloop; + case MARKUP_DECLARATION_HYPHEN: + errBogusComment(); + emitComment(0, 0); + break eofloop; + case MARKUP_DECLARATION_OCTYPE: + if (index < 6) { + errBogusComment(); + emitComment(0, 0); + } else { + /* EOF Parse error. */ + errEofInDoctype(); + /* + * Create a new DOCTYPE token. Set its force-quirks flag + * to on. + */ + doctypeName = ""; + if (systemIdentifier != null) { + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + if (publicIdentifier != null) { + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + } + forceQuirks = true; + /* + * Emit the token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + } + break eofloop; + case COMMENT_START: + case COMMENT: + /* + * EOF Parse error. + */ + errEofInComment(); + /* Emit the comment token. */ + emitComment(0, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case COMMENT_END: + errEofInComment(); + /* Emit the comment token. */ + emitComment(2, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case COMMENT_END_DASH: + case COMMENT_START_DASH: + errEofInComment(); + /* Emit the comment token. */ + emitComment(1, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case COMMENT_END_BANG: + errEofInComment(); + /* Emit the comment token. */ + emitComment(3, 0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE: + case BEFORE_DOCTYPE_NAME: + errEofInDoctype(); + /* + * Create a new DOCTYPE token. Set its force-quirks flag to + * on. + */ + forceQuirks = true; + /* + * Emit the token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_NAME: + errEofInDoctype(); + strBufToDoctypeName(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_UBLIC: + case DOCTYPE_YSTEM: + case AFTER_DOCTYPE_NAME: + case AFTER_DOCTYPE_PUBLIC_KEYWORD: + case AFTER_DOCTYPE_SYSTEM_KEYWORD: + case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + errEofInDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + /* EOF Parse error. */ + errEofInPublicId(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + publicIdentifier = strBufToString(); + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + errEofInDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + /* EOF Parse error. */ + errEofInSystemId(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + systemIdentifier = strBufToString(); + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + errEofInDoctype(); + /* + * Set the DOCTYPE token's force-quirks flag to on. + */ + forceQuirks = true; + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case BOGUS_DOCTYPE: + /* + * Emit that DOCTYPE token. + */ + emitDoctypeToken(0); + /* + * Reconsume the EOF character in the data state. + */ + break eofloop; + case CONSUME_CHARACTER_REFERENCE: + /* + * Unlike the definition is the spec, this state does not + * return a value and never requires the caller to + * backtrack. This state takes care of emitting characters + * or appending to the current attribute value. It also + * takes care of that in the case when consuming the entity + * fails. + */ + /* + * This section defines how to consume an entity. This + * definition is used when parsing entities in text and in + * attributes. + * + * The behavior depends on the identity of the next + * character (the one immediately after the U+0026 AMPERSAND + * character): + */ + + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue; + case CHARACTER_REFERENCE_HILO_LOOKUP: + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue; + case CHARACTER_REFERENCE_TAIL: + outer: for (;;) { + char c = '\u0000'; + entCol++; + /* + * Consume the maximum number of characters possible, + * with the consumed characters matching one of the + * identifiers in the first column of the named + * character references table (in a case-sensitive + * manner). + */ + hiloop: for (;;) { + if (hi == -1) { + break hiloop; + } + if (entCol == NamedCharacters.NAMES[hi].length()) { + break hiloop; + } + if (entCol > NamedCharacters.NAMES[hi].length()) { + break outer; + } else if (c < NamedCharacters.NAMES[hi].charAt(entCol)) { + hi--; + } else { + break hiloop; + } + } + + loloop: for (;;) { + if (hi < lo) { + break outer; + } + if (entCol == NamedCharacters.NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + lo++; + } else if (entCol > NamedCharacters.NAMES[lo].length()) { + break outer; + } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) { + lo++; + } else { + break loloop; + } + } + if (hi < lo) { + break outer; + } + continue; + } + + if (candidate == -1) { + /* + * If no match can be made, then this is a parse error. + */ + errNoNamedCharacterMatch(); + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue eofloop; + } else { + @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate]; + if (candidateName.length() == 0 + || candidateName.charAt(candidateName.length() - 1) != ';') { + /* + * If the last character matched is not a U+003B + * SEMICOLON (;), there is a parse error. + */ + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + /* + * If the entity is being consumed as part of an + * attribute, and the last character matched is + * not a U+003B SEMICOLON (;), + */ + char ch; + if (charRefBufMark == charRefBufLen) { + ch = '\u0000'; + } else { + ch = charRefBuf[charRefBufMark]; + } + if ((ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z')) { + /* + * and the next character is in the range + * U+0030 DIGIT ZERO to U+0039 DIGIT NINE, + * U+0041 LATIN CAPITAL LETTER A to U+005A + * LATIN CAPITAL LETTER Z, or U+0061 LATIN + * SMALL LETTER A to U+007A LATIN SMALL + * LETTER Z, then, for historical reasons, + * all the characters that were matched + * after the U+0026 AMPERSAND (&) must be + * unconsumed, and nothing is returned. + */ + errNoNamedCharacterMatch(); + appendCharRefBufToStrBuf(); + state = returnState; + continue eofloop; + } + } + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + errUnescapedAmpersandInterpretedAsCharacterReference(); + } else { + errNotSemicolonTerminated(); + } + } + + /* + * Otherwise, return a character token for the character + * corresponding to the entity name (as given by the + * second column of the named character references + * table). + */ + @Const @NoLength char[] val = NamedCharacters.VALUES[candidate]; + if ( + // [NOCPP[ + val.length == 1 + // ]NOCPP] + // CPPONLY: val[1] == 0 + ) { + emitOrAppendOne(val, returnState); + } else { + emitOrAppendTwo(val, returnState); + } + // this is so complicated! + if (charRefBufMark < charRefBufLen) { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } else { + tokenHandler.characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } + } + charRefBufLen = 0; + state = returnState; + continue eofloop; + /* + * If the markup contains I'm ¬it; I tell you, the + * entity is parsed as "not", as in, I'm ¬it; I tell + * you. But if the markup was I'm ∉ I tell you, + * the entity would be parsed as "notin;", resulting in + * I'm ∉ I tell you. + */ + } + case CONSUME_NCR: + case DECIMAL_NRC_LOOP: + case HEX_NCR_LOOP: + /* + * If no characters match the range, then don't consume any + * characters (and unconsume the U+0023 NUMBER SIGN + * character and, if appropriate, the X character). This is + * a parse error; nothing is returned. + * + * Otherwise, if the next character is a U+003B SEMICOLON, + * consume that too. If it isn't, there is a parse error. + */ + if (!seenDigits) { + errNoDigitsInNCR(); + emitOrAppendCharRefBuf(returnState); + state = returnState; + continue; + } else { + errCharRefLacksSemicolon(); + } + // WARNING previous state sets reconsume + handleNcrValue(returnState); + state = returnState; + continue; + case CDATA_RSQB: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1); + break eofloop; + case CDATA_RSQB_RSQB: + tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2); + break eofloop; + case DATA: + default: + break eofloop; + } + } + // case DATA: + /* + * EOF Emit an end-of-file token. + */ + tokenHandler.eof(); + return; + } + + private void emitDoctypeToken(int pos) throws SAXException { + cstart = pos + 1; + tokenHandler.doctype(doctypeName, publicIdentifier, systemIdentifier, + forceQuirks); + // It is OK and sufficient to release these here, since + // there's no way out of the doctype states than through paths + // that call this method. + doctypeName = null; + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + + @Inline protected char checkChar(@NoLength char[] buf, int pos) + throws SAXException { + return buf[pos]; + } + + public boolean internalEncodingDeclaration(String internalCharset) + throws SAXException { + if (encodingDeclarationHandler != null) { + return encodingDeclarationHandler.internalEncodingDeclaration(internalCharset); + } + return false; + } + + /** + * @param val + * @throws SAXException + */ + private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) + throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(val[0]); + appendStrBuf(val[1]); + } else { + tokenHandler.characters(val, 0, 2); + } + } + + private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) + throws SAXException { + if ((returnState & DATA_AND_RCDATA_MASK) != 0) { + appendStrBuf(val[0]); + } else { + tokenHandler.characters(val, 0, 1); + } + } + + public void end() throws SAXException { + strBuf = null; + doctypeName = null; + if (systemIdentifier != null) { + Portability.releaseString(systemIdentifier); + systemIdentifier = null; + } + if (publicIdentifier != null) { + Portability.releaseString(publicIdentifier); + publicIdentifier = null; + } + if (tagName != null) { + tagName.release(); + tagName = null; + } + if (attributeName != null) { + attributeName.release(); + attributeName = null; + } + tokenHandler.endTokenization(); + if (attributes != null) { + // [NOCPP[ + attributes = null; + // ]NOCPP] + // CPPONLY: attributes.clear(mappingLangToXmlLang); + } + } + + public void requestSuspension() { + shouldSuspend = true; + } + + // [NOCPP[ + + public void becomeConfident() { + confident = true; + } + + /** + * Returns the nextCharOnNewLine. + * + * @return the nextCharOnNewLine + */ + public boolean isNextCharOnNewLine() { + return false; + } + + public boolean isPrevCR() { + return lastCR; + } + + /** + * Returns the line. + * + * @return the line + */ + public int getLine() { + return -1; + } + + /** + * Returns the col. + * + * @return the col + */ + public int getCol() { + return -1; + } + + // ]NOCPP] + + public boolean isInDataState() { + return (stateSave == DATA); + } + + public void resetToDataState() { + clearStrBufAfterUse(); + charRefBufLen = 0; + stateSave = Tokenizer.DATA; + // line = 1; XXX line numbers + lastCR = false; + index = 0; + forceQuirks = false; + additional = '\u0000'; + entCol = -1; + firstCharKey = -1; + lo = 0; + hi = 0; // will always be overwritten before use anyway + candidate = -1; + charRefBufMark = 0; + value = 0; + seenDigits = false; + endTag = false; + shouldSuspend = false; + initDoctypeFields(); + if (tagName != null) { + tagName.release(); + tagName = null; + } + if (attributeName != null) { + attributeName.release(); + attributeName = null; + } + if (newAttributesEachTime) { + if (attributes != null) { + Portability.delete(attributes); + attributes = null; + } + } + } + + public void loadState(Tokenizer other) throws SAXException { + strBufLen = other.strBufLen; + if (strBufLen > strBuf.length) { + strBuf = new char[strBufLen]; + } + System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen); + + charRefBufLen = other.charRefBufLen; + System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen); + + stateSave = other.stateSave; + returnStateSave = other.returnStateSave; + endTagExpectation = other.endTagExpectation; + endTagExpectationAsArray = other.endTagExpectationAsArray; + // line = 1; XXX line numbers + lastCR = other.lastCR; + index = other.index; + forceQuirks = other.forceQuirks; + additional = other.additional; + entCol = other.entCol; + firstCharKey = other.firstCharKey; + lo = other.lo; + hi = other.hi; + candidate = other.candidate; + charRefBufMark = other.charRefBufMark; + value = other.value; + seenDigits = other.seenDigits; + endTag = other.endTag; + shouldSuspend = false; + + if (other.doctypeName == null) { + doctypeName = null; + } else { + doctypeName = Portability.newLocalFromLocal(other.doctypeName, + interner); + } + + Portability.releaseString(systemIdentifier); + if (other.systemIdentifier == null) { + systemIdentifier = null; + } else { + systemIdentifier = Portability.newStringFromString(other.systemIdentifier); + } + + Portability.releaseString(publicIdentifier); + if (other.publicIdentifier == null) { + publicIdentifier = null; + } else { + publicIdentifier = Portability.newStringFromString(other.publicIdentifier); + } + + if (tagName != null) { + tagName.release(); + } + if (other.tagName == null) { + tagName = null; + } else { + tagName = other.tagName.cloneElementName(interner); + } + + if (attributeName != null) { + attributeName.release(); + } + if (other.attributeName == null) { + attributeName = null; + } else { + attributeName = other.attributeName.cloneAttributeName(interner); + } + + Portability.delete(attributes); + if (other.attributes == null) { + attributes = null; + } else { + attributes = other.attributes.cloneAttributes(interner); + } + } + + public void initializeWithoutStarting() throws SAXException { + confident = false; + strBuf = null; + line = 1; + // CPPONLY: attributeLine = 1; + // [NOCPP[ + html4 = false; + metaBoundaryPassed = false; + wantsComments = tokenHandler.wantsComments(); + if (!newAttributesEachTime) { + attributes = new HtmlAttributes(mappingLangToXmlLang); + } + // ]NOCPP] + resetToDataState(); + } + + protected void errGarbageAfterLtSlash() throws SAXException { + } + + protected void errLtSlashGt() throws SAXException { + } + + protected void errWarnLtSlashInRcdata() throws SAXException { + } + + protected void errHtml4LtSlashInRcdata(char folded) throws SAXException { + } + + protected void errCharRefLacksSemicolon() throws SAXException { + } + + protected void errNoDigitsInNCR() throws SAXException { + } + + protected void errGtInSystemId() throws SAXException { + } + + protected void errGtInPublicId() throws SAXException { + } + + protected void errNamelessDoctype() throws SAXException { + } + + protected void errConsecutiveHyphens() throws SAXException { + } + + protected void errPrematureEndOfComment() throws SAXException { + } + + protected void errBogusComment() throws SAXException { + } + + protected void errUnquotedAttributeValOrNull(char c) throws SAXException { + } + + protected void errSlashNotFollowedByGt() throws SAXException { + } + + protected void errHtml4XmlVoidSyntax() throws SAXException { + } + + protected void errNoSpaceBetweenAttributes() throws SAXException { + } + + protected void errHtml4NonNameInUnquotedAttribute(char c) + throws SAXException { + } + + protected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) + throws SAXException { + } + + protected void errAttributeValueMissing() throws SAXException { + } + + protected void errBadCharBeforeAttributeNameOrNull(char c) + throws SAXException { + } + + protected void errEqualsSignBeforeAttributeName() throws SAXException { + } + + protected void errBadCharAfterLt(char c) throws SAXException { + } + + protected void errLtGt() throws SAXException { + } + + protected void errProcessingInstruction() throws SAXException { + } + + protected void errUnescapedAmpersandInterpretedAsCharacterReference() + throws SAXException { + } + + protected void errNotSemicolonTerminated() throws SAXException { + } + + protected void errNoNamedCharacterMatch() throws SAXException { + } + + protected void errQuoteBeforeAttributeName(char c) throws SAXException { + } + + protected void errQuoteOrLtInAttributeNameOrNull(char c) + throws SAXException { + } + + protected void errExpectedPublicId() throws SAXException { + } + + protected void errBogusDoctype() throws SAXException { + } + + protected void maybeWarnPrivateUseAstral() throws SAXException { + } + + protected void maybeWarnPrivateUse(char ch) throws SAXException { + } + + protected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) + throws SAXException { + } + + protected void maybeErrSlashInEndTag(boolean selfClosing) + throws SAXException { + } + + protected char errNcrNonCharacter(char ch) throws SAXException { + return ch; + } + + protected void errAstralNonCharacter(int ch) throws SAXException { + } + + protected void errNcrSurrogate() throws SAXException { + } + + protected char errNcrControlChar(char ch) throws SAXException { + return ch; + } + + protected void errNcrCr() throws SAXException { + } + + protected void errNcrInC1Range() throws SAXException { + } + + protected void errEofInPublicId() throws SAXException { + } + + protected void errEofInComment() throws SAXException { + } + + protected void errEofInDoctype() throws SAXException { + } + + protected void errEofInAttributeValue() throws SAXException { + } + + protected void errEofInAttributeName() throws SAXException { + } + + protected void errEofWithoutGt() throws SAXException { + } + + protected void errEofInTagName() throws SAXException { + } + + protected void errEofInEndTag() throws SAXException { + } + + protected void errEofAfterLt() throws SAXException { + } + + protected void errNcrOutOfRange() throws SAXException { + } + + protected void errNcrUnassigned() throws SAXException { + } + + protected void errDuplicateAttribute() throws SAXException { + } + + protected void errEofInSystemId() throws SAXException { + } + + protected void errExpectedSystemId() throws SAXException { + } + + protected void errMissingSpaceBeforeDoctypeName() throws SAXException { + } + + protected void errHyphenHyphenBang() throws SAXException { + } + + protected void errNcrControlChar() throws SAXException { + } + + protected void errNcrZero() throws SAXException { + } + + protected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote() + throws SAXException { + } + + protected void errNoSpaceBetweenPublicAndSystemIds() throws SAXException { + } + + protected void errNoSpaceBetweenDoctypePublicKeywordAndQuote() + throws SAXException { + } + + protected void noteAttributeWithoutValue() throws SAXException { + } + + protected void noteUnquotedAttributeValue() throws SAXException { + } + + /** + * Sets the encodingDeclarationHandler. + * + * @param encodingDeclarationHandler + * the encodingDeclarationHandler to set + */ + public void setEncodingDeclarationHandler( + EncodingDeclarationHandler encodingDeclarationHandler) { + this.encodingDeclarationHandler = encodingDeclarationHandler; + } + + void destructor() { + // The translator will write refcount tracing stuff here + Portability.delete(attributes); + attributes = null; + } + + // [NOCPP[ + + /** + * Sets an offset to be added to the position reported to + * <code>TransitionHandler</code>. + * + * @param offset the offset + */ + public void setTransitionBaseOffset(int offset) { + + } + + // ]NOCPP] + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java new file mode 100644 index 000000000..de7d8478d --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilder.java @@ -0,0 +1,6553 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2015 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * Portions of comments Copyright 2004-2008 Apple Computer, Inc., Mozilla + * Foundation, and Opera Software ASA. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * The comments following this one that use the same comment syntax as this + * comment are quotes from the WHATWG HTML 5 spec as of 27 June 2007 + * amended as of June 28 2007. + * That document came with this statement: + * "© Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, and + * Opera Software ASA. You are granted a license to use, reproduce and + * create derivative works of this document." + */ + +package nu.validator.htmlparser.impl; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +import nu.validator.htmlparser.annotation.Auto; +import nu.validator.htmlparser.annotation.Const; +import nu.validator.htmlparser.annotation.IdType; +import nu.validator.htmlparser.annotation.Inline; +import nu.validator.htmlparser.annotation.Literal; +import nu.validator.htmlparser.annotation.Local; +import nu.validator.htmlparser.annotation.NoLength; +import nu.validator.htmlparser.annotation.NsUri; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Interner; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; + +public abstract class TreeBuilder<T> implements TokenHandler, + TreeBuilderState<T> { + + /** + * Array version of U+FFFD. + */ + private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' }; + + // Start dispatch groups + + final static int OTHER = 0; + + final static int A = 1; + + final static int BASE = 2; + + final static int BODY = 3; + + final static int BR = 4; + + final static int BUTTON = 5; + + final static int CAPTION = 6; + + final static int COL = 7; + + final static int COLGROUP = 8; + + final static int FORM = 9; + + final static int FRAME = 10; + + final static int FRAMESET = 11; + + final static int IMAGE = 12; + + final static int INPUT = 13; + + final static int ISINDEX = 14; + + final static int LI = 15; + + final static int LINK_OR_BASEFONT_OR_BGSOUND = 16; + + final static int MATH = 17; + + final static int META = 18; + + final static int SVG = 19; + + final static int HEAD = 20; + + final static int HR = 22; + + final static int HTML = 23; + + final static int NOBR = 24; + + final static int NOFRAMES = 25; + + final static int NOSCRIPT = 26; + + final static int OPTGROUP = 27; + + final static int OPTION = 28; + + final static int P = 29; + + final static int PLAINTEXT = 30; + + final static int SCRIPT = 31; + + final static int SELECT = 32; + + final static int STYLE = 33; + + final static int TABLE = 34; + + final static int TEXTAREA = 35; + + final static int TITLE = 36; + + final static int TR = 37; + + final static int XMP = 38; + + final static int TBODY_OR_THEAD_OR_TFOOT = 39; + + final static int TD_OR_TH = 40; + + final static int DD_OR_DT = 41; + + final static int H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6 = 42; + + final static int MARQUEE_OR_APPLET = 43; + + final static int PRE_OR_LISTING = 44; + + final static int B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U = 45; + + final static int UL_OR_OL_OR_DL = 46; + + final static int IFRAME = 47; + + final static int EMBED = 48; + + final static int AREA_OR_WBR = 49; + + final static int DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU = 50; + + final static int ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY = 51; + + final static int RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR = 52; + + final static int RB_OR_RTC = 53; + + final static int PARAM_OR_SOURCE_OR_TRACK = 55; + + final static int MGLYPH_OR_MALIGNMARK = 56; + + final static int MI_MO_MN_MS_MTEXT = 57; + + final static int ANNOTATION_XML = 58; + + final static int FOREIGNOBJECT_OR_DESC = 59; + + final static int NOEMBED = 60; + + final static int FIELDSET = 61; + + final static int OUTPUT = 62; + + final static int OBJECT = 63; + + final static int FONT = 64; + + final static int KEYGEN = 65; + + final static int MENUITEM = 66; + + final static int TEMPLATE = 67; + + final static int IMG = 68; + + final static int RT_OR_RP = 69; + + // start insertion modes + + private static final int IN_ROW = 0; + + private static final int IN_TABLE_BODY = 1; + + private static final int IN_TABLE = 2; + + private static final int IN_CAPTION = 3; + + private static final int IN_CELL = 4; + + private static final int FRAMESET_OK = 5; + + private static final int IN_BODY = 6; + + private static final int IN_HEAD = 7; + + private static final int IN_HEAD_NOSCRIPT = 8; + + // no fall-through + + private static final int IN_COLUMN_GROUP = 9; + + // no fall-through + + private static final int IN_SELECT_IN_TABLE = 10; + + private static final int IN_SELECT = 11; + + // no fall-through + + private static final int AFTER_BODY = 12; + + // no fall-through + + private static final int IN_FRAMESET = 13; + + private static final int AFTER_FRAMESET = 14; + + // no fall-through + + private static final int INITIAL = 15; + + // could add fall-through + + private static final int BEFORE_HTML = 16; + + // could add fall-through + + private static final int BEFORE_HEAD = 17; + + // no fall-through + + private static final int AFTER_HEAD = 18; + + // no fall-through + + private static final int AFTER_AFTER_BODY = 19; + + // no fall-through + + private static final int AFTER_AFTER_FRAMESET = 20; + + // no fall-through + + private static final int TEXT = 21; + + private static final int IN_TEMPLATE = 22; + + // start charset states + + private static final int CHARSET_INITIAL = 0; + + private static final int CHARSET_C = 1; + + private static final int CHARSET_H = 2; + + private static final int CHARSET_A = 3; + + private static final int CHARSET_R = 4; + + private static final int CHARSET_S = 5; + + private static final int CHARSET_E = 6; + + private static final int CHARSET_T = 7; + + private static final int CHARSET_EQUALS = 8; + + private static final int CHARSET_SINGLE_QUOTED = 9; + + private static final int CHARSET_DOUBLE_QUOTED = 10; + + private static final int CHARSET_UNQUOTED = 11; + + // end pseudo enums + + // [NOCPP[ + + private final static String[] HTML4_PUBLIC_IDS = { + "-//W3C//DTD HTML 4.0 Frameset//EN", + "-//W3C//DTD HTML 4.0 Transitional//EN", + "-//W3C//DTD HTML 4.0//EN", "-//W3C//DTD HTML 4.01 Frameset//EN", + "-//W3C//DTD HTML 4.01 Transitional//EN", + "-//W3C//DTD HTML 4.01//EN" }; + + // ]NOCPP] + + @Literal private final static String[] QUIRKY_PUBLIC_IDS = { + "+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//" }; + + private static final int NOT_FOUND_ON_STACK = Integer.MAX_VALUE; + + // [NOCPP[ + + private static final @Local String HTML_LOCAL = "html"; + + // ]NOCPP] + + private int mode = INITIAL; + + private int originalMode = INITIAL; + + /** + * Used only when moving back to IN_BODY. + */ + private boolean framesetOk = true; + + protected Tokenizer tokenizer; + + // [NOCPP[ + + protected ErrorHandler errorHandler; + + private DocumentModeHandler documentModeHandler; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private LocatorImpl firstCommentLocation; + + // ]NOCPP] + + private boolean scriptingEnabled = false; + + private boolean needToDropLF; + + // [NOCPP[ + + private boolean wantingComments; + + // ]NOCPP] + + private boolean fragment; + + private @Local String contextName; + + private @NsUri String contextNamespace; + + private T contextNode; + + /** + * Stack of template insertion modes + */ + private @Auto int[] templateModeStack; + + /** + * Current template mode stack pointer. + */ + private int templateModePtr = -1; + + private @Auto StackNode<T>[] stack; + + private int currentPtr = -1; + + private @Auto StackNode<T>[] listOfActiveFormattingElements; + + private int listPtr = -1; + + private T formPointer; + + private T headPointer; + + /** + * Used to work around Gecko limitations. Not used in Java. + */ + private T deepTreeSurrogateParent; + + protected @Auto char[] charBuffer; + + protected int charBufferLen = 0; + + private boolean quirks = false; + + private boolean isSrcdocDocument = false; + + // [NOCPP[ + + private boolean reportingDoctype = true; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET; + + private final Map<String, LocatorImpl> idLocations = new HashMap<String, LocatorImpl>(); + + private boolean html4; + + // ]NOCPP] + + protected TreeBuilder() { + fragment = false; + } + + /** + * Reports an condition that would make the infoset incompatible with XML + * 1.0 as fatal. + * + * @throws SAXException + * @throws SAXParseException + */ + protected void fatal() throws SAXException { + } + + // [NOCPP[ + + protected final void fatal(Exception e) throws SAXException { + SAXParseException spe = new SAXParseException(e.getMessage(), + tokenizer, e); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + final void fatal(String s) throws SAXException { + SAXParseException spe = new SAXParseException(s, tokenizer); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + /** + * Reports a Parse Error. + * + * @param message + * the message + * @throws SAXException + */ + final void err(String message) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck(message); + } + + /** + * Reports a Parse Error without checking if an error handler is present. + * + * @param message + * the message + * @throws SAXException + */ + final void errNoCheck(String message) throws SAXException { + SAXParseException spe = new SAXParseException(message, tokenizer); + errorHandler.error(spe); + } + + private void errListUnclosedStartTags(int eltPos) throws SAXException { + if (currentPtr != -1) { + for (int i = currentPtr; i > eltPos; i--) { + reportUnclosedElementNameAndLocation(i); + } + } + } + + /** + * Reports the name and location of an unclosed element. + * + * @throws SAXException + */ + private final void reportUnclosedElementNameAndLocation(int pos) throws SAXException { + StackNode<T> node = stack[pos]; + if (node.isOptionalEndTag()) { + return; + } + TaintableLocatorImpl locator = node.getLocator(); + if (locator.isTainted()) { + return; + } + locator.markTainted(); + SAXParseException spe = new SAXParseException( + "Unclosed element \u201C" + node.popName + "\u201D.", locator); + errorHandler.error(spe); + } + + /** + * Reports a warning + * + * @param message + * the message + * @throws SAXException + */ + final void warn(String message) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, tokenizer); + errorHandler.warning(spe); + } + + /** + * Reports a warning with an explicit locator + * + * @param message + * the message + * @throws SAXException + */ + final void warn(String message, Locator locator) throws SAXException { + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, locator); + errorHandler.warning(spe); + } + + // ]NOCPP] + + @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException { + tokenizer = self; + stack = new StackNode[64]; + templateModeStack = new int[64]; + listOfActiveFormattingElements = new StackNode[64]; + needToDropLF = false; + originalMode = INITIAL; + templateModePtr = -1; + currentPtr = -1; + listPtr = -1; + formPointer = null; + headPointer = null; + deepTreeSurrogateParent = null; + // [NOCPP[ + html4 = false; + idLocations.clear(); + wantingComments = wantsComments(); + firstCommentLocation = null; + // ]NOCPP] + start(fragment); + charBufferLen = 0; + charBuffer = null; + framesetOk = true; + if (fragment) { + T elt; + if (contextNode != null) { + elt = contextNode; + } else { + elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes()); + } + // When the context node is not in the HTML namespace, contrary + // to the spec, the first node on the stack is not set to "html" + // in the HTML namespace. Instead, it is set to a node that has + // the characteristics of the appropriate "adjusted current node". + // This way, there is no need to perform "adjusted current node" + // checks during tree construction. Instead, it's sufficient to + // just look at the current node. However, this also means that it + // is not safe to treat "html" in the HTML namespace as a sentinel + // that ends stack popping. Instead, stack popping loops that are + // meant not to pop the first element on the stack need to check + // for currentPos becoming zero. + if (contextNamespace == "http://www.w3.org/2000/svg") { + ElementName elementName = ElementName.SVG; + if ("title" == contextName || "desc" == contextName + || "foreignObject" == contextName) { + // These elements are all alike and we don't care about + // the exact name. + elementName = ElementName.FOREIGNOBJECT; + } + // This is the SVG variant of the StackNode constructor. + StackNode<T> node = new StackNode<T>(elementName, + elementName.camelCaseName, elt + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + tokenizer.setState(Tokenizer.DATA); + // The frameset-ok flag is set even though <frameset> never + // ends up being allowed as HTML frameset in the fragment case. + mode = FRAMESET_OK; + } else if (contextNamespace == "http://www.w3.org/1998/Math/MathML") { + ElementName elementName = ElementName.MATH; + if ("mi" == contextName || "mo" == contextName + || "mn" == contextName || "ms" == contextName + || "mtext" == contextName) { + // These elements are all alike and we don't care about + // the exact name. + elementName = ElementName.MTEXT; + } else if ("annotation-xml" == contextName) { + elementName = ElementName.ANNOTATION_XML; + // Blink does not check the encoding attribute of the + // annotation-xml element innerHTML is being set on. + // Let's do the same at least until + // https://www.w3.org/Bugs/Public/show_bug.cgi?id=26783 + // is resolved. + } + // This is the MathML variant of the StackNode constructor. + StackNode<T> node = new StackNode<T>(elementName, elt, + elementName.name, false + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + tokenizer.setState(Tokenizer.DATA); + // The frameset-ok flag is set even though <frameset> never + // ends up being allowed as HTML frameset in the fragment case. + mode = FRAMESET_OK; + } else { // html + StackNode<T> node = new StackNode<T>(ElementName.HTML, elt + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + if ("template" == contextName) { + pushTemplateMode(IN_TEMPLATE); + } + resetTheInsertionMode(); + formPointer = getFormPointerForContext(contextNode); + if ("title" == contextName || "textarea" == contextName) { + tokenizer.setState(Tokenizer.RCDATA); + } else if ("style" == contextName || "xmp" == contextName + || "iframe" == contextName || "noembed" == contextName + || "noframes" == contextName + || (scriptingEnabled && "noscript" == contextName)) { + tokenizer.setState(Tokenizer.RAWTEXT); + } else if ("plaintext" == contextName) { + tokenizer.setState(Tokenizer.PLAINTEXT); + } else if ("script" == contextName) { + tokenizer.setState(Tokenizer.SCRIPT_DATA); + } else { + tokenizer.setState(Tokenizer.DATA); + } + } + contextName = null; + contextNode = null; + } else { + mode = INITIAL; + // If we are viewing XML source, put a foreign element permanently + // on the stack so that cdataSectionAllowed() returns true. + // CPPONLY: if (tokenizer.isViewingXmlSource()) { + // CPPONLY: T elt = createElement("http://www.w3.org/2000/svg", + // CPPONLY: "svg", + // CPPONLY: tokenizer.emptyAttributes(), null); + // CPPONLY: StackNode<T> node = new StackNode<T>(ElementName.SVG, + // CPPONLY: "svg", + // CPPONLY: elt); + // CPPONLY: currentPtr++; + // CPPONLY: stack[currentPtr] = node; + // CPPONLY: } + } + } + + public final void doctype(@Local String name, String publicIdentifier, + String systemIdentifier, boolean forceQuirks) throws SAXException { + needToDropLF = false; + if (!isInForeign() && mode == INITIAL) { + // [NOCPP[ + if (reportingDoctype) { + // ]NOCPP] + String emptyString = Portability.newEmptyString(); + appendDoctypeToDocument(name == null ? "" : name, + publicIdentifier == null ? emptyString + : publicIdentifier, + systemIdentifier == null ? emptyString + : systemIdentifier); + Portability.releaseString(emptyString); + // [NOCPP[ + } + switch (doctypeExpectation) { + case HTML: + // ]NOCPP] + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + errQuirkyDoctype(); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, false); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + // [NOCPP[ + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + // ]NOCPP] + errAlmostStandardsDoctype(); + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } else { + // [NOCPP[ + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ((Portability.literalEqualsString( + "-//W3C//DTD HTML 4.0//EN", publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString( + "http://www.w3.org/TR/REC-html40/strict.dtd", + systemIdentifier))) + || (Portability.literalEqualsString( + "-//W3C//DTD HTML 4.01//EN", + publicIdentifier) && (systemIdentifier == null || Portability.literalEqualsString( + "http://www.w3.org/TR/html4/strict.dtd", + systemIdentifier))) + || (Portability.literalEqualsString( + "-//W3C//DTD XHTML 1.0 Strict//EN", + publicIdentifier) && Portability.literalEqualsString( + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", + systemIdentifier)) + || (Portability.literalEqualsString( + "-//W3C//DTD XHTML 1.1//EN", + publicIdentifier) && Portability.literalEqualsString( + "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", + systemIdentifier)) + + ) { + warn("Obsolete doctype. Expected \u201C<!DOCTYPE html>\u201D."); + } else if (!((systemIdentifier == null || Portability.literalEqualsString( + "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) { + err("Legacy doctype. Expected \u201C<!DOCTYPE html>\u201D."); + } + // ]NOCPP] + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } + // [NOCPP[ + break; + case HTML401_STRICT: + html4 = true; + tokenizer.turnOnAdditionalHtml4Errors(); + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, true); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + err("Almost standards mode doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } else { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + } + } else { + err("The doctype was not the HTML 4.01 Strict doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + } + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } + break; + case HTML401_TRANSITIONAL: + html4 = true; + tokenizer.turnOnAdditionalHtml4Errors(); + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + err("Quirky doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, true); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier) + && systemIdentifier != null) { + if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + } + } else { + err("The doctype was not a non-quirky HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + } + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } else { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + err("The doctype was not the HTML 4.01 Transitional doctype. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, true); + } + break; + case AUTO: + html4 = isHtml4Doctype(publicIdentifier); + if (html4) { + tokenizer.turnOnAdditionalHtml4Errors(); + } + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + err("Quirky doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D."); + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, html4); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/html4/loose.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + } + } else { + err("Almost standards mode doctype. Expected e.g. \u201C<!DOCTYPE html>\u201D."); + } + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, html4); + } else { + if (firstCommentLocation != null) { + warn("Comments seen before doctype. Internet Explorer will go into the quirks mode.", + firstCommentLocation); + } + if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/html4/strict.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the HTML 4.01 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + } + } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the XHTML 1.0 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\u201D."); + } + } else if ("//W3C//DTD XHTML 1.1//EN".equals(publicIdentifier)) { + if (!"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd".equals(systemIdentifier)) { + warn("The doctype did not contain the system identifier prescribed by the XHTML 1.1 specification. Expected \u201C<!DOCTYPE HTML PUBLIC \"//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\u201D."); + } + } else if (!((systemIdentifier == null || Portability.literalEqualsString( + "about:legacy-compat", systemIdentifier)) && publicIdentifier == null)) { + err("Unexpected doctype. Expected, e.g., \u201C<!DOCTYPE html>\u201D."); + } + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, html4); + } + break; + case NO_DOCTYPE_ERRORS: + if (isQuirky(name, publicIdentifier, systemIdentifier, + forceQuirks)) { + documentModeInternal(DocumentMode.QUIRKS_MODE, + publicIdentifier, systemIdentifier, false); + } else if (isAlmostStandards(publicIdentifier, + systemIdentifier)) { + documentModeInternal( + DocumentMode.ALMOST_STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } else { + documentModeInternal(DocumentMode.STANDARDS_MODE, + publicIdentifier, systemIdentifier, false); + } + break; + } + // ]NOCPP] + + /* + * + * Then, switch to the root element mode of the tree construction + * stage. + */ + mode = BEFORE_HTML; + return; + } + /* + * A DOCTYPE token Parse error. + */ + errStrayDoctype(); + /* + * Ignore the token. + */ + return; + } + + // [NOCPP[ + + private boolean isHtml4Doctype(String publicIdentifier) { + if (publicIdentifier != null + && (Arrays.binarySearch(TreeBuilder.HTML4_PUBLIC_IDS, + publicIdentifier) > -1)) { + return true; + } + return false; + } + + // ]NOCPP] + + public final void comment(@NoLength char[] buf, int start, int length) + throws SAXException { + needToDropLF = false; + // [NOCPP[ + if (firstCommentLocation == null) { + firstCommentLocation = new LocatorImpl(tokenizer); + } + if (!wantingComments) { + return; + } + // ]NOCPP] + if (!isInForeign()) { + switch (mode) { + case INITIAL: + case BEFORE_HTML: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + /* + * A comment token Append a Comment node to the Document + * object with the data attribute set to the data given in + * the comment token. + */ + appendCommentToDocument(buf, start, length); + return; + case AFTER_BODY: + /* + * A comment token Append a Comment node to the first + * element in the stack of open elements (the html element), + * with the data attribute set to the data given in the + * comment token. + */ + flushCharacters(); + appendComment(stack[0].node, buf, start, length); + return; + default: + break; + } + } + /* + * A comment token Append a Comment node to the current node with the + * data attribute set to the data given in the comment token. + */ + flushCharacters(); + appendComment(stack[currentPtr].node, buf, start, length); + return; + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int, + * int) + */ + public final void characters(@Const @NoLength char[] buf, int start, int length) + throws SAXException { + // Note: Can't attach error messages to EOF in C++ yet + + // CPPONLY: if (tokenizer.isViewingXmlSource()) { + // CPPONLY: return; + // CPPONLY: } + if (needToDropLF) { + needToDropLF = false; + if (buf[start] == '\n') { + start++; + length--; + if (length == 0) { + return; + } + } + } + + // optimize the most common case + switch (mode) { + case IN_BODY: + case IN_CELL: + case IN_CAPTION: + if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) { + reconstructTheActiveFormattingElements(); + } + // fall through + case TEXT: + accumulateCharacters(buf, start, length); + return; + case IN_TABLE: + case IN_TABLE_BODY: + case IN_ROW: + accumulateCharactersForced(buf, start, length); + return; + default: + int end = start + length; + charactersloop: for (int i = start; i < end; i++) { + switch (buf[i]) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\u000C': + /* + * A character token that is one of one of U+0009 + * CHARACTER TABULATION, U+000A LINE FEED (LF), + * U+000C FORM FEED (FF), or U+0020 SPACE + */ + switch (mode) { + case INITIAL: + case BEFORE_HTML: + case BEFORE_HEAD: + /* + * Ignore the token. + */ + start = i + 1; + continue; + case IN_HEAD: + case IN_HEAD_NOSCRIPT: + case AFTER_HEAD: + case IN_COLUMN_GROUP: + case IN_FRAMESET: + case AFTER_FRAMESET: + /* + * Append the character to the current node. + */ + continue; + case FRAMESET_OK: + case IN_TEMPLATE: + case IN_BODY: + case IN_CELL: + case IN_CAPTION: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + + /* + * Reconstruct the active formatting + * elements, if any. + */ + if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) { + flushCharacters(); + reconstructTheActiveFormattingElements(); + } + /* + * Append the token's character to the + * current node. + */ + break charactersloop; + case IN_SELECT: + case IN_SELECT_IN_TABLE: + break charactersloop; + case IN_TABLE: + case IN_TABLE_BODY: + case IN_ROW: + accumulateCharactersForced(buf, i, 1); + start = i + 1; + continue; + case AFTER_BODY: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Reconstruct the active formatting + * elements, if any. + */ + flushCharacters(); + reconstructTheActiveFormattingElements(); + /* + * Append the token's character to the + * current node. + */ + continue; + } + default: + /* + * A character token that is not one of one of + * U+0009 CHARACTER TABULATION, U+000A LINE FEED + * (LF), U+000C FORM FEED (FF), or U+0020 SPACE + */ + switch (mode) { + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("Non-space characters found without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D."); + break; + case HTML: + // XXX figure out a way to report this in the Gecko View Source case + err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D."); + break; + case HTML401_STRICT: + err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + break; + case HTML401_TRANSITIONAL: + err("Non-space characters found without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal( + DocumentMode.QUIRKS_MODE, null, + null, false); + /* + * Then, switch to the root element mode of + * the tree construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + i--; + continue; + case BEFORE_HTML: + /* + * Create an HTMLElement node with the tag + * name html, in the HTML namespace. Append + * it to the Document object. + */ + // No need to flush characters here, + // because there's nothing to flush. + appendHtmlElementToDocumentAndPush(); + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + i--; + continue; + case BEFORE_HEAD: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * /Act as if a start tag token with the tag + * name "head" and no attributes had been + * seen, + */ + flushCharacters(); + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + /* + * then reprocess the current token. + * + * This will result in an empty head element + * being generated, with the current token + * being reprocessed in the "after head" + * insertion mode. + */ + i--; + continue; + case IN_HEAD: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Act as if an end tag token with the tag + * name "head" had been seen, + */ + flushCharacters(); + pop(); + mode = AFTER_HEAD; + /* + * and reprocess the current token. + */ + i--; + continue; + case IN_HEAD_NOSCRIPT: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Parse error. Act as if an end tag with + * the tag name "noscript" had been seen + */ + errNonSpaceInNoscriptInHead(); + flushCharacters(); + pop(); + mode = IN_HEAD; + /* + * and reprocess the current token. + */ + i--; + continue; + case AFTER_HEAD: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Act as if a start tag token with the tag + * name "body" and no attributes had been + * seen, + */ + flushCharacters(); + appendToCurrentNodeAndPushBodyElement(); + mode = FRAMESET_OK; + /* + * and then reprocess the current token. + */ + i--; + continue; + case FRAMESET_OK: + framesetOk = false; + mode = IN_BODY; + i--; + continue; + case IN_TEMPLATE: + case IN_BODY: + case IN_CELL: + case IN_CAPTION: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Reconstruct the active formatting + * elements, if any. + */ + if (!isInForeignButNotHtmlOrMathTextIntegrationPoint()) { + flushCharacters(); + reconstructTheActiveFormattingElements(); + } + /* + * Append the token's character to the + * current node. + */ + break charactersloop; + case IN_TABLE: + case IN_TABLE_BODY: + case IN_ROW: + accumulateCharactersForced(buf, i, 1); + start = i + 1; + continue; + case IN_COLUMN_GROUP: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + start = i; + } + /* + * Act as if an end tag with the tag name + * "colgroup" had been seen, and then, if + * that token wasn't ignored, reprocess the + * current token. + */ + if (currentPtr == 0 || stack[currentPtr].getGroup() == + TreeBuilder.TEMPLATE) { + errNonSpaceInColgroupInFragment(); + start = i + 1; + continue; + } + flushCharacters(); + pop(); + mode = IN_TABLE; + i--; + continue; + case IN_SELECT: + case IN_SELECT_IN_TABLE: + break charactersloop; + case AFTER_BODY: + errNonSpaceAfterBody(); + fatal(); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + i--; + continue; + case IN_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + // start index is adjusted below. + } + /* + * Parse error. + */ + errNonSpaceInFrameset(); + /* + * Ignore the token. + */ + start = i + 1; + continue; + case AFTER_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + // start index is adjusted below. + } + /* + * Parse error. + */ + errNonSpaceAfterFrameset(); + /* + * Ignore the token. + */ + start = i + 1; + continue; + case AFTER_AFTER_BODY: + /* + * Parse error. + */ + errNonSpaceInTrailer(); + /* + * Switch back to the main mode and + * reprocess the token. + */ + mode = framesetOk ? FRAMESET_OK : IN_BODY; + i--; + continue; + case AFTER_AFTER_FRAMESET: + if (start < i) { + accumulateCharacters(buf, start, i + - start); + // start index is adjusted below. + } + /* + * Parse error. + */ + errNonSpaceInTrailer(); + /* + * Ignore the token. + */ + start = i + 1; + continue; + } + } + } + if (start < end) { + accumulateCharacters(buf, start, end - start); + } + } + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter() + */ + public void zeroOriginatingReplacementCharacter() throws SAXException { + if (mode == TEXT) { + accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1); + return; + } + if (currentPtr >= 0) { + if (isSpecialParentInForeign(stack[currentPtr])) { + return; + } + accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1); + } + } + + public final void eof() throws SAXException { + flushCharacters(); + // Note: Can't attach error messages to EOF in C++ yet + eofloop: for (;;) { + switch (mode) { + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("End of file seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D."); + break; + case HTML: + err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D."); + break; + case HTML401_STRICT: + err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + break; + case HTML401_TRANSITIONAL: + err("End of file seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, + false); + /* + * Then, switch to the root element mode of the tree + * construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + continue; + case BEFORE_HTML: + /* + * Create an HTMLElement node with the tag name html, in the + * HTML namespace. Append it to the Document object. + */ + appendHtmlElementToDocumentAndPush(); + // XXX application cache manifest + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + continue; + case BEFORE_HEAD: + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + continue; + case IN_HEAD: + // [NOCPP[ + if (errorHandler != null && currentPtr > 1) { + errEofWithUnclosedElements(); + } + // ]NOCPP] + while (currentPtr > 0) { + popOnEof(); + } + mode = AFTER_HEAD; + continue; + case IN_HEAD_NOSCRIPT: + // [NOCPP[ + errEofWithUnclosedElements(); + // ]NOCPP] + while (currentPtr > 1) { + popOnEof(); + } + mode = IN_HEAD; + continue; + case AFTER_HEAD: + appendToCurrentNodeAndPushBodyElement(); + mode = IN_BODY; + continue; + case IN_TABLE_BODY: + case IN_ROW: + case IN_TABLE: + case IN_SELECT_IN_TABLE: + case IN_SELECT: + case IN_COLUMN_GROUP: + case FRAMESET_OK: + case IN_CAPTION: + case IN_CELL: + case IN_BODY: + // [NOCPP[ + // i > 0 to stop in time in the foreign fragment case. + openelementloop: for (int i = currentPtr; i > 0; i--) { + int group = stack[i].getGroup(); + switch (group) { + case DD_OR_DT: + case LI: + case P: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case BODY: + case HTML: + break; + default: + errEofWithUnclosedElements(); + break openelementloop; + } + } + // ]NOCPP] + + if (isTemplateModeStackEmpty()) { + break eofloop; + } + + // fall through to IN_TEMPLATE + case IN_TEMPLATE: + int eltPos = findLast("template"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break eofloop; + } + if (errorHandler != null) { + errUnclosedElements(eltPos, "template"); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + popTemplateMode(); + resetTheInsertionMode(); + + // Reprocess token. + continue; + case TEXT: + // [NOCPP[ + if (errorHandler != null) { + errNoCheck("End of file seen when expecting text or an end tag."); + errListUnclosedStartTags(0); + } + // ]NOCPP] + // XXX mark script as already executed + if (originalMode == AFTER_HEAD) { + popOnEof(); + } + popOnEof(); + mode = originalMode; + continue; + case IN_FRAMESET: + // [NOCPP[ + if (errorHandler != null && currentPtr > 0) { + errEofWithUnclosedElements(); + } + // ]NOCPP] + break eofloop; + case AFTER_BODY: + case AFTER_FRAMESET: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + default: + // [NOCPP[ + if (currentPtr == 0) { // This silliness is here to poison + // buggy compiler optimizations in + // GWT + System.currentTimeMillis(); + } + // ]NOCPP] + break eofloop; + } + } + while (currentPtr > 0) { + popOnEof(); + } + if (!fragment) { + popOnEof(); + } + /* Stop parsing. */ + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#endTokenization() + */ + public final void endTokenization() throws SAXException { + formPointer = null; + headPointer = null; + deepTreeSurrogateParent = null; + templateModeStack = null; + if (stack != null) { + while (currentPtr > -1) { + stack[currentPtr].release(); + currentPtr--; + } + stack = null; + } + if (listOfActiveFormattingElements != null) { + while (listPtr > -1) { + if (listOfActiveFormattingElements[listPtr] != null) { + listOfActiveFormattingElements[listPtr].release(); + } + listPtr--; + } + listOfActiveFormattingElements = null; + } + // [NOCPP[ + idLocations.clear(); + // ]NOCPP] + charBuffer = null; + end(); + } + + public final void startTag(ElementName elementName, + HtmlAttributes attributes, boolean selfClosing) throws SAXException { + flushCharacters(); + + // [NOCPP[ + if (errorHandler != null) { + // ID uniqueness + @IdType String id = attributes.getId(); + if (id != null) { + LocatorImpl oldLoc = idLocations.get(id); + if (oldLoc != null) { + err("Duplicate ID \u201C" + id + "\u201D."); + errorHandler.warning(new SAXParseException( + "The first occurrence of ID \u201C" + id + + "\u201D was here.", oldLoc)); + } else { + idLocations.put(id, new LocatorImpl(tokenizer)); + } + } + } + // ]NOCPP] + + int eltPos; + needToDropLF = false; + starttagloop: for (;;) { + int group = elementName.getGroup(); + @Local String name = elementName.name; + if (isInForeign()) { + StackNode<T> currentNode = stack[currentPtr]; + @NsUri String currNs = currentNode.ns; + if (!(currentNode.isHtmlIntegrationPoint() || (currNs == "http://www.w3.org/1998/Math/MathML" && ((currentNode.getGroup() == MI_MO_MN_MS_MTEXT && group != MGLYPH_OR_MALIGNMARK) || (currentNode.getGroup() == ANNOTATION_XML && group == SVG))))) { + switch (group) { + case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: + case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: + case BODY: + case BR: + case RUBY_OR_SPAN_OR_SUB_OR_SUP_OR_VAR: + case DD_OR_DT: + case UL_OR_OL_OR_DL: + case EMBED: + case IMG: + case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: + case HEAD: + case HR: + case LI: + case META: + case NOBR: + case P: + case PRE_OR_LISTING: + case TABLE: + case FONT: + // re-check FONT to deal with the special case + if (!(group == FONT && !(attributes.contains(AttributeName.COLOR) + || attributes.contains(AttributeName.FACE) || attributes.contains(AttributeName.SIZE)))) { + errHtmlStartTagInForeignContext(name); + if (!fragment) { + while (!isSpecialParentInForeign(stack[currentPtr])) { + pop(); + } + continue starttagloop; + } // else fall thru + } + // else fall thru + default: + if ("http://www.w3.org/2000/svg" == currNs) { + attributes.adjustForSvg(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterSVG( + elementName, attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterSVG( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + } else { + attributes.adjustForMath(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterMathML( + elementName, attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterMathML( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + } + } // switch + } // foreignObject / annotation-xml + } + switch (mode) { + case IN_TEMPLATE: + switch (group) { + case COL: + popTemplateMode(); + pushTemplateMode(IN_COLUMN_GROUP); + mode = IN_COLUMN_GROUP; + // Reprocess token. + continue; + case CAPTION: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + popTemplateMode(); + pushTemplateMode(IN_TABLE); + mode = IN_TABLE; + // Reprocess token. + continue; + case TR: + popTemplateMode(); + pushTemplateMode(IN_TABLE_BODY); + mode = IN_TABLE_BODY; + // Reprocess token. + continue; + case TD_OR_TH: + popTemplateMode(); + pushTemplateMode(IN_ROW); + mode = IN_ROW; + // Reprocess token. + continue; + case META: + checkMetaCharset(attributes); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case TITLE: + startTagTitleInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case SCRIPT: + startTagScriptInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case NOFRAMES: + case STYLE: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + popTemplateMode(); + pushTemplateMode(IN_BODY); + mode = IN_BODY; + // Reprocess token. + continue; + } + case IN_ROW: + switch (group) { + case TD_OR_TH: + clearStackBackTo(findLastOrRoot(TreeBuilder.TR)); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_CELL; + insertMarker(); + attributes = null; // CPP + break starttagloop; + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break starttagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + continue; + default: + // fall through to IN_TABLE + } + case IN_TABLE_BODY: + switch (group) { + case TR: + clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot()); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_ROW; + attributes = null; // CPP + break starttagloop; + case TD_OR_TH: + errStartTagInTableBody(name); + clearStackBackTo(findLastInTableScopeOrRootTemplateTbodyTheadTfoot()); + appendToCurrentNodeAndPushElement( + ElementName.TR, + HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_ROW; + continue; + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot(); + if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) { + assert fragment || isTemplateContents(); + errStrayStartTag(name); + break starttagloop; + } else { + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE; + continue; + } + default: + // fall through to IN_TABLE + } + case IN_TABLE: + intableloop: for (;;) { + switch (group) { + case CAPTION: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + insertMarker(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_CAPTION; + attributes = null; // CPP + break starttagloop; + case COLGROUP: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_COLUMN_GROUP; + attributes = null; // CPP + break starttagloop; + case COL: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + ElementName.COLGROUP, + HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_COLUMN_GROUP; + continue starttagloop; + case TBODY_OR_THEAD_OR_TFOOT: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_TABLE_BODY; + attributes = null; // CPP + break starttagloop; + case TR: + case TD_OR_TH: + clearStackBackTo(findLastOrRoot(TreeBuilder.TABLE)); + appendToCurrentNodeAndPushElement( + ElementName.TBODY, + HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_TABLE_BODY; + continue starttagloop; + case TEMPLATE: + // fall through to IN_HEAD + break intableloop; + case TABLE: + errTableSeenWhileTableOpen(); + eltPos = findLastInTableScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment || isTemplateContents(); + break starttagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent("table")) { + errNoCheckUnclosedElementsOnStack(); + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue starttagloop; + case SCRIPT: + // XXX need to manage much more stuff + // here if + // supporting + // document.write() + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.SCRIPT_DATA, elementName); + attributes = null; // CPP + break starttagloop; + case STYLE: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case INPUT: + errStartTagInTable(name); + if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "hidden", + attributes.getValue(AttributeName.TYPE))) { + break intableloop; + } + appendVoidElementToCurrent( + name, attributes, + formPointer); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case FORM: + if (formPointer != null || isTemplateContents()) { + errFormWhenFormOpen(); + break starttagloop; + } else { + errStartTagInTable(name); + appendVoidFormToCurrent(attributes); + attributes = null; // CPP + break starttagloop; + } + default: + errStartTagInTable(name); + // fall through to IN_BODY + break intableloop; + } + } + case IN_CAPTION: + switch (group) { + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + errStrayStartTag(name); + eltPos = findLastInTableScope("caption"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + break starttagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && currentPtr != eltPos) { + errNoCheckUnclosedElementsOnStack(); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_TABLE; + continue; + default: + // fall through to IN_BODY + } + case IN_CELL: + switch (group) { + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + eltPos = findLastInTableScopeTdTh(); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoCellToClose(); + break starttagloop; + } else { + closeTheCell(eltPos); + continue; + } + default: + // fall through to IN_BODY + } + case FRAMESET_OK: + switch (group) { + case FRAMESET: + if (mode == FRAMESET_OK) { + if (currentPtr == 0 || stack[1].getGroup() != BODY) { + assert fragment || isTemplateContents(); + errStrayStartTag(name); + break starttagloop; + } else { + errFramesetStart(); + detachFromParent(stack[1].node); + while (currentPtr > 0) { + pop(); + } + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_FRAMESET; + attributes = null; // CPP + break starttagloop; + } + } else { + errStrayStartTag(name); + break starttagloop; + } + // NOT falling through! + case PRE_OR_LISTING: + case LI: + case DD_OR_DT: + case BUTTON: + case MARQUEE_OR_APPLET: + case OBJECT: + case TABLE: + case AREA_OR_WBR: + case BR: + case EMBED: + case IMG: + case INPUT: + case KEYGEN: + case HR: + case TEXTAREA: + case XMP: + case IFRAME: + case SELECT: + if (mode == FRAMESET_OK + && !(group == INPUT && Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "hidden", + attributes.getValue(AttributeName.TYPE)))) { + framesetOk = false; + mode = IN_BODY; + } + // fall through to IN_BODY + default: + // fall through to IN_BODY + } + case IN_BODY: + inbodyloop: for (;;) { + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + case META: + case STYLE: + case SCRIPT: + case TITLE: + case TEMPLATE: + // Fall through to IN_HEAD + break inbodyloop; + case BODY: + if (currentPtr == 0 || stack[1].getGroup() != BODY || isTemplateContents()) { + assert fragment || isTemplateContents(); + errStrayStartTag(name); + break starttagloop; + } + errFooSeenWhenFooOpen(name); + framesetOk = false; + if (mode == FRAMESET_OK) { + mode = IN_BODY; + } + if (addAttributesToBody(attributes)) { + attributes = null; // CPP + } + break starttagloop; + case P: + case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: + case UL_OR_OL_OR_DL: + case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: + implicitlyCloseP(); + if (stack[currentPtr].getGroup() == H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) { + errHeadingWhenHeadingOpen(); + pop(); + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case FIELDSET: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + attributes = null; // CPP + break starttagloop; + case PRE_OR_LISTING: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + needToDropLF = true; + attributes = null; // CPP + break starttagloop; + case FORM: + if (formPointer != null && !isTemplateContents()) { + errFormWhenFormOpen(); + break starttagloop; + } else { + implicitlyCloseP(); + appendToCurrentNodeAndPushFormElementMayFoster(attributes); + attributes = null; // CPP + break starttagloop; + } + case LI: + case DD_OR_DT: + eltPos = currentPtr; + for (;;) { + StackNode<T> node = stack[eltPos]; // weak + // ref + if (node.getGroup() == group) { // LI or + // DD_OR_DT + generateImpliedEndTagsExceptFor(node.name); + if (errorHandler != null + && eltPos != currentPtr) { + errUnclosedElementsImplied(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break; + } else if (eltPos == 0 || (node.isSpecial() + && (node.ns != "http://www.w3.org/1999/xhtml" + || (node.name != "p" + && node.name != "address" + && node.name != "div")))) { + break; + } + eltPos--; + } + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case PLAINTEXT: + implicitlyCloseP(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + tokenizer.setStateAndEndTagExpectation( + Tokenizer.PLAINTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case A: + int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a"); + if (activeAPos != -1) { + errFooSeenWhenFooOpen(name); + StackNode<T> activeA = listOfActiveFormattingElements[activeAPos]; + activeA.retain(); + adoptionAgencyEndTag("a"); + removeFromStack(activeA); + activeAPos = findInListOfActiveFormattingElements(activeA); + if (activeAPos != -1) { + removeFromListOfActiveFormattingElements(activeAPos); + } + activeA.release(); + } + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushFormattingElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: + case FONT: + reconstructTheActiveFormattingElements(); + maybeForgetEarlierDuplicateFormattingElement(elementName.name, attributes); + appendToCurrentNodeAndPushFormattingElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case NOBR: + reconstructTheActiveFormattingElements(); + if (TreeBuilder.NOT_FOUND_ON_STACK != findLastInScope("nobr")) { + errFooSeenWhenFooOpen(name); + adoptionAgencyEndTag("nobr"); + reconstructTheActiveFormattingElements(); + } + appendToCurrentNodeAndPushFormattingElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case BUTTON: + eltPos = findLastInScope(name); + if (eltPos != TreeBuilder.NOT_FOUND_ON_STACK) { + errFooSeenWhenFooOpen(name); + generateImpliedEndTags(); + if (errorHandler != null + && !isCurrent(name)) { + errUnclosedElementsImplied(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + continue starttagloop; + } else { + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + attributes = null; // CPP + break starttagloop; + } + case OBJECT: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + insertMarker(); + attributes = null; // CPP + break starttagloop; + case MARQUEE_OR_APPLET: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + insertMarker(); + attributes = null; // CPP + break starttagloop; + case TABLE: + // The only quirk. Blame Hixie and + // Acid2. + if (!quirks) { + implicitlyCloseP(); + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + mode = IN_TABLE; + attributes = null; // CPP + break starttagloop; + case BR: + case EMBED: + case AREA_OR_WBR: + reconstructTheActiveFormattingElements(); + // FALL THROUGH to PARAM_OR_SOURCE_OR_TRACK + // CPPONLY: case MENUITEM: + case PARAM_OR_SOURCE_OR_TRACK: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case HR: + implicitlyCloseP(); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case IMAGE: + errImage(); + elementName = ElementName.IMG; + continue starttagloop; + case IMG: + case KEYGEN: + case INPUT: + reconstructTheActiveFormattingElements(); + appendVoidElementToCurrentMayFoster( + name, attributes, + formPointer); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case ISINDEX: + errIsindex(); + if (formPointer != null && !isTemplateContents()) { + break starttagloop; + } + implicitlyCloseP(); + HtmlAttributes formAttrs = new HtmlAttributes(0); + int actionIndex = attributes.getIndex(AttributeName.ACTION); + if (actionIndex > -1) { + formAttrs.addAttribute( + AttributeName.ACTION, + attributes.getValueNoBoundsCheck(actionIndex) + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , attributes.getLineNoBoundsCheck(actionIndex) + ); + } + appendToCurrentNodeAndPushFormElementMayFoster(formAttrs); + appendVoidElementToCurrentMayFoster( + ElementName.HR, + HtmlAttributes.EMPTY_ATTRIBUTES); + appendToCurrentNodeAndPushElementMayFoster( + ElementName.LABEL, + HtmlAttributes.EMPTY_ATTRIBUTES); + int promptIndex = attributes.getIndex(AttributeName.PROMPT); + if (promptIndex > -1) { + @Auto char[] prompt = Portability.newCharArrayFromString(attributes.getValueNoBoundsCheck(promptIndex)); + appendCharacters(stack[currentPtr].node, + prompt, 0, prompt.length); + } else { + appendIsindexPrompt(stack[currentPtr].node); + } + HtmlAttributes inputAttributes = new HtmlAttributes( + 0); + inputAttributes.addAttribute( + AttributeName.NAME, + Portability.newStringFromLiteral("isindex") + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , tokenizer.getLineNumber() + ); + for (int i = 0; i < attributes.getLength(); i++) { + AttributeName attributeQName = attributes.getAttributeNameNoBoundsCheck(i); + if (AttributeName.NAME == attributeQName + || AttributeName.PROMPT == attributeQName) { + attributes.releaseValue(i); + } else if (AttributeName.ACTION != attributeQName) { + inputAttributes.addAttribute( + attributeQName, + attributes.getValueNoBoundsCheck(i) + // [NOCPP[ + , XmlViolationPolicy.ALLOW + // ]NOCPP] + // CPPONLY: , attributes.getLineNoBoundsCheck(i) + ); + } + } + attributes.clearWithoutReleasingContents(); + appendVoidElementToCurrentMayFoster( + "input", + inputAttributes, formPointer); + pop(); // label + appendVoidElementToCurrentMayFoster( + ElementName.HR, + HtmlAttributes.EMPTY_ATTRIBUTES); + pop(); // form + + if (!isTemplateContents()) { + formPointer = null; + } + + selfClosing = false; + // Portability.delete(formAttrs); + // Portability.delete(inputAttributes); + // Don't delete attributes, they are deleted + // later + break starttagloop; + case TEXTAREA: + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RCDATA, elementName); + originalMode = mode; + mode = TEXT; + needToDropLF = true; + attributes = null; // CPP + break starttagloop; + case XMP: + implicitlyCloseP(); + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case NOSCRIPT: + if (!scriptingEnabled) { + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + } else { + // fall through + } + case NOFRAMES: + case IFRAME: + case NOEMBED: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case SELECT: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + switch (mode) { + case IN_TABLE: + case IN_CAPTION: + case IN_COLUMN_GROUP: + case IN_TABLE_BODY: + case IN_ROW: + case IN_CELL: + mode = IN_SELECT_IN_TABLE; + break; + default: + mode = IN_SELECT; + break; + } + attributes = null; // CPP + break starttagloop; + case OPTGROUP: + case OPTION: + if (isCurrent("option")) { + pop(); + } + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case RB_OR_RTC: + eltPos = findLastInScope("ruby"); + if (eltPos != NOT_FOUND_ON_STACK) { + generateImpliedEndTags(); + } + if (eltPos != currentPtr) { + if (eltPos == NOT_FOUND_ON_STACK) { + errStartTagSeenWithoutRuby(name); + } else { + errUnclosedChildrenInRuby(); + } + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case RT_OR_RP: + eltPos = findLastInScope("ruby"); + if (eltPos != NOT_FOUND_ON_STACK) { + generateImpliedEndTagsExceptFor("rtc"); + } + if (eltPos != currentPtr) { + if (!isCurrent("rtc")) { + if (eltPos == NOT_FOUND_ON_STACK) { + errStartTagSeenWithoutRuby(name); + } else { + errUnclosedChildrenInRuby(); + } + } + } + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case MATH: + reconstructTheActiveFormattingElements(); + attributes.adjustForMath(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterMathML( + elementName, attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterMathML( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + case SVG: + reconstructTheActiveFormattingElements(); + attributes.adjustForSvg(); + if (selfClosing) { + appendVoidElementToCurrentMayFosterSVG( + elementName, + attributes); + selfClosing = false; + } else { + appendToCurrentNodeAndPushElementMayFosterSVG( + elementName, attributes); + } + attributes = null; // CPP + break starttagloop; + case CAPTION: + case COL: + case COLGROUP: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + case FRAME: + case FRAMESET: + case HEAD: + errStrayStartTag(name); + break starttagloop; + case OUTPUT: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes, formPointer); + attributes = null; // CPP + break starttagloop; + default: + reconstructTheActiveFormattingElements(); + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + } + } + case IN_HEAD: + inheadloop: for (;;) { + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case META: + // Fall through to IN_HEAD_NOSCRIPT + break inheadloop; + case TITLE: + startTagTitleInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case NOSCRIPT: + if (scriptingEnabled) { + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + } else { + appendToCurrentNodeAndPushElementMayFoster( + elementName, + attributes); + mode = IN_HEAD_NOSCRIPT; + } + attributes = null; // CPP + break starttagloop; + case SCRIPT: + startTagScriptInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case STYLE: + case NOFRAMES: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case HEAD: + /* Parse error. */ + errFooSeenWhenFooOpen(name); + /* Ignore the token. */ + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + pop(); + mode = AFTER_HEAD; + continue starttagloop; + } + } + case IN_HEAD_NOSCRIPT: + switch (group) { + case HTML: + // XXX did Hixie really mean to omit "base" + // here? + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case LINK_OR_BASEFONT_OR_BGSOUND: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case META: + checkMetaCharset(attributes); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case STYLE: + case NOFRAMES: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case HEAD: + errFooSeenWhenFooOpen(name); + break starttagloop; + case NOSCRIPT: + errFooSeenWhenFooOpen(name); + break starttagloop; + default: + errBadStartTagInHead(name); + pop(); + mode = IN_HEAD; + continue; + } + case IN_COLUMN_GROUP: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case COL: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + if (currentPtr == 0 || stack[currentPtr].getGroup() == TEMPLATE) { + assert fragment || isTemplateContents(); + errGarbageInColgroup(); + break starttagloop; + } + pop(); + mode = IN_TABLE; + continue; + } + case IN_SELECT_IN_TABLE: + switch (group) { + case CAPTION: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + case TABLE: + errStartTagWithSelectOpen(name); + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break starttagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375 + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue; + default: + // fall through to IN_SELECT + } + case IN_SELECT: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case OPTION: + if (isCurrent("option")) { + pop(); + } + appendToCurrentNodeAndPushElement( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case OPTGROUP: + if (isCurrent("option")) { + pop(); + } + if (isCurrent("optgroup")) { + pop(); + } + appendToCurrentNodeAndPushElement( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case SELECT: + errStartSelectWhereEndSelectExpected(); + eltPos = findLastInTableScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + errNoSelectInTableScope(); + break starttagloop; + } else { + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + break starttagloop; + } + case INPUT: + case TEXTAREA: + case KEYGEN: + errStartTagWithSelectOpen(name); + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break starttagloop; + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue; + case SCRIPT: + startTagScriptInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + startTagTemplateInHead(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + errStrayStartTag(name); + break starttagloop; + } + case AFTER_BODY: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + default: + errStrayStartTag(name); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + } + case IN_FRAMESET: + switch (group) { + case FRAMESET: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + attributes = null; // CPP + break starttagloop; + case FRAME: + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + attributes = null; // CPP + break starttagloop; + default: + // fall through to AFTER_FRAMESET + } + case AFTER_FRAMESET: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case NOFRAMES: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + default: + errStrayStartTag(name); + break starttagloop; + } + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("Start tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D."); + break; + case HTML: + // ]NOCPP] + errStartTagWithoutDoctype(); + // [NOCPP[ + break; + case HTML401_STRICT: + err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + break; + case HTML401_TRANSITIONAL: + err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, + false); + /* + * Then, switch to the root element mode of the tree + * construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + continue; + case BEFORE_HTML: + switch (group) { + case HTML: + // optimize error check and streaming SAX by + // hoisting + // "html" handling here. + if (attributes == HtmlAttributes.EMPTY_ATTRIBUTES) { + // This has the right magic side effect + // that + // it + // makes attributes in SAX Tree mutable. + appendHtmlElementToDocumentAndPush(); + } else { + appendHtmlElementToDocumentAndPush(attributes); + } + // XXX application cache should fire here + mode = BEFORE_HEAD; + attributes = null; // CPP + break starttagloop; + default: + /* + * Create an HTMLElement node with the tag name + * html, in the HTML namespace. Append it to the + * Document object. + */ + appendHtmlElementToDocumentAndPush(); + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + continue; + } + case BEFORE_HEAD: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case HEAD: + /* + * A start tag whose tag name is "head" + * + * Create an element for the token. + * + * Set the head element pointer to this new element + * node. + * + * Append the new element to the current node and + * push it onto the stack of open elements. + */ + appendToCurrentNodeAndPushHeadElement(attributes); + /* + * Change the insertion mode to "in head". + */ + mode = IN_HEAD; + attributes = null; // CPP + break starttagloop; + default: + /* + * Any other start tag token + * + * Act as if a start tag token with the tag name + * "head" and no attributes had been seen, + */ + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + /* + * then reprocess the current token. + * + * This will result in an empty head element being + * generated, with the current token being + * reprocessed in the "after head" insertion mode. + */ + continue; + } + case AFTER_HEAD: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case BODY: + if (attributes.getLength() == 0) { + // This has the right magic side effect + // that + // it + // makes attributes in SAX Tree mutable. + appendToCurrentNodeAndPushBodyElement(); + } else { + appendToCurrentNodeAndPushBodyElement(attributes); + } + framesetOk = false; + mode = IN_BODY; + attributes = null; // CPP + break starttagloop; + case FRAMESET: + appendToCurrentNodeAndPushElement( + elementName, + attributes); + mode = IN_FRAMESET; + attributes = null; // CPP + break starttagloop; + case TEMPLATE: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + StackNode<T> headOnStack = stack[currentPtr]; + startTagTemplateInHead(elementName, attributes); + removeFromStack(headOnStack); + attributes = null; // CPP + break starttagloop; + case BASE: + case LINK_OR_BASEFONT_OR_BGSOUND: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + pop(); // head + attributes = null; // CPP + break starttagloop; + case META: + errFooBetweenHeadAndBody(name); + checkMetaCharset(attributes); + pushHeadPointerOntoStack(); + appendVoidElementToCurrentMayFoster( + elementName, + attributes); + selfClosing = false; + pop(); // head + attributes = null; // CPP + break starttagloop; + case SCRIPT: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.SCRIPT_DATA, elementName); + attributes = null; // CPP + break starttagloop; + case STYLE: + case NOFRAMES: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RAWTEXT, elementName); + attributes = null; // CPP + break starttagloop; + case TITLE: + errFooBetweenHeadAndBody(name); + pushHeadPointerOntoStack(); + appendToCurrentNodeAndPushElement( + elementName, + attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation( + Tokenizer.RCDATA, elementName); + attributes = null; // CPP + break starttagloop; + case HEAD: + errStrayStartTag(name); + break starttagloop; + default: + appendToCurrentNodeAndPushBodyElement(); + mode = FRAMESET_OK; + continue; + } + case AFTER_AFTER_BODY: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + default: + errStrayStartTag(name); + fatal(); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + } + case AFTER_AFTER_FRAMESET: + switch (group) { + case HTML: + errStrayStartTag(name); + if (!fragment && !isTemplateContents()) { + addAttributesToHtml(attributes); + attributes = null; // CPP + } + break starttagloop; + case NOFRAMES: + startTagGenericRawText(elementName, attributes); + attributes = null; // CPP + break starttagloop; + default: + errStrayStartTag(name); + break starttagloop; + } + case TEXT: + assert false; + break starttagloop; // Avoid infinite loop if the assertion + // fails + } + } + if (selfClosing) { + errSelfClosing(); + } + // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) { + // CPPONLY: Portability.delete(attributes); + // CPPONLY: } + } + + private void startTagTitleInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException { + appendToCurrentNodeAndPushElementMayFoster(elementName, attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, elementName); + } + + private void startTagGenericRawText(ElementName elementName, HtmlAttributes attributes) throws SAXException { + appendToCurrentNodeAndPushElementMayFoster(elementName, attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, elementName); + } + + private void startTagScriptInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException { + // XXX need to manage much more stuff here if supporting document.write() + appendToCurrentNodeAndPushElementMayFoster(elementName, attributes); + originalMode = mode; + mode = TEXT; + tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA, elementName); + } + + private void startTagTemplateInHead(ElementName elementName, HtmlAttributes attributes) throws SAXException { + appendToCurrentNodeAndPushElement(elementName, attributes); + insertMarker(); + framesetOk = false; + originalMode = mode; + mode = IN_TEMPLATE; + pushTemplateMode(IN_TEMPLATE); + } + + private boolean isTemplateContents() { + return TreeBuilder.NOT_FOUND_ON_STACK != findLast("template"); + } + + private boolean isTemplateModeStackEmpty() { + return templateModePtr == -1; + } + + private boolean isSpecialParentInForeign(StackNode<T> stackNode) { + @NsUri String ns = stackNode.ns; + return ("http://www.w3.org/1999/xhtml" == ns) + || (stackNode.isHtmlIntegrationPoint()) + || (("http://www.w3.org/1998/Math/MathML" == ns) && (stackNode.getGroup() == MI_MO_MN_MS_MTEXT)); + } + + /** + * + * <p> + * C++ memory note: The return value must be released. + * + * @return + * @throws SAXException + * @throws StopSniffingException + */ + public static String extractCharsetFromContent(String attributeValue + // CPPONLY: , TreeBuilder tb + ) { + // This is a bit ugly. Converting the string to char array in order to + // make the portability layer smaller. + int charsetState = CHARSET_INITIAL; + int start = -1; + int end = -1; + @Auto char[] buffer = Portability.newCharArrayFromString(attributeValue); + + charsetloop: for (int i = 0; i < buffer.length; i++) { + char c = buffer[i]; + switch (charsetState) { + case CHARSET_INITIAL: + switch (c) { + case 'c': + case 'C': + charsetState = CHARSET_C; + continue; + default: + continue; + } + case CHARSET_C: + switch (c) { + case 'h': + case 'H': + charsetState = CHARSET_H; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_H: + switch (c) { + case 'a': + case 'A': + charsetState = CHARSET_A; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_A: + switch (c) { + case 'r': + case 'R': + charsetState = CHARSET_R; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_R: + switch (c) { + case 's': + case 'S': + charsetState = CHARSET_S; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_S: + switch (c) { + case 'e': + case 'E': + charsetState = CHARSET_E; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_E: + switch (c) { + case 't': + case 'T': + charsetState = CHARSET_T; + continue; + default: + charsetState = CHARSET_INITIAL; + continue; + } + case CHARSET_T: + switch (c) { + case '\t': + case '\n': + case '\u000C': + case '\r': + case ' ': + continue; + case '=': + charsetState = CHARSET_EQUALS; + continue; + default: + return null; + } + case CHARSET_EQUALS: + switch (c) { + case '\t': + case '\n': + case '\u000C': + case '\r': + case ' ': + continue; + case '\'': + start = i + 1; + charsetState = CHARSET_SINGLE_QUOTED; + continue; + case '\"': + start = i + 1; + charsetState = CHARSET_DOUBLE_QUOTED; + continue; + default: + start = i; + charsetState = CHARSET_UNQUOTED; + continue; + } + case CHARSET_SINGLE_QUOTED: + switch (c) { + case '\'': + end = i; + break charsetloop; + default: + continue; + } + case CHARSET_DOUBLE_QUOTED: + switch (c) { + case '\"': + end = i; + break charsetloop; + default: + continue; + } + case CHARSET_UNQUOTED: + switch (c) { + case '\t': + case '\n': + case '\u000C': + case '\r': + case ' ': + case ';': + end = i; + break charsetloop; + default: + continue; + } + } + } + String charset = null; + if (start != -1) { + if (end == -1) { + end = buffer.length; + } + charset = Portability.newStringFromBuffer(buffer, start, end + - start + // CPPONLY: , tb + ); + } + return charset; + } + + private void checkMetaCharset(HtmlAttributes attributes) + throws SAXException { + String charset = attributes.getValue(AttributeName.CHARSET); + if (charset != null) { + if (tokenizer.internalEncodingDeclaration(charset)) { + requestSuspension(); + return; + } + return; + } + if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "content-type", + attributes.getValue(AttributeName.HTTP_EQUIV))) { + return; + } + String content = attributes.getValue(AttributeName.CONTENT); + if (content != null) { + String extract = TreeBuilder.extractCharsetFromContent(content + // CPPONLY: , this + ); + // remember not to return early without releasing the string + if (extract != null) { + if (tokenizer.internalEncodingDeclaration(extract)) { + requestSuspension(); + } + } + Portability.releaseString(extract); + } + } + + public final void endTag(ElementName elementName) throws SAXException { + flushCharacters(); + needToDropLF = false; + int eltPos; + int group = elementName.getGroup(); + @Local String name = elementName.name; + endtagloop: for (;;) { + if (isInForeign()) { + if (stack[currentPtr].name != name) { + if (currentPtr == 0) { + errStrayEndTag(name); + } else { + errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName); + } + } + eltPos = currentPtr; + for (;;) { + if (eltPos == 0) { + assert fragment: "We can get this close to the root of the stack in foreign content only in the fragment case."; + break endtagloop; + } + if (stack[eltPos].name == name) { + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + } + if (stack[--eltPos].ns == "http://www.w3.org/1999/xhtml") { + break; + } + } + } + switch (mode) { + case IN_TEMPLATE: + switch (group) { + case TEMPLATE: + // fall through to IN_HEAD + break; + default: + errStrayEndTag(name); + break endtagloop; + } + case IN_ROW: + switch (group) { + case TR: + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + break endtagloop; + case TABLE: + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + continue; + case TBODY_OR_THEAD_OR_TFOOT: + if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + eltPos = findLastOrRoot(TreeBuilder.TR); + if (eltPos == 0) { + assert fragment || isTemplateContents(); + errNoTableRowToClose(); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE_BODY; + continue; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + case TD_OR_TH: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_TABLE + } + case IN_TABLE_BODY: + switch (group) { + case TBODY_OR_THEAD_OR_TFOOT: + eltPos = findLastOrRoot(name); + if (eltPos == 0) { + errStrayEndTag(name); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE; + break endtagloop; + case TABLE: + eltPos = findLastInTableScopeOrRootTemplateTbodyTheadTfoot(); + if (eltPos == 0 || stack[eltPos].getGroup() == TEMPLATE) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + clearStackBackTo(eltPos); + pop(); + mode = IN_TABLE; + continue; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + case TD_OR_TH: + case TR: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_TABLE + } + case IN_TABLE: + switch (group) { + case TABLE: + eltPos = findLast("table"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + break endtagloop; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case TR: + errStrayEndTag(name); + break endtagloop; + case TEMPLATE: + // fall through to IN_HEAD + break; + default: + errStrayEndTag(name); + // fall through to IN_BODY + } + case IN_CAPTION: + switch (group) { + case CAPTION: + eltPos = findLastInTableScope("caption"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && currentPtr != eltPos) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_TABLE; + break endtagloop; + case TABLE: + errTableClosedWhileCaptionOpen(); + eltPos = findLastInTableScope("caption"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && currentPtr != eltPos) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_TABLE; + continue; + case BODY: + case COL: + case COLGROUP: + case HTML: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case TR: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_BODY + } + case IN_CELL: + switch (group) { + case TD_OR_TH: + eltPos = findLastInTableScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_ROW; + break endtagloop; + case TABLE: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + if (findLastInTableScope(name) == TreeBuilder.NOT_FOUND_ON_STACK) { + assert name == "tbody" || name == "tfoot" || name == "thead" || fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + closeTheCell(findLastInTableScopeTdTh()); + continue; + case BODY: + case CAPTION: + case COL: + case COLGROUP: + case HTML: + errStrayEndTag(name); + break endtagloop; + default: + // fall through to IN_BODY + } + case FRAMESET_OK: + case IN_BODY: + switch (group) { + case BODY: + if (!isSecondOnStackBody()) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + assert currentPtr >= 1; + if (errorHandler != null) { + uncloseloop1: for (int i = 2; i <= currentPtr; i++) { + switch (stack[i].getGroup()) { + case DD_OR_DT: + case LI: + case OPTGROUP: + case OPTION: // is this possible? + case P: + case RB_OR_RTC: + case RT_OR_RP: + case TD_OR_TH: + case TBODY_OR_THEAD_OR_TFOOT: + break; + default: + errEndWithUnclosedElements(name); + break uncloseloop1; + } + } + } + mode = AFTER_BODY; + break endtagloop; + case HTML: + if (!isSecondOnStackBody()) { + assert fragment || isTemplateContents(); + errStrayEndTag(name); + break endtagloop; + } + if (errorHandler != null) { + uncloseloop2: for (int i = 0; i <= currentPtr; i++) { + switch (stack[i].getGroup()) { + case DD_OR_DT: + case LI: + case P: + case RB_OR_RTC: + case RT_OR_RP: + case TBODY_OR_THEAD_OR_TFOOT: + case TD_OR_TH: + case BODY: + case HTML: + break; + default: + errEndWithUnclosedElements(name); + break uncloseloop2; + } + } + } + mode = AFTER_BODY; + continue; + case DIV_OR_BLOCKQUOTE_OR_CENTER_OR_MENU: + case UL_OR_OL_OR_DL: + case PRE_OR_LISTING: + case FIELDSET: + case BUTTON: + case ADDRESS_OR_ARTICLE_OR_ASIDE_OR_DETAILS_OR_DIALOG_OR_DIR_OR_FIGCAPTION_OR_FIGURE_OR_FOOTER_OR_HEADER_OR_HGROUP_OR_MAIN_OR_NAV_OR_SECTION_OR_SUMMARY: + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + } else { + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case FORM: + if (!isTemplateContents()) { + if (formPointer == null) { + errStrayEndTag(name); + break endtagloop; + } + formPointer = null; + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + removeFromStack(eltPos); + break endtagloop; + } else { + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + break endtagloop; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + } + case P: + eltPos = findLastInButtonScope("p"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoElementToCloseButEndTagSeen("p"); + // XXX Can the 'in foreign' case happen anymore? + if (isInForeign()) { + errHtmlStartTagInForeignContext(name); + // Check for currentPtr for the fragment + // case. + while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { + pop(); + } + } + appendVoidElementToCurrentMayFoster( + elementName, + HtmlAttributes.EMPTY_ATTRIBUTES); + break endtagloop; + } + generateImpliedEndTagsExceptFor("p"); + assert eltPos != TreeBuilder.NOT_FOUND_ON_STACK; + if (errorHandler != null && eltPos != currentPtr) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + case LI: + eltPos = findLastInListScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoElementToCloseButEndTagSeen(name); + } else { + generateImpliedEndTagsExceptFor(name); + if (errorHandler != null + && eltPos != currentPtr) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case DD_OR_DT: + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errNoElementToCloseButEndTagSeen(name); + } else { + generateImpliedEndTagsExceptFor(name); + if (errorHandler != null + && eltPos != currentPtr) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6: + eltPos = findLastInScopeHn(); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + } else { + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + } + break endtagloop; + case OBJECT: + case MARQUEE_OR_APPLET: + eltPos = findLastInScope(name); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag(name); + } else { + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + } + break endtagloop; + case BR: + errEndTagBr(); + if (isInForeign()) { + // XXX can this happen anymore? + errHtmlStartTagInForeignContext(name); + // Check for currentPtr for the fragment + // case. + while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { + pop(); + } + } + reconstructTheActiveFormattingElements(); + appendVoidElementToCurrentMayFoster( + elementName, + HtmlAttributes.EMPTY_ATTRIBUTES); + break endtagloop; + case TEMPLATE: + // fall through to IN_HEAD; + break; + case AREA_OR_WBR: + // CPPONLY: case MENUITEM: + case PARAM_OR_SOURCE_OR_TRACK: + case EMBED: + case IMG: + case IMAGE: + case INPUT: + case KEYGEN: // XXX?? + case HR: + case ISINDEX: + case IFRAME: + case NOEMBED: // XXX??? + case NOFRAMES: // XXX?? + case SELECT: + case TABLE: + case TEXTAREA: // XXX?? + errStrayEndTag(name); + break endtagloop; + case NOSCRIPT: + if (scriptingEnabled) { + errStrayEndTag(name); + break endtagloop; + } else { + // fall through + } + case A: + case B_OR_BIG_OR_CODE_OR_EM_OR_I_OR_S_OR_SMALL_OR_STRIKE_OR_STRONG_OR_TT_OR_U: + case FONT: + case NOBR: + if (adoptionAgencyEndTag(name)) { + break endtagloop; + } + // else handle like any other tag + default: + if (isCurrent(name)) { + pop(); + break endtagloop; + } + + eltPos = currentPtr; + for (;;) { + StackNode<T> node = stack[eltPos]; + if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) { + generateImpliedEndTags(); + if (errorHandler != null + && !isCurrent(name)) { + errUnclosedElements(eltPos, name); + } + while (currentPtr >= eltPos) { + pop(); + } + break endtagloop; + } else if (eltPos == 0 || node.isSpecial()) { + errStrayEndTag(name); + break endtagloop; + } + eltPos--; + } + } + case IN_HEAD: + switch (group) { + case HEAD: + pop(); + mode = AFTER_HEAD; + break endtagloop; + case BR: + case HTML: + case BODY: + pop(); + mode = AFTER_HEAD; + continue; + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case IN_HEAD_NOSCRIPT: + switch (group) { + case NOSCRIPT: + pop(); + mode = IN_HEAD; + break endtagloop; + case BR: + errStrayEndTag(name); + pop(); + mode = IN_HEAD; + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case IN_COLUMN_GROUP: + switch (group) { + case COLGROUP: + if (currentPtr == 0 || stack[currentPtr].getGroup() == + TreeBuilder.TEMPLATE) { + assert fragment || isTemplateContents(); + errGarbageInColgroup(); + break endtagloop; + } + pop(); + mode = IN_TABLE; + break endtagloop; + case COL: + errStrayEndTag(name); + break endtagloop; + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + default: + if (currentPtr == 0 || stack[currentPtr].getGroup() == + TreeBuilder.TEMPLATE) { + assert fragment || isTemplateContents(); + errGarbageInColgroup(); + break endtagloop; + } + pop(); + mode = IN_TABLE; + continue; + } + case IN_SELECT_IN_TABLE: + switch (group) { + case CAPTION: + case TABLE: + case TBODY_OR_THEAD_OR_TFOOT: + case TR: + case TD_OR_TH: + errEndTagSeenWithSelectOpen(name); + if (findLastInTableScope(name) != TreeBuilder.NOT_FOUND_ON_STACK) { + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + break endtagloop; // http://www.w3.org/Bugs/Public/show_bug.cgi?id=8375 + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + continue; + } else { + break endtagloop; + } + default: + // fall through to IN_SELECT + } + case IN_SELECT: + switch (group) { + case OPTION: + if (isCurrent("option")) { + pop(); + break endtagloop; + } else { + errStrayEndTag(name); + break endtagloop; + } + case OPTGROUP: + if (isCurrent("option") + && "optgroup" == stack[currentPtr - 1].name) { + pop(); + } + if (isCurrent("optgroup")) { + pop(); + } else { + errStrayEndTag(name); + } + break endtagloop; + case SELECT: + eltPos = findLastInTableScope("select"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + assert fragment; + errStrayEndTag(name); + break endtagloop; + } + while (currentPtr >= eltPos) { + pop(); + } + resetTheInsertionMode(); + break endtagloop; + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_BODY: + switch (group) { + case HTML: + if (fragment) { + errStrayEndTag(name); + break endtagloop; + } else { + mode = AFTER_AFTER_BODY; + break endtagloop; + } + default: + errEndTagAfterBody(); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + } + case IN_FRAMESET: + switch (group) { + case FRAMESET: + if (currentPtr == 0) { + assert fragment; + errStrayEndTag(name); + break endtagloop; + } + pop(); + if ((!fragment) && !isCurrent("frameset")) { + mode = AFTER_FRAMESET; + } + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_FRAMESET: + switch (group) { + case HTML: + mode = AFTER_AFTER_FRAMESET; + break endtagloop; + default: + errStrayEndTag(name); + break endtagloop; + } + case INITIAL: + /* + * Parse error. + */ + // [NOCPP[ + switch (doctypeExpectation) { + case AUTO: + err("End tag seen without seeing a doctype first. Expected e.g. \u201C<!DOCTYPE html>\u201D."); + break; + case HTML: + // ]NOCPP] + errEndTagSeenWithoutDoctype(); + // [NOCPP[ + break; + case HTML401_STRICT: + err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\u201D."); + break; + case HTML401_TRANSITIONAL: + err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\u201D."); + break; + case NO_DOCTYPE_ERRORS: + } + // ]NOCPP] + /* + * + * Set the document to quirks mode. + */ + documentModeInternal(DocumentMode.QUIRKS_MODE, null, null, + false); + /* + * Then, switch to the root element mode of the tree + * construction stage + */ + mode = BEFORE_HTML; + /* + * and reprocess the current token. + */ + continue; + case BEFORE_HTML: + switch (group) { + case HEAD: + case BR: + case HTML: + case BODY: + /* + * Create an HTMLElement node with the tag name + * html, in the HTML namespace. Append it to the + * Document object. + */ + appendHtmlElementToDocumentAndPush(); + /* Switch to the main mode */ + mode = BEFORE_HEAD; + /* + * reprocess the current token. + */ + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case BEFORE_HEAD: + switch (group) { + case HEAD: + case BR: + case HTML: + case BODY: + appendToCurrentNodeAndPushHeadElement(HtmlAttributes.EMPTY_ATTRIBUTES); + mode = IN_HEAD; + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_HEAD: + switch (group) { + case TEMPLATE: + endTagTemplateInHead(); + break endtagloop; + case HTML: + case BODY: + case BR: + appendToCurrentNodeAndPushBodyElement(); + mode = FRAMESET_OK; + continue; + default: + errStrayEndTag(name); + break endtagloop; + } + case AFTER_AFTER_BODY: + errStrayEndTag(name); + mode = framesetOk ? FRAMESET_OK : IN_BODY; + continue; + case AFTER_AFTER_FRAMESET: + errStrayEndTag(name); + break endtagloop; + case TEXT: + // XXX need to manage insertion point here + pop(); + if (originalMode == AFTER_HEAD) { + silentPop(); + } + mode = originalMode; + break endtagloop; + } + } // endtagloop + } + + private void endTagTemplateInHead() throws SAXException { + int eltPos = findLast("template"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + errStrayEndTag("template"); + return; + } + generateImpliedEndTags(); + if (errorHandler != null && !isCurrent("template")) { + errUnclosedElements(eltPos, "template"); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + popTemplateMode(); + resetTheInsertionMode(); + } + + private int findLastInTableScopeOrRootTemplateTbodyTheadTfoot() { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].getGroup() == TreeBuilder.TBODY_OR_THEAD_OR_TFOOT || + stack[i].getGroup() == TreeBuilder.TEMPLATE) { + return i; + } + } + return 0; + } + + private int findLast(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) { + return i; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInTableScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if (stack[i].name == name) { + return i; + } else if (stack[i].name == "table" || stack[i].name == "template") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInButtonScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if (stack[i].name == name) { + return i; + } else if (stack[i].name == "button") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + + if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) { + return i; + } else if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInListScope(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if (stack[i].name == name) { + return i; + } else if (stack[i].name == "ul" || stack[i].name == "ol") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + + if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private int findLastInScopeHn() { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].getGroup() == TreeBuilder.H1_OR_H2_OR_H3_OR_H4_OR_H5_OR_H6) { + return i; + } else if (stack[i].isScoping()) { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private void generateImpliedEndTagsExceptFor(@Local String name) + throws SAXException { + for (;;) { + StackNode<T> node = stack[currentPtr]; + switch (node.getGroup()) { + case P: + case LI: + case DD_OR_DT: + case OPTION: + case OPTGROUP: + case RB_OR_RTC: + case RT_OR_RP: + if (node.ns == "http://www.w3.org/1999/xhtml" && node.name == name) { + return; + } + pop(); + continue; + default: + return; + } + } + } + + private void generateImpliedEndTags() throws SAXException { + for (;;) { + switch (stack[currentPtr].getGroup()) { + case P: + case LI: + case DD_OR_DT: + case OPTION: + case OPTGROUP: + case RB_OR_RTC: + case RT_OR_RP: + pop(); + continue; + default: + return; + } + } + } + + private boolean isSecondOnStackBody() { + return currentPtr >= 1 && stack[1].getGroup() == TreeBuilder.BODY; + } + + private void documentModeInternal(DocumentMode m, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + + if (isSrcdocDocument) { + // Srcdoc documents are always rendered in standards mode. + quirks = false; + if (documentModeHandler != null) { + documentModeHandler.documentMode( + DocumentMode.STANDARDS_MODE + // [NOCPP[ + , null, null, false + // ]NOCPP] + ); + } + return; + } + + quirks = (m == DocumentMode.QUIRKS_MODE); + if (documentModeHandler != null) { + documentModeHandler.documentMode( + m + // [NOCPP[ + , publicIdentifier, systemIdentifier, + html4SpecificAdditionalErrorChecks + // ]NOCPP] + ); + } + // [NOCPP[ + documentMode(m, publicIdentifier, systemIdentifier, + html4SpecificAdditionalErrorChecks); + // ]NOCPP] + } + + private boolean isAlmostStandards(String publicIdentifier, + String systemIdentifier) { + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd xhtml 1.0 transitional//en", publicIdentifier)) { + return true; + } + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd xhtml 1.0 frameset//en", publicIdentifier)) { + return true; + } + if (systemIdentifier != null) { + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) { + return true; + } + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) { + return true; + } + } + return false; + } + + private boolean isQuirky(@Local String name, String publicIdentifier, + String systemIdentifier, boolean forceQuirks) { + if (forceQuirks) { + return true; + } + if (name != HTML_LOCAL) { + return true; + } + if (publicIdentifier != null) { + for (int i = 0; i < TreeBuilder.QUIRKY_PUBLIC_IDS.length; i++) { + if (Portability.lowerCaseLiteralIsPrefixOfIgnoreAsciiCaseString( + TreeBuilder.QUIRKY_PUBLIC_IDS[i], publicIdentifier)) { + return true; + } + } + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3o//dtd w3 html strict 3.0//en//", publicIdentifier) + || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-/w3c/dtd html 4.0 transitional/en", + publicIdentifier) + || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "html", publicIdentifier)) { + return true; + } + } + if (systemIdentifier == null) { + if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 transitional//en", publicIdentifier)) { + return true; + } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "-//w3c//dtd html 4.01 frameset//en", publicIdentifier)) { + return true; + } + } else if (Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", + systemIdentifier)) { + return true; + } + return false; + } + + private void closeTheCell(int eltPos) throws SAXException { + generateImpliedEndTags(); + if (errorHandler != null && eltPos != currentPtr) { + errUnclosedElementsCell(eltPos); + } + while (currentPtr >= eltPos) { + pop(); + } + clearTheListOfActiveFormattingElementsUpToTheLastMarker(); + mode = IN_ROW; + return; + } + + private int findLastInTableScopeTdTh() { + for (int i = currentPtr; i > 0; i--) { + @Local String name = stack[i].name; + if (stack[i].ns == "http://www.w3.org/1999/xhtml") { + if ("td" == name || "th" == name) { + return i; + } else if (name == "table" || name == "template") { + return TreeBuilder.NOT_FOUND_ON_STACK; + } + } + } + return TreeBuilder.NOT_FOUND_ON_STACK; + } + + private void clearStackBackTo(int eltPos) throws SAXException { + int eltGroup = stack[eltPos].getGroup(); + while (currentPtr > eltPos) { // > not >= intentional + if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" + && stack[currentPtr].getGroup() == TEMPLATE + && (eltGroup == TABLE || eltGroup == TBODY_OR_THEAD_OR_TFOOT|| eltGroup == TR || eltPos == 0)) { + return; + } + pop(); + } + } + + private void resetTheInsertionMode() { + StackNode<T> node; + @Local String name; + @NsUri String ns; + for (int i = currentPtr; i >= 0; i--) { + node = stack[i]; + name = node.name; + ns = node.ns; + if (i == 0) { + if (!(contextNamespace == "http://www.w3.org/1999/xhtml" && (contextName == "td" || contextName == "th"))) { + if (fragment) { + // Make sure we are parsing a fragment otherwise the context element doesn't make sense. + name = contextName; + ns = contextNamespace; + } + } else { + mode = framesetOk ? FRAMESET_OK : IN_BODY; // XXX from Hixie's email + return; + } + } + if ("select" == name) { + int ancestorIndex = i; + while (ancestorIndex > 0) { + StackNode<T> ancestor = stack[ancestorIndex--]; + if ("http://www.w3.org/1999/xhtml" == ancestor.ns) { + if ("template" == ancestor.name) { + break; + } + if ("table" == ancestor.name) { + mode = IN_SELECT_IN_TABLE; + return; + } + } + } + mode = IN_SELECT; + return; + } else if ("td" == name || "th" == name) { + mode = IN_CELL; + return; + } else if ("tr" == name) { + mode = IN_ROW; + return; + } else if ("tbody" == name || "thead" == name || "tfoot" == name) { + mode = IN_TABLE_BODY; + return; + } else if ("caption" == name) { + mode = IN_CAPTION; + return; + } else if ("colgroup" == name) { + mode = IN_COLUMN_GROUP; + return; + } else if ("table" == name) { + mode = IN_TABLE; + return; + } else if ("http://www.w3.org/1999/xhtml" != ns) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; + return; + } else if ("template" == name) { + assert templateModePtr >= 0; + mode = templateModeStack[templateModePtr]; + return; + } else if ("head" == name) { + if (name == contextName) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; // really + } else { + mode = IN_HEAD; + } + return; + } else if ("body" == name) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; + return; + } else if ("frameset" == name) { + // TODO: Fragment case. Add error reporting. + mode = IN_FRAMESET; + return; + } else if ("html" == name) { + if (headPointer == null) { + // TODO: Fragment case. Add error reporting. + mode = BEFORE_HEAD; + } else { + mode = AFTER_HEAD; + } + return; + } else if (i == 0) { + mode = framesetOk ? FRAMESET_OK : IN_BODY; + return; + } + } + } + + /** + * @throws SAXException + * + */ + private void implicitlyCloseP() throws SAXException { + int eltPos = findLastInButtonScope("p"); + if (eltPos == TreeBuilder.NOT_FOUND_ON_STACK) { + return; + } + generateImpliedEndTagsExceptFor("p"); + if (errorHandler != null && eltPos != currentPtr) { + errUnclosedElementsImplied(eltPos, "p"); + } + while (currentPtr >= eltPos) { + pop(); + } + } + + private boolean debugOnlyClearLastStackSlot() { + stack[currentPtr] = null; + return true; + } + + private boolean debugOnlyClearLastListSlot() { + listOfActiveFormattingElements[listPtr] = null; + return true; + } + + private void pushTemplateMode(int mode) { + templateModePtr++; + if (templateModePtr == templateModeStack.length) { + int[] newStack = new int[templateModeStack.length + 64]; + System.arraycopy(templateModeStack, 0, newStack, 0, templateModeStack.length); + templateModeStack = newStack; + } + templateModeStack[templateModePtr] = mode; + } + + @SuppressWarnings("unchecked") private void push(StackNode<T> node) throws SAXException { + currentPtr++; + if (currentPtr == stack.length) { + StackNode<T>[] newStack = new StackNode[stack.length + 64]; + System.arraycopy(stack, 0, newStack, 0, stack.length); + stack = newStack; + } + stack[currentPtr] = node; + elementPushed(node.ns, node.popName, node.node); + } + + @SuppressWarnings("unchecked") private void silentPush(StackNode<T> node) throws SAXException { + currentPtr++; + if (currentPtr == stack.length) { + StackNode<T>[] newStack = new StackNode[stack.length + 64]; + System.arraycopy(stack, 0, newStack, 0, stack.length); + stack = newStack; + } + stack[currentPtr] = node; + } + + @SuppressWarnings("unchecked") private void append(StackNode<T> node) { + listPtr++; + if (listPtr == listOfActiveFormattingElements.length) { + StackNode<T>[] newList = new StackNode[listOfActiveFormattingElements.length + 64]; + System.arraycopy(listOfActiveFormattingElements, 0, newList, 0, + listOfActiveFormattingElements.length); + listOfActiveFormattingElements = newList; + } + listOfActiveFormattingElements[listPtr] = node; + } + + @Inline private void insertMarker() { + append(null); + } + + private void clearTheListOfActiveFormattingElementsUpToTheLastMarker() { + while (listPtr > -1) { + if (listOfActiveFormattingElements[listPtr] == null) { + --listPtr; + return; + } + listOfActiveFormattingElements[listPtr].release(); + --listPtr; + } + } + + @Inline private boolean isCurrent(@Local String name) { + return stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" && + name == stack[currentPtr].name; + } + + private void removeFromStack(int pos) throws SAXException { + if (currentPtr == pos) { + pop(); + } else { + fatal(); + stack[pos].release(); + System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos); + assert debugOnlyClearLastStackSlot(); + currentPtr--; + } + } + + private void removeFromStack(StackNode<T> node) throws SAXException { + if (stack[currentPtr] == node) { + pop(); + } else { + int pos = currentPtr - 1; + while (pos >= 0 && stack[pos] != node) { + pos--; + } + if (pos == -1) { + // dead code? + return; + } + fatal(); + node.release(); + System.arraycopy(stack, pos + 1, stack, pos, currentPtr - pos); + currentPtr--; + } + } + + private void removeFromListOfActiveFormattingElements(int pos) { + assert listOfActiveFormattingElements[pos] != null; + listOfActiveFormattingElements[pos].release(); + if (pos == listPtr) { + assert debugOnlyClearLastListSlot(); + listPtr--; + return; + } + assert pos < listPtr; + System.arraycopy(listOfActiveFormattingElements, pos + 1, + listOfActiveFormattingElements, pos, listPtr - pos); + assert debugOnlyClearLastListSlot(); + listPtr--; + } + + /** + * Adoption agency algorithm. + * + * @param name subject as described in the specified algorithm. + * @return Returns true if the algorithm has completed and there is nothing remaining to + * be done. Returns false if the algorithm needs to "act as described in the 'any other + * end tag' entry" as described in the specified algorithm. + * @throws SAXException + */ + private boolean adoptionAgencyEndTag(@Local String name) throws SAXException { + // This check intends to ensure that for properly nested tags, closing tags will match + // against the stack instead of the listOfActiveFormattingElements. + if (stack[currentPtr].ns == "http://www.w3.org/1999/xhtml" && + stack[currentPtr].name == name && + findInListOfActiveFormattingElements(stack[currentPtr]) == -1) { + // If the current element matches the name but isn't on the list of active + // formatting elements, then it is possible that the list was mangled by the Noah's Ark + // clause. In this case, we want to match the end tag against the stack instead of + // proceeding with the AAA algorithm that may match against the list of + // active formatting elements (and possibly mangle the tree in unexpected ways). + pop(); + return true; + } + + // If you crash around here, perhaps some stack node variable claimed to + // be a weak ref isn't. + for (int i = 0; i < 8; ++i) { + int formattingEltListPos = listPtr; + while (formattingEltListPos > -1) { + StackNode<T> listNode = listOfActiveFormattingElements[formattingEltListPos]; // weak ref + if (listNode == null) { + formattingEltListPos = -1; + break; + } else if (listNode.name == name) { + break; + } + formattingEltListPos--; + } + if (formattingEltListPos == -1) { + return false; + } + // this *looks* like a weak ref to the list of formatting elements + StackNode<T> formattingElt = listOfActiveFormattingElements[formattingEltListPos]; + int formattingEltStackPos = currentPtr; + boolean inScope = true; + while (formattingEltStackPos > -1) { + StackNode<T> node = stack[formattingEltStackPos]; // weak ref + if (node == formattingElt) { + break; + } else if (node.isScoping()) { + inScope = false; + } + formattingEltStackPos--; + } + if (formattingEltStackPos == -1) { + errNoElementToCloseButEndTagSeen(name); + removeFromListOfActiveFormattingElements(formattingEltListPos); + return true; + } + if (!inScope) { + errNoElementToCloseButEndTagSeen(name); + return true; + } + // stackPos now points to the formatting element and it is in scope + if (formattingEltStackPos != currentPtr) { + errEndTagViolatesNestingRules(name); + } + int furthestBlockPos = formattingEltStackPos + 1; + while (furthestBlockPos <= currentPtr) { + StackNode<T> node = stack[furthestBlockPos]; // weak ref + assert furthestBlockPos > 0: "How is formattingEltStackPos + 1 not > 0?"; + if (node.isSpecial()) { + break; + } + furthestBlockPos++; + } + if (furthestBlockPos > currentPtr) { + // no furthest block + while (currentPtr >= formattingEltStackPos) { + pop(); + } + removeFromListOfActiveFormattingElements(formattingEltListPos); + return true; + } + StackNode<T> commonAncestor = stack[formattingEltStackPos - 1]; // weak ref + StackNode<T> furthestBlock = stack[furthestBlockPos]; // weak ref + // detachFromParent(furthestBlock.node); XXX AAA CHANGE + int bookmark = formattingEltListPos; + int nodePos = furthestBlockPos; + StackNode<T> lastNode = furthestBlock; // weak ref + int j = 0; + for (;;) { + ++j; + nodePos--; + if (nodePos == formattingEltStackPos) { + break; + } + StackNode<T> node = stack[nodePos]; // weak ref + int nodeListPos = findInListOfActiveFormattingElements(node); + + if (j > 3 && nodeListPos != -1) { + removeFromListOfActiveFormattingElements(nodeListPos); + + // Adjust the indices into the list to account + // for the removal of nodeListPos. + if (nodeListPos <= formattingEltListPos) { + formattingEltListPos--; + } + if (nodeListPos <= bookmark) { + bookmark--; + } + + // Update position to reflect removal from list. + nodeListPos = -1; + } + + if (nodeListPos == -1) { + assert formattingEltStackPos < nodePos; + assert bookmark < nodePos; + assert furthestBlockPos > nodePos; + removeFromStack(nodePos); // node is now a bad pointer in C++ + furthestBlockPos--; + continue; + } + // now node is both on stack and in the list + if (nodePos == furthestBlockPos) { + bookmark = nodeListPos + 1; + } + // if (hasChildren(node.node)) { XXX AAA CHANGE + assert node == listOfActiveFormattingElements[nodeListPos]; + assert node == stack[nodePos]; + T clone = createElement("http://www.w3.org/1999/xhtml", + node.name, node.attributes.cloneAttributes(null), commonAncestor.node); + StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns, + node.name, clone, node.popName, node.attributes + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); // creation ownership goes to stack + node.dropAttributes(); // adopt ownership to newNode + stack[nodePos] = newNode; + newNode.retain(); // retain for list + listOfActiveFormattingElements[nodeListPos] = newNode; + node.release(); // release from stack + node.release(); // release from list + node = newNode; + // } XXX AAA CHANGE + detachFromParent(lastNode.node); + appendElement(lastNode.node, node.node); + lastNode = node; + } + if (commonAncestor.isFosterParenting()) { + fatal(); + detachFromParent(lastNode.node); + insertIntoFosterParent(lastNode.node); + } else { + detachFromParent(lastNode.node); + appendElement(lastNode.node, commonAncestor.node); + } + T clone = createElement("http://www.w3.org/1999/xhtml", + formattingElt.name, + formattingElt.attributes.cloneAttributes(null), furthestBlock.node); + StackNode<T> formattingClone = new StackNode<T>( + formattingElt.getFlags(), formattingElt.ns, + formattingElt.name, clone, formattingElt.popName, + formattingElt.attributes + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); // Ownership transfers to stack below + formattingElt.dropAttributes(); // transfer ownership to + // formattingClone + appendChildrenToNewParent(furthestBlock.node, clone); + appendElement(clone, furthestBlock.node); + removeFromListOfActiveFormattingElements(formattingEltListPos); + insertIntoListOfActiveFormattingElements(formattingClone, bookmark); + assert formattingEltStackPos < furthestBlockPos; + removeFromStack(formattingEltStackPos); + // furthestBlockPos is now off by one and points to the slot after + // it + insertIntoStack(formattingClone, furthestBlockPos); + } + return true; + } + + private void insertIntoStack(StackNode<T> node, int position) + throws SAXException { + assert currentPtr + 1 < stack.length; + assert position <= currentPtr + 1; + if (position == currentPtr + 1) { + push(node); + } else { + System.arraycopy(stack, position, stack, position + 1, + (currentPtr - position) + 1); + currentPtr++; + stack[position] = node; + } + } + + private void insertIntoListOfActiveFormattingElements( + StackNode<T> formattingClone, int bookmark) { + formattingClone.retain(); + assert listPtr + 1 < listOfActiveFormattingElements.length; + if (bookmark <= listPtr) { + System.arraycopy(listOfActiveFormattingElements, bookmark, + listOfActiveFormattingElements, bookmark + 1, + (listPtr - bookmark) + 1); + } + listPtr++; + listOfActiveFormattingElements[bookmark] = formattingClone; + } + + private int findInListOfActiveFormattingElements(StackNode<T> node) { + for (int i = listPtr; i >= 0; i--) { + if (node == listOfActiveFormattingElements[i]) { + return i; + } + } + return -1; + } + + private int findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker( + @Local String name) { + for (int i = listPtr; i >= 0; i--) { + StackNode<T> node = listOfActiveFormattingElements[i]; + if (node == null) { + return -1; + } else if (node.name == name) { + return i; + } + } + return -1; + } + + + private void maybeForgetEarlierDuplicateFormattingElement( + @Local String name, HtmlAttributes attributes) throws SAXException { + int candidate = -1; + int count = 0; + for (int i = listPtr; i >= 0; i--) { + StackNode<T> node = listOfActiveFormattingElements[i]; + if (node == null) { + break; + } + if (node.name == name && node.attributes.equalsAnother(attributes)) { + candidate = i; + ++count; + } + } + if (count >= 3) { + removeFromListOfActiveFormattingElements(candidate); + } + } + + private int findLastOrRoot(@Local String name) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].ns == "http://www.w3.org/1999/xhtml" && stack[i].name == name) { + return i; + } + } + return 0; + } + + private int findLastOrRoot(int group) { + for (int i = currentPtr; i > 0; i--) { + if (stack[i].getGroup() == group) { + return i; + } + } + return 0; + } + + /** + * Attempt to add attribute to the body element. + * @param attributes the attributes + * @return <code>true</code> iff the attributes were added + * @throws SAXException + */ + private boolean addAttributesToBody(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + if (currentPtr >= 1) { + StackNode<T> body = stack[1]; + if (body.getGroup() == TreeBuilder.BODY) { + addAttributesToElement(body.node, attributes); + return true; + } + } + return false; + } + + private void addAttributesToHtml(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + addAttributesToElement(stack[0].node, attributes); + } + + private void pushHeadPointerOntoStack() throws SAXException { + assert headPointer != null; + assert mode == AFTER_HEAD; + fatal(); + silentPush(new StackNode<T>(ElementName.HEAD, headPointer + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + )); + } + + /** + * @throws SAXException + * + */ + private void reconstructTheActiveFormattingElements() throws SAXException { + if (listPtr == -1) { + return; + } + StackNode<T> mostRecent = listOfActiveFormattingElements[listPtr]; + if (mostRecent == null || isInStack(mostRecent)) { + return; + } + int entryPos = listPtr; + for (;;) { + entryPos--; + if (entryPos == -1) { + break; + } + if (listOfActiveFormattingElements[entryPos] == null) { + break; + } + if (isInStack(listOfActiveFormattingElements[entryPos])) { + break; + } + } + while (entryPos < listPtr) { + entryPos++; + StackNode<T> entry = listOfActiveFormattingElements[entryPos]; + StackNode<T> currentNode = stack[currentPtr]; + + T clone; + if (currentNode.isFosterParenting()) { + clone = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", entry.name, + entry.attributes.cloneAttributes(null)); + } else { + clone = createElement("http://www.w3.org/1999/xhtml", entry.name, + entry.attributes.cloneAttributes(null), currentNode.node); + appendElement(clone, currentNode.node); + } + + StackNode<T> entryClone = new StackNode<T>(entry.getFlags(), + entry.ns, entry.name, clone, entry.popName, + entry.attributes + // [NOCPP[ + , entry.getLocator() + // ]NOCPP] + ); + + entry.dropAttributes(); // transfer ownership to entryClone + + push(entryClone); + // stack takes ownership of the local variable + listOfActiveFormattingElements[entryPos] = entryClone; + // overwriting the old entry on the list, so release & retain + entry.release(); + entryClone.retain(); + } + } + + private void insertIntoFosterParent(T child) throws SAXException { + int tablePos = findLastOrRoot(TreeBuilder.TABLE); + int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE); + + if (templatePos >= tablePos) { + appendElement(child, stack[templatePos].node); + return; + } + + StackNode<T> node = stack[tablePos]; + insertFosterParentedChild(child, node.node, stack[tablePos - 1].node); + } + + private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes) throws SAXException { + return createAndInsertFosterParentedElement(ns, name, attributes, null); + } + + private T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T form) throws SAXException { + int tablePos = findLastOrRoot(TreeBuilder.TABLE); + int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE); + + if (templatePos >= tablePos) { + T child = createElement(ns, name, attributes, form, stack[templatePos].node); + appendElement(child, stack[templatePos].node); + return child; + } + + StackNode<T> node = stack[tablePos]; + return createAndInsertFosterParentedElement(ns, name, attributes, form, node.node, stack[tablePos - 1].node); + } + + private boolean isInStack(StackNode<T> node) { + for (int i = currentPtr; i >= 0; i--) { + if (stack[i] == node) { + return true; + } + } + return false; + } + + private void popTemplateMode() { + templateModePtr--; + } + + private void pop() throws SAXException { + StackNode<T> node = stack[currentPtr]; + assert debugOnlyClearLastStackSlot(); + currentPtr--; + elementPopped(node.ns, node.popName, node.node); + node.release(); + } + + private void silentPop() throws SAXException { + StackNode<T> node = stack[currentPtr]; + assert debugOnlyClearLastStackSlot(); + currentPtr--; + node.release(); + } + + private void popOnEof() throws SAXException { + StackNode<T> node = stack[currentPtr]; + assert debugOnlyClearLastStackSlot(); + currentPtr--; + markMalformedIfScript(node.node); + elementPopped(node.ns, node.popName, node.node); + node.release(); + } + + // [NOCPP[ + private void checkAttributes(HtmlAttributes attributes, @NsUri String ns) + throws SAXException { + if (errorHandler != null) { + int len = attributes.getXmlnsLength(); + for (int i = 0; i < len; i++) { + AttributeName name = attributes.getXmlnsAttributeName(i); + if (name == AttributeName.XMLNS) { + if (html4) { + err("Attribute \u201Cxmlns\u201D not allowed here. (HTML4-only error.)"); + } else { + String xmlns = attributes.getXmlnsValue(i); + if (!ns.equals(xmlns)) { + err("Bad value \u201C" + + xmlns + + "\u201D for the attribute \u201Cxmlns\u201D (only \u201C" + + ns + "\u201D permitted here)."); + switch (namePolicy) { + case ALTER_INFOSET: + // fall through + case ALLOW: + warn("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0."); + break; + case FATAL: + fatal("Attribute \u201Cxmlns\u201D is not serializable as XML 1.0."); + break; + } + } + } + } else if (ns != "http://www.w3.org/1999/xhtml" + && name == AttributeName.XMLNS_XLINK) { + String xmlns = attributes.getXmlnsValue(i); + if (!"http://www.w3.org/1999/xlink".equals(xmlns)) { + err("Bad value \u201C" + + xmlns + + "\u201D for the attribute \u201Cxmlns:link\u201D (only \u201Chttp://www.w3.org/1999/xlink\u201D permitted here)."); + switch (namePolicy) { + case ALTER_INFOSET: + // fall through + case ALLOW: + warn("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics."); + break; + case FATAL: + fatal("Attribute \u201Cxmlns:xlink\u201D with a value other than \u201Chttp://www.w3.org/1999/xlink\u201D is not serializable as XML 1.0 without changing document semantics."); + break; + } + } + } else { + err("Attribute \u201C" + attributes.getXmlnsLocalName(i) + + "\u201D not allowed here."); + switch (namePolicy) { + case ALTER_INFOSET: + // fall through + case ALLOW: + warn("Attribute with the local name \u201C" + + attributes.getXmlnsLocalName(i) + + "\u201D is not serializable as XML 1.0."); + break; + case FATAL: + fatal("Attribute with the local name \u201C" + + attributes.getXmlnsLocalName(i) + + "\u201D is not serializable as XML 1.0."); + break; + } + } + } + } + attributes.processNonNcNames(this, namePolicy); + } + + private String checkPopName(@Local String name) throws SAXException { + if (NCName.isNCName(name)) { + return name; + } else { + switch (namePolicy) { + case ALLOW: + warn("Element name \u201C" + name + + "\u201D cannot be represented as XML 1.0."); + return name; + case ALTER_INFOSET: + warn("Element name \u201C" + name + + "\u201D cannot be represented as XML 1.0."); + return NCName.escapeName(name); + case FATAL: + fatal("Element name \u201C" + name + + "\u201D cannot be represented as XML 1.0."); + } + } + return null; // keep compiler happy + } + + // ]NOCPP] + + private void appendHtmlElementToDocumentAndPush(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + T elt = createHtmlElementSetAsRoot(attributes); + StackNode<T> node = new StackNode<T>(ElementName.HTML, + elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendHtmlElementToDocumentAndPush() throws SAXException { + appendHtmlElementToDocumentAndPush(tokenizer.emptyAttributes()); + } + + private void appendToCurrentNodeAndPushHeadElement(HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", "head", attributes, currentNode); + appendElement(elt, currentNode); + headPointer = elt; + StackNode<T> node = new StackNode<T>(ElementName.HEAD, + elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushBodyElement(HtmlAttributes attributes) + throws SAXException { + appendToCurrentNodeAndPushElement(ElementName.BODY, + attributes); + } + + private void appendToCurrentNodeAndPushBodyElement() throws SAXException { + appendToCurrentNodeAndPushBodyElement(tokenizer.emptyAttributes()); + } + + private void appendToCurrentNodeAndPushFormElementMayFoster( + HtmlAttributes attributes) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", "form", attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", "form", attributes, current.node); + appendElement(elt, current.node); + } + + if (!isTemplateContents()) { + formPointer = elt; + } + + StackNode<T> node = new StackNode<T>(ElementName.FORM, + elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushFormattingElementMayFoster( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // This method can't be called for custom elements + HtmlAttributes clone = attributes.cloneAttributes(null); + // Attributes must not be read after calling createElement, because + // createElement may delete attributes in C++. + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, current.node); + appendElement(elt, current.node); + } + StackNode<T> node = new StackNode<T>(elementName, elt, clone + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + append(node); + node.retain(); // append doesn't retain itself + } + + private void appendToCurrentNodeAndPushElement(ElementName elementName, + HtmlAttributes attributes) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // This method can't be called for custom elements + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, attributes, currentNode); + appendElement(elt, currentNode); + if (ElementName.TEMPLATE == elementName) { + elt = getDocumentFragmentForTemplate(elt); + } + StackNode<T> node = new StackNode<T>(elementName, elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName, + HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node); + appendElement(elt, current.node); + } + StackNode<T> node = new StackNode<T>(elementName, elt, popName + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushElementMayFosterMathML( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + boolean markAsHtmlIntegrationPoint = false; + if (ElementName.ANNOTATION_XML == elementName + && annotationXmlEncodingPermitsHtml(attributes)) { + markAsHtmlIntegrationPoint = true; + } + // Attributes must not be read after calling createElement(), since + // createElement may delete the object in C++. + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node); + appendElement(elt, current.node); + } + StackNode<T> node = new StackNode<T>(elementName, elt, popName, + markAsHtmlIntegrationPoint + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + // [NOCPP[ + T getDocumentFragmentForTemplate(T template) { + return template; + } + + T getFormPointerForContext(T context) { + return null; + } + // ]NOCPP] + + private boolean annotationXmlEncodingPermitsHtml(HtmlAttributes attributes) { + String encoding = attributes.getValue(AttributeName.ENCODING); + if (encoding == null) { + return false; + } + return Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "application/xhtml+xml", encoding) + || Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString( + "text/html", encoding); + } + + private void appendToCurrentNodeAndPushElementMayFosterSVG( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.camelCaseName; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/2000/svg"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes); + } else { + elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node); + appendElement(elt, current.node); + } + StackNode<T> node = new StackNode<T>(elementName, popName, elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendToCurrentNodeAndPushElementMayFoster(ElementName elementName, + HtmlAttributes attributes, T form) + throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // Can't be called for custom elements + T elt; + T formOwner = form == null || fragment || isTemplateContents() ? null : form; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", elementName.name, + attributes, formOwner); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", elementName.name, + attributes, formOwner, current.node); + appendElement(elt, current.node); + } + StackNode<T> node = new StackNode<T>(elementName, elt + // [NOCPP[ + , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + push(node); + } + + private void appendVoidElementToCurrentMayFoster( + @Local String name, HtmlAttributes attributes, T form) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // Can't be called for custom elements + T elt; + T formOwner = form == null || fragment || isTemplateContents() ? null : form; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", name, + attributes, formOwner); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", name, + attributes, formOwner, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/1999/xhtml", name, elt); + elementPopped("http://www.w3.org/1999/xhtml", name, elt); + } + + private void appendVoidElementToCurrentMayFoster( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1999/xhtml", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1999/xhtml", popName, attributes, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/1999/xhtml", popName, elt); + elementPopped("http://www.w3.org/1999/xhtml", popName, elt); + } + + private void appendVoidElementToCurrentMayFosterSVG( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.camelCaseName; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/2000/svg"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/2000/svg", popName, attributes); + } else { + elt = createElement("http://www.w3.org/2000/svg", popName, attributes, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/2000/svg", popName, elt); + elementPopped("http://www.w3.org/2000/svg", popName, elt); + } + + private void appendVoidElementToCurrentMayFosterMathML( + ElementName elementName, HtmlAttributes attributes) + throws SAXException { + @Local String popName = elementName.name; + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1998/Math/MathML"); + if (elementName.isCustom()) { + popName = checkPopName(popName); + } + // ]NOCPP] + T elt; + StackNode<T> current = stack[currentPtr]; + if (current.isFosterParenting()) { + fatal(); + elt = createAndInsertFosterParentedElement("http://www.w3.org/1998/Math/MathML", popName, attributes); + } else { + elt = createElement("http://www.w3.org/1998/Math/MathML", popName, attributes, current.node); + appendElement(elt, current.node); + } + elementPushed("http://www.w3.org/1998/Math/MathML", popName, elt); + elementPopped("http://www.w3.org/1998/Math/MathML", popName, elt); + } + + private void appendVoidElementToCurrent( + @Local String name, HtmlAttributes attributes, T form) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + // Can't be called for custom elements + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", name, attributes, + form == null || fragment || isTemplateContents() ? null : form, currentNode); + appendElement(elt, currentNode); + elementPushed("http://www.w3.org/1999/xhtml", name, elt); + elementPopped("http://www.w3.org/1999/xhtml", name, elt); + } + + private void appendVoidFormToCurrent(HtmlAttributes attributes) throws SAXException { + // [NOCPP[ + checkAttributes(attributes, "http://www.w3.org/1999/xhtml"); + // ]NOCPP] + T currentNode = stack[currentPtr].node; + T elt = createElement("http://www.w3.org/1999/xhtml", "form", + attributes, currentNode); + formPointer = elt; + // ownership transferred to form pointer + appendElement(elt, currentNode); + elementPushed("http://www.w3.org/1999/xhtml", "form", elt); + elementPopped("http://www.w3.org/1999/xhtml", "form", elt); + } + + // [NOCPP[ + + private final void accumulateCharactersForced(@Const @NoLength char[] buf, + int start, int length) throws SAXException { + System.arraycopy(buf, start, charBuffer, charBufferLen, length); + charBufferLen += length; + } + + @Override public void ensureBufferSpace(int inputLength) + throws SAXException { + // TODO: Unify Tokenizer.strBuf and TreeBuilder.charBuffer so that + // this method becomes unnecessary. + int worstCase = charBufferLen + inputLength; + if (charBuffer == null) { + // Add an arbitrary small value to avoid immediate reallocation + // once there are a few characters in the buffer. + charBuffer = new char[worstCase + 128]; + } else if (worstCase > charBuffer.length) { + // HotSpot reportedly allocates memory with 8-byte accuracy, so + // there's no point in trying to do math here to avoid slop. + // Maybe we should add some small constant to worstCase here + // but not doing that without profiling. In C++ with jemalloc, + // the corresponding method should do math to round up here + // to avoid slop. + char[] newBuf = new char[worstCase]; + System.arraycopy(charBuffer, 0, newBuf, 0, charBufferLen); + charBuffer = newBuf; + } + } + + // ]NOCPP] + + protected void accumulateCharacters(@Const @NoLength char[] buf, int start, + int length) throws SAXException { + appendCharacters(stack[currentPtr].node, buf, start, length); + } + + // ------------------------------- // + + protected final void requestSuspension() { + tokenizer.requestSuspension(); + } + + protected abstract T createElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T intendedParent) throws SAXException; + + protected T createElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T form, T intendedParent) throws SAXException { + return createElement("http://www.w3.org/1999/xhtml", name, attributes, intendedParent); + } + + protected abstract T createHtmlElementSetAsRoot(HtmlAttributes attributes) + throws SAXException; + + protected abstract void detachFromParent(T element) throws SAXException; + + protected abstract boolean hasChildren(T element) throws SAXException; + + protected abstract void appendElement(T child, T newParent) + throws SAXException; + + protected abstract void appendChildrenToNewParent(T oldParent, T newParent) + throws SAXException; + + protected abstract void insertFosterParentedChild(T child, T table, + T stackParent) throws SAXException; + + // We don't generate CPP code for this method because it is not used in generated CPP + // code. Instead, the form owner version of this method is called with a null form owner. + // [NOCPP[ + + protected abstract T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T table, T stackParent) throws SAXException; + + // ]NOCPP] + + protected T createAndInsertFosterParentedElement(@NsUri String ns, @Local String name, + HtmlAttributes attributes, T form, T table, T stackParent) throws SAXException { + return createAndInsertFosterParentedElement(ns, name, attributes, table, stackParent); + }; + + protected abstract void insertFosterParentedCharacters( + @NoLength char[] buf, int start, int length, T table, T stackParent) + throws SAXException; + + protected abstract void appendCharacters(T parent, @NoLength char[] buf, + int start, int length) throws SAXException; + + protected abstract void appendIsindexPrompt(T parent) throws SAXException; + + protected abstract void appendComment(T parent, @NoLength char[] buf, + int start, int length) throws SAXException; + + protected abstract void appendCommentToDocument(@NoLength char[] buf, + int start, int length) throws SAXException; + + protected abstract void addAttributesToElement(T element, + HtmlAttributes attributes) throws SAXException; + + protected void markMalformedIfScript(T elt) throws SAXException { + + } + + protected void start(boolean fragmentMode) throws SAXException { + + } + + protected void end() throws SAXException { + + } + + protected void appendDoctypeToDocument(@Local String name, + String publicIdentifier, String systemIdentifier) + throws SAXException { + + } + + protected void elementPushed(@NsUri String ns, @Local String name, T node) + throws SAXException { + + } + + protected void elementPopped(@NsUri String ns, @Local String name, T node) + throws SAXException { + + } + + // [NOCPP[ + + protected void documentMode(DocumentMode m, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + + } + + /** + * @see nu.validator.htmlparser.common.TokenHandler#wantsComments() + */ + public boolean wantsComments() { + return wantingComments; + } + + public void setIgnoringComments(boolean ignoreComments) { + wantingComments = !ignoreComments; + } + + /** + * Sets the errorHandler. + * + * @param errorHandler + * the errorHandler to set + */ + public final void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + } + + /** + * Returns the errorHandler. + * + * @return the errorHandler + */ + public ErrorHandler getErrorHandler() { + return errorHandler; + } + + /** + * The argument MUST be an interned string or <code>null</code>. + * + * @param context + */ + public final void setFragmentContext(@Local String context) { + this.contextName = context; + this.contextNamespace = "http://www.w3.org/1999/xhtml"; + this.contextNode = null; + this.fragment = (contextName != null); + this.quirks = false; + } + + // ]NOCPP] + + /** + * @see nu.validator.htmlparser.common.TokenHandler#cdataSectionAllowed() + */ + @Inline public boolean cdataSectionAllowed() throws SAXException { + return isInForeign(); + } + + private boolean isInForeign() { + return currentPtr >= 0 + && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml"; + } + + private boolean isInForeignButNotHtmlOrMathTextIntegrationPoint() { + if (currentPtr < 0) { + return false; + } + return !isSpecialParentInForeign(stack[currentPtr]); + } + + /** + * The argument MUST be an interned string or <code>null</code>. + * + * @param context + */ + public final void setFragmentContext(@Local String context, + @NsUri String ns, T node, boolean quirks) { + // [NOCPP[ + if (!((context == null && ns == null) + || "http://www.w3.org/1999/xhtml" == ns + || "http://www.w3.org/2000/svg" == ns || "http://www.w3.org/1998/Math/MathML" == ns)) { + throw new IllegalArgumentException( + "The namespace must be the HTML, SVG or MathML namespace (or null when the local name is null). Got: " + + ns); + } + // ]NOCPP] + this.contextName = context; + this.contextNamespace = ns; + this.contextNode = node; + this.fragment = (contextName != null); + this.quirks = quirks; + } + + protected final T currentNode() { + return stack[currentPtr].node; + } + + /** + * Returns the scriptingEnabled. + * + * @return the scriptingEnabled + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets the scriptingEnabled. + * + * @param scriptingEnabled + * the scriptingEnabled to set + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + } + + public void setIsSrcdocDocument(boolean isSrcdocDocument) { + this.isSrcdocDocument = isSrcdocDocument; + } + + // [NOCPP[ + + /** + * Sets the doctypeExpectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + } + + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + } + + /** + * Sets the documentModeHandler. + * + * @param documentModeHandler + * the documentModeHandler to set + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Sets the reportingDoctype. + * + * @param reportingDoctype + * the reportingDoctype to set + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + } + + // ]NOCPP] + + /** + * Flushes the pending characters. Public for document.write use cases only. + * @throws SAXException + */ + public final void flushCharacters() throws SAXException { + if (charBufferLen > 0) { + if ((mode == IN_TABLE || mode == IN_TABLE_BODY || mode == IN_ROW) + && charBufferContainsNonWhitespace()) { + errNonSpaceInTable(); + reconstructTheActiveFormattingElements(); + if (!stack[currentPtr].isFosterParenting()) { + // reconstructing gave us a new current node + appendCharacters(currentNode(), charBuffer, 0, + charBufferLen); + charBufferLen = 0; + return; + } + + int tablePos = findLastOrRoot(TreeBuilder.TABLE); + int templatePos = findLastOrRoot(TreeBuilder.TEMPLATE); + + if (templatePos >= tablePos) { + appendCharacters(stack[templatePos].node, charBuffer, 0, charBufferLen); + charBufferLen = 0; + return; + } + + StackNode<T> tableElt = stack[tablePos]; + insertFosterParentedCharacters(charBuffer, 0, charBufferLen, + tableElt.node, stack[tablePos - 1].node); + charBufferLen = 0; + return; + } + appendCharacters(currentNode(), charBuffer, 0, charBufferLen); + charBufferLen = 0; + } + } + + private boolean charBufferContainsNonWhitespace() { + for (int i = 0; i < charBufferLen; i++) { + switch (charBuffer[i]) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\u000C': + continue; + default: + return true; + } + } + return false; + } + + /** + * Creates a comparable snapshot of the tree builder state. Snapshot + * creation is only supported immediately after a script end tag has been + * processed. In C++ the caller is responsible for calling + * <code>delete</code> on the returned object. + * + * @return a snapshot. + * @throws SAXException + */ + @SuppressWarnings("unchecked") public TreeBuilderState<T> newSnapshot() + throws SAXException { + StackNode<T>[] listCopy = new StackNode[listPtr + 1]; + for (int i = 0; i < listCopy.length; i++) { + StackNode<T> node = listOfActiveFormattingElements[i]; + if (node != null) { + StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns, + node.name, node.node, node.popName, + node.attributes.cloneAttributes(null) + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + listCopy[i] = newNode; + } else { + listCopy[i] = null; + } + } + StackNode<T>[] stackCopy = new StackNode[currentPtr + 1]; + for (int i = 0; i < stackCopy.length; i++) { + StackNode<T> node = stack[i]; + int listIndex = findInListOfActiveFormattingElements(node); + if (listIndex == -1) { + StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns, + node.name, node.node, node.popName, + null + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + stackCopy[i] = newNode; + } else { + stackCopy[i] = listCopy[listIndex]; + stackCopy[i].retain(); + } + } + int[] templateModeStackCopy = new int[templateModePtr + 1]; + System.arraycopy(templateModeStack, 0, templateModeStackCopy, 0, + templateModeStackCopy.length); + return new StateSnapshot<T>(stackCopy, listCopy, templateModeStackCopy, formPointer, + headPointer, deepTreeSurrogateParent, mode, originalMode, framesetOk, + needToDropLF, quirks); + } + + public boolean snapshotMatches(TreeBuilderState<T> snapshot) { + StackNode<T>[] stackCopy = snapshot.getStack(); + int stackLen = snapshot.getStackLength(); + StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements(); + int listLen = snapshot.getListOfActiveFormattingElementsLength(); + int[] templateModeStackCopy = snapshot.getTemplateModeStack(); + int templateModeStackLen = snapshot.getTemplateModeStackLength(); + + if (stackLen != currentPtr + 1 + || listLen != listPtr + 1 + || templateModeStackLen != templateModePtr + 1 + || formPointer != snapshot.getFormPointer() + || headPointer != snapshot.getHeadPointer() + || deepTreeSurrogateParent != snapshot.getDeepTreeSurrogateParent() + || mode != snapshot.getMode() + || originalMode != snapshot.getOriginalMode() + || framesetOk != snapshot.isFramesetOk() + || needToDropLF != snapshot.isNeedToDropLF() + || quirks != snapshot.isQuirks()) { // maybe just assert quirks + return false; + } + for (int i = listLen - 1; i >= 0; i--) { + if (listCopy[i] == null + && listOfActiveFormattingElements[i] == null) { + continue; + } else if (listCopy[i] == null + || listOfActiveFormattingElements[i] == null) { + return false; + } + if (listCopy[i].node != listOfActiveFormattingElements[i].node) { + return false; // it's possible that this condition is overly + // strict + } + } + for (int i = stackLen - 1; i >= 0; i--) { + if (stackCopy[i].node != stack[i].node) { + return false; + } + } + for (int i = templateModeStackLen - 1; i >=0; i--) { + if (templateModeStackCopy[i] != templateModeStack[i]) { + return false; + } + } + return true; + } + + @SuppressWarnings("unchecked") public void loadState( + TreeBuilderState<T> snapshot, Interner interner) + throws SAXException { + StackNode<T>[] stackCopy = snapshot.getStack(); + int stackLen = snapshot.getStackLength(); + StackNode<T>[] listCopy = snapshot.getListOfActiveFormattingElements(); + int listLen = snapshot.getListOfActiveFormattingElementsLength(); + int[] templateModeStackCopy = snapshot.getTemplateModeStack(); + int templateModeStackLen = snapshot.getTemplateModeStackLength(); + + for (int i = 0; i <= listPtr; i++) { + if (listOfActiveFormattingElements[i] != null) { + listOfActiveFormattingElements[i].release(); + } + } + if (listOfActiveFormattingElements.length < listLen) { + listOfActiveFormattingElements = new StackNode[listLen]; + } + listPtr = listLen - 1; + + for (int i = 0; i <= currentPtr; i++) { + stack[i].release(); + } + if (stack.length < stackLen) { + stack = new StackNode[stackLen]; + } + currentPtr = stackLen - 1; + + if (templateModeStack.length < templateModeStackLen) { + templateModeStack = new int[templateModeStackLen]; + } + templateModePtr = templateModeStackLen - 1; + + for (int i = 0; i < listLen; i++) { + StackNode<T> node = listCopy[i]; + if (node != null) { + StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns, + Portability.newLocalFromLocal(node.name, interner), node.node, + Portability.newLocalFromLocal(node.popName, interner), + node.attributes.cloneAttributes(null) + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + listOfActiveFormattingElements[i] = newNode; + } else { + listOfActiveFormattingElements[i] = null; + } + } + for (int i = 0; i < stackLen; i++) { + StackNode<T> node = stackCopy[i]; + int listIndex = findInArray(node, listCopy); + if (listIndex == -1) { + StackNode<T> newNode = new StackNode<T>(node.getFlags(), node.ns, + Portability.newLocalFromLocal(node.name, interner), node.node, + Portability.newLocalFromLocal(node.popName, interner), + null + // [NOCPP[ + , node.getLocator() + // ]NOCPP] + ); + stack[i] = newNode; + } else { + stack[i] = listOfActiveFormattingElements[listIndex]; + stack[i].retain(); + } + } + System.arraycopy(templateModeStackCopy, 0, templateModeStack, 0, templateModeStackLen); + formPointer = snapshot.getFormPointer(); + headPointer = snapshot.getHeadPointer(); + deepTreeSurrogateParent = snapshot.getDeepTreeSurrogateParent(); + mode = snapshot.getMode(); + originalMode = snapshot.getOriginalMode(); + framesetOk = snapshot.isFramesetOk(); + needToDropLF = snapshot.isNeedToDropLF(); + quirks = snapshot.isQuirks(); + } + + private int findInArray(StackNode<T> node, StackNode<T>[] arr) { + for (int i = listPtr; i >= 0; i--) { + if (node == arr[i]) { + return i; + } + } + return -1; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getFormPointer() + */ + public T getFormPointer() { + return formPointer; + } + + /** + * Returns the headPointer. + * + * @return the headPointer + */ + public T getHeadPointer() { + return headPointer; + } + + /** + * Returns the deepTreeSurrogateParent. + * + * @return the deepTreeSurrogateParent + */ + public T getDeepTreeSurrogateParent() { + return deepTreeSurrogateParent; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElements() + */ + public StackNode<T>[] getListOfActiveFormattingElements() { + return listOfActiveFormattingElements; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStack() + */ + public StackNode<T>[] getStack() { + return stack; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStack() + */ + public int[] getTemplateModeStack() { + return templateModeStack; + } + + /** + * Returns the mode. + * + * @return the mode + */ + public int getMode() { + return mode; + } + + /** + * Returns the originalMode. + * + * @return the originalMode + */ + public int getOriginalMode() { + return originalMode; + } + + /** + * Returns the framesetOk. + * + * @return the framesetOk + */ + public boolean isFramesetOk() { + return framesetOk; + } + + /** + * Returns the needToDropLF. + * + * @return the needToDropLF + */ + public boolean isNeedToDropLF() { + return needToDropLF; + } + + /** + * Returns the quirks. + * + * @return the quirks + */ + public boolean isQuirks() { + return quirks; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getListOfActiveFormattingElementsLength() + */ + public int getListOfActiveFormattingElementsLength() { + return listPtr + 1; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getStackLength() + */ + public int getStackLength() { + return currentPtr + 1; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilderState#getTemplateModeStackLength() + */ + public int getTemplateModeStackLength() { + return templateModePtr + 1; + } + + /** + * Reports a stray start tag. + * @param name the name of the stray tag + * + * @throws SAXException + */ + private void errStrayStartTag(@Local String name) throws SAXException { + err("Stray start tag \u201C" + name + "\u201D."); + } + + /** + * Reports a stray end tag. + * @param name the name of the stray tag + * + * @throws SAXException + */ + private void errStrayEndTag(@Local String name) throws SAXException { + err("Stray end tag \u201C" + name + "\u201D."); + } + + /** + * Reports a state when elements expected to be closed were not. + * + * @param eltPos the position of the start tag on the stack of the element + * being closed. + * @param name the name of the end tag + * + * @throws SAXException + */ + private void errUnclosedElements(int eltPos, @Local String name) throws SAXException { + errNoCheck("End tag \u201C" + name + "\u201D seen, but there were open elements."); + errListUnclosedStartTags(eltPos); + } + + /** + * Reports a state when elements expected to be closed ahead of an implied + * end tag but were not. + * + * @param eltPos the position of the start tag on the stack of the element + * being closed. + * @param name the name of the end tag + * + * @throws SAXException + */ + private void errUnclosedElementsImplied(int eltPos, String name) throws SAXException { + errNoCheck("End tag \u201C" + name + "\u201D implied, but there were open elements."); + errListUnclosedStartTags(eltPos); + } + + /** + * Reports a state when elements expected to be closed ahead of an implied + * table cell close. + * + * @param eltPos the position of the start tag on the stack of the element + * being closed. + * @throws SAXException + */ + private void errUnclosedElementsCell(int eltPos) throws SAXException { + errNoCheck("A table cell was implicitly closed, but there were open elements."); + errListUnclosedStartTags(eltPos); + } + + private void errStrayDoctype() throws SAXException { + err("Stray doctype."); + } + + private void errAlmostStandardsDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("Almost standards mode doctype. Expected \u201C<!DOCTYPE html>\u201D."); + } + } + + private void errQuirkyDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("Quirky doctype. Expected \u201C<!DOCTYPE html>\u201D."); + } + } + + private void errNonSpaceInTrailer() throws SAXException { + err("Non-space character in page trailer."); + } + + private void errNonSpaceAfterFrameset() throws SAXException { + err("Non-space after \u201Cframeset\u201D."); + } + + private void errNonSpaceInFrameset() throws SAXException { + err("Non-space in \u201Cframeset\u201D."); + } + + private void errNonSpaceAfterBody() throws SAXException { + err("Non-space character after body."); + } + + private void errNonSpaceInColgroupInFragment() throws SAXException { + err("Non-space in \u201Ccolgroup\u201D when parsing fragment."); + } + + private void errNonSpaceInNoscriptInHead() throws SAXException { + err("Non-space character inside \u201Cnoscript\u201D inside \u201Chead\u201D."); + } + + private void errFooBetweenHeadAndBody(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D."); + } + + private void errStartTagWithoutDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("Start tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D."); + } + } + + private void errNoSelectInTableScope() throws SAXException { + err("No \u201Cselect\u201D in table scope."); + } + + private void errStartSelectWhereEndSelectExpected() throws SAXException { + err("\u201Cselect\u201D start tag where end tag expected."); + } + + private void errStartTagWithSelectOpen(@Local String name) + throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + + "\u201D start tag with \u201Cselect\u201D open."); + } + + private void errBadStartTagInHead(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Bad start tag in \u201C" + name + + "\u201D in \u201Chead\u201D."); + } + + private void errImage() throws SAXException { + err("Saw a start tag \u201Cimage\u201D."); + } + + private void errIsindex() throws SAXException { + err("\u201Cisindex\u201D seen."); + } + + private void errFooSeenWhenFooOpen(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("An \u201C" + name + "\u201D start tag seen but an element of the same type was already open."); + } + + private void errHeadingWhenHeadingOpen() throws SAXException { + err("Heading cannot be a child of another heading."); + } + + private void errFramesetStart() throws SAXException { + err("\u201Cframeset\u201D start tag seen."); + } + + private void errNoCellToClose() throws SAXException { + err("No cell to close."); + } + + private void errStartTagInTable(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Start tag \u201C" + name + + "\u201D seen in \u201Ctable\u201D."); + } + + private void errFormWhenFormOpen() throws SAXException { + err("Saw a \u201Cform\u201D start tag, but there was already an active \u201Cform\u201D element. Nested forms are not allowed. Ignoring the tag."); + } + + private void errTableSeenWhileTableOpen() throws SAXException { + err("Start tag for \u201Ctable\u201D seen but the previous \u201Ctable\u201D is still open."); + } + + private void errStartTagInTableBody(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + "\u201D start tag in table body."); + } + + private void errEndTagSeenWithoutDoctype() throws SAXException { + if (!isSrcdocDocument) { + err("End tag seen without seeing a doctype first. Expected \u201C<!DOCTYPE html>\u201D."); + } + } + + private void errEndTagAfterBody() throws SAXException { + err("Saw an end tag after \u201Cbody\u201D had been closed."); + } + + private void errEndTagSeenWithSelectOpen(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("\u201C" + name + + "\u201D end tag with \u201Cselect\u201D open."); + } + + private void errGarbageInColgroup() throws SAXException { + err("Garbage in \u201Ccolgroup\u201D fragment."); + } + + private void errEndTagBr() throws SAXException { + err("End tag \u201Cbr\u201D."); + } + + private void errNoElementToCloseButEndTagSeen(@Local String name) + throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("No \u201C" + name + "\u201D element in scope but a \u201C" + + name + "\u201D end tag seen."); + } + + private void errHtmlStartTagInForeignContext(@Local String name) + throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("HTML start tag \u201C" + name + + "\u201D in a foreign namespace context."); + } + + private void errTableClosedWhileCaptionOpen() throws SAXException { + err("\u201Ctable\u201D closed but \u201Ccaption\u201D was still open."); + } + + private void errNoTableRowToClose() throws SAXException { + err("No table row to close."); + } + + private void errNonSpaceInTable() throws SAXException { + err("Misplaced non-space characters insided a table."); + } + + private void errUnclosedChildrenInRuby() throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Unclosed children in \u201Cruby\u201D."); + } + + private void errStartTagSeenWithoutRuby(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Start tag \u201C" + + name + + "\u201D seen without a \u201Cruby\u201D element being open."); + } + + private void errSelfClosing() throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("Self-closing syntax (\u201C/>\u201D) used on a non-void HTML element. Ignoring the slash and treating as a start tag."); + } + + private void errNoCheckUnclosedElementsOnStack() throws SAXException { + errNoCheck("Unclosed elements on stack."); + } + + private void errEndTagDidNotMatchCurrentOpenElement(@Local String name, + @Local String currOpenName) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End tag \u201C" + + name + + "\u201D did not match the name of the current open element (\u201C" + + currOpenName + "\u201D)."); + } + + private void errEndTagViolatesNestingRules(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End tag \u201C" + name + "\u201D violates nesting rules."); + } + + private void errEofWithUnclosedElements() throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End of file seen and there were open elements."); + // just report all remaining unclosed elements + errListUnclosedStartTags(0); + } + + /** + * Reports arriving at/near end of document with unclosed elements remaining. + * + * @param message + * the message + * @throws SAXException + */ + private void errEndWithUnclosedElements(@Local String name) throws SAXException { + if (errorHandler == null) { + return; + } + errNoCheck("End tag for \u201C" + + name + + "\u201D seen, but there were unclosed elements."); + // just report all remaining unclosed elements + errListUnclosedStartTags(0); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java new file mode 100644 index 000000000..c4e2d4afb --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/TreeBuilderState.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2009-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +/** + * Interface for exposing the state of the HTML5 tree builder so that the + * interface can be implemented by the tree builder itself and by snapshots. + * + * @version $Id$ + * @author hsivonen + */ +public interface TreeBuilderState<T> { + + /** + * Returns the stack. + * + * @return the stack + */ + public StackNode<T>[] getStack(); + + /** + * Returns the listOfActiveFormattingElements. + * + * @return the listOfActiveFormattingElements + */ + public StackNode<T>[] getListOfActiveFormattingElements(); + + /** + * Returns the stack of template insertion modes. + * + * @return the stack of template insertion modes + */ + public int[] getTemplateModeStack(); + + /** + * Returns the formPointer. + * + * @return the formPointer + */ + public T getFormPointer(); + + /** + * Returns the headPointer. + * + * @return the headPointer + */ + public T getHeadPointer(); + + /** + * Returns the deepTreeSurrogateParent. + * + * @return the deepTreeSurrogateParent + */ + public T getDeepTreeSurrogateParent(); + + /** + * Returns the mode. + * + * @return the mode + */ + public int getMode(); + + /** + * Returns the originalMode. + * + * @return the originalMode + */ + public int getOriginalMode(); + + /** + * Returns the framesetOk. + * + * @return the framesetOk + */ + public boolean isFramesetOk(); + + /** + * Returns the needToDropLF. + * + * @return the needToDropLF + */ + public boolean isNeedToDropLF(); + + /** + * Returns the quirks. + * + * @return the quirks + */ + public boolean isQuirks(); + + /** + * Return the length of the stack. + * @return the length of the stack. + */ + public int getStackLength(); + + /** + * Return the length of the list of active formatting elements. + * @return the length of the list of active formatting elements. + */ + public int getListOfActiveFormattingElementsLength(); + + /** + * Return the length of the stack of template insertion modes. + * + * @return the length of the stack of template insertion modes. + */ + int getTemplateModeStackLength(); +}
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java new file mode 100644 index 000000000..35f1ac055 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/UTF16Buffer.java @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2008-2010 Mozilla Foundation + * Copyright (c) 2018-2020 Moonchild Productions + * Copyright (c) 2020 Binary Outcast + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.impl; + +import nu.validator.htmlparser.annotation.NoLength; + +/** + * An UTF-16 buffer that knows the start and end indeces of its unconsumed + * content. + * + * @version $Id$ + * @author hsivonen + */ +public final class UTF16Buffer { + + /** + * The backing store of the buffer. May be larger than the logical content + * of this <code>UTF16Buffer</code>. + */ + private final @NoLength char[] buffer; + + /** + * The index of the first unconsumed character in the backing buffer. + */ + private int start; + + /** + * The index of the slot immediately after the last character in the backing + * buffer that is part of the logical content of this + * <code>UTF16Buffer</code>. + */ + private int end; + + //[NOCPP[ + + /** + * Constructor for wrapping an existing UTF-16 code unit array. + * + * @param buffer + * the backing buffer + * @param start + * the index of the first character to consume + * @param end + * the index immediately after the last character to consume + */ + public UTF16Buffer(@NoLength char[] buffer, int start, int end) { + this.buffer = buffer; + this.start = start; + this.end = end; + } + + // ]NOCPP] + + /** + * Returns the start index. + * + * @return the start index + */ + public int getStart() { + return start; + } + + /** + * Sets the start index. + * + * @param start + * the start index + */ + public void setStart(int start) { + this.start = start; + } + + /** + * Returns the backing buffer. + * + * @return the backing buffer + */ + public @NoLength char[] getBuffer() { + return buffer; + } + + /** + * Returns the end index. + * + * @return the end index + */ + public int getEnd() { + return end; + } + + /** + * Checks if the buffer has data left. + * + * @return <code>true</code> if there's data left + */ + public boolean hasMore() { + return start < end; + } + + /** + * Returns <code>end - start</code>. + * + * @return <code>end - start</code> + */ + public int getLength() { + return end - start; + } + + /** + * Adjusts the start index to skip over the first character if it is a line + * feed and the previous character was a carriage return. + * + * @param lastWasCR + * whether the previous character was a carriage return + */ + public void adjust(boolean lastWasCR) { + if (lastWasCR && buffer[start] == '\n') { + start++; + } + } + + /** + * Sets the end index. + * + * @param end + * the end index + */ + public void setEnd(int end) { + this.end = end; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html new file mode 100644 index 000000000..6d029a13e --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/impl/package.html @@ -0,0 +1,30 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package contains the bulk of parser internals. Only implementors of +additional tree builders or token handlers should look here.</p> +</body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java new file mode 100644 index 000000000..42d7a837f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/BomSniffer.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; + +import nu.validator.htmlparser.common.ByteReadable; + +/** + * The BOM sniffing part of the HTML5 encoding sniffing algorithm. + * + * @version $Id$ + * @author hsivonen + */ +public final class BomSniffer { + + private final ByteReadable source; + + /** + * @param source + */ + public BomSniffer(final ByteReadable source) { + this.source = source; + } + + Encoding sniff() throws IOException { + int b = source.readByte(); + if (b == 0xEF) { // UTF-8 + b = source.readByte(); + if (b == 0xBB) { + b = source.readByte(); + if (b == 0xBF) { + return Encoding.UTF8; + } else { + return null; + } + } else { + return null; + } + } else if (b == 0xFF) { // little-endian + b = source.readByte(); + if (b == 0xFE) { + return Encoding.UTF16LE; + } else { + return null; + } + } else if (b == 0xFE) { // big-endian UTF-16 + b = source.readByte(); + if (b == 0xFF) { + return Encoding.UTF16BE; + } else { + return null; + } + } else { + return null; + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java new file mode 100644 index 000000000..1a2d49746 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Confidence.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +public enum Confidence { + TENTATIVE, CERTAIN +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java new file mode 100644 index 000000000..f0b0cc55d --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Driver.java @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2005, 2006, 2007 Henri Sivonen + * Copyright (c) 2007-2013 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.nio.charset.UnsupportedCharsetException; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.EncodingDeclarationHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.extra.NormalizationChecker; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.UTF16Buffer; +import nu.validator.htmlparser.rewindable.RewindableInputStream; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class Driver implements EncodingDeclarationHandler { + + /** + * The input UTF-16 code unit stream. If a byte stream was given, this + * object is an instance of <code>HtmlInputStreamReader</code>. + */ + private Reader reader; + + /** + * The reference to the rewindable byte stream. <code>null</code> if + * prohibited or no longer needed. + */ + private RewindableInputStream rewindableInputStream; + + private boolean swallowBom; + + private Encoding characterEncoding; + + private boolean allowRewinding = true; + + private Heuristics heuristics = Heuristics.NONE; + + private final Tokenizer tokenizer; + + private Confidence confidence; + + /** + * Used for NFC checking if non-<code>null</code>, source code capture, + * etc. + */ + private CharacterHandler[] characterHandlers = new CharacterHandler[0]; + + public Driver(Tokenizer tokenizer) { + this.tokenizer = tokenizer; + tokenizer.setEncodingDeclarationHandler(this); + } + + /** + * Returns the allowRewinding. + * + * @return the allowRewinding + */ + public boolean isAllowRewinding() { + return allowRewinding; + } + + /** + * Sets the allowRewinding. + * + * @param allowRewinding + * the allowRewinding to set + */ + public void setAllowRewinding(boolean allowRewinding) { + this.allowRewinding = allowRewinding; + } + + /** + * Turns NFC checking on or off. + * + * @param enable + * <code>true</code> if checking on + */ + public void setCheckingNormalization(boolean enable) { + if (enable) { + if (isCheckingNormalization()) { + return; + } else { + NormalizationChecker normalizationChecker = new NormalizationChecker(tokenizer); + normalizationChecker.setErrorHandler(tokenizer.getErrorHandler()); + + } + } else { + if (isCheckingNormalization()) { + CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length - 1]; + boolean skipped = false; + int j = 0; + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + if (!(!skipped && (ch instanceof NormalizationChecker))) { + newHandlers[j] = ch; + j++; + } + } + characterHandlers = newHandlers; + } else { + return; + } + } + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + if (characterHandler == null) { + throw new IllegalArgumentException("Null argument."); + } + CharacterHandler[] newHandlers = new CharacterHandler[characterHandlers.length + 1]; + System.arraycopy(characterHandlers, 0, newHandlers, 0, + characterHandlers.length); + newHandlers[characterHandlers.length] = characterHandler; + characterHandlers = newHandlers; + } + + /** + * Query if checking normalization. + * + * @return <code>true</code> if checking on + */ + public boolean isCheckingNormalization() { + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + if (ch instanceof NormalizationChecker) { + return true; + } + } + return false; + } + + /** + * Runs the tokenization. This is the main entry point. + * + * @param is + * the input source + * @throws SAXException + * on fatal error (if configured to treat XML violations as + * fatal) or if the token handler threw + * @throws IOException + * if the stream threw + */ + public void tokenize(InputSource is) throws SAXException, IOException { + if (is == null) { + throw new IllegalArgumentException("InputSource was null."); + } + tokenizer.start(); + confidence = Confidence.TENTATIVE; + swallowBom = true; + rewindableInputStream = null; + tokenizer.initLocation(is.getPublicId(), is.getSystemId()); + this.reader = is.getCharacterStream(); + this.characterEncoding = encodingFromExternalDeclaration(is.getEncoding()); + if (this.reader == null) { + InputStream inputStream = is.getByteStream(); + if (inputStream == null) { + throw new SAXException("Both streams in InputSource were null."); + } + if (this.characterEncoding == null) { + if (allowRewinding) { + inputStream = rewindableInputStream = new RewindableInputStream( + inputStream); + } + this.reader = new HtmlInputStreamReader(inputStream, + tokenizer.getErrorHandler(), tokenizer, this, heuristics); + } else { + if (this.characterEncoding != Encoding.UTF8) { + warnWithoutLocation("Legacy encoding \u201C" + + this.characterEncoding.getCanonName() + + "\u201D used. Documents should use UTF-8."); + } + becomeConfident(); + this.reader = new HtmlInputStreamReader(inputStream, + tokenizer.getErrorHandler(), tokenizer, this, this.characterEncoding); + } + } else { + becomeConfident(); + } + Throwable t = null; + try { + for (;;) { + try { + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.start(); + } + runStates(); + break; + } catch (ReparseException e) { + if (rewindableInputStream == null) { + tokenizer.fatal("Changing encoding at this point would need non-streamable behavior."); + } else { + rewindableInputStream.rewind(); + becomeConfident(); + this.reader = new HtmlInputStreamReader( + rewindableInputStream, tokenizer.getErrorHandler(), tokenizer, + this, this.characterEncoding); + } + continue; + } + } + } catch (Throwable tr) { + t = tr; + } finally { + try { + tokenizer.end(); + characterEncoding = null; + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.end(); + } + reader.close(); + reader = null; + rewindableInputStream = null; + } catch (Throwable tr) { + if (t == null) { + t = tr; + } // else drop the later throwable + } + if (t != null) { + if (t instanceof IOException) { + throw (IOException) t; + } else if (t instanceof SAXException) { + throw (SAXException) t; + } else if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } else if (t instanceof Error) { + throw (Error) t; + } else { + // impossible + throw new RuntimeException(t); + } + } + } + } + + void dontSwallowBom() { + swallowBom = false; + } + + private void runStates() throws SAXException, IOException { + char[] buffer = new char[2048]; + UTF16Buffer bufr = new UTF16Buffer(buffer, 0, 0); + boolean lastWasCR = false; + int len = -1; + if ((len = reader.read(buffer)) != -1) { + assert len > 0; + int streamOffset = 0; + int offset = 0; + int length = len; + if (swallowBom) { + if (buffer[0] == '\uFEFF') { + streamOffset = -1; + offset = 1; + length--; + } + } + if (length > 0) { + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.characters(buffer, offset, length); + } + tokenizer.setTransitionBaseOffset(streamOffset); + bufr.setStart(offset); + bufr.setEnd(offset + length); + while (bufr.hasMore()) { + bufr.adjust(lastWasCR); + lastWasCR = false; + if (bufr.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(bufr); + } + } + } + streamOffset = length; + while ((len = reader.read(buffer)) != -1) { + assert len > 0; + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + ch.characters(buffer, 0, len); + } + tokenizer.setTransitionBaseOffset(streamOffset); + bufr.setStart(0); + bufr.setEnd(len); + while (bufr.hasMore()) { + bufr.adjust(lastWasCR); + lastWasCR = false; + if (bufr.hasMore()) { + lastWasCR = tokenizer.tokenizeBuffer(bufr); + } + } + streamOffset += len; + } + } + tokenizer.eof(); + } + + public void setEncoding(Encoding encoding, Confidence confidence) { + this.characterEncoding = encoding; + if (confidence == Confidence.CERTAIN) { + becomeConfident(); + } + } + + public boolean internalEncodingDeclaration(String internalCharset) + throws SAXException { + try { + internalCharset = Encoding.toAsciiLowerCase(internalCharset); + Encoding cs; + if ("utf-16".equals(internalCharset) + || "utf-16be".equals(internalCharset) + || "utf-16le".equals(internalCharset)) { + tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C" + + internalCharset + + "\u201D which is not an ASCII superset. Continuing as if the encoding had been \u201Cutf-8\u201D."); + cs = Encoding.UTF8; + internalCharset = "utf-8"; + } else { + cs = Encoding.forName(internalCharset); + } + Encoding actual = cs.getActualHtmlEncoding(); + if (actual == null) { + actual = cs; + } + if (!actual.isAsciiSuperset()) { + tokenizer.errTreeBuilder("Internal encoding declaration specified \u201C" + + internalCharset + + "\u201D which is not an ASCII superset. Not changing the encoding."); + return false; + } + if (characterEncoding == null) { + // Reader case + return true; + } + if (characterEncoding == actual) { + becomeConfident(); + return true; + } + if (confidence == Confidence.CERTAIN && actual != characterEncoding) { + tokenizer.errTreeBuilder("Internal encoding declaration \u201C" + + internalCharset + + "\u201D disagrees with the actual encoding of the document (\u201C" + + characterEncoding.getCanonName() + "\u201D)."); + } else { + Encoding newEnc = whineAboutEncodingAndReturnActual( + internalCharset, cs); + tokenizer.errTreeBuilder("Changing character encoding \u201C" + + internalCharset + "\u201D and reparsing."); + characterEncoding = newEnc; + throw new ReparseException(); + } + return true; + } catch (UnsupportedCharsetException e) { + tokenizer.errTreeBuilder("Internal encoding declaration named an unsupported chararacter encoding \u201C" + + internalCharset + "\u201D."); + return false; + } + } + + /** + * + */ + private void becomeConfident() { + if (rewindableInputStream != null) { + rewindableInputStream.willNotRewind(); + } + confidence = Confidence.CERTAIN; + tokenizer.becomeConfident(); + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics + * the heuristics to set + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + } + + /** + * Reports a warning without line/col + * + * @param message + * the message + * @throws SAXException + */ + protected void warnWithoutLocation(String message) throws SAXException { + ErrorHandler errorHandler = tokenizer.getErrorHandler(); + if (errorHandler == null) { + return; + } + SAXParseException spe = new SAXParseException(message, null, + tokenizer.getSystemId(), -1, -1); + errorHandler.warning(spe); + } + + /** + * Initializes a decoder from external decl. + */ + protected Encoding encodingFromExternalDeclaration(String encoding) + throws SAXException { + if (encoding == null) { + return null; + } + encoding = Encoding.toAsciiLowerCase(encoding); + try { + Encoding cs = Encoding.forName(encoding); + if ("utf-16".equals(cs.getCanonName()) + || "utf-32".equals(cs.getCanonName())) { + swallowBom = false; + } + return whineAboutEncodingAndReturnActual(encoding, cs); + } catch (UnsupportedCharsetException e) { + tokenizer.err("Unsupported character encoding name: \u201C" + encoding + + "\u201D. Will sniff."); + swallowBom = true; + } + return null; // keep the compiler happy + } + + /** + * @param encoding + * @param cs + * @return + * @throws SAXException + */ + protected Encoding whineAboutEncodingAndReturnActual(String encoding, + Encoding cs) throws SAXException { + String canonName = cs.getCanonName(); + if (!cs.isRegistered()) { + if (encoding.startsWith("x-")) { + tokenizer.err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding. (Charmod C022)"); + } else { + tokenizer.err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)"); + } + } else if (!canonName.equals(encoding)) { + tokenizer.err("The encoding \u201C" + + encoding + + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C" + + canonName + "\u201D. (Charmod C024)"); + } + if (cs.isShouldNot()) { + tokenizer.warn("Authors should not use the character encoding \u201C" + + encoding + + "\u201D. It is recommended to use \u201CUTF-8\u201D."); + } else if (cs.isLikelyEbcdic()) { + tokenizer.warn("Authors should not use EBCDIC-based encodings. It is recommended to use \u201CUTF-8\u201D."); + } else if (cs.isObscure()) { + tokenizer.warn("The character encoding \u201C" + + encoding + + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D."); + } + Encoding actual = cs.getActualHtmlEncoding(); + if (actual == null) { + return cs; + } else { + tokenizer.warn("Using \u201C" + actual.getCanonName() + + "\u201D instead of the declared encoding \u201C" + + encoding + "\u201D."); + return actual; + } + } + + private class ReparseException extends SAXException { + + } + + void notifyAboutMetaBoundary() { + tokenizer.notifyAboutMetaBoundary(); + } + + /** + * @param commentPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + tokenizer.setCommentPolicy(commentPolicy); + } + + /** + * @param contentNonXmlCharPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + tokenizer.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + } + + /** + * @param contentSpacePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + tokenizer.setContentSpacePolicy(contentSpacePolicy); + } + + /** + * @param eh + * @see nu.validator.htmlparser.impl.Tokenizer#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler eh) { + tokenizer.setErrorHandler(eh); + for (int i = 0; i < characterHandlers.length; i++) { + CharacterHandler ch = characterHandlers[i]; + if (ch instanceof NormalizationChecker) { + NormalizationChecker nc = (NormalizationChecker) ch; + nc.setErrorHandler(eh); + } + } + } + + public void setTransitionHandler(TransitionHandler transitionHandler) { + if (tokenizer instanceof ErrorReportingTokenizer) { + ErrorReportingTokenizer ert = (ErrorReportingTokenizer) tokenizer; + ert.setTransitionHandler(transitionHandler); + } else if (transitionHandler != null) { + throw new IllegalStateException("Attempt to set a transition handler on a plain tokenizer."); + } + } + + /** + * @param html4ModeCompatibleWithXhtml1Schemata + * @see nu.validator.htmlparser.impl.Tokenizer#setHtml4ModeCompatibleWithXhtml1Schemata(boolean) + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + tokenizer.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + + /** + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + tokenizer.setMappingLangToXmlLang(mappingLangToXmlLang); + } + + /** + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + tokenizer.setNamePolicy(namePolicy); + } + + /** + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + tokenizer.setXmlnsPolicy(xmlnsPolicy); + } + + public String getCharacterEncoding() throws SAXException { + return characterEncoding.getCanonName(); + } + + public Locator getDocumentLocator() { + return tokenizer; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java new file mode 100644 index 000000000..3bbc606fa --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/Encoding.java @@ -0,0 +1,395 @@ +/* + * Copyright (c) 2006 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderMalfunctionError; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; + +public class Encoding { + + public static final Encoding UTF8; + + public static final Encoding UTF16; + + public static final Encoding UTF16LE; + + public static final Encoding UTF16BE; + + public static final Encoding WINDOWS1252; + + private static String[] SHOULD_NOT = { "jisx02121990", "xjis0208" }; + + private static String[] BANNED = { "bocu1", "cesu8", "compoundtext", + "iscii91", "macarabic", "maccentraleurroman", "maccroatian", + "maccyrillic", "macdevanagari", "macfarsi", "macgreek", + "macgujarati", "macgurmukhi", "machebrew", "macicelandic", + "macroman", "macromanian", "macthai", "macturkish", "macukranian", + "scsu", "utf32", "utf32be", "utf32le", "utf7", "ximapmailboxname", + "xjisautodetect", "xutf16bebom", "xutf16lebom", "xutf32bebom", + "xutf32lebom", "xutf16oppositeendian", "xutf16platformendian", + "xutf32oppositeendian", "xutf32platformendian" }; + + private static String[] NOT_OBSCURE = { "big5", "big5hkscs", "eucjp", + "euckr", "gb18030", "gbk", "iso2022jp", "iso2022kr", "iso88591", + "iso885913", "iso885915", "iso88592", "iso88593", "iso88594", + "iso88595", "iso88596", "iso88597", "iso88598", "iso88599", + "koi8r", "shiftjis", "tis620", "usascii", "utf16", "utf16be", + "utf16le", "utf8", "windows1250", "windows1251", "windows1252", + "windows1253", "windows1254", "windows1255", "windows1256", + "windows1257", "windows1258" }; + + private static Map<String, Encoding> encodingByCookedName = new HashMap<String, Encoding>(); + + private final String canonName; + + private final Charset charset; + + private final boolean asciiSuperset; + + private final boolean obscure; + + private final boolean shouldNot; + + private final boolean likelyEbcdic; + + private Encoding actualHtmlEncoding = null; + + static { + byte[] testBuf = new byte[0x7F]; + for (int i = 0; i < 0x7F; i++) { + if (isAsciiSupersetnessSensitive(i)) { + testBuf[i] = (byte) i; + } else { + testBuf[i] = (byte) 0x20; + } + } + + Set<Encoding> encodings = new HashSet<Encoding>(); + + SortedMap<String, Charset> charsets = Charset.availableCharsets(); + for (Map.Entry<String, Charset> entry : charsets.entrySet()) { + Charset cs = entry.getValue(); + String name = toNameKey(cs.name()); + String canonName = toAsciiLowerCase(cs.name()); + if (!isBanned(name)) { + name = name.intern(); + boolean asciiSuperset = asciiMapsToBasicLatin(testBuf, cs); + Encoding enc = new Encoding(canonName.intern(), cs, + asciiSuperset, isObscure(name), isShouldNot(name), + isLikelyEbcdic(name, asciiSuperset)); + encodings.add(enc); + Set<String> aliases = cs.aliases(); + for (String alias : aliases) { + encodingByCookedName.put(toNameKey(alias).intern(), enc); + } + } + } + // Overwrite possible overlapping aliases with the real things--just in + // case + for (Encoding encoding : encodings) { + encodingByCookedName.put(toNameKey(encoding.getCanonName()), + encoding); + } + UTF8 = forName("utf-8"); + UTF16 = forName("utf-16"); + UTF16BE = forName("utf-16be"); + UTF16LE = forName("utf-16le"); + WINDOWS1252 = forName("windows-1252"); + try { + forName("iso-8859-1").actualHtmlEncoding = forName("windows-1252"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("iso-8859-9").actualHtmlEncoding = forName("windows-1254"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("iso-8859-11").actualHtmlEncoding = forName("windows-874"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("x-iso-8859-11").actualHtmlEncoding = forName("windows-874"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("tis-620").actualHtmlEncoding = forName("windows-874"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("gb_2312-80").actualHtmlEncoding = forName("gbk"); + } catch (UnsupportedCharsetException e) { + } + try { + forName("gb2312").actualHtmlEncoding = forName("gbk"); + } catch (UnsupportedCharsetException e) { + } + try { + encodingByCookedName.put("x-x-big5", forName("big5")); + } catch (UnsupportedCharsetException e) { + } + try { + encodingByCookedName.put("euc-kr", forName("windows-949")); + } catch (UnsupportedCharsetException e) { + } + try { + encodingByCookedName.put("ks_c_5601-1987", forName("windows-949")); + } catch (UnsupportedCharsetException e) { + } + } + + private static boolean isAsciiSupersetnessSensitive(int c) { + return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0x22) + || (c >= 0x26 && c <= 0x27) || (c >= 0x2C && c <= 0x3F) + || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A); + } + + private static boolean isObscure(String lowerCasePreferredIanaName) { + return !(Arrays.binarySearch(NOT_OBSCURE, lowerCasePreferredIanaName) > -1); + } + + private static boolean isBanned(String lowerCasePreferredIanaName) { + if (lowerCasePreferredIanaName.startsWith("xibm")) { + return true; + } + return (Arrays.binarySearch(BANNED, lowerCasePreferredIanaName) > -1); + } + + private static boolean isShouldNot(String lowerCasePreferredIanaName) { + return (Arrays.binarySearch(SHOULD_NOT, lowerCasePreferredIanaName) > -1); + } + + /** + * @param testBuf + * @param cs + */ + private static boolean asciiMapsToBasicLatin(byte[] testBuf, Charset cs) { + CharsetDecoder dec = cs.newDecoder(); + dec.onMalformedInput(CodingErrorAction.REPORT); + dec.onUnmappableCharacter(CodingErrorAction.REPORT); + Reader r = new InputStreamReader(new ByteArrayInputStream(testBuf), dec); + try { + for (int i = 0; i < 0x7F; i++) { + if (isAsciiSupersetnessSensitive(i)) { + if (r.read() != i) { + return false; + } + } else { + if (r.read() != 0x20) { + return false; + } + } + } + } catch (IOException e) { + return false; + } catch (Exception e) { + return false; + } catch (CoderMalfunctionError e) { + return false; + } + + return true; + } + + private static boolean isLikelyEbcdic(String canonName, + boolean asciiSuperset) { + if (!asciiSuperset) { + return (canonName.startsWith("cp") || canonName.startsWith("ibm") || canonName.startsWith("xibm")); + } else { + return false; + } + } + + public static Encoding forName(String name) { + Encoding rv = encodingByCookedName.get(toNameKey(name)); + if (rv == null) { + throw new UnsupportedCharsetException(name); + } else { + return rv; + } + } + + public static String toNameKey(String str) { + if (str == null) { + return null; + } + int j = 0; + char[] buf = new char[str.length()]; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (!((c >= '\t' && c <= '\r') || (c >= '\u0020' && c <= '\u002F') + || (c >= '\u003A' && c <= '\u0040') + || (c >= '\u005B' && c <= '\u0060') || (c >= '\u007B' && c <= '\u007E'))) { + buf[j] = c; + j++; + } + } + return new String(buf, 0, j); + } + + public static String toAsciiLowerCase(String str) { + if (str == null) { + return null; + } + char[] buf = new char[str.length()]; + for (int i = 0; i < str.length(); i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + buf[i] = c; + } + return new String(buf); + } + + /** + * @param canonName + * @param charset + * @param asciiSuperset + * @param obscure + * @param shouldNot + * @param likelyEbcdic + */ + private Encoding(final String canonName, final Charset charset, + final boolean asciiSuperset, final boolean obscure, + final boolean shouldNot, final boolean likelyEbcdic) { + this.canonName = canonName; + this.charset = charset; + this.asciiSuperset = asciiSuperset; + this.obscure = obscure; + this.shouldNot = shouldNot; + this.likelyEbcdic = likelyEbcdic; + } + + /** + * Returns the asciiSuperset. + * + * @return the asciiSuperset + */ + public boolean isAsciiSuperset() { + return asciiSuperset; + } + + /** + * Returns the canonName. + * + * @return the canonName + */ + public String getCanonName() { + return canonName; + } + + /** + * Returns the likelyEbcdic. + * + * @return the likelyEbcdic + */ + public boolean isLikelyEbcdic() { + return likelyEbcdic; + } + + /** + * Returns the obscure. + * + * @return the obscure + */ + public boolean isObscure() { + return obscure; + } + + /** + * Returns the shouldNot. + * + * @return the shouldNot + */ + public boolean isShouldNot() { + return shouldNot; + } + + public boolean isRegistered() { + return !canonName.startsWith("x-"); + } + + /** + * @return + * @see java.nio.charset.Charset#canEncode() + */ + public boolean canEncode() { + return charset.canEncode(); + } + + /** + * @return + * @see java.nio.charset.Charset#newDecoder() + */ + public CharsetDecoder newDecoder() { + return charset.newDecoder(); + } + + /** + * @return + * @see java.nio.charset.Charset#newEncoder() + */ + public CharsetEncoder newEncoder() { + return charset.newEncoder(); + } + + /** + * Returns the actualHtmlEncoding. + * + * @return the actualHtmlEncoding + */ + public Encoding getActualHtmlEncoding() { + return actualHtmlEncoding; + } + + public static void main(String[] args) { + for (Map.Entry<String, Encoding> entry : encodingByCookedName.entrySet()) { + String name = entry.getKey(); + Encoding enc = entry.getValue(); + System.out.printf( + "%21s: canon %21s, obs %5s, reg %5s, asc %5s, ebc %5s\n", + name, enc.getCanonName(), enc.isObscure(), + enc.isRegistered(), enc.isAsciiSuperset(), + enc.isLikelyEbcdic()); + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java new file mode 100644 index 000000000..413f0d9e9 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2013 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import nu.validator.htmlparser.common.ByteReadable; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.extra.ChardetSniffer; +import nu.validator.htmlparser.extra.IcuDetectorSniffer; +import nu.validator.htmlparser.impl.Tokenizer; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** + * Be very careful with this class. It is not a general-purpose subclass of of + * <code>Reader</code>. Instead, it is the minimal implementation that does + * what <code>Tokenizer</code> needs while being an instance of + * <code>Reader</code>. + * + * The only reason why this is a public class is that it needs to be visible to + * test code in another package. + * + * @version $Id$ + * @author hsivonen + */ +public final class HtmlInputStreamReader extends Reader implements + ByteReadable, Locator { + + private static final int SNIFFING_LIMIT = 1024; + + private final InputStream inputStream; + + private final ErrorHandler errorHandler; + + private final Tokenizer tokenizer; + + private final Driver driver; + + private CharsetDecoder decoder = null; + + private boolean sniffing = true; + + private int limit = 0; + + private int position = 0; + + private int bytesRead = 0; + + private boolean eofSeen = false; + + private boolean shouldReadBytes = false; + + private boolean charsetBoundaryPassed = false; + + private final byte[] byteArray = new byte[4096]; // Length must be >= + + // SNIFFING_LIMIT + + private final ByteBuffer byteBuffer = ByteBuffer.wrap(byteArray); + + private boolean needToNotifyTokenizer = false; + + private boolean flushing = false; + + private int line = -1; + + private int col = -1; + + private int lineColPos; + + private boolean hasPendingReplacementCharacter = false; + + private boolean nextCharOnNewLine; + + private boolean prevWasCR; + + /** + * @param inputStream + * @param errorHandler + * @param locator + * @throws IOException + * @throws SAXException + */ + public HtmlInputStreamReader(InputStream inputStream, + ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, + Heuristics heuristics) throws SAXException, IOException { + this.inputStream = inputStream; + this.errorHandler = errorHandler; + this.tokenizer = tokenizer; + this.driver = driver; + this.sniffing = true; + Encoding encoding = (new BomSniffer(this)).sniff(); + if (encoding == null) { + position = 0; + encoding = (new MetaSniffer(errorHandler, this)).sniff(this); + boolean declared = true; + if (encoding == null) { + declared = false; + } else if (encoding != Encoding.UTF8) { + warn("Legacy encoding \u201C" + + encoding.getCanonName() + + "\u201D used. Documents should use UTF-8."); + } + if (encoding == null + && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) { + encoding = (new ChardetSniffer(byteArray, limit)).sniff(); + } + if (encoding == null + && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) { + position = 0; + encoding = (new IcuDetectorSniffer(this)).sniff(); + } + sniffing = false; + if (encoding == null) { + encoding = Encoding.WINDOWS1252; + } + if (!declared) { + err("The character encoding was not declared. Proceeding using \u201C" + encoding.getCanonName() + "\u201D."); + } + if (driver != null) { + driver.setEncoding(encoding, Confidence.TENTATIVE); + } + } else { + if (encoding == Encoding.UTF8) { + if (driver != null) { + driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN); + } + } else { + warn("Legacy encoding \u201C" + + encoding.getCanonName() + + "\u201D used. Documents should use UTF-8."); + if (driver != null) { + driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN); + } + } + } + this.decoder = encoding.newDecoder(); + sniffing = false; + position = 0; + bytesRead = 0; + byteBuffer.position(position); + byteBuffer.limit(limit); + initDecoder(); + } + + /** + * + */ + private void initDecoder() { + this.decoder.onMalformedInput(CodingErrorAction.REPORT); + this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + } + + public HtmlInputStreamReader(InputStream inputStream, + ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, + Encoding encoding) throws SAXException, IOException { + this.inputStream = inputStream; + this.errorHandler = errorHandler; + this.tokenizer = tokenizer; + this.driver = driver; + this.decoder = encoding.newDecoder(); + this.sniffing = false; + position = 0; + bytesRead = 0; + byteBuffer.position(0); + byteBuffer.limit(0); + shouldReadBytes = true; + initDecoder(); + } + + @Override public void close() throws IOException { + inputStream.close(); + } + + @Override public int read(char[] charArray) throws IOException { + lineColPos = 0; + assert !sniffing; + assert charArray.length >= 2; + if (needToNotifyTokenizer) { + if (driver != null) { + driver.notifyAboutMetaBoundary(); + } + needToNotifyTokenizer = false; + } + CharBuffer charBuffer = CharBuffer.wrap(charArray); + charBuffer.limit(charArray.length); + charBuffer.position(0); + if (flushing) { + decoder.flush(charBuffer); + // return -1 if zero + int cPos = charBuffer.position(); + return cPos == 0 ? -1 : cPos; + } + if (hasPendingReplacementCharacter) { + charBuffer.put('\uFFFD'); + hasPendingReplacementCharacter = false; + } + for (;;) { + if (shouldReadBytes) { + int oldLimit = byteBuffer.limit(); + int readLen; + if (charsetBoundaryPassed) { + readLen = byteArray.length - oldLimit; + } else { + readLen = SNIFFING_LIMIT - oldLimit; + } + int num = inputStream.read(byteArray, oldLimit, readLen); + if (num == -1) { + eofSeen = true; + inputStream.close(); + } else { + byteBuffer.position(0); + byteBuffer.limit(oldLimit + num); + } + shouldReadBytes = false; + } + boolean finalDecode = false; + for (;;) { + int oldBytePos = byteBuffer.position(); + CoderResult cr = decoder.decode(byteBuffer, charBuffer, + finalDecode); + bytesRead += byteBuffer.position() - oldBytePos; + if (cr == CoderResult.OVERFLOW) { + // Decoder will remember surrogates + return charBuffer.position(); + } else if (cr == CoderResult.UNDERFLOW) { + int remaining = byteBuffer.remaining(); + if (!charsetBoundaryPassed) { + if (bytesRead + remaining >= SNIFFING_LIMIT) { + needToNotifyTokenizer = true; + charsetBoundaryPassed = true; + } + } + + // XXX what happens if the entire byte buffer consists of + // a pathologically long malformed sequence? + + // If the buffer was not fully consumed, there may be an + // incomplete byte sequence that needs to seed the next + // buffer. + if (remaining > 0) { + System.arraycopy(byteArray, byteBuffer.position(), + byteArray, 0, remaining); + } + byteBuffer.position(0); + byteBuffer.limit(remaining); + if (flushing) { + // The final decode was successful. Not sure if this + // ever happens. + // Let's get out in any case. + int cPos = charBuffer.position(); + return cPos == 0 ? -1 : cPos; + } else if (eofSeen) { + // If there's something left, it isn't something that + // would be + // consumed in the middle of the stream. Rerun the loop + // once + // in the final mode. + shouldReadBytes = false; + finalDecode = true; + flushing = true; + continue; + } else { + // The usual stuff. Want more bytes next time. + shouldReadBytes = true; + int cPos = charBuffer.position(); + if (cPos == 0) { + // No output. Read more bytes right away + break; + } + return cPos; + } + } else { + // The result is in error. No need to test. + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < cr.length(); i++) { + if (i > 0) { + sb.append(", "); + } + sb.append('\u201C'); + sb.append(Integer.toHexString(byteBuffer.get() & 0xFF)); + bytesRead++; + sb.append('\u201D'); + } + if (charBuffer.hasRemaining()) { + charBuffer.put('\uFFFD'); + } else { + hasPendingReplacementCharacter = true; + } + calculateLineAndCol(charBuffer); + if (cr.isMalformed()) { + err("Malformed byte sequence: " + sb + "."); + } else if (cr.isUnmappable()) { + err("Unmappable byte sequence: " + sb + "."); + } else { + throw new RuntimeException( + "CoderResult was none of overflow, underflow, malformed or unmappable."); + } + if (finalDecode) { + // These were the last bytes of input. Return without + // relooping. + // return -1 if zero + int cPos = charBuffer.position(); + return cPos == 0 ? -1 : cPos; + } + } + } + } + } + + private void calculateLineAndCol(CharBuffer charBuffer) { + if (tokenizer != null) { + if (lineColPos == 0) { + line = tokenizer.getLine(); + col = tokenizer.getCol(); + nextCharOnNewLine = tokenizer.isNextCharOnNewLine(); + prevWasCR = tokenizer.isPrevCR(); + } + + char[] charArray = charBuffer.array(); + int i = lineColPos; + while (i < charBuffer.position()) { + char c; + if (nextCharOnNewLine) { + line++; + col = 1; + nextCharOnNewLine = false; + } else { + col++; + } + + c = charArray[i]; + switch (c) { + case '\r': + nextCharOnNewLine = true; + prevWasCR = true; + break; + case '\n': + if (prevWasCR) { + col--; + } else { + nextCharOnNewLine = true; + } + break; + } + i++; + } + lineColPos = i; + } + } + + public int readByte() throws IOException { + if (!sniffing) { + throw new IllegalStateException( + "readByte() called when not in the sniffing state."); + } + if (position == SNIFFING_LIMIT) { + return -1; + } else if (position < limit) { + return byteArray[position++] & 0xFF; + } else { + int num = inputStream.read(byteArray, limit, SNIFFING_LIMIT - limit); + if (num == -1) { + return -1; + } else { + limit += num; + return byteArray[position++] & 0xFF; + } + } + } + + public static void main(String[] args) { + CharsetDecoder dec = Charset.forName("UTF-8").newDecoder(); + dec.onMalformedInput(CodingErrorAction.REPORT); + dec.onUnmappableCharacter(CodingErrorAction.REPORT); + byte[] bytes = { (byte) 0xF0, (byte) 0x9D, (byte) 0x80, (byte) 0x80 }; + byte[] bytes2 = { (byte) 0xB8, (byte) 0x80, 0x63, 0x64, 0x65 }; + ByteBuffer byteBuf = ByteBuffer.wrap(bytes); + ByteBuffer byteBuf2 = ByteBuffer.wrap(bytes2); + char[] chars = new char[1]; + CharBuffer charBuf = CharBuffer.wrap(chars); + + CoderResult cr = dec.decode(byteBuf, charBuf, false); + System.out.println(cr); + System.out.println(byteBuf); + // byteBuf.get(); + cr = dec.decode(byteBuf2, charBuf, false); + System.out.println(cr); + System.out.println(byteBuf2); + + } + + public int getColumnNumber() { + if (tokenizer != null) { + return col; + } + return -1; + } + + public int getLineNumber() { + if (tokenizer != null) { + return line; + } + return -1; + } + + public String getPublicId() { + if (tokenizer != null) { + return tokenizer.getPublicId(); + } + return null; + } + + public String getSystemId() { + if (tokenizer != null) { + return tokenizer.getSystemId(); + } + return null; + } + + /** + * @param string + * @throws SAXException + */ + private void err(String message) throws IOException { + // TODO remove wrapping when changing read() to take a CharBuffer + try { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.error(spe); + } + } catch (SAXException e) { + throw (IOException) new IOException(e.getMessage()).initCause(e); + } + } + + private void warn(String message) throws IOException { + // TODO remove wrapping when changing read() to take a CharBuffer + try { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.warning(spe); + } + } catch (SAXException e) { + throw (IOException) new IOException(e.getMessage()).initCause(e); + } + } + + public Charset getCharset() { + return decoder.charset(); + } + + /** + * @see java.io.Reader#read() + */ + @Override public int read() throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * @see java.io.Reader#read(char[], int, int) + */ + @Override public int read(char[] cbuf, int off, int len) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * @see java.io.Reader#read(java.nio.CharBuffer) + */ + @Override public int read(CharBuffer target) throws IOException { + throw new UnsupportedOperationException(); + } + + public void switchEncoding(Encoding newEnc) { + this.decoder = newEnc.newDecoder(); + initDecoder(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java new file mode 100644 index 000000000..baa04e44f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/io/MetaSniffer.java @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.io; + +import java.io.IOException; +import java.nio.charset.UnsupportedCharsetException; + +import nu.validator.htmlparser.common.ByteReadable; +import nu.validator.htmlparser.impl.MetaScanner; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class MetaSniffer extends MetaScanner implements Locator { + + private Encoding characterEncoding = null; + + private final ErrorHandler errorHandler; + + private final Locator locator; + + private int line = 1; + + private int col = 0; + + private boolean prevWasCR = false; + + public MetaSniffer(ErrorHandler eh, Locator locator) { + this.errorHandler = eh; + this.locator = locator; + this.characterEncoding = null; + } + + /** + * -1 means end. + * @return + * @throws IOException + */ + protected int read() throws IOException { + int b = readable.readByte(); + // [NOCPP[ + switch (b) { + case '\n': + if (!prevWasCR) { + line++; + col = 0; + } + prevWasCR = false; + break; + case '\r': + line++; + col = 0; + prevWasCR = true; + break; + default: + col++; + prevWasCR = false; + break; + } + // ]NOCPP] + return b; + } + + /** + * Main loop. + * + * @return + * + * @throws SAXException + * @throws IOException + * @throws + */ + public Encoding sniff(ByteReadable readable) throws SAXException, IOException { + this.readable = readable; + stateLoop(stateSave); + return characterEncoding; + } + + + /** + * @param string + * @throws SAXException + */ + private void err(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.error(spe); + } + } + + /** + * @param string + * @throws SAXException + */ + private void warn(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, this); + errorHandler.warning(spe); + } + } + + public int getColumnNumber() { + return col; + } + + public int getLineNumber() { + return line; + } + + public String getPublicId() { + if (locator != null) { + return locator.getPublicId(); + } + return null; + } + + public String getSystemId() { + if (locator != null) { + return locator.getSystemId(); + } + return null; + } + + protected boolean tryCharset(String encoding) throws SAXException { + encoding = Encoding.toAsciiLowerCase(encoding); + try { + // XXX spec says only UTF-16 + if ("utf-16".equals(encoding) || "utf-16be".equals(encoding) || "utf-16le".equals(encoding) || "utf-32".equals(encoding) || "utf-32be".equals(encoding) || "utf-32le".equals(encoding)) { + this.characterEncoding = Encoding.UTF8; + err("The internal character encoding declaration specified \u201C" + encoding + "\u201D which is not a rough superset of ASCII. Using \u201CUTF-8\u201D instead."); + return true; + } else { + Encoding cs = Encoding.forName(encoding); + String canonName = cs.getCanonName(); + if (!cs.isAsciiSuperset()) { + err("The encoding \u201C" + + encoding + + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration. Continuing the sniffing algorithm."); + return false; + } + if (!cs.isRegistered()) { + if (encoding.startsWith("x-")) { + err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding. (Charmod C022)"); + } else { + err("The encoding \u201C" + + encoding + + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)"); + } + } else if (!cs.getCanonName().equals(encoding)) { + err("The encoding \u201C" + encoding + + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C" + + canonName + "\u201D. (Charmod C024)"); + } + if (cs.isShouldNot()) { + warn("Authors should not use the character encoding \u201C" + + encoding + + "\u201D. It is recommended to use \u201CUTF-8\u201D."); + } else if (cs.isObscure()) { + warn("The character encoding \u201C" + encoding + "\u201D is not widely supported. Better interoperability may be achieved by using \u201CUTF-8\u201D."); + } + Encoding actual = cs.getActualHtmlEncoding(); + if (actual == null) { + this.characterEncoding = cs; + } else { + warn("Using \u201C" + actual.getCanonName() + "\u201D instead of the declared encoding \u201C" + encoding + "\u201D."); + this.characterEncoding = actual; + } + return true; + } + } catch (UnsupportedCharsetException e) { + err("Unsupported character encoding name: \u201C" + encoding + "\u201D. Will continue sniffing."); + } + return false; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java new file mode 100644 index 000000000..47a3d5eb0 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/Rewindable.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Thai Open Source Software Center Ltd nor + * the names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package nu.validator.htmlparser.rewindable; + +public interface Rewindable { + void willNotRewind(); + + void rewind(); + + boolean canRewind(); +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java new file mode 100644 index 000000000..3a1cc1b91 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2001-2003 Thai Open Source Software Center Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the Thai Open Source Software Center Ltd nor + * the names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package nu.validator.htmlparser.rewindable; + +import java.io.IOException; +import java.io.InputStream; + +public class RewindableInputStream extends InputStream implements Rewindable { + static class Block { + Block next; + + final byte[] buf; + + int used = 0; + + static final int MIN_SIZE = 1024; + + Block(int minSize) { + buf = new byte[Math.max(MIN_SIZE, minSize)]; + } + + Block() { + this(0); + } + + void append(byte b) { + buf[used++] = b; + } + + void append(byte[] b, int off, int len) { + System.arraycopy(b, off, buf, used, len); + used += len; + } + } + + private Block head; + + /** + * If curBlockAvail > 0, then there are curBlockAvail bytes available to be + * returned starting at curBlockPos in curBlock.buf. + */ + private int curBlockAvail; + + private Block curBlock; + + private int curBlockPos; + + private Block lastBlock; + + /** + * true unless willNotRewind has been called + */ + private boolean saving = true; + + private final InputStream in; + + private boolean pretendClosed = false; + + /** + * true if we have got an EOF from the underlying InputStream + */ + private boolean eof; + + public RewindableInputStream(InputStream in) { + if (in == null) + throw new NullPointerException(); + this.in = in; + } + + public void close() throws IOException { + if (saving) { + curBlockAvail = 0; + curBlock = null; + pretendClosed = true; + } else { + head = null; + curBlock = null; + lastBlock = null; + saving = false; + curBlockAvail = 0; + in.close(); + } + } + + public void rewind() { + if (!saving) + throw new IllegalStateException("rewind() after willNotRewind()"); + pretendClosed = false; + if (head == null) + return; + curBlock = head; + curBlockPos = 0; + curBlockAvail = curBlock.used; + } + + public boolean canRewind() { + return saving; + } + + public void willNotRewind() { + saving = false; + head = null; + lastBlock = null; + if (pretendClosed) { + pretendClosed = false; + try { + in.close(); + } catch (IOException e) { + } + } + } + + public int read() throws IOException { + if (curBlockAvail > 0) { + int c = curBlock.buf[curBlockPos++] & 0xFF; + --curBlockAvail; + if (curBlockAvail == 0) { + curBlock = curBlock.next; + if (curBlock != null) { + curBlockPos = 0; + curBlockAvail = curBlock.used; + } + } + return c; + } + int c = in.read(); + if (saving && c != -1) { + if (lastBlock == null) + lastBlock = head = new Block(); + else if (lastBlock.used == lastBlock.buf.length) + lastBlock = lastBlock.next = new Block(); + lastBlock.append((byte) c); + } + return c; + } + + public int read(byte b[], int off, int len) throws IOException { + if (curBlockAvail == 0 && !saving) + return in.read(b, off, len); + if (b == null) + throw new NullPointerException(); + if (len < 0) + throw new IndexOutOfBoundsException(); + int nRead = 0; + if (curBlockAvail != 0) { + for (;;) { + if (len == 0) + return nRead; + b[off++] = curBlock.buf[curBlockPos++]; + --len; + nRead++; + --curBlockAvail; + if (curBlockAvail == 0) { + curBlock = curBlock.next; + if (curBlock == null) + break; + curBlockAvail = curBlock.used; + curBlockPos = 0; + } + } + } + if (len == 0) + return nRead; + if (eof) + return nRead > 0 ? nRead : -1; + try { + int n = in.read(b, off, len); + if (n < 0) { + eof = true; + return nRead > 0 ? nRead : -1; + } + nRead += n; + if (saving) { + if (lastBlock == null) + lastBlock = head = new Block(n); + else if (lastBlock.buf.length - lastBlock.used < n) { + if (lastBlock.used != lastBlock.buf.length) { + int free = lastBlock.buf.length - lastBlock.used; + lastBlock.append(b, off, free); + off += free; + n -= free; + } + lastBlock = lastBlock.next = new Block(n); + } + lastBlock.append(b, off, n); + } + } catch (IOException e) { + eof = true; + if (nRead == 0) + throw e; + } + return nRead; + } + + public int available() throws IOException { + if (curBlockAvail == 0) + return in.available(); + int n = curBlockAvail; + for (Block b = curBlock.next; b != null; b = b.next) + n += b.used; + return n + in.available(); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java new file mode 100644 index 000000000..714053e70 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlParser.java @@ -0,0 +1,1097 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; +import java.util.HashMap; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.impl.TreeBuilder; +import nu.validator.htmlparser.io.Driver; +import nu.validator.saxtree.Document; +import nu.validator.saxtree.DocumentFragment; +import nu.validator.saxtree.TreeParser; + +import org.xml.sax.ContentHandler; +import org.xml.sax.DTDHandler; +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; +import org.xml.sax.XMLReader; +import org.xml.sax.ext.LexicalHandler; +import org.xml.sax.helpers.DefaultHandler; + +/** + * This class implements an HTML5 parser that exposes data through the SAX2 + * interface. + * + * <p>By default, when using the constructor without arguments, the + * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible + * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general + * XML violation policy. To make the parser support non-conforming HTML fully + * per the HTML 5 spec while on the other hand potentially violating the SAX2 + * API contract, set the general XML violation policy to <code>ALLOW</code>. + * It is possible to treat XML 1.0 infoset violations as fatal by setting + * the general XML violation policy to <code>FATAL</code>. + * + * <p>By default, this parser doesn't do true streaming but buffers everything + * first. The parser can be made truly streaming by calling + * <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This + * has the consequence that errors that require non-streamable recovery are + * treated as fatal. + * + * <p>By default, in order to make the parse events emulate the parse events + * for a DTDless XML document, the parser does not report the doctype through + * <code>LexicalHandler</code>. Doctype reporting through + * <code>LexicalHandler</code> can be turned on by calling + * <code>setReportingDoctype(true)</code>. + * + * @version $Id$ + * @author hsivonen + */ +public class HtmlParser implements XMLReader { + + private Driver driver = null; + + private TreeBuilder<?> treeBuilder = null; + + private SAXStreamer saxStreamer = null; // work around javac bug + + private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug + + private ContentHandler contentHandler = null; + + private LexicalHandler lexicalHandler = null; + + private DTDHandler dtdHandler = null; + + private EntityResolver entityResolver = null; + + private ErrorHandler errorHandler = null; + + private DocumentModeHandler documentModeHandler = null; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private boolean checkingNormalization = false; + + private boolean scriptingEnabled = false; + + private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); + + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; + + private boolean html4ModeCompatibleWithXhtml1Schemata = false; + + private boolean mappingLangToXmlLang = false; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; + + private boolean reportingDoctype = true; + + private ErrorHandler treeBuilderErrorHandler = null; + + private Heuristics heuristics = Heuristics.NONE; + + private HashMap<String, String> errorProfileMap = null; + + private TransitionHandler transitionHandler = null; + + /** + * Instantiates the parser with a fatal XML violation policy. + * + */ + public HtmlParser() { + this(XmlViolationPolicy.FATAL); + } + + /** + * Instantiates the parser with a specific XML violation policy. + * @param xmlPolicy the policy + */ + public HtmlParser(XmlViolationPolicy xmlPolicy) { + setXmlPolicy(xmlPolicy); + } + + private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) { + if (errorHandler == null && transitionHandler == null && + contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { + return new Tokenizer(handler, newAttributesEachTime); + } + ErrorReportingTokenizer tokenizer = + new ErrorReportingTokenizer(handler, newAttributesEachTime); + tokenizer.setErrorProfile(errorProfileMap); + return tokenizer; + } + + /** + * This class wraps different tree builders depending on configuration. This + * method does the work of hiding this from the user of the class. + */ + private void lazyInit() { + if (driver == null) { + if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) { + this.saxTreeBuilder = new SAXTreeBuilder(); + this.treeBuilder = this.saxTreeBuilder; + this.saxStreamer = null; + this.driver = new Driver(newTokenizer(treeBuilder, true)); + } else { + this.saxStreamer = new SAXStreamer(); + this.treeBuilder = this.saxStreamer; + this.saxTreeBuilder = null; + this.driver = new Driver(newTokenizer(treeBuilder, false)); + } + this.driver.setErrorHandler(errorHandler); + this.driver.setTransitionHandler(transitionHandler); + this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); + this.driver.setCheckingNormalization(checkingNormalization); + this.driver.setCommentPolicy(commentPolicy); + this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + this.driver.setContentSpacePolicy(contentSpacePolicy); + this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); + this.driver.setXmlnsPolicy(xmlnsPolicy); + this.driver.setHeuristics(heuristics); + for (CharacterHandler characterHandler : characterHandlers) { + this.driver.addCharacterHandler(characterHandler); + } + this.treeBuilder.setDoctypeExpectation(doctypeExpectation); + this.treeBuilder.setDocumentModeHandler(documentModeHandler); + this.treeBuilder.setIgnoringComments(lexicalHandler == null); + this.treeBuilder.setScriptingEnabled(scriptingEnabled); + this.treeBuilder.setReportingDoctype(reportingDoctype); + this.treeBuilder.setNamePolicy(namePolicy); + if (saxStreamer != null) { + saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() + : contentHandler); + saxStreamer.setLexicalHandler(lexicalHandler); + driver.setAllowRewinding(false); + } + } + } + + /** + * @see org.xml.sax.XMLReader#getContentHandler() + */ + public ContentHandler getContentHandler() { + return contentHandler; + } + + /** + * @see org.xml.sax.XMLReader#getDTDHandler() + */ + public DTDHandler getDTDHandler() { + return dtdHandler; + } + + /** + * @see org.xml.sax.XMLReader#getEntityResolver() + */ + public EntityResolver getEntityResolver() { + return entityResolver; + } + + /** + * @see org.xml.sax.XMLReader#getErrorHandler() + */ + public ErrorHandler getErrorHandler() { + return errorHandler; + } + + /** + * Exposes the configuration of the emulated XML parser as well as + * boolean-valued configuration without using non-<code>XMLReader</code> + * getters directly. + * + * <dl> + * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/is-standalone</code></dt> + * <dd><code>true</code></dd> + * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/namespaces</code></dt> + * <dd><code>true</code></dd> + * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt> + * <dd><code>true</code></dd> + * <dt><code>http://xml.org/sax/features/string-interning</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> + * <dd><code>isCheckingNormalization</code></dd> + * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/use-locator2</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/validation</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt> + * <dd><code>false</code></dd> + * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt> + * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd> + * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt> + * <dd><code>isMappingLangToXmlLang</code></dd> + * <dt><code>http://validator.nu/features/scripting-enabled</code></dt> + * <dd><code>isScriptingEnabled</code></dd> + * </dl> + * + * @param name + * feature URI string + * @return a value per the list above + * @see org.xml.sax.XMLReader#getFeature(java.lang.String) + */ + public boolean getFeature(String name) throws SAXNotRecognizedException, + SAXNotSupportedException { + if ("http://xml.org/sax/features/external-general-entities".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { + return true; + } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/namespaces".equals(name)) { + return true; + } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { + return true; // default value--applicable scenario never happens + } else if ("http://xml.org/sax/features/string-interning".equals(name)) { + return true; + } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { + return isCheckingNormalization(); // the checks aren't really per + // XML 1.1 + } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/validation".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { + return false; + } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { + return false; + } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { + return isHtml4ModeCompatibleWithXhtml1Schemata(); + } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { + return isMappingLangToXmlLang(); + } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { + return isScriptingEnabled(); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * Allows <code>XMLReader</code>-level access to non-boolean valued + * getters. + * + * <p> + * The properties are mapped as follows: + * + * <dl> + * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt> + * <dd><code>"1.0"</code></dd> + * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt> + * <dd><code>getLexicalHandler</code></dd> + * <dt><code>http://validator.nu/properties/content-space-policy</code></dt> + * <dd><code>getContentSpacePolicy</code></dd> + * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt> + * <dd><code>getContentNonXmlCharPolicy</code></dd> + * <dt><code>http://validator.nu/properties/comment-policy</code></dt> + * <dd><code>getCommentPolicy</code></dd> + * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt> + * <dd><code>getXmlnsPolicy</code></dd> + * <dt><code>http://validator.nu/properties/name-policy</code></dt> + * <dd><code>getNamePolicy</code></dd> + * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt> + * <dd><code>getStreamabilityViolationPolicy</code></dd> + * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt> + * <dd><code>getDocumentModeHandler</code></dd> + * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt> + * <dd><code>getDoctypeExpectation</code></dd> + * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> + * </dl> + * + * @param name + * property URI string + * @return a value per the list above + * @see org.xml.sax.XMLReader#getProperty(java.lang.String) + */ + public Object getProperty(String name) throws SAXNotRecognizedException, + SAXNotSupportedException { + if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not suppert DeclHandler."); + } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { + return "1.0"; // Emulating an XML 1.1 parser is not supported. + } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not walk the DOM."); + } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { + return getLexicalHandler(); + } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not expose the source as a string."); + } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { + return getContentSpacePolicy(); + } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { + return getContentNonXmlCharPolicy(); + } else if ("http://validator.nu/properties/comment-policy".equals(name)) { + return getCommentPolicy(); + } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { + return getXmlnsPolicy(); + } else if ("http://validator.nu/properties/name-policy".equals(name)) { + return getNamePolicy(); + } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { + return getStreamabilityViolationPolicy(); + } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { + return getDocumentModeHandler(); + } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { + return getDoctypeExpectation(); + } else if ("http://validator.nu/properties/xml-policy".equals(name)) { + throw new SAXNotSupportedException( + "Cannot get a convenience setter."); + } else if ("http://validator.nu/properties/heuristics".equals(name)) { + return getHeuristics(); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource) + */ + public void parse(InputSource input) throws IOException, SAXException { + lazyInit(); + try { + treeBuilder.setFragmentContext(null); + tokenize(input); + } finally { + if (saxTreeBuilder != null) { + Document document = saxTreeBuilder.getDocument(); + if (document != null) { + new TreeParser(contentHandler, lexicalHandler).parse(document); + } + } + } + } + + /** + * Parses a fragment with HTML context. + * + * @param input the input to parse + * @param context the name of the context element (HTML namespace assumed) + * @throws IOException + * @throws SAXException + */ + public void parseFragment(InputSource input, String context) + throws IOException, SAXException { + lazyInit(); + try { + treeBuilder.setFragmentContext(context.intern()); + tokenize(input); + } finally { + if (saxTreeBuilder != null) { + DocumentFragment fragment = saxTreeBuilder.getDocumentFragment(); + new TreeParser(contentHandler, lexicalHandler).parse(fragment); + } + } + } + + /** + * Parses a fragment. + * + * @param input the input to parse + * @param contextLocal the local name of the context element + * @param contextNamespace the namespace of the context element + * @throws IOException + * @throws SAXException + */ + public void parseFragment(InputSource input, String contextLocal, String contextNamespace) + throws IOException, SAXException { + lazyInit(); + try { + treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false); + tokenize(input); + } finally { + if (saxTreeBuilder != null) { + DocumentFragment fragment = saxTreeBuilder.getDocumentFragment(); + new TreeParser(contentHandler, lexicalHandler).parse(fragment); + } + } + } + + /** + * @param is + * @throws SAXException + * @throws IOException + * @throws MalformedURLException + */ + private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException { + if (is == null) { + throw new IllegalArgumentException("Null input."); + } + if (is.getByteStream() == null && is.getCharacterStream() == null) { + String systemId = is.getSystemId(); + if (systemId == null) { + throw new IllegalArgumentException("No byte stream, no character stream nor URI."); + } + if (entityResolver != null) { + is = entityResolver.resolveEntity(is.getPublicId(), systemId); + } + if (is.getByteStream() == null || is.getCharacterStream() == null) { + is = new InputSource(); + is.setSystemId(systemId); + is.setByteStream(new URL(systemId).openStream()); + } + } + driver.tokenize(is); + } + + /** + * @see org.xml.sax.XMLReader#parse(java.lang.String) + */ + public void parse(String systemId) throws IOException, SAXException { + parse(new InputSource(systemId)); + } + + /** + * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler) + */ + public void setContentHandler(ContentHandler handler) { + contentHandler = handler; + if (saxStreamer != null) { + saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() + : contentHandler); + } + } + + /** + * Sets the lexical handler. + * @param handler the hander. + */ + public void setLexicalHandler(LexicalHandler handler) { + lexicalHandler = handler; + if (treeBuilder != null) { + treeBuilder.setIgnoringComments(handler == null); + if (saxStreamer != null) { + saxStreamer.setLexicalHandler(handler); + } + } + } + + /** + * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) + */ + public void setDTDHandler(DTDHandler handler) { + dtdHandler = handler; + } + + /** + * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) + */ + public void setEntityResolver(EntityResolver resolver) { + entityResolver = resolver; + } + + /** + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler handler) { + errorHandler = handler; + treeBuilderErrorHandler = handler; + driver = null; + } + + public void setTransitionHandler(TransitionHandler handler) { + transitionHandler = handler; + driver = null; + } + + /** + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + * @deprecated For Validator.nu internal use + */ + public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) { + treeBuilderErrorHandler = handler; + if (driver != null) { + treeBuilder.setErrorHandler(handler); + } + } + + /** + * Sets a boolean feature without having to use non-<code>XMLReader</code> + * setters directly. + * + * <p> + * The supported features are: + * + * <dl> + * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt> + * <dd><code>setCheckingNormalization</code></dd> + * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt> + * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd> + * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt> + * <dd><code>setMappingLangToXmlLang</code></dd> + * <dt><code>http://validator.nu/features/scripting-enabled</code></dt> + * <dd><code>setScriptingEnabled</code></dd> + * </dl> + * + * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) + */ + public void setFeature(String name, boolean value) + throws SAXNotRecognizedException, SAXNotSupportedException { + if ("http://xml.org/sax/features/external-general-entities".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/is-standalone".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/namespaces".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/string-interning".equals(name)) { + if (!value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) { + setCheckingNormalization(value); + } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/use-locator2".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/validation".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) { + if (value) { + throw new SAXNotSupportedException("Cannot set " + name + "."); + } + } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) { + setHtml4ModeCompatibleWithXhtml1Schemata(value); + } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) { + setMappingLangToXmlLang(value); + } else if ("http://validator.nu/features/scripting-enabled".equals(name)) { + setScriptingEnabled(value); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * Sets a non-boolean property without having to use non-<code>XMLReader</code> + * setters directly. + * + * <dl> + * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt> + * <dd><code>setLexicalHandler</code></dd> + * <dt><code>http://validator.nu/properties/content-space-policy</code></dt> + * <dd><code>setContentSpacePolicy</code></dd> + * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt> + * <dd><code>setContentNonXmlCharPolicy</code></dd> + * <dt><code>http://validator.nu/properties/comment-policy</code></dt> + * <dd><code>setCommentPolicy</code></dd> + * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt> + * <dd><code>setXmlnsPolicy</code></dd> + * <dt><code>http://validator.nu/properties/name-policy</code></dt> + * <dd><code>setNamePolicy</code></dd> + * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt> + * <dd><code>setStreamabilityViolationPolicy</code></dd> + * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt> + * <dd><code>setDocumentModeHandler</code></dd> + * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt> + * <dd><code>setDoctypeExpectation</code></dd> + * <dt><code>http://validator.nu/properties/xml-policy</code></dt> + * <dd><code>setXmlPolicy</code></dd> + * </dl> + * + * @see org.xml.sax.XMLReader#setProperty(java.lang.String, + * java.lang.Object) + */ + public void setProperty(String name, Object value) + throws SAXNotRecognizedException, SAXNotSupportedException { + if ("http://xml.org/sax/properties/declaration-handler".equals(name)) { + throw new SAXNotSupportedException( + "This parser does not suppert DeclHandler."); + } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) { + throw new SAXNotSupportedException( + "Can't set document-xml-version."); + } else if ("http://xml.org/sax/properties/dom-node".equals(name)) { + throw new SAXNotSupportedException("Can't set dom-node."); + } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) { + setLexicalHandler((LexicalHandler) value); + } else if ("http://xml.org/sax/properties/xml-string".equals(name)) { + throw new SAXNotSupportedException("Can't set xml-string."); + } else if ("http://validator.nu/properties/content-space-policy".equals(name)) { + setContentSpacePolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) { + setContentNonXmlCharPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/comment-policy".equals(name)) { + setCommentPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) { + setXmlnsPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/name-policy".equals(name)) { + setNamePolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) { + setStreamabilityViolationPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) { + setDocumentModeHandler((DocumentModeHandler) value); + } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) { + setDoctypeExpectation((DoctypeExpectation) value); + } else if ("http://validator.nu/properties/xml-policy".equals(name)) { + setXmlPolicy((XmlViolationPolicy) value); + } else if ("http://validator.nu/properties/heuristics".equals(name)) { + setHeuristics((Heuristics) value); + } else { + throw new SAXNotRecognizedException(); + } + } + + /** + * Indicates whether NFC normalization of source is being checked. + * @return <code>true</code> if NFC normalization of source is being checked. + * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + */ + public boolean isCheckingNormalization() { + return checkingNormalization; + } + + /** + * Toggles the checking of the NFC normalization of source. + * @param enable <code>true</code> to check normalization + * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + */ + public void setCheckingNormalization(boolean enable) { + this.checkingNormalization = enable; + if (driver != null) { + driver.setCheckingNormalization(checkingNormalization); + } + } + + /** + * Sets the policy for consecutive hyphens in comments. + * @param commentPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + if (driver != null) { + driver.setCommentPolicy(commentPolicy); + } + } + + /** + * Sets the policy for non-XML characters except white space. + * @param contentNonXmlCharPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + driver = null; + } + + /** + * Sets the policy for non-XML white space. + * @param contentSpacePolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + if (driver != null) { + driver.setContentSpacePolicy(contentSpacePolicy); + } + } + + /** + * Whether the parser considers scripting to be enabled for noscript treatment. + * + * @return <code>true</code> if enabled + * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled <code>true</code> to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + if (treeBuilder != null) { + treeBuilder.setScriptingEnabled(scriptingEnabled); + } + } + + /** + * Returns the doctype expectation. + * + * @return the doctypeExpectation + */ + public DoctypeExpectation getDoctypeExpectation() { + return doctypeExpectation; + } + + /** + * Sets the doctype expectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + if (treeBuilder != null) { + treeBuilder.setDoctypeExpectation(doctypeExpectation); + } + } + + /** + * Returns the document mode handler. + * + * @return the documentModeHandler + */ + public DocumentModeHandler getDocumentModeHandler() { + return documentModeHandler; + } + + /** + * Sets the document mode handler. + * + * @param documentModeHandler + * the documentModeHandler to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Returns the streamabilityViolationPolicy. + * + * @return the streamabilityViolationPolicy + */ + public XmlViolationPolicy getStreamabilityViolationPolicy() { + return streamabilityViolationPolicy; + } + + /** + * Sets the streamabilityViolationPolicy. + * + * @param streamabilityViolationPolicy + * the streamabilityViolationPolicy to set + */ + public void setStreamabilityViolationPolicy( + XmlViolationPolicy streamabilityViolationPolicy) { + this.streamabilityViolationPolicy = streamabilityViolationPolicy; + driver = null; + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * @param html4ModeCompatibleWithXhtml1Schemata + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + if (driver != null) { + driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + } + + /** + * Returns the <code>Locator</code> during parse. + * @return the <code>Locator</code> + */ + public Locator getDocumentLocator() { + return driver.getDocumentLocator(); + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * + * @return the html4ModeCompatibleWithXhtml1Schemata + */ + public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { + return html4ModeCompatibleWithXhtml1Schemata; + } + + /** + * Whether <code>lang</code> is mapped to <code>xml:lang</code>. + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang; + if (driver != null) { + driver.setMappingLangToXmlLang(mappingLangToXmlLang); + } + } + + /** + * Whether <code>lang</code> is mapped to <code>xml:lang</code>. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang; + } + + /** + * Whether the <code>xmlns</code> attribute on the root element is + * passed to through. (FATAL not allowed.) + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + if (driver != null) { + driver.setXmlnsPolicy(xmlnsPolicy); + } + } + + /** + * Returns the xmlnsPolicy. + * + * @return the xmlnsPolicy + */ + public XmlViolationPolicy getXmlnsPolicy() { + return xmlnsPolicy; + } + + /** + * Returns the lexicalHandler. + * + * @return the lexicalHandler + */ + public LexicalHandler getLexicalHandler() { + return lexicalHandler; + } + + /** + * Returns the commentPolicy. + * + * @return the commentPolicy + */ + public XmlViolationPolicy getCommentPolicy() { + return commentPolicy; + } + + /** + * Returns the contentNonXmlCharPolicy. + * + * @return the contentNonXmlCharPolicy + */ + public XmlViolationPolicy getContentNonXmlCharPolicy() { + return contentNonXmlCharPolicy; + } + + /** + * Returns the contentSpacePolicy. + * + * @return the contentSpacePolicy + */ + public XmlViolationPolicy getContentSpacePolicy() { + return contentSpacePolicy; + } + + /** + * @param reportingDoctype + * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + if (treeBuilder != null) { + treeBuilder.setReportingDoctype(reportingDoctype); + } + } + + /** + * Returns the reportingDoctype. + * + * @return the reportingDoctype + */ + public boolean isReportingDoctype() { + return reportingDoctype; + } + + /** + * @param errorProfile + * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set) + */ + public void setErrorProfile(HashMap<String, String> errorProfileMap) { + this.errorProfileMap = errorProfileMap; + } + + /** + * The policy for non-NCName element and attribute names. + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + if (driver != null) { + driver.setNamePolicy(namePolicy); + treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics the heuristics to set + * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + if (driver != null) { + driver.setHeuristics(heuristics); + } + } + + public Heuristics getHeuristics() { + return this.heuristics; + } + + /** + * This is a catch-all convenience method for setting name, xmlns, content space, + * content non-XML char and comment policies in one go. This does not affect the + * streamability policy or doctype reporting. + * + * @param xmlPolicy + */ + public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { + setNamePolicy(xmlPolicy); + setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); + setContentSpacePolicy(xmlPolicy); + setContentNonXmlCharPolicy(xmlPolicy); + setCommentPolicy(xmlPolicy); + } + + /** + * The policy for non-NCName element and attribute names. + * + * @return the namePolicy + */ + public XmlViolationPolicy getNamePolicy() { + return namePolicy; + } + + /** + * Does nothing. + * @deprecated + */ + public void setBogusXmlnsPolicy( + XmlViolationPolicy bogusXmlnsPolicy) { + } + + /** + * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>. + * @deprecated + * @return <code>XmlViolationPolicy.ALTER_INFOSET</code> + */ + public XmlViolationPolicy getBogusXmlnsPolicy() { + return XmlViolationPolicy.ALTER_INFOSET; + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + this.characterHandlers.add(characterHandler); + if (driver != null) { + driver.addCharacterHandler(characterHandler); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java new file mode 100644 index 000000000..3312398d5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/HtmlSerializer.java @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2011 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.util.Arrays; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class HtmlSerializer implements ContentHandler, LexicalHandler { + + private static final String[] VOID_ELEMENTS = { "area", "base", "basefont", + "bgsound", "br", "col", "command", "embed", "frame", "hr", "img", + "input", "keygen", "link", "meta", "param", "source", "track", + "wbr" }; + + private static final String[] NON_ESCAPING = { "iframe", "noembed", + "noframes", "noscript", "plaintext", "script", "style", "xmp" }; + + private static Writer wrap(OutputStream out) { + try { + return new OutputStreamWriter(out, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + private int ignoreLevel = 0; + + private int escapeLevel = 0; + + private final Writer writer; + + public HtmlSerializer(OutputStream out) { + this(wrap(out)); + } + + public HtmlSerializer(Writer out) { + this.writer = out; + } + + public void characters(char[] ch, int start, int length) + throws SAXException { + try { + if (escapeLevel > 0) { + writer.write(ch, start, length); + } else { + for (int i = start; i < start + length; i++) { + char c = ch[i]; + switch (c) { + case '<': + writer.write("<"); + break; + case '>': + writer.write(">"); + break; + case '&': + writer.write("&"); + break; + case '\u00A0': + writer.write(" "); + break; + default: + writer.write(c); + break; + } + } + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endDocument() throws SAXException { + try { + writer.flush(); + writer.close(); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endElement(String uri, String localName, String qName) + throws SAXException { + if (escapeLevel > 0) { + escapeLevel--; + } + if (ignoreLevel > 0) { + ignoreLevel--; + } else { + try { + writer.write('<'); + writer.write('/'); + writer.write(localName); + writer.write('>'); + } catch (IOException e) { + throw new SAXException(e); + } + } + } + + public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException { + characters(ch, start, length); + } + + public void processingInstruction(String target, String data) + throws SAXException { + } + + public void setDocumentLocator(Locator locator) { + } + + public void startDocument() throws SAXException { + try { + writer.write("<!DOCTYPE html>\n"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void startElement(String uri, String localName, String qName, + Attributes atts) throws SAXException { + if (escapeLevel > 0) { + escapeLevel++; + } + boolean xhtml = "http://www.w3.org/1999/xhtml".equals(uri); + if (ignoreLevel > 0 + || !(xhtml || "http://www.w3.org/2000/svg".equals(uri) || "http://www.w3.org/1998/Math/MathML".equals(uri))) { + ignoreLevel++; + return; + } + try { + writer.write('<'); + writer.write(localName); + for (int i = 0; i < atts.getLength(); i++) { + String attUri = atts.getURI(i); + String attLocal = atts.getLocalName(i); + if (attUri.length() == 0) { + writer.write(' '); + } else if (!xhtml + && "http://www.w3.org/1999/xlink".equals(attUri)) { + writer.write(" xlink:"); + } else if ("http://www.w3.org/XML/1998/namespace".equals(attUri)) { + if (xhtml) { + if ("lang".equals(attLocal)) { + writer.write(' '); + } else { + continue; + } + } else { + writer.write(" xml:"); + } + } else { + continue; + } + writer.write(atts.getLocalName(i)); + writer.write('='); + writer.write('"'); + String val = atts.getValue(i); + for (int j = 0; j < val.length(); j++) { + char c = val.charAt(j); + switch (c) { + case '"': + writer.write("""); + break; + case '&': + writer.write("&"); + break; + case '\u00A0': + writer.write(" "); + break; + default: + writer.write(c); + break; + } + } + writer.write('"'); + } + writer.write('>'); + if (Arrays.binarySearch(VOID_ELEMENTS, localName) > -1) { + ignoreLevel++; + return; + } + if ("pre".equals(localName) || "textarea".equals(localName) + || "listing".equals(localName)) { + writer.write('\n'); + } + if (escapeLevel == 0 + && Arrays.binarySearch(NON_ESCAPING, localName) > -1) { + escapeLevel = 1; + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void comment(char[] ch, int start, int length) throws SAXException { + if (ignoreLevel > 0 || escapeLevel > 0) { + return; + } + try { + writer.write("<!--"); + writer.write(ch, start, length); + writer.write("-->"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public void endCDATA() throws SAXException { + } + + public void endDTD() throws SAXException { + } + + public void endEntity(String name) throws SAXException { + } + + public void startCDATA() throws SAXException { + } + + public void startDTD(String name, String publicId, String systemId) + throws SAXException { + } + + public void startEntity(String name) throws SAXException { + } + + public void startPrefixMapping(String prefix, String uri) + throws SAXException { + } + + public void endPrefixMapping(String prefix) throws SAXException { + } + + public void skippedEntity(String name) throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java new file mode 100644 index 000000000..33e98dbe8 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import nu.validator.htmlparser.common.XmlViolationPolicy; + +/** + * This subclass of <code>HtmlParser</code> simply provides a no-argument + * constructor that calls the constructor of the superclass with the + * <code>ALTER_INFOSET</code> policy. This is convenient when another Java + * component wants an implementation of <code>XMLReader</code> with a + * no-argument constructor and infoset coercion is the wanted behavior. + * + * @version $Id$ + * @author hsivonen + */ +public class InfosetCoercingHtmlParser extends HtmlParser { + + /** + * A constructor that passes <code>ALTER_INFOSET</code> to the superclass' + * constructor. + */ + public InfosetCoercingHtmlParser() { + super(XmlViolationPolicy.ALTER_INFOSET); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java new file mode 100644 index 000000000..b6cb2f872 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.OutputStream; +import java.io.Writer; + +import nu.validator.htmlparser.impl.NCName; + +import org.xml.sax.SAXException; + +public class NameCheckingXmlSerializer extends XmlSerializer { + + public NameCheckingXmlSerializer(OutputStream out) { + super(out); + } + + public NameCheckingXmlSerializer(Writer out) { + super(out); + } + + /** + * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName() + */ + @Override protected void checkNCName(String name) throws SAXException { + if (!NCName.isNCName(name)) { + throw new SAXException("Not an XML 1.0 4th ed. NCName: " + name); + } + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java new file mode 100644 index 000000000..07ff5da4a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXStreamer.java @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.TreeBuilder; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.ext.LexicalHandler; + +class SAXStreamer extends TreeBuilder<Attributes>{ + + private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray(); + + private ContentHandler contentHandler = null; + private LexicalHandler lexicalHandler = null; + + SAXStreamer() { + super(); + } + + @Override + protected void addAttributesToElement(Attributes element, HtmlAttributes attributes) throws SAXException { + Attributes existingAttrs = element; + for (int i = 0; i < attributes.getLength(); i++) { + String qName = attributes.getQNameNoBoundsCheck(i); + if (existingAttrs.getIndex(qName) < 0) { + fatal(); + } + } + } + + @Override + protected void appendCharacters(Attributes parent, char[] buf, int start, int length) throws SAXException { + contentHandler.characters(buf, start, length); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object) + */ + @Override protected void appendIsindexPrompt(Attributes parent) + throws SAXException { + contentHandler.characters(ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length); + } + + @Override + protected void appendChildrenToNewParent(Attributes oldParent, Attributes newParent) throws SAXException { + fatal(); + } + + @Override + protected void appendComment(Attributes parent, char[] buf, int start, int length) throws SAXException { + if (lexicalHandler != null) { + lexicalHandler.comment(buf, start, length); + } + } + + @Override + protected void appendCommentToDocument(char[] buf, int start, int length) + throws SAXException { + if (lexicalHandler != null) { + lexicalHandler.comment(buf, start, length); + } + } + + @Override + protected Attributes createElement(String ns, String name, HtmlAttributes attributes, Attributes intendedParent) throws SAXException { + return attributes; + } + + @Override + protected Attributes createHtmlElementSetAsRoot(HtmlAttributes attributes) throws SAXException { + return attributes; + } + + @Override + protected void detachFromParent(Attributes element) throws SAXException { + fatal(); + } + + @Override + protected void appendElement(Attributes child, Attributes newParent) throws SAXException { + } + + @Override + protected boolean hasChildren(Attributes element) throws SAXException { + return false; + } + + public void setContentHandler(ContentHandler handler) { + contentHandler = handler; + } + + public void setLexicalHandler(LexicalHandler handler) { + lexicalHandler = handler; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String) + */ + @Override + protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) throws SAXException { + if (lexicalHandler != null) { + lexicalHandler.startDTD(name, publicIdentifier, systemIdentifier); + lexicalHandler.endDTD(); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(String, java.lang.String, java.lang.Object) + */ + @Override + protected void elementPopped(String ns, String name, Attributes node) throws SAXException { + contentHandler.endElement(ns, name, name); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#elementPushed(String, java.lang.String, java.lang.Object) + */ + @Override + protected void elementPushed(String ns, String name, Attributes node) throws SAXException { + contentHandler.startElement(ns, name, name, node); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#end() + */ + @Override + protected void end() throws SAXException { + contentHandler.endDocument(); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override + protected void start(boolean fragment) throws SAXException { + contentHandler.setDocumentLocator(tokenizer); + if (!fragment) { + contentHandler.startDocument(); + } + } + + protected void fatal() throws SAXException { + SAXParseException spe = new SAXParseException( + "Cannot recover after last error. Any further errors will be ignored.", + tokenizer); + if (errorHandler != null) { + errorHandler.fatalError(spe); + } + throw spe; + } + + @Override + protected Attributes createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Attributes table, Attributes stackParent) throws SAXException { + fatal(); + throw new RuntimeException("Unreachable"); + } + + @Override protected void insertFosterParentedCharacters(char[] buf, + int start, int length, Attributes table, Attributes stackParent) + throws SAXException { + fatal(); + } + + @Override protected void insertFosterParentedChild(Attributes child, + Attributes table, Attributes stackParent) throws SAXException { + fatal(); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java new file mode 100644 index 000000000..ef51d2a51 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.validator.htmlparser.impl.TreeBuilder; +import nu.validator.saxtree.Characters; +import nu.validator.saxtree.Comment; +import nu.validator.saxtree.DTD; +import nu.validator.saxtree.Document; +import nu.validator.saxtree.DocumentFragment; +import nu.validator.saxtree.Element; +import nu.validator.saxtree.Node; +import nu.validator.saxtree.ParentNode; + +import org.xml.sax.SAXException; + +class SAXTreeBuilder extends TreeBuilder<Element> { + + private static final char[] ISINDEX_PROMPT = "This is a searchable index. Enter search keywords: ".toCharArray(); + + private Document document; + + private Node cachedTable = null; + + private Node cachedTablePreviousSibling = null; + + SAXTreeBuilder() { + super(); + } + + @Override + protected void appendComment(Element parent, char[] buf, int start, int length) { + parent.appendChild(new Comment(tokenizer, buf, start, length)); + } + + @Override + protected void appendCommentToDocument(char[] buf, int start, int length) { + document.appendChild(new Comment(tokenizer, buf, start, length)); + } + + @Override + protected void appendCharacters(Element parent, char[] buf, int start, int length) { + parent.appendChild(new Characters(tokenizer, buf, start, length)); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendIsindexPrompt(java.lang.Object) + */ + @Override protected void appendIsindexPrompt(Element parent) + throws SAXException { + parent.appendChild(new Characters(tokenizer, ISINDEX_PROMPT, 0, ISINDEX_PROMPT.length)); + } + + @Override + protected boolean hasChildren(Element element) { + return element.getFirstChild() != null; + } + + @Override + protected void appendElement(Element child, Element newParent) { + newParent.appendChild(child); + } + + @Override + protected Element createHtmlElementSetAsRoot(HtmlAttributes attributes) { + Element newElt = new Element(tokenizer, "http://www.w3.org/1999/xhtml", "html", "html", attributes, true, null); + document.appendChild(newElt); + return newElt; + } + + @Override + protected void addAttributesToElement(Element element, HtmlAttributes attributes) throws SAXException { + HtmlAttributes existingAttrs = (HtmlAttributes) element.getAttributes(); + existingAttrs.merge(attributes); + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#appendDoctypeToDocument(java.lang.String, java.lang.String, java.lang.String) + */ + @Override + protected void appendDoctypeToDocument(String name, String publicIdentifier, String systemIdentifier) { + DTD dtd = new DTD(tokenizer, name, publicIdentifier, systemIdentifier); + dtd.setEndLocator(tokenizer); + document.appendChild(dtd); + } + + /** + * Returns the document. + * + * @return the document + */ + Document getDocument() { + Document rv = document; + document = null; + return rv; + } + + DocumentFragment getDocumentFragment() { + DocumentFragment rv = new DocumentFragment(); + rv.appendChildren(document.getFirstChild()); + document = null; + return rv; + } + + /** + * @throws SAXException + * @see nu.validator.htmlparser.impl.TreeBuilder#end() + */ + @Override + protected void end() throws SAXException { + document.setEndLocator(tokenizer); + cachedTable = null; + cachedTablePreviousSibling = null; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override + protected void start(boolean fragment) { + document = new Document(tokenizer); + cachedTable = null; + cachedTablePreviousSibling = null; + } + + @Override + protected void appendChildrenToNewParent(Element oldParent, Element newParent) throws SAXException { + newParent.appendChildren(oldParent); + } + + @Override + protected Element createElement(String ns, String name, HtmlAttributes attributes, + Element intendedParent) throws SAXException { + return new Element(tokenizer, ns, name, name, attributes, true, null); + } + + @Override + protected Element createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Element table, Element stackParent) throws SAXException { + ParentNode parent = table.getParentNode(); + Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent); + if (parent != null) { // always an element if not null + parent.insertBetween(child, previousSibling(table), table); + cachedTablePreviousSibling = child; + } else { + stackParent.appendChild(child); + } + + return child; + } + + @Override protected void insertFosterParentedCharacters(char[] buf, + int start, int length, Element table, Element stackParent) throws SAXException { + Node child = new Characters(tokenizer, buf, start, length); + ParentNode parent = table.getParentNode(); + if (parent != null) { // always an element if not null + parent.insertBetween(child, previousSibling(table), table); + cachedTablePreviousSibling = child; + } else { + stackParent.appendChild(child); + } + } + + @Override protected void insertFosterParentedChild(Element child, + Element table, Element stackParent) throws SAXException { + ParentNode parent = table.getParentNode(); + if (parent != null) { // always an element if not null + parent.insertBetween(child, previousSibling(table), table); + cachedTablePreviousSibling = child; + } else { + stackParent.appendChild(child); + } + } + + private Node previousSibling(Node table) { + if (table == cachedTable) { + return cachedTablePreviousSibling; + } else { + cachedTable = table; + return (cachedTablePreviousSibling = table.getPreviousSibling()); + } + } + + @Override protected void detachFromParent(Element element) + throws SAXException { + element.detach(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java new file mode 100644 index 000000000..5dccf5d3a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/XmlSerializer.java @@ -0,0 +1,737 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.sax; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Map; +import java.util.Set; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +public class XmlSerializer implements ContentHandler, LexicalHandler { + + private final class PrefixMapping { + public final String uri; + + public final String prefix; + + /** + * @param uri + * @param prefix + */ + public PrefixMapping(String uri, String prefix) { + this.uri = uri; + this.prefix = prefix; + } + + /** + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override public final boolean equals(Object obj) { + if (obj instanceof PrefixMapping) { + PrefixMapping other = (PrefixMapping) obj; + return this.prefix.equals(other.prefix); + } else { + return false; + } + } + + /** + * @see java.lang.Object#hashCode() + */ + @Override public final int hashCode() { + return prefix.hashCode(); + } + + } + + private final class StackNode { + public final String uri; + + public final String prefix; + + public final String qName; + + public final Set<PrefixMapping> mappings = new HashSet<PrefixMapping>(); + + /** + * @param uri + * @param qName + */ + public StackNode(String uri, String qName, String prefix) { + this.uri = uri; + this.qName = qName; + this.prefix = prefix; + } + } + + private final static Map<String, String> WELL_KNOWN_ATTRIBUTE_PREFIXES = new HashMap<String, String>(); + + static { + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("adobe:ns:meta/", "x"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/AdobeIllustrator/10.0/", "i"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/Extensibility/1.0/", "x"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/illustrator/1.0/", "illustrator"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/", + "photoshop"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", + "tiff"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/", + "xapG"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/", + "xapMM"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/rights/", "xapRights"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/", + "xapTPg"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://purl.org/dc/elements/1.1/", + "dc"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://w3.org/1999/xlink", "xlink"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.carto.net/attrib/", + "attrib"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.iki.fi/pav/software/textext/", "textext"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.inkscape.org/namespaces/inkscape", "inkscape"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.justsystem.co.jp/hanako13/svg", "jsh"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink", + "xlink"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put( + "http://www.w3.org/2001/XMLSchema-instance", "xsi"); + WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink", + "xlink"); + } + + private final static Map<String, String> WELL_KNOWN_ELEMENT_PREFIXES = new HashMap<String, String>(); + + static { + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.w3.org/1999/XSL/Transform", + "xsl"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.org/dc/elements/1.1/", + "dc"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.w3.org/2001/XMLSchema-instance", "xsi"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.ascc.net/xml/schematron", + "sch"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.oclc.org/dsdl/schematron", + "sch"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.inkscape.org/namespaces/inkscape", "inkscape"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/AdobeIllustrator/10.0/", "i"); + WELL_KNOWN_ELEMENT_PREFIXES.put("adobe:ns:meta/", "x"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", "tiff"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://creativecommons.org/ns#", "cc"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd", + "sodipodi"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/", "Iptc4xmpCore"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/exif/1.0/", "exif"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/Extensibility/1.0/", "x"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/illustrator/1.0/", + "illustrator"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdfx/1.3/", "pdfx"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/", + "photoshop"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/Variables/1.0/", + "v"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/", + "xapG"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/img/", + "xapGImg"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/", + "xapMM"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/rights/", + "xapRights"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/Font#", "stFnt"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/", + "xapTPg"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://product.corel.com/CGS/11/cddns/", "odm"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://web.resource.org/cc/", "cc"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.freesoftware.fsf.org/bkchem/cdml", "cdml"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.opengis.net/gml", "gml"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.svgmaker.com/svgns", + "svgmaker"); + WELL_KNOWN_ELEMENT_PREFIXES.put( + "http://www.w3.org/2000/01/rdf-schema#", "rdfs"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://xmlns.com/foaf/0.1/", "foaf"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.xml-cml.org/schema/stmml", + "stm"); + WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.iupac.org/foo/ichi", "ichi"); + } + + private final static Writer wrap(OutputStream out) { + Charset charset = Charset.forName("utf-8"); + CharsetEncoder encoder = charset.newEncoder(); + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + try { + encoder.replaceWith("\uFFFD".getBytes("utf-8")); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + return new OutputStreamWriter(out, encoder); + } + + // grows from head + private final LinkedList<StackNode> stack = new LinkedList<StackNode>(); + + private final Writer writer; + + public XmlSerializer(OutputStream out) { + this(wrap(out)); + } + + public XmlSerializer(Writer out) { + this.writer = out; + } + + protected void checkNCName(String name) throws SAXException { + + } + + private final void push(String uri, String local, String prefix) { + stack.addFirst(new StackNode(uri, local, prefix)); + } + + private final String pop() { + String rv = stack.removeFirst().qName; + stack.getFirst().mappings.clear(); + return rv; + } + + private final String lookupPrefixAttribute(String ns) { + if ("http://www.w3.org/XML/1998/namespace".equals(ns)) { + return "xml"; + } + Set<String> hidden = new HashSet<String>(); + for (StackNode node : stack) { + for (PrefixMapping mapping : node.mappings) { + if (mapping.prefix.length() != 0 && mapping.uri.equals(ns) + && !hidden.contains(mapping.prefix)) { + return mapping.prefix; + } + hidden.add(mapping.prefix); + } + } + return null; + } + + private final String lookupUri(String prefix) { + for (StackNode node : stack) { + for (PrefixMapping mapping : node.mappings) { + if (mapping.prefix.equals(prefix)) { + return mapping.uri; + } + } + } + return null; + } + + private final boolean xmlNsQname(String name) { + if (name == null) { + return false; + } else if ("xmlns".equals(name)) { + return true; + } else if (name.startsWith("xmlns:")) { + return true; + } else { + return false; + } + } + + private final void writeAttributeValue(String val) throws IOException { + boolean prevWasSpace = true; + int last = val.length() - 1; + for (int i = 0; i <= last; i++) { + char c = val.charAt(i); + switch (c) { + case '<': + writer.write("<"); + prevWasSpace = false; + break; + case '>': + writer.write(">"); + prevWasSpace = false; + break; + case '&': + writer.write("&"); + prevWasSpace = false; + break; + case '"': + writer.write("""); + prevWasSpace = false; + break; + case '\r': + writer.write("
"); + prevWasSpace = false; + break; + case '\t': + writer.write("	"); + prevWasSpace = false; + break; + case '\n': + writer.write("
"); + prevWasSpace = false; + break; + case ' ': + if (prevWasSpace || i == last) { + writer.write(" "); + prevWasSpace = false; + } else { + writer.write(' '); + prevWasSpace = true; + } + break; + case '\uFFFE': + writer.write('\uFFFD'); + prevWasSpace = false; + break; + case '\uFFFF': + writer.write('\uFFFD'); + prevWasSpace = false; + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + prevWasSpace = false; + break; + } + } + } + + private final void generatePrefix(String uri) throws SAXException { + int counter = 0; + String candidate = WELL_KNOWN_ATTRIBUTE_PREFIXES.get(uri); + if (candidate == null) { + candidate = "p" + (counter++); + } + while (lookupUri(candidate) != null) { + candidate = "p" + (counter++); + } + startPrefixMappingPrivate(candidate, uri); + } + + public final void characters(char[] ch, int start, int length) + throws SAXException { + try { + for (int i = start; i < start + length; i++) { + char c = ch[i]; + switch (c) { + case '<': + writer.write("<"); + break; + case '>': + writer.write(">"); + break; + case '&': + writer.write("&"); + break; + case '\r': + writer.write("
"); + break; + case '\t': + writer.write('\t'); + break; + case '\n': + writer.write('\n'); + break; + case '\uFFFE': + writer.write('\uFFFD'); + break; + case '\uFFFF': + writer.write('\uFFFD'); + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + break; + } + } + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void endDocument() throws SAXException { + try { + stack.clear(); + writer.flush(); + writer.close(); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void endElement(String uri, String localName, String qName) + throws SAXException { + try { + writer.write('<'); + writer.write('/'); + writer.write(pop()); + writer.write('>'); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException { + characters(ch, start, length); + } + + public final void processingInstruction(String target, String data) + throws SAXException { + try { + checkNCName(target); + writer.write("<?"); + writer.write(target); + writer.write(' '); + boolean prevWasQuestionmark = false; + for (int i = 0; i < data.length(); i++) { + char c = data.charAt(i); + switch (c) { + case '?': + writer.write('?'); + prevWasQuestionmark = true; + break; + case '>': + if (prevWasQuestionmark) { + writer.write(" >"); + } else { + writer.write('>'); + } + prevWasQuestionmark = false; + break; + case '\t': + writer.write('\t'); + prevWasQuestionmark = false; + break; + case '\r': + case '\n': + writer.write('\n'); + prevWasQuestionmark = false; + break; + case '\uFFFE': + writer.write('\uFFFD'); + prevWasQuestionmark = false; + break; + case '\uFFFF': + writer.write('\uFFFD'); + prevWasQuestionmark = false; + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + prevWasQuestionmark = false; + break; + } + } + writer.write("?>"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void setDocumentLocator(Locator locator) { + } + + public final void startDocument() throws SAXException { + try { + writer.write("<?xml version='1.0' encoding='utf-8'?>\n"); + } catch (IOException e) { + throw new SAXException(e); + } + stack.clear(); + push(null, null, null); + } + + public final void startElement(String uri, String localName, String q, + Attributes atts) throws SAXException { + checkNCName(localName); + String prefix; + String qName; + if (uri.length() == 0) { + prefix = ""; + qName = localName; + // generate xmlns + startPrefixMappingPrivate(prefix, uri); + } else { + prefix = WELL_KNOWN_ELEMENT_PREFIXES.get(uri); + if (prefix == null) { + prefix = ""; + } + String lookup = lookupUri(prefix); + if (lookup != null && !lookup.equals(uri)) { + prefix = ""; + } + startPrefixMappingPrivate(prefix, uri); + if (prefix.length() == 0) { + qName = localName; + } else { + qName = prefix + ':' + localName; + } + } + + int attLen = atts.getLength(); + for (int i = 0; i < attLen; i++) { + String attUri = atts.getURI(i); + if (attUri.length() == 0 + || "http://www.w3.org/XML/1998/namespace".equals(attUri) + || "http://www.w3.org/2000/xmlns/".equals(attUri) + || atts.getLocalName(i).length() == 0 + || xmlNsQname(atts.getQName(i))) { + continue; + } + if (lookupPrefixAttribute(attUri) == null) { + generatePrefix(attUri); + } + } + + try { + writer.write('<'); + writer.write(qName); + for (PrefixMapping mapping : stack.getFirst().mappings) { + writer.write(' '); + if (mapping.prefix.length() == 0) { + writer.write("xmlns"); + } else { + writer.write("xmlns:"); + writer.write(mapping.prefix); + } + writer.write('='); + writer.write('"'); + writeAttributeValue(mapping.uri); + writer.write('"'); + } + + for (int i = 0; i < attLen; i++) { + String attUri = atts.getURI(i); + if ("http://www.w3.org/XML/1998/namespace".equals(attUri) + || "http://www.w3.org/2000/xmlns/".equals(attUri) + || atts.getLocalName(i).length() == 0 + || xmlNsQname(atts.getQName(i))) { + continue; + } + writer.write(' '); + if (attUri.length() != 0) { + writer.write(lookupPrefixAttribute(attUri)); + writer.write(':'); + } + String attLocal = atts.getLocalName(i); + checkNCName(attLocal); + writer.write(attLocal); + writer.write('='); + writer.write('"'); + writeAttributeValue(atts.getValue(i)); + writer.write('"'); + } + writer.write('>'); + } catch (IOException e) { + throw new SAXException(e); + } + push(uri, qName, prefix); + } + + public final void comment(char[] ch, int start, int length) throws SAXException { + try { + boolean prevWasHyphen = false; + writer.write("<!--"); + for (int i = start; i < start + length; i++) { + char c = ch[i]; + switch (c) { + case '-': + if (prevWasHyphen) { + writer.write(" -"); + } else { + writer.write('-'); + prevWasHyphen = true; + } + break; + case '\t': + writer.write('\t'); + prevWasHyphen = false; + break; + case '\r': + case '\n': + writer.write('\n'); + prevWasHyphen = false; + break; + case '\uFFFE': + writer.write('\uFFFD'); + prevWasHyphen = false; + break; + case '\uFFFF': + writer.write('\uFFFD'); + prevWasHyphen = false; + break; + default: + if (c < ' ') { + writer.write('\uFFFD'); + } else { + writer.write(c); + } + prevWasHyphen = false; + break; + } + } + if (prevWasHyphen) { + writer.write(' '); + } + writer.write("-->"); + } catch (IOException e) { + throw new SAXException(e); + } + } + + public final void endCDATA() throws SAXException { + } + + public final void endDTD() throws SAXException { + } + + public final void endEntity(String name) throws SAXException { + } + + public final void startCDATA() throws SAXException { + } + + public final void startDTD(String name, String publicId, String systemId) + throws SAXException { + } + + public final void startEntity(String name) throws SAXException { + } + + public final void startPrefixMapping(String prefix, String uri) + throws SAXException { + if (prefix.length() == 0 || uri.equals(lookupUri(prefix))) { + return; + } + if (uri.equals(lookupUri(prefix))) { + return; + } + if ("http://www.w3.org/XML/1998/namespace".equals(uri)) { + if ("xml".equals(prefix)) { + return; + } else { + throw new SAXException("Attempt to declare a reserved NS uri."); + } + } + if ("http://www.w3.org/2000/xmlns/".equals(uri)) { + throw new SAXException("Attempt to declare a reserved NS uri."); + } + if (uri.length() == 0 && prefix.length() != 0) { + throw new SAXException("Can bind a prefix to no namespace."); + } + checkNCName(prefix); + Set<PrefixMapping> theSet = stack.getFirst().mappings; + PrefixMapping mapping = new PrefixMapping(uri, prefix); + if (theSet.contains(mapping)) { + throw new SAXException( + "Attempt to map one prefix to two URIs on one element."); + } + theSet.add(mapping); + } + + public final void startPrefixMappingPrivate(String prefix, String uri) + throws SAXException { + if (uri.equals(lookupUri(prefix))) { + return; + } + stack.getFirst().mappings.add(new PrefixMapping(uri, prefix)); + } + + public final void endPrefixMapping(String prefix) throws SAXException { + } + + public final void skippedEntity(String name) throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html new file mode 100644 index 000000000..60532962f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/sax/package.html @@ -0,0 +1,29 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package provides an HTML5 parser that exposes the document through the SAX API.</p> +</body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java new file mode 100644 index 000000000..6dcff5600 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPointer.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.xom.Element; + +/** + * Interface for elements that have an associated form pointer. + * + * @version $Id$ + * @author hsivonen + */ +public interface FormPointer { + + /** + * Returns the form. + * + * @return the form + */ + public abstract Element getForm(); + + /** + * Sets the form. + * + * @param form the form to set + */ + public abstract void setForm(Element form); + +}
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java new file mode 100644 index 000000000..2e2e18df7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/FormPtrElement.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.xom.Element; + +/** + * Element with an associated form. + * + * @version $Id$ + * @author hsivonen + */ +public class FormPtrElement extends Element implements FormPointer { + + private Element form = null; + + /** + * Copy constructor (<code>FormPointer</code>-aware). + * @param elt + */ + public FormPtrElement(Element elt) { + super(elt); + if (elt instanceof FormPointer) { + FormPointer other = (FormPointer) elt; + this.setForm(other.getForm()); + } + } + + /** + * Null form. + * + * @param name + * @param uri + */ + public FormPtrElement(String name, String uri) { + super(name, uri); + } + + /** + * Full constructor. + * + * @param name + * @param uri + * @param form + */ + public FormPtrElement(String name, String uri, Element form) { + super(name, uri); + this.form = form; + } + + /** + * Gets the form. + * @see nu.validator.htmlparser.xom.FormPointer#getForm() + */ + public Element getForm() { + return form; + } + + /** + * Sets the form. + * @see nu.validator.htmlparser.xom.FormPointer#setForm(nu.xom.Element) + */ + public void setForm(Element form) { + this.form = form; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java new file mode 100644 index 000000000..845ea15cf --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/HtmlBuilder.java @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; + +import nu.validator.htmlparser.common.CharacterHandler; +import nu.validator.htmlparser.common.DoctypeExpectation; +import nu.validator.htmlparser.common.DocumentModeHandler; +import nu.validator.htmlparser.common.Heuristics; +import nu.validator.htmlparser.common.TokenHandler; +import nu.validator.htmlparser.common.TransitionHandler; +import nu.validator.htmlparser.common.XmlViolationPolicy; +import nu.validator.htmlparser.impl.ErrorReportingTokenizer; +import nu.validator.htmlparser.impl.Tokenizer; +import nu.validator.htmlparser.io.Driver; +import nu.xom.Builder; +import nu.xom.Document; +import nu.xom.Nodes; +import nu.xom.ParsingException; +import nu.xom.ValidityException; + +import org.xml.sax.EntityResolver; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** + * This class implements an HTML5 parser that exposes data through the XOM + * interface. + * + * <p>By default, when using the constructor without arguments, the + * this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible + * infosets. This corresponds to <code>ALTER_INFOSET</code> as the general + * XML violation policy. It is possible to treat XML 1.0 infoset violations + * as fatal by setting the general XML violation policy to <code>FATAL</code>. + * + * <p>The doctype is not represented in the tree. + * + * <p>The document mode is represented via the <code>Mode</code> + * interface on the <code>Document</code> node if the node implements + * that interface (depends on the used node factory). + * + * <p>The form pointer is stored if the node factory supports storing it. + * + * <p>This package has its own node factory class because the official + * XOM node factory may return multiple nodes instead of one confusing + * the assumptions of the DOM-oriented HTML5 parsing algorithm. + * + * @version $Id$ + * @author hsivonen + */ +public class HtmlBuilder extends Builder { + + private Driver driver; + + private final XOMTreeBuilder treeBuilder; + + private final SimpleNodeFactory simpleNodeFactory; + + private EntityResolver entityResolver; + + private ErrorHandler errorHandler = null; + + private DocumentModeHandler documentModeHandler = null; + + private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML; + + private boolean checkingNormalization = false; + + private boolean scriptingEnabled = false; + + private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>(); + + private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL; + + private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW; + + private boolean html4ModeCompatibleWithXhtml1Schemata = false; + + private boolean mappingLangToXmlLang = false; + + private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL; + + private boolean reportingDoctype = true; + + private ErrorHandler treeBuilderErrorHandler = null; + + private Heuristics heuristics = Heuristics.NONE; + + private TransitionHandler transitionHandler = null; + + /** + * Constructor with default node factory and fatal XML violation policy. + */ + public HtmlBuilder() { + this(new SimpleNodeFactory(), XmlViolationPolicy.FATAL); + } + + /** + * Constructor with given node factory and fatal XML violation policy. + * @param nodeFactory the factory + */ + public HtmlBuilder(SimpleNodeFactory nodeFactory) { + this(nodeFactory, XmlViolationPolicy.FATAL); + } + + /** + * Constructor with default node factory and given XML violation policy. + * @param xmlPolicy the policy + */ + public HtmlBuilder(XmlViolationPolicy xmlPolicy) { + this(new SimpleNodeFactory(), xmlPolicy); + } + + /** + * Constructor with given node factory and given XML violation policy. + * @param nodeFactory the factory + * @param xmlPolicy the policy + */ + public HtmlBuilder(SimpleNodeFactory nodeFactory, XmlViolationPolicy xmlPolicy) { + super(); + this.simpleNodeFactory = nodeFactory; + this.treeBuilder = new XOMTreeBuilder(nodeFactory); + this.driver = null; + setXmlPolicy(xmlPolicy); + } + + private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) { + if (errorHandler == null && transitionHandler == null + && contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) { + return new Tokenizer(handler, newAttributesEachTime); + } else { + return new ErrorReportingTokenizer(handler, newAttributesEachTime); + } + } + + /** + * This class wraps different tree builders depending on configuration. This + * method does the work of hiding this from the user of the class. + */ + private void lazyInit() { + if (driver == null) { + this.driver = new Driver(newTokenizer(treeBuilder, false)); + this.driver.setErrorHandler(errorHandler); + this.driver.setTransitionHandler(transitionHandler); + this.treeBuilder.setErrorHandler(treeBuilderErrorHandler); + this.driver.setCheckingNormalization(checkingNormalization); + this.driver.setCommentPolicy(commentPolicy); + this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy); + this.driver.setContentSpacePolicy(contentSpacePolicy); + this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + this.driver.setMappingLangToXmlLang(mappingLangToXmlLang); + this.driver.setXmlnsPolicy(xmlnsPolicy); + this.driver.setHeuristics(heuristics); + for (CharacterHandler characterHandler : characterHandlers) { + this.driver.addCharacterHandler(characterHandler); + } + this.treeBuilder.setDoctypeExpectation(doctypeExpectation); + this.treeBuilder.setDocumentModeHandler(documentModeHandler); + this.treeBuilder.setScriptingEnabled(scriptingEnabled); + this.treeBuilder.setReportingDoctype(reportingDoctype); + this.treeBuilder.setNamePolicy(namePolicy); + } + } + + + private void tokenize(InputSource is) throws ParsingException, IOException, + MalformedURLException { + try { + if (is == null) { + throw new IllegalArgumentException("Null input."); + } + if (is.getByteStream() == null && is.getCharacterStream() == null) { + String systemId = is.getSystemId(); + if (systemId == null) { + throw new IllegalArgumentException( + "No byte stream, no character stream nor URI."); + } + if (entityResolver != null) { + is = entityResolver.resolveEntity(is.getPublicId(), + systemId); + } + if (is.getByteStream() == null + || is.getCharacterStream() == null) { + is = new InputSource(); + is.setSystemId(systemId); + is.setByteStream(new URL(systemId).openStream()); + } + } + driver.tokenize(is); + } catch (SAXParseException e) { + throw new ParsingException(e.getMessage(), e.getSystemId(), e.getLineNumber(), + e.getColumnNumber(), e); + } catch (SAXException e) { + throw new ParsingException(e.getMessage(), e); + } + } + + /** + * Parse from SAX <code>InputSource</code>. + * @param is the <code>InputSource</code> + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + */ + public Document build(InputSource is) throws ParsingException, IOException { + lazyInit(); + treeBuilder.setFragmentContext(null); + tokenize(is); + return treeBuilder.getDocument(); + } + + /** + * Parse a fragment from SAX <code>InputSource</code> assuming an HTML + * context. + * @param is the <code>InputSource</code> + * @param context the name of the context element (HTML namespace assumed) + * @return the fragment + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + */ + public Nodes buildFragment(InputSource is, String context) + throws IOException, ParsingException { + lazyInit(); + treeBuilder.setFragmentContext(context.intern()); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Parse a fragment from SAX <code>InputSource</code>. + * @param is the <code>InputSource</code> + * @param contextLocal the local name of the context element + * @parem contextNamespace the namespace of the context element + * @return the fragment + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + */ + public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace) + throws IOException, ParsingException { + lazyInit(); + treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false); + tokenize(is); + return treeBuilder.getDocumentFragment(); + } + + /** + * Parse from <code>File</code>. + * @param file the file + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.File) + */ + @Override + public Document build(File file) throws ParsingException, + ValidityException, IOException { + return build(new FileInputStream(file), file.toURI().toASCIIString()); + } + + /** + * Parse from <code>InputStream</code>. + * @param stream the stream + * @param uri the base URI + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String) + */ + @Override + public Document build(InputStream stream, String uri) + throws ParsingException, ValidityException, IOException { + InputSource is = new InputSource(stream); + is.setSystemId(uri); + return build(is); + } + + /** + * Parse from <code>InputStream</code>. + * @param stream the stream + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.InputStream) + */ + @Override + public Document build(InputStream stream) throws ParsingException, + ValidityException, IOException { + return build(new InputSource(stream)); + } + + /** + * Parse from <code>Reader</code>. + * @param stream the reader + * @param uri the base URI + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.Reader, java.lang.String) + */ + @Override + public Document build(Reader stream, String uri) throws ParsingException, + ValidityException, IOException { + InputSource is = new InputSource(stream); + is.setSystemId(uri); + return build(is); + } + + /** + * Parse from <code>Reader</code>. + * @param stream the reader + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.io.Reader) + */ + @Override + public Document build(Reader stream) throws ParsingException, + ValidityException, IOException { + return build(new InputSource(stream)); + } + + /** + * Parse from <code>String</code>. + * @param content the HTML source as string + * @param uri the base URI + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.lang.String, java.lang.String) + */ + @Override + public Document build(String content, String uri) throws ParsingException, + ValidityException, IOException { + return build(new StringReader(content), uri); + } + + /** + * Parse from URI. + * @param uri the URI of the document + * @return the document + * @throws ParsingException in case of an XML violation + * @throws IOException if IO goes wrang + * @see nu.xom.Builder#build(java.lang.String) + */ + @Override + public Document build(String uri) throws ParsingException, + ValidityException, IOException { + return build(new InputSource(uri)); + } + + /** + * Gets the node factory + */ + public SimpleNodeFactory getSimpleNodeFactory() { + return simpleNodeFactory; + } + + /** + * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) + */ + public void setEntityResolver(EntityResolver resolver) { + entityResolver = resolver; + } + + /** + * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) + */ + public void setErrorHandler(ErrorHandler handler) { + errorHandler = handler; + treeBuilderErrorHandler = handler; + driver = null; + } + + public void setTransitionHander(TransitionHandler handler) { + transitionHandler = handler; + driver = null; + } + + /** + * Indicates whether NFC normalization of source is being checked. + * @return <code>true</code> if NFC normalization of source is being checked. + * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + */ + public boolean isCheckingNormalization() { + return checkingNormalization; + } + + /** + * Toggles the checking of the NFC normalization of source. + * @param enable <code>true</code> to check normalization + * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + */ + public void setCheckingNormalization(boolean enable) { + this.checkingNormalization = enable; + if (driver != null) { + driver.setCheckingNormalization(checkingNormalization); + } + } + + /** + * Sets the policy for consecutive hyphens in comments. + * @param commentPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setCommentPolicy(XmlViolationPolicy commentPolicy) { + this.commentPolicy = commentPolicy; + if (driver != null) { + driver.setCommentPolicy(commentPolicy); + } + } + + /** + * Sets the policy for non-XML characters except white space. + * @param contentNonXmlCharPolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentNonXmlCharPolicy( + XmlViolationPolicy contentNonXmlCharPolicy) { + this.contentNonXmlCharPolicy = contentNonXmlCharPolicy; + driver = null; + } + + /** + * Sets the policy for non-XML white space. + * @param contentSpacePolicy the policy + * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) { + this.contentSpacePolicy = contentSpacePolicy; + if (driver != null) { + driver.setContentSpacePolicy(contentSpacePolicy); + } + } + + /** + * Whether the parser considers scripting to be enabled for noscript treatment. + * + * @return <code>true</code> if enabled + * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled() + */ + public boolean isScriptingEnabled() { + return scriptingEnabled; + } + + /** + * Sets whether the parser considers scripting to be enabled for noscript treatment. + * @param scriptingEnabled <code>true</code> to enable + * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) + */ + public void setScriptingEnabled(boolean scriptingEnabled) { + this.scriptingEnabled = scriptingEnabled; + if (treeBuilder != null) { + treeBuilder.setScriptingEnabled(scriptingEnabled); + } + } + + /** + * Returns the doctype expectation. + * + * @return the doctypeExpectation + */ + public DoctypeExpectation getDoctypeExpectation() { + return doctypeExpectation; + } + + /** + * Sets the doctype expectation. + * + * @param doctypeExpectation + * the doctypeExpectation to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation) + */ + public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) { + this.doctypeExpectation = doctypeExpectation; + if (treeBuilder != null) { + treeBuilder.setDoctypeExpectation(doctypeExpectation); + } + } + + /** + * Returns the document mode handler. + * + * @return the documentModeHandler + */ + public DocumentModeHandler getDocumentModeHandler() { + return documentModeHandler; + } + + /** + * Sets the document mode handler. + * + * @param documentModeHandler + * the documentModeHandler to set + * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler) + */ + public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) { + this.documentModeHandler = documentModeHandler; + } + + /** + * Returns the streamabilityViolationPolicy. + * + * @return the streamabilityViolationPolicy + */ + public XmlViolationPolicy getStreamabilityViolationPolicy() { + return streamabilityViolationPolicy; + } + + /** + * Sets the streamabilityViolationPolicy. + * + * @param streamabilityViolationPolicy + * the streamabilityViolationPolicy to set + */ + public void setStreamabilityViolationPolicy( + XmlViolationPolicy streamabilityViolationPolicy) { + this.streamabilityViolationPolicy = streamabilityViolationPolicy; + driver = null; + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * @param html4ModeCompatibleWithXhtml1Schemata + */ + public void setHtml4ModeCompatibleWithXhtml1Schemata( + boolean html4ModeCompatibleWithXhtml1Schemata) { + this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata; + if (driver != null) { + driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata); + } + } + + /** + * Returns the <code>Locator</code> during parse. + * @return the <code>Locator</code> + */ + public Locator getDocumentLocator() { + return driver.getDocumentLocator(); + } + + /** + * Whether the HTML 4 mode reports boolean attributes in a way that repeats + * the name in the value. + * + * @return the html4ModeCompatibleWithXhtml1Schemata + */ + public boolean isHtml4ModeCompatibleWithXhtml1Schemata() { + return html4ModeCompatibleWithXhtml1Schemata; + } + + /** + * Whether <code>lang</code> is mapped to <code>xml:lang</code>. + * @param mappingLangToXmlLang + * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean) + */ + public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) { + this.mappingLangToXmlLang = mappingLangToXmlLang; + if (driver != null) { + driver.setMappingLangToXmlLang(mappingLangToXmlLang); + } + } + + /** + * Whether <code>lang</code> is mapped to <code>xml:lang</code>. + * + * @return the mappingLangToXmlLang + */ + public boolean isMappingLangToXmlLang() { + return mappingLangToXmlLang; + } + + /** + * Whether the <code>xmlns</code> attribute on the root element is + * passed to through. (FATAL not allowed.) + * @param xmlnsPolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) { + if (xmlnsPolicy == XmlViolationPolicy.FATAL) { + throw new IllegalArgumentException("Can't use FATAL here."); + } + this.xmlnsPolicy = xmlnsPolicy; + if (driver != null) { + driver.setXmlnsPolicy(xmlnsPolicy); + } + } + + /** + * Returns the xmlnsPolicy. + * + * @return the xmlnsPolicy + */ + public XmlViolationPolicy getXmlnsPolicy() { + return xmlnsPolicy; + } + + /** + * Returns the commentPolicy. + * + * @return the commentPolicy + */ + public XmlViolationPolicy getCommentPolicy() { + return commentPolicy; + } + + /** + * Returns the contentNonXmlCharPolicy. + * + * @return the contentNonXmlCharPolicy + */ + public XmlViolationPolicy getContentNonXmlCharPolicy() { + return contentNonXmlCharPolicy; + } + + /** + * Returns the contentSpacePolicy. + * + * @return the contentSpacePolicy + */ + public XmlViolationPolicy getContentSpacePolicy() { + return contentSpacePolicy; + } + + /** + * @param reportingDoctype + * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean) + */ + public void setReportingDoctype(boolean reportingDoctype) { + this.reportingDoctype = reportingDoctype; + if (treeBuilder != null) { + treeBuilder.setReportingDoctype(reportingDoctype); + } + } + + /** + * Returns the reportingDoctype. + * + * @return the reportingDoctype + */ + public boolean isReportingDoctype() { + return reportingDoctype; + } + + /** + * The policy for non-NCName element and attribute names. + * @param namePolicy + * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy) + */ + public void setNamePolicy(XmlViolationPolicy namePolicy) { + this.namePolicy = namePolicy; + if (driver != null) { + driver.setNamePolicy(namePolicy); + treeBuilder.setNamePolicy(namePolicy); + } + } + + /** + * Sets the encoding sniffing heuristics. + * + * @param heuristics the heuristics to set + * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + */ + public void setHeuristics(Heuristics heuristics) { + this.heuristics = heuristics; + if (driver != null) { + driver.setHeuristics(heuristics); + } + } + + public Heuristics getHeuristics() { + return this.heuristics; + } + + /** + * This is a catch-all convenience method for setting name, xmlns, content space, + * content non-XML char and comment policies in one go. This does not affect the + * streamability policy or doctype reporting. + * + * @param xmlPolicy + */ + public void setXmlPolicy(XmlViolationPolicy xmlPolicy) { + setNamePolicy(xmlPolicy); + setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy); + setContentSpacePolicy(xmlPolicy); + setContentNonXmlCharPolicy(xmlPolicy); + setCommentPolicy(xmlPolicy); + } + + /** + * The policy for non-NCName element and attribute names. + * + * @return the namePolicy + */ + public XmlViolationPolicy getNamePolicy() { + return namePolicy; + } + + /** + * Does nothing. + * @deprecated + */ + public void setBogusXmlnsPolicy( + XmlViolationPolicy bogusXmlnsPolicy) { + } + + /** + * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>. + * @deprecated + * @return <code>XmlViolationPolicy.ALTER_INFOSET</code> + */ + public XmlViolationPolicy getBogusXmlnsPolicy() { + return XmlViolationPolicy.ALTER_INFOSET; + } + + public void addCharacterHandler(CharacterHandler characterHandler) { + this.characterHandlers.add(characterHandler); + if (driver != null) { + driver.addCharacterHandler(characterHandler); + } + } + + + /** + * Sets whether comment nodes appear in the tree. + * @param ignoreComments <code>true</code> to ignore comments + * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) + */ + public void setIgnoringComments(boolean ignoreComments) { + treeBuilder.setIgnoringComments(ignoreComments); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java new file mode 100644 index 000000000..3b76b1421 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/ModalDocument.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.xom.Document; +import nu.xom.Element; + +/** + * Document with <code>Mode</code>. + * @version $Id$ + * @author hsivonen + */ +public class ModalDocument extends Document implements Mode { + + private DocumentMode mode = null; + + /** + * Copy constructor (<code>Mode</code>-aware). + * @param doc + */ + public ModalDocument(Document doc) { + super(doc); + if (doc instanceof Mode) { + Mode modal = (Mode) doc; + setMode(modal.getMode()); + } + } + + /** + * With root. + * + * @param elt + */ + public ModalDocument(Element elt) { + super(elt); + } + + /** + * Gets the mode. + * @see nu.validator.htmlparser.xom.Mode#getMode() + */ + public DocumentMode getMode() { + return mode; + } + + /** + * Sets the mode. + * @see nu.validator.htmlparser.xom.Mode#setMode(nu.validator.htmlparser.common.DocumentMode) + */ + public void setMode(DocumentMode mode) { + this.mode = mode; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java new file mode 100644 index 000000000..bd2dcbc26 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/Mode.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.validator.htmlparser.common.DocumentMode; + +/** + * Interface for attaching a <code>DocumentMode</code> on a Document. + * @version $Id$ + * @author hsivonen + */ +public interface Mode { + + /** + * Returns the mode. + * + * @return the mode + */ + public abstract DocumentMode getMode(); + + /** + * Sets the mode. + * + * @param mode the mode to set + */ + public abstract void setMode(DocumentMode mode); + +}
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java new file mode 100644 index 000000000..147b5d930 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.xom.Attribute; +import nu.xom.Comment; +import nu.xom.Document; +import nu.xom.Element; +import nu.xom.Text; +import nu.xom.Attribute.Type; + +/** + * A simpler node factory that does not use <code>Nodes</code>.. + * + * @version $Id$ + * @author hsivonen + */ +public class SimpleNodeFactory { + + /** + * <code>return new Attribute(localName, uri, value, type);</code> + * @param localName + * @param uri + * @param value + * @param type + * @return + */ + public Attribute makeAttribute(String localName, String uri, String value, Type type) { + return new Attribute(localName, uri, value, type); + } + + /** + * <code>return new Text(string);</code> + * @param string + * @return + */ + public Text makeText(String string) { + return new Text(string); + } + + /** + * <code>return new Comment(string);</code> + * @param string + * @return + */ + public Comment makeComment(String string) { + return new Comment(string); + } + + /** + * <code>return new Element(name, namespace);</code> + * @param name + * @param namespace + * @return + */ + public Element makeElement(String name, String namespace) { + return new Element(name, namespace); + } + + /** + * <code>return new FormPtrElement(name, namespace, form);</code> + * @param name + * @param namespace + * @param form + * @return + */ + public Element makeElement(String name, String namespace, Element form) { + return new FormPtrElement(name, namespace, form); + } + + /** + * <code>return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot"));</code> + * + * <p>Subclasses adviced to return an instance of <code>Mode</code>. (Not required, though.) + * + * @return + */ + public Document makeDocument() { + return new ModalDocument(new Element("root", "http://www.xom.nu/fakeRoot")); + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java new file mode 100644 index 000000000..623f31927 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008-2010 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.xom; + +import nu.validator.htmlparser.common.DocumentMode; +import nu.validator.htmlparser.impl.CoalescingTreeBuilder; +import nu.validator.htmlparser.impl.HtmlAttributes; +import nu.xom.Attribute; +import nu.xom.Document; +import nu.xom.Element; +import nu.xom.Node; +import nu.xom.Nodes; +import nu.xom.ParentNode; +import nu.xom.Text; +import nu.xom.XMLException; + +import org.xml.sax.SAXException; + +class XOMTreeBuilder extends CoalescingTreeBuilder<Element> { + + private final SimpleNodeFactory nodeFactory; + + private Document document; + + private int cachedTableIndex = -1; + + private Element cachedTable = null; + + protected XOMTreeBuilder(SimpleNodeFactory nodeFactory) { + super(); + this.nodeFactory = nodeFactory; + } + + @Override + protected void addAttributesToElement(Element element, HtmlAttributes attributes) + throws SAXException { + try { + for (int i = 0; i < attributes.getLength(); i++) { + String localName = attributes.getLocalNameNoBoundsCheck(i); + String uri = attributes.getURINoBoundsCheck(i); + if (element.getAttribute(localName, uri) == null) { + element.addAttribute(nodeFactory.makeAttribute( + localName, + uri, + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + } + } catch (XMLException e) { + fatal(e); + } + } + + @Override protected void appendCharacters(Element parent, String text) + throws SAXException { + try { + int childCount = parent.getChildCount(); + Node lastChild; + if (childCount != 0 + && ((lastChild = parent.getChild(childCount - 1)) instanceof Text)) { + Text lastAsText = (Text) lastChild; + lastAsText.setValue(lastAsText.getValue() + text); + return; + } + parent.appendChild(nodeFactory.makeText(text)); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendChildrenToNewParent(Element oldParent, + Element newParent) throws SAXException { + try { + Nodes children = oldParent.removeChildren(); + for (int i = 0; i < children.size(); i++) { + newParent.appendChild(children.get(i)); + } + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendComment(Element parent, String comment) throws SAXException { + try { + parent.appendChild(nodeFactory.makeComment(comment)); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendCommentToDocument(String comment) + throws SAXException { + try { + Element root = document.getRootElement(); + if ("http://www.xom.nu/fakeRoot".equals(root.getNamespaceURI())) { + document.insertChild(nodeFactory.makeComment(comment), document.indexOf(root)); + } else { + document.appendChild(nodeFactory.makeComment(comment)); + } + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element intendedParent) throws SAXException { + try { + Element rv = nodeFactory.makeElement(name, ns); + for (int i = 0; i < attributes.getLength(); i++) { + rv.addAttribute(nodeFactory.makeAttribute( + attributes.getLocalNameNoBoundsCheck(i), + attributes.getURINoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + return rv; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override + protected Element createHtmlElementSetAsRoot( + HtmlAttributes attributes) throws SAXException { + try { + Element rv = nodeFactory.makeElement("html", + "http://www.w3.org/1999/xhtml"); + for (int i = 0; i < attributes.getLength(); i++) { + rv.addAttribute(nodeFactory.makeAttribute( + attributes.getLocalNameNoBoundsCheck(i), + attributes.getURINoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + document.setRootElement(rv); + return rv; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override + protected void detachFromParent(Element element) throws SAXException { + try { + element.detach(); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected void appendElement(Element child, + Element newParent) throws SAXException { + try { + child.detach(); + newParent.appendChild(child); + } catch (XMLException e) { + fatal(e); + } + } + + @Override + protected boolean hasChildren(Element element) throws SAXException { + try { + return element.getChildCount() != 0; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * Returns the document. + * + * @return the document + */ + Document getDocument() { + Document rv = document; + document = null; + return rv; + } + + Nodes getDocumentFragment() { + Element rootElt = document.getRootElement(); + Nodes rv = rootElt.removeChildren(); + document = null; + return rv; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#createElement(String, + * java.lang.String, org.xml.sax.Attributes, java.lang.Object) + */ + @Override + protected Element createElement(String ns, String name, + HtmlAttributes attributes, Element form, Element intendedParent) throws SAXException { + try { + Element rv = nodeFactory.makeElement(name, + ns, form); + for (int i = 0; i < attributes.getLength(); i++) { + rv.addAttribute(nodeFactory.makeAttribute( + attributes.getLocalName(i), + attributes.getURINoBoundsCheck(i), + attributes.getValueNoBoundsCheck(i), + attributes.getTypeNoBoundsCheck(i) == "ID" ? Attribute.Type.ID + : Attribute.Type.CDATA)); + } + return rv; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#start() + */ + @Override + protected void start(boolean fragment) throws SAXException { + document = nodeFactory.makeDocument(); + cachedTableIndex = -1; + cachedTable = null; + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#documentMode(nu.validator.htmlparser.common.DocumentMode, + * java.lang.String, java.lang.String, boolean) + */ + @Override + protected void documentMode(DocumentMode mode, String publicIdentifier, + String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) + throws SAXException { + if (document instanceof Mode) { + Mode modal = (Mode) document; + modal.setMode(mode); + } + } + + @Override + protected Element createAndInsertFosterParentedElement(String ns, String name, + HtmlAttributes attributes, Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParent(); + Element child = createElement(ns, name, attributes, parent != null ? (Element) parent : stackParent); + if (parent != null) { // always an element if not null + ((ParentNode) parent).insertChild(child, indexOfTable(table, stackParent)); + cachedTableIndex++; + } else { + stackParent.appendChild(child); + } + return child; + } catch (XMLException e) { + fatal(e); + throw new RuntimeException("Unreachable"); + } + } + + @Override protected void insertFosterParentedCharacters(String text, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParent(); + if (parent != null) { // always an element if not null + Element parentAsElt = (Element) parent; + int tableIndex = indexOfTable(table, parentAsElt); + Node prevSibling; + if (tableIndex != 0 + && ((prevSibling = parentAsElt.getChild(tableIndex - 1)) instanceof Text)) { + Text prevAsText = (Text) prevSibling; + prevAsText.setValue(prevAsText.getValue() + text); + return; + } + parentAsElt.insertChild(nodeFactory.makeText(text), tableIndex); + cachedTableIndex++; + return; + } + int childCount = stackParent.getChildCount(); + Node lastChild; + if (childCount != 0 + && ((lastChild = stackParent.getChild(childCount - 1)) instanceof Text)) { + Text lastAsText = (Text) lastChild; + lastAsText.setValue(lastAsText.getValue() + text); + return; + } + stackParent.appendChild(nodeFactory.makeText(text)); + } catch (XMLException e) { + fatal(e); + } + } + + @Override protected void insertFosterParentedChild(Element child, + Element table, Element stackParent) throws SAXException { + try { + Node parent = table.getParent(); + if (parent != null) { // always an element if not null + ((ParentNode)parent).insertChild(child, indexOfTable(table, stackParent)); + cachedTableIndex++; + } else { + stackParent.appendChild(child); + } + } catch (XMLException e) { + fatal(e); + } + } + + private int indexOfTable(Element table, Element stackParent) { + if (table == cachedTable) { + return cachedTableIndex; + } else { + cachedTable = table; + return (cachedTableIndex = stackParent.indexOf(table)); + } + } + + /** + * @see nu.validator.htmlparser.impl.TreeBuilder#end() + */ + @Override protected void end() throws SAXException { + cachedTableIndex = -1; + cachedTable = null; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html new file mode 100644 index 000000000..a936d5e3a --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/htmlparser/xom/package.html @@ -0,0 +1,29 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package provides an HTML5 parser that exposes the document through the XOM API.</p> +</body> +</html>
\ No newline at end of file diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java new file mode 100644 index 000000000..f17ce3f89 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CDATA.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A CDATA section. + * @version $Id$ + * @author hsivonen + */ +public final class CDATA extends ParentNode { + + /** + * The constructor. + * @param locator the locator + */ + public CDATA(Locator locator) { + super(locator); + } + + /** + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startCDATA(this); + } + + /** + * + * @throws SAXException if things go wrong + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endCDATA(endLocator); + } + + /** + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.CDATA; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java new file mode 100644 index 000000000..55c7715f6 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/CharBufferNode.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; + +/** + * A common superclass for character buffer node classes. + * @version $Id$ + * @author hsivonen + */ +public abstract class CharBufferNode extends Node { + + /** + * The buffer. + */ + protected final char[] buffer; + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset + * @param length the length + */ + CharBufferNode(Locator locator, char[] buf, int start, int length) { + super(locator); + this.buffer = new char[length]; + System.arraycopy(buf, start, buffer, 0, length); + } + + /** + * Returns the wrapped buffer as a string. + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return new String(buffer); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java new file mode 100644 index 000000000..b8cc2d6d6 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Characters.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A run of characters + * @version $Id$ + * @author hsivonen + */ +public final class Characters extends CharBufferNode { + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset in the buffer + * @param length the length + */ + public Characters(Locator locator, char[] buf, int start, int length) { + super(locator, buf, start, length); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.characters(buffer, 0, buffer.length, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.CHARACTERS; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java new file mode 100644 index 000000000..f010462fb --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Comment.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A comment. + * + * @version $Id$ + * @author hsivonen + */ +public final class Comment extends CharBufferNode { + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset + * @param length the length + */ + public Comment(Locator locator, char[] buf, int start, int length) { + super(locator, buf, start, length); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.comment(buffer, 0, buffer.length, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.COMMENT; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java new file mode 100644 index 000000000..2169e0571 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DTD.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A doctype. + * @version $Id$ + * @author hsivonen + */ +public final class DTD extends ParentNode { + + /** + * The name. + */ + private final String name; + + /** + * The public id. + */ + private final String publicIdentifier; + + /** + * The system id. + */ + private final String systemIdentifier; + + /** + * The constructor. + * @param locator the locator + * @param name the name + * @param publicIdentifier the public id + * @param systemIdentifier the system id + */ + public DTD(Locator locator, String name, String publicIdentifier, String systemIdentifier) { + super(locator); + this.name = name; + this.publicIdentifier = publicIdentifier; + this.systemIdentifier = systemIdentifier; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startDTD(name, publicIdentifier, systemIdentifier, this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endDTD(endLocator); + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + return name; + } + + /** + * Returns the publicIdentifier. + * + * @return the publicIdentifier + */ + public String getPublicIdentifier() { + return publicIdentifier; + } + + /** + * Returns the systemIdentifier. + * + * @return the systemIdentifier + */ + public String getSystemIdentifier() { + return systemIdentifier; + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.DTD; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java new file mode 100644 index 000000000..3bb6f09c7 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Document.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A document. + * @version $Id$ + * @author hsivonen + */ +public final class Document extends ParentNode { + + /** + * The constructor. + * @param locator the locator + */ + public Document(Locator locator) { + super(locator); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startDocument(this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endDocument(endLocator); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.DOCUMENT; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java new file mode 100644 index 000000000..06816932f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/DocumentFragment.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.helpers.LocatorImpl; + +/** + * A document fragment. + * + * @version $Id$ + * @author hsivonen + */ +public final class DocumentFragment extends ParentNode { + + /** + * The constructor. + */ + public DocumentFragment() { + super(new LocatorImpl()); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override void visit(TreeParser treeParser) { + // nothing + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override public NodeType getNodeType() { + return NodeType.DOCUMENT_FRAGMENT; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java new file mode 100644 index 000000000..3d33164e5 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Element.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import java.util.List; + +import org.xml.sax.Attributes; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +/** + * An element. + * @version $Id$ + * @author hsivonen + */ +public final class Element extends ParentNode { + + /** + * The namespace URI. + */ + private final String uri; + + /** + * The local name. + */ + private final String localName; + + /** + * The qualified name. + */ + private final String qName; + + /** + * The attributes. + */ + private final Attributes attributes; + + /** + * The namespace prefix mappings. + */ + private final List<PrefixMapping> prefixMappings; + + /** + * The contructor. + * @param locator the locator. + * @param uri the namespace URI + * @param localName the local name + * @param qName the qualified name + * @param atts the attributes + * @param retainAttributes <code>true</code> to retain the attributes instead of copying + * @param prefixMappings the prefix mappings + */ + public Element(Locator locator, String uri, String localName, String qName, + Attributes atts, boolean retainAttributes, + List<PrefixMapping> prefixMappings) { + super(locator); + this.uri = uri; + this.localName = localName; + this.qName = qName; + if (retainAttributes) { + this.attributes = atts; + } else { + this.attributes = new AttributesImpl(atts); + } + this.prefixMappings = prefixMappings; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + if (prefixMappings != null) { + for (PrefixMapping mapping : prefixMappings) { + treeParser.startPrefixMapping(mapping.getPrefix(), + mapping.getUri(), this); + } + } + treeParser.startElement(uri, localName, qName, attributes, this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endElement(uri, localName, qName, endLocator); + if (prefixMappings != null) { + for (PrefixMapping mapping : prefixMappings) { + treeParser.endPrefixMapping(mapping.getPrefix(), endLocator); + } + } + } + + /** + * Returns the attributes. + * + * @return the attributes + */ + public Attributes getAttributes() { + return attributes; + } + + /** + * Returns the localName. + * + * @return the localName + */ + public String getLocalName() { + return localName; + } + + /** + * Returns the prefixMappings. + * + * @return the prefixMappings + */ + public List<PrefixMapping> getPrefixMappings() { + return prefixMappings; + } + + /** + * Returns the qName. + * + * @return the qName + */ + public String getQName() { + return qName; + } + + /** + * Returns the uri. + * + * @return the uri + */ + public String getUri() { + return uri; + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.ELEMENT; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java new file mode 100644 index 000000000..091013736 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Entity.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * An entity. + * @version $Id$ + * @author hsivonen + */ +public final class Entity extends ParentNode { + + /** + * The name. + */ + private final String name; + + /** + * The constructor. + * @param locator the locator + * @param name the name + */ + public Entity(Locator locator, String name) { + super(locator); + this.name = name; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.startEntity(name, this); + } + + /** + * @see nu.validator.saxtree.Node#revisit(nu.validator.saxtree.TreeParser) + */ + @Override + void revisit(TreeParser treeParser) throws SAXException { + treeParser.endEntity(name, endLocator); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.ENTITY; + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + return name; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java new file mode 100644 index 000000000..e5fcf350f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/IgnorableWhitespace.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A run ignorable whitespace. + * @version $Id$ + * @author hsivonen + */ +public final class IgnorableWhitespace extends CharBufferNode { + + /** + * The constructor. + * @param locator the locator + * @param buf the buffer + * @param start the offset + * @param length the length + */ + public IgnorableWhitespace(Locator locator, char[] buf, int start, int length) { + super(locator, buf, start, length); + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.ignorableWhitespace(buffer, 0, buffer.length, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.IGNORABLE_WHITESPACE; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java new file mode 100644 index 000000000..37c0c6325 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/LocatorImpl.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; + +/** + * A locator implementation. + * @version $Id$ + * @author hsivonen + */ +public final class LocatorImpl implements Locator { + + /** + * The system id. + */ + private final String systemId; + + /** + * The public id. + */ + private final String publicId; + + /** + * The column. + */ + private final int column; + + /** + * The line. + */ + private final int line; + + /** + * The constructor. + * @param locator the locator + */ + public LocatorImpl(Locator locator) { + if (locator == null) { + this.systemId = null; + this.publicId = null; + this.column = -1; + this.line = -1; + } else { + this.systemId = locator.getSystemId(); + this.publicId = locator.getPublicId(); + this.column = locator.getColumnNumber(); + this.line = locator.getLineNumber(); + } + } + + /** + * + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + return column; + } + + /** + * + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + return line; + } + + /** + * + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + return publicId; + } + + /** + * + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + return systemId; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java new file mode 100644 index 000000000..7aed83b75 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/Node.java @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2007-2009 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import java.util.List; + +import org.xml.sax.Attributes; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * The common node superclass. + * @version $Id$ + * @author hsivonen + */ +public abstract class Node implements Locator { + + /** + * The system id. + */ + private final String systemId; + + /** + * The public id. + */ + private final String publicId; + + /** + * The column. + */ + private final int column; + + /** + * The line. + */ + private final int line; + + /** + * The next sibling. + */ + private Node nextSibling = null; + + /** + * The parent. + */ + private ParentNode parentNode = null; + + /** + * The constructor. + * + * @param locator the locator + */ + Node(Locator locator) { + if (locator == null) { + this.systemId = null; + this.publicId = null; + this.column = -1; + this.line = -1; + } else { + this.systemId = locator.getSystemId(); + this.publicId = locator.getPublicId(); + this.column = locator.getColumnNumber(); + this.line = locator.getLineNumber(); + } + } + + /** + * + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + return column; + } + + /** + * + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + return line; + } + + /** + * + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + return publicId; + } + + /** + * + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + return systemId; + } + + /** + * Visit the node. + * + * @param treeParser the visitor + * @throws SAXException if stuff goes wrong + */ + abstract void visit(TreeParser treeParser) throws SAXException; + + /** + * Revisit the node. + * + * @param treeParser the visitor + * @throws SAXException if stuff goes wrong + */ + void revisit(TreeParser treeParser) throws SAXException { + return; + } + + /** + * Return the first child. + * @return the first child + */ + public Node getFirstChild() { + return null; + } + + /** + * Returns the nextSibling. + * + * @return the nextSibling + */ + public final Node getNextSibling() { + return nextSibling; + } + + /** + * Returns the previous sibling + * @return the previous sibling + */ + public final Node getPreviousSibling() { + Node prev = null; + Node next = parentNode.getFirstChild(); + for(;;) { + if (this == next) { + return prev; + } + prev = next; + next = next.nextSibling; + } + } + + /** + * Sets the nextSibling. + * + * @param nextSibling the nextSibling to set + */ + void setNextSibling(Node nextSibling) { + this.nextSibling = nextSibling; + } + + + /** + * Returns the parentNode. + * + * @return the parentNode + */ + public final ParentNode getParentNode() { + return parentNode; + } + + /** + * Sets the parentNode. + * + * @param parentNode the parentNode to set + */ + void setParentNode(ParentNode parentNode) { + this.parentNode = parentNode; + } + + /** + * Return the node type. + * @return the node type + */ + public abstract NodeType getNodeType(); + + // Subclass-specific accessors that are hoisted here to + // avoid casting. + + /** + * Detach this node from its parent. + */ + public void detach() { + if (parentNode != null) { + parentNode.removeChild(this); + parentNode = null; + } + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the publicIdentifier. + * + * @return the publicIdentifier + */ + public String getPublicIdentifier() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the systemIdentifier. + * + * @return the systemIdentifier + */ + public String getSystemIdentifier() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the attributes. + * + * @return the attributes + */ + public Attributes getAttributes() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the localName. + * + * @return the localName + */ + public String getLocalName() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the prefixMappings. + * + * @return the prefixMappings + */ + public List<PrefixMapping> getPrefixMappings() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the qName. + * + * @return the qName + */ + public String getQName() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the uri. + * + * @return the uri + */ + public String getUri() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the data. + * + * @return the data + */ + public String getData() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the target. + * + * @return the target + */ + public String getTarget() { + throw new UnsupportedOperationException(); + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java new file mode 100644 index 000000000..c3c927f0d --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NodeType.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +/** + * The node type. + * @version $Id$ + * @author hsivonen + */ +public enum NodeType { + /** + * A CDATA section. + */ + CDATA, + /** + * A run of characters. + */ + CHARACTERS, + /** + * A comment. + */ + COMMENT, + /** + * A document. + */ + DOCUMENT, + /** + * A document fragment. + */ + DOCUMENT_FRAGMENT, + /** + * A DTD. + */ + DTD, + /** + * An element. + */ + ELEMENT, + /** + * An entity. + */ + ENTITY, + /** + * A run of ignorable whitespace. + */ + IGNORABLE_WHITESPACE, + /** + * A processing instruction. + */ + PROCESSING_INSTRUCTION, + /** + * A skipped entity. + */ + SKIPPED_ENTITY +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java new file mode 100644 index 000000000..de63f3b57 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/NullLexicalHandler.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +/** + * A lexical handler that does nothing. + * @version $Id$ + * @author hsivonen + */ +final class NullLexicalHandler implements LexicalHandler { + + /** + * + * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) + */ + public void comment(char[] arg0, int arg1, int arg2) throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endCDATA() + */ + public void endCDATA() throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endDTD() + */ + public void endDTD() throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) + */ + public void endEntity(String arg0) throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startCDATA() + */ + public void startCDATA() throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String) + */ + public void startDTD(String arg0, String arg1, String arg2) throws SAXException { + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) + */ + public void startEntity(String arg0) throws SAXException { + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java new file mode 100644 index 000000000..6cc96003f --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ParentNode.java @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; + +/** + * Common superclass for parent nodes. + * @version $Id$ + * @author hsivonen + */ +public abstract class ParentNode extends Node { + + /** + * The end locator. + */ + protected Locator endLocator; + + /** + * The first child. + */ + private Node firstChild = null; + + /** + * The last child (for efficiency). + */ + private Node lastChild = null; + + /** + * The constuctor. + * @param locator the locator + */ + ParentNode(Locator locator) { + super(locator); + } + + /** + * Sets the endLocator. + * + * @param endLocator the endLocator to set + */ + public void setEndLocator(Locator endLocator) { + this.endLocator = new LocatorImpl(endLocator); + } + + /** + * Copies the endLocator from another node. + * + * @param another the another node + */ + public void copyEndLocator(ParentNode another) { + this.endLocator = another.endLocator; + } + + /** + * Returns the firstChild. + * + * @return the firstChild + */ + public final Node getFirstChild() { + return firstChild; + } + + /** + * Returns the lastChild. + * + * @return the lastChild + */ + public final Node getLastChild() { + return lastChild; + } + + /** + * Insert a new child before a pre-existing child and return the newly inserted child. + * @param child the new child + * @param sibling the existing child before which to insert (must be a child of this node) or <code>null</code> to append + * @return <code>child</code> + */ + public Node insertBefore(Node child, Node sibling) { + assert sibling == null || this == sibling.getParentNode(); + if (sibling == null) { + return appendChild(child); + } + child.detach(); + child.setParentNode(this); + if (firstChild == sibling) { + child.setNextSibling(sibling); + firstChild = child; + } else { + Node prev = firstChild; + Node next = firstChild.getNextSibling(); + while (next != sibling) { + prev = next; + next = next.getNextSibling(); + } + prev.setNextSibling(child); + child.setNextSibling(next); + } + return child; + } + + public Node insertBetween(Node child, Node prev, Node next) { + assert prev == null || this == prev.getParentNode(); + assert next == null || this == next.getParentNode(); + assert prev != null || next == firstChild; + assert next != null || prev == lastChild; + assert prev == null || next == null || prev.getNextSibling() == next; + if (next == null) { + return appendChild(child); + } + child.detach(); + child.setParentNode(this); + child.setNextSibling(next); + if (prev == null) { + firstChild = child; + } else { + prev.setNextSibling(child); + } + return child; + } + + /** + * Append a child to this node and return the child. + * + * @param child the child to append. + * @return <code>child</code> + */ + public Node appendChild(Node child) { + child.detach(); + child.setParentNode(this); + if (firstChild == null) { + firstChild = child; + } else { + lastChild.setNextSibling(child); + } + lastChild = child; + return child; + } + + /** + * Append the children of another node to this node removing them from the other node . + * @param parent the other node whose children to append to this one + */ + public void appendChildren(Node parent) { + Node child = parent.getFirstChild(); + if (child == null) { + return; + } + ParentNode another = (ParentNode) parent; + if (firstChild == null) { + firstChild = child; + } else { + lastChild.setNextSibling(child); + } + lastChild = another.lastChild; + do { + child.setParentNode(this); + } while ((child = child.getNextSibling()) != null); + another.firstChild = null; + another.lastChild = null; + } + + /** + * Remove a child from this node. + * @param node the child to remove + */ + void removeChild(Node node) { + assert this == node.getParentNode(); + if (firstChild == node) { + firstChild = node.getNextSibling(); + if (lastChild == node) { + lastChild = null; + } + } else { + Node prev = firstChild; + Node next = firstChild.getNextSibling(); + while (next != node) { + prev = next; + next = next.getNextSibling(); + } + prev.setNextSibling(node.getNextSibling()); + if (lastChild == node) { + lastChild = prev; + } + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java new file mode 100644 index 000000000..8ffaf4a2c --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/PrefixMapping.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +/** + * A prefix mapping. + * @version $Id$ + * @author hsivonen + */ +public final class PrefixMapping { + /** + * The namespace prefix. + */ + private final String prefix; + /** + * The namespace URI. + */ + private final String uri; + /** + * Constructor. + * @param prefix the prefix + * @param uri the URI + */ + public PrefixMapping(final String prefix, final String uri) { + this.prefix = prefix; + this.uri = uri; + } + /** + * Returns the prefix. + * + * @return the prefix + */ + public String getPrefix() { + return prefix; + } + /** + * Returns the uri. + * + * @return the uri + */ + public String getUri() { + return uri; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java new file mode 100644 index 000000000..014e63821 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/ProcessingInstruction.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A processing instruction. + * @version $Id$ + * @author hsivonen + */ +public final class ProcessingInstruction extends Node { + + /** + * PI target. + */ + private final String target; + + /** + * PI data. + */ + private final String data; + + /** + * Constructor. + * @param locator the locator + * @param target PI target + * @param data PI data + */ + public ProcessingInstruction(Locator locator, String target, String data) { + super(locator); + this.target = target; + this.data = data; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.processingInstruction(target, data, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.PROCESSING_INSTRUCTION; + } + + /** + * Returns the data. + * + * @return the data + */ + public String getData() { + return data; + } + + /** + * Returns the target. + * + * @return the target + */ + public String getTarget() { + return target; + } + +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java new file mode 100644 index 000000000..01ca61490 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/SkippedEntity.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A skipped entity. + * @version $Id$ + * @author hsivonen + */ +public final class SkippedEntity extends Node { + + /** + * The name. + */ + private final String name; + + /** + * Constructor. + * @param locator the locator + * @param name the name + */ + public SkippedEntity(Locator locator, String name) { + super(locator); + this.name = name; + } + + /** + * + * @see nu.validator.saxtree.Node#visit(nu.validator.saxtree.TreeParser) + */ + @Override + void visit(TreeParser treeParser) throws SAXException { + treeParser.skippedEntity(name, this); + } + + /** + * + * @see nu.validator.saxtree.Node#getNodeType() + */ + @Override + public NodeType getNodeType() { + return NodeType.SKIPPED_ENTITY; + } + + /** + * Returns the name. + * + * @return the name + */ + public String getName() { + return name; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java new file mode 100644 index 000000000..39fe236b3 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import java.util.LinkedList; +import java.util.List; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +/** + * Builds a SAX Tree representation of a document or a fragment + * streamed as <code>ContentHandler</code> and + * <code>LexicalHandler</code> events. The start/end event matching + * is expected to adhere to the SAX API contract. Things will + * simply break if this is not the case. Fragments are expected to + * omit <code>startDocument()</code> and <code>endDocument()</code> + * calls. + * + * @version $Id$ + * @author hsivonen + */ +public class TreeBuilder implements ContentHandler, LexicalHandler { + + /** + * The locator. + */ + private Locator locator; + + /** + * The current node. + */ + private ParentNode current; + + /** + * Whether to retain attribute objects. + */ + private final boolean retainAttributes; + + /** + * The prefix mappings for the next element to be inserted. + */ + private List<PrefixMapping> prefixMappings; + + /** + * Constructs a reusable <code>TreeBuilder</code> that builds + * <code>Document</code>s and copies attributes. + */ + public TreeBuilder() { + this(false, false); + } + + /** + * The constructor. The instance will be reusabe if building a full + * document and not reusable if building a fragment. + * + * @param fragment whether this <code>TreeBuilder</code> should build + * a <code>DocumentFragment</code> instead of a <code>Document</code>. + * @param retainAttributes whether instances of the <code>Attributes</code> + * interface passed to <code>startElement</code> should be retained + * (the alternative is copying). + */ + public TreeBuilder(boolean fragment, boolean retainAttributes) { + if (fragment) { + current = new DocumentFragment(); + } + this.retainAttributes = retainAttributes; + } + + /** + * + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + public void characters(char[] ch, int start, int length) throws SAXException { + current.appendChild(new Characters(locator, ch, start, length)); + } + + /** + * + * @see org.xml.sax.ContentHandler#endDocument() + */ + public void endDocument() throws SAXException { + current.setEndLocator(locator); + } + + /** + * + * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) + */ + public void endElement(String uri, String localName, String qName) throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) + */ + public void endPrefixMapping(String prefix) throws SAXException { + } + + /** + * + * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) + */ + public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { + current.appendChild(new IgnorableWhitespace(locator, ch, start, length)); + } + + /** + * + * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) + */ + public void processingInstruction(String target, String data) throws SAXException { + current.appendChild(new ProcessingInstruction(locator, target, data)); + } + + /** + * + * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator) + */ + public void setDocumentLocator(Locator locator) { + this.locator = locator; + } + + public void skippedEntity(String name) throws SAXException { + current.appendChild(new SkippedEntity(locator, name)); + } + + /** + * + * @see org.xml.sax.ContentHandler#startDocument() + */ + public void startDocument() throws SAXException { + current = new Document(locator); + } + + /** + * + * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) + */ + public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { + current = (ParentNode) current.appendChild(new Element(locator, uri, localName, qName, atts, retainAttributes, prefixMappings)); + prefixMappings = null; + } + + /** + * + * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) + */ + public void startPrefixMapping(String prefix, String uri) throws SAXException { + if (prefixMappings == null) { + prefixMappings = new LinkedList<PrefixMapping>(); + } + prefixMappings.add(new PrefixMapping(prefix, uri)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) + */ + public void comment(char[] ch, int start, int length) throws SAXException { + current.appendChild(new Comment(locator, ch, start, length)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endCDATA() + */ + public void endCDATA() throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endDTD() + */ + public void endDTD() throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) + */ + public void endEntity(String name) throws SAXException { + current.setEndLocator(locator); + current = current.getParentNode(); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startCDATA() + */ + public void startCDATA() throws SAXException { + current = (ParentNode) current.appendChild(new CDATA(locator)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String) + */ + public void startDTD(String name, String publicId, String systemId) throws SAXException { + current = (ParentNode) current.appendChild(new DTD(locator, name, publicId, systemId)); + } + + /** + * + * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) + */ + public void startEntity(String name) throws SAXException { + current = (ParentNode) current.appendChild(new Entity(locator, name)); + } + + /** + * Returns the root (<code>Document</code> if building a full document or + * <code>DocumentFragment</code> if building a fragment.). + * + * @return the root + */ + public ParentNode getRoot() { + return current; + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java new file mode 100644 index 000000000..a9d92deb0 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeParser.java @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2007 Henri Sivonen + * Copyright (c) 2008 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.saxtree; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.ext.LexicalHandler; + +/** + * A tree visitor that replays a tree as SAX events. + * @version $Id$ + * @author hsivonen + */ +public final class TreeParser implements Locator { + + /** + * The content handler. + */ + private final ContentHandler contentHandler; + + /** + * The lexical handler. + */ + private final LexicalHandler lexicalHandler; + + /** + * The current locator. + */ + private Locator locatorDelegate; + + /** + * The constructor. + * + * @param contentHandler + * must not be <code>null</code> + * @param lexicalHandler + * may be <code>null</code> + */ + public TreeParser(final ContentHandler contentHandler, + final LexicalHandler lexicalHandler) { + if (contentHandler == null) { + throw new IllegalArgumentException("contentHandler was null."); + } + this.contentHandler = contentHandler; + if (lexicalHandler == null) { + this.lexicalHandler = new NullLexicalHandler(); + } else { + this.lexicalHandler = lexicalHandler; + } + } + + /** + * Causes SAX events for the tree rooted at the argument to be emitted. + * <code>startDocument()</code> and <code>endDocument()</code> are only + * emitted for a <code>Document</code> node. + * + * @param node + * the root + * @throws SAXException + */ + public void parse(Node node) throws SAXException { + contentHandler.setDocumentLocator(this); + Node current = node; + Node next; + for (;;) { + current.visit(this); + if ((next = current.getFirstChild()) != null) { + current = next; + continue; + } + for (;;) { + current.revisit(this); + if (current == node) { + return; + } + if ((next = current.getNextSibling()) != null) { + current = next; + break; + } + current = current.getParentNode(); + } + } + } + + /** + * @see org.xml.sax.ContentHandler#characters(char[], int, int) + */ + void characters(char[] ch, int start, int length, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.characters(ch, start, length); + } + + /** + * @see org.xml.sax.ContentHandler#endDocument() + */ + void endDocument(Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.endDocument(); + } + + /** + * @see org.xml.sax.ContentHandler#endElement(java.lang.String, + * java.lang.String, java.lang.String) + */ + void endElement(String uri, String localName, String qName, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.endElement(uri, localName, qName); + } + + /** + * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) + */ + void endPrefixMapping(String prefix, Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.endPrefixMapping(prefix); + } + + /** + * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) + */ + void ignorableWhitespace(char[] ch, int start, int length, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.ignorableWhitespace(ch, start, length); + } + + /** + * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, + * java.lang.String) + */ + void processingInstruction(String target, String data, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.processingInstruction(target, data); + } + + /** + * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) + */ + void skippedEntity(String name, Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.skippedEntity(name); + } + + /** + * @see org.xml.sax.ContentHandler#startDocument() + */ + void startDocument(Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.startDocument(); + } + + /** + * @see org.xml.sax.ContentHandler#startElement(java.lang.String, + * java.lang.String, java.lang.String, org.xml.sax.Attributes) + */ + void startElement(String uri, String localName, String qName, + Attributes atts, Locator locator) throws SAXException { + this.locatorDelegate = locator; + contentHandler.startElement(uri, localName, qName, atts); + } + + /** + * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, + * java.lang.String) + */ + void startPrefixMapping(String prefix, String uri, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + contentHandler.startPrefixMapping(prefix, uri); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) + */ + void comment(char[] ch, int start, int length, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.comment(ch, start, length); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#endCDATA() + */ + void endCDATA(Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.endCDATA(); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#endDTD() + */ + void endDTD(Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.endDTD(); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) + */ + void endEntity(String name, Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.endEntity(name); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#startCDATA() + */ + void startCDATA(Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.startCDATA(); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, + * java.lang.String, java.lang.String) + */ + void startDTD(String name, String publicId, String systemId, Locator locator) + throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.startDTD(name, publicId, systemId); + } + + /** + * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) + */ + void startEntity(String name, Locator locator) throws SAXException { + this.locatorDelegate = locator; + lexicalHandler.startEntity(name); + } + + /** + * @see org.xml.sax.Locator#getColumnNumber() + */ + public int getColumnNumber() { + if (locatorDelegate == null) { + return -1; + } else { + return locatorDelegate.getColumnNumber(); + } + } + + /** + * @see org.xml.sax.Locator#getLineNumber() + */ + public int getLineNumber() { + if (locatorDelegate == null) { + return -1; + } else { + return locatorDelegate.getLineNumber(); + } + } + + /** + * @see org.xml.sax.Locator#getPublicId() + */ + public String getPublicId() { + if (locatorDelegate == null) { + return null; + } else { + + return locatorDelegate.getPublicId(); + } + } + + /** + * @see org.xml.sax.Locator#getSystemId() + */ + public String getSystemId() { + if (locatorDelegate == null) { + return null; + } else { + return locatorDelegate.getSystemId(); + } + } +} diff --git a/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html new file mode 100644 index 000000000..0c34dad81 --- /dev/null +++ b/parser/html/java/htmlparser/src/nu/validator/saxtree/package.html @@ -0,0 +1,46 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<html> +<head><title>Package Overview</title> +<!-- + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +--> +</head> +<body bgcolor="white"> +<p>This package provides SAX Tree: a tree model optimized for creation from SAX +events and replay as SAX events.</p> +<h2>Design Principles</h2> +<ol> +<li>Preserve information exposed through <code>ContentHandler</code>, +<code>LexicalHandler</code> <em>and</em> <code>Locator</code>. +<li>Creation from SAX events or as part of the parse of a conforming +HTML5 document should be <em>fast</em>.</li> +<li>Emitting SAX events based on the tree should be <em>fast</em>.</li> +<li>Mutations should be <em>possible</em> but should not make the above +"fast" cases slower.</li> +<li>Concurrent reads should work without locking when there are no +concurrent mutations.</li> +<li>The user of the API has the responsibility of using the API properly: +for the sake of performance, the model does not check if it is being +used properly. Improper use may, therefore, put the model in and +inconsistent state.</li> +</ol> +</body> +</html>
\ No newline at end of file |