diff options
Diffstat (limited to 'mailnews/intl')
-rw-r--r-- | mailnews/intl/charsetData.properties | 120 | ||||
-rw-r--r-- | mailnews/intl/charsetalias.properties | 99 | ||||
-rw-r--r-- | mailnews/intl/jar.mn | 6 | ||||
-rw-r--r-- | mailnews/intl/moz.build | 35 | ||||
-rw-r--r-- | mailnews/intl/nsCharsetAlias.cpp | 93 | ||||
-rw-r--r-- | mailnews/intl/nsCharsetAlias.h | 25 | ||||
-rw-r--r-- | mailnews/intl/nsCharsetConverterManager.cpp | 356 | ||||
-rw-r--r-- | mailnews/intl/nsCharsetConverterManager.h | 36 | ||||
-rw-r--r-- | mailnews/intl/nsCommUConvCID.h | 26 | ||||
-rw-r--r-- | mailnews/intl/nsICharsetConverterManager.idl | 108 | ||||
-rw-r--r-- | mailnews/intl/nsMUTF7ToUnicode.cpp | 14 | ||||
-rw-r--r-- | mailnews/intl/nsMUTF7ToUnicode.h | 31 | ||||
-rw-r--r-- | mailnews/intl/nsUTF7ToUnicode.cpp | 228 | ||||
-rw-r--r-- | mailnews/intl/nsUTF7ToUnicode.h | 72 | ||||
-rw-r--r-- | mailnews/intl/nsUnicodeToMUTF7.cpp | 14 | ||||
-rw-r--r-- | mailnews/intl/nsUnicodeToMUTF7.h | 31 | ||||
-rw-r--r-- | mailnews/intl/nsUnicodeToUTF7.cpp | 298 | ||||
-rw-r--r-- | mailnews/intl/nsUnicodeToUTF7.h | 78 |
18 files changed, 1670 insertions, 0 deletions
diff --git a/mailnews/intl/charsetData.properties b/mailnews/intl/charsetData.properties new file mode 100644 index 000000000..66bdacf7c --- /dev/null +++ b/mailnews/intl/charsetData.properties @@ -0,0 +1,120 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +## Rule of this file: +## 1. key should always be in lower case ascii so we can do case insensitive +## comparison in the code faster. + +## Format of this file: +## +## charset_name.isInternal = anything - specifies that this charset should +## not be exposed to web content because of the vulnerability to XSS attacks +## or some other reasons +## +## charset_name.LangGroup = +## +## charset_name.isMultibyte = multi byte charsets + +x-mac-arabic.isInternal = true +x-mac-farsi.isInternal = true +x-mac-hebrew.isInternal = true +x-imap4-modified-utf7.isInternal = true +replacement.isInternal = true + +# XXX : todo: move to something based on BCP 47 (RFC 5646); +# these should primarily specify script (and sometimes region), +# but NOT language. +# See also https://bugzilla.mozilla.org/show_bug.cgi?id=756022 +# e.g. x-western -> *-Latn-155 (Western Europe), +# *-Latn-151 (Eastern Europe), +# *-Latn-154 (Northern Europe), +# *-Latn-TR +# x-cyrillic -> *-Cyrl +# zh-TW -> *-Hant-TW +# zh-HK -> *-Hant-HK +# zh-CN -> *-Hans +# ja -> *-Jpan +# ko -> *-Hang +# he -> *-Hebr +# ar -> *-Arab +# etc + +big5.LangGroup = zh-TW +x-x-big5.LangGroup = zh-TW +big5-hkscs.LangGroup = zh-HK +euc-jp.LangGroup = ja +euc-kr.LangGroup = ko +gb2312.LangGroup = zh-CN +gb18030.LangGroup = zh-CN +gb18030.2000-0.LangGroup = zh-CN +gb18030.2000-1.LangGroup = zh-CN +hkscs-1.LangGroup = zh-HK +ibm866.LangGroup = x-cyrillic +ibm1125.LangGroup = x-cyrillic +ibm1131.LangGroup = x-cyrillic +iso-2022-jp.LangGroup = ja +iso-8859-1.LangGroup = x-western +iso-8859-10.LangGroup = x-western +iso-8859-14.LangGroup = x-western +iso-8859-15.LangGroup = x-western +iso-8859-2.LangGroup = x-western +iso-8859-16.LangGroup = x-western +iso-8859-3.LangGroup = x-western +iso-8859-4.LangGroup = x-western +iso-8859-13.LangGroup = x-western +iso-8859-5.LangGroup = x-cyrillic +iso-8859-6.LangGroup = ar +iso-8859-7.LangGroup = el +iso-8859-8.LangGroup = he +iso-8859-8-i.LangGroup = he +jis_0208-1983.LangGroup = ja +koi8-r.LangGroup = x-cyrillic +koi8-u.LangGroup = x-cyrillic +shift_jis.LangGroup = ja +windows-874.LangGroup = th +utf-8.LangGroup = x-unicode +utf-16.LangGroup = x-unicode +utf-16be.LangGroup = x-unicode +utf-16le.LangGroup = x-unicode +utf-7.LangGroup = x-unicode +x-imap4-modified-utf7.LangGroup = x-unicode +replacement.LangGroup = x-unicode +windows-1250.LangGroup = x-western +windows-1251.LangGroup = x-cyrillic +windows-1252.LangGroup = x-western +windows-1253.LangGroup = el +windows-1254.LangGroup = x-western +windows-1255.LangGroup = he +windows-1256.LangGroup = ar +windows-1257.LangGroup = x-western +windows-1258.LangGroup = x-western +gbk.LangGroup = zh-CN +x-mac-ce.LangGroup = x-western +x-mac-croatian.LangGroup = x-western +x-mac-cyrillic.LangGroup = x-cyrillic +x-mac-devanagari.LangGroup = x-devanagari +x-mac-farsi.LangGroup = ar +x-mac-greek.LangGroup = el +x-mac-gujarati.LangGroup = x-gujr +x-mac-gurmukhi.LangGroup = x-guru +x-mac-icelandic.LangGroup = x-western +macintosh.LangGroup = x-western +x-mac-turkish.LangGroup = x-western +x-mac-ukrainian.LangGroup = x-cyrillic +x-mac-romanian.LangGroup = x-western +x-user-defined.LangGroup = x-unicode +ks_c_5601-1987.LangGroup = ko +x-mac-hebrew.LangGroup = he +x-mac-arabic.LangGroup = ar + +iso-2022-jp.isMultibyte = true +shift_jis.isMultibyte = true +euc-jp.isMultibyte = true +big5.isMultibyte = true +big5-hkscs.isMultibyte = true +gb2312.isMultibyte = true +euc-kr.isMultibyte = true +utf-7.isMultibyte = true +utf-8.isMultibyte = true +replacement.isMultibyte = true diff --git a/mailnews/intl/charsetalias.properties b/mailnews/intl/charsetalias.properties new file mode 100644 index 000000000..b3edb83b9 --- /dev/null +++ b/mailnews/intl/charsetalias.properties @@ -0,0 +1,99 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Rule of this file: +# 1. key should always be in lower case ascii so we can do case insensitive +# comparison in the code faster. +# 2. value should be the one used in unicode converter +# 3. If the charset is not used for document charset, but font charset +# (e.g. XLFD charset- such as JIS x0201, JIS x0208), don't put here +# +# This file contains mainly aliases. Actual labels for encodings are in +# labelsencodings.properties. Besides aliases it contains labels for charsets +# that are not part of the HTML5 world, but still are supported for e-mail. + +646=windows-1252 +iso-8859-1=ISO-8859-1 +utf-16=UTF-16 +utf-7=UTF-7 + +# Netscape private ... +x-imap4-modified-utf7=x-imap4-modified-utf7 +x-mac-ce=x-mac-ce +x-mac-turkish=x-mac-turkish +x-mac-greek=x-mac-greek +x-mac-icelandic=x-mac-icelandic +x-mac-croatian=x-mac-croatian +x-mac-romanian=x-mac-romanian +x-mac-hebrew=x-mac-hebrew +x-mac-arabic=x-mac-arabic +x-mac-farsi=x-mac-farsi +x-mac-devanagari=x-mac-devanagari +x-mac-gujarati=x-mac-gujarati +x-mac-gurmukhi=x-mac-gurmukhi +iso-10646-ucs-2=UTF-16BE +x-iso-10646-ucs-2-be=UTF-16BE +x-iso-10646-ucs-2-le=UTF-16LE + +# Aliases for ISO-8859-1 +latin1=ISO-8859-1 +iso_8859-1=ISO-8859-1 +iso8859-1=ISO-8859-1 +iso_8859-1:1987=ISO-8859-1 +iso-ir-100=ISO-8859-1 +l1=ISO-8859-1 +cp819=ISO-8859-1 +csisolatin1=ISO-8859-1 + +# Aliases for ISO-8859-8-I +iso-8859-8i=ISO-8859-8-I + +# Aliases for Shift_JIS +cp932=Shift_JIS + +# Aliases for ISO-2022-JP +# The following are really not aliases ISO-2022-JP, but sharing the same decoder +iso-2022-jp-2=ISO-2022-JP +csiso2022jp2=ISO-2022-JP + +# Aliases for Big5 +# x-x-big5 is not really a alias for Big5, add it only for MS FrontPage +# Sun Solaris + +zh_tw-big5=Big5 + +# Aliases for EUC-KR +5601=EUC-KR + +# Aliases for windows-874 +tis620=windows-874 + +# Aliases for IBM866 +cp-866=IBM866 + +# Aliases for UTF-7 +x-unicode-2-0-utf-7=UTF-7 +unicode-2-0-utf-7=UTF-7 +unicode-1-1-utf-7=UTF-7 +csunicode11utf7=UTF-7 + +# Aliases for ISO-10646-UCS-2 +csunicode=UTF-16BE +csunicode11=UTF-16BE +iso-10646-ucs-basic=UTF-16BE +csunicodeascii=UTF-16BE +iso-10646-unicode-latin1=UTF-16BE +csunicodelatin1=UTF-16BE +iso-10646=UTF-16BE +iso-10646-j-1=UTF-16BE + +# Following names appears in unix nl_langinfo(CODESET) +# They can be compiled as platform specific if necessary +# DONT put things here if it does not look generic enough (like hp15CN) +iso88591=ISO-8859-1 +iso885912=ISO-8859-12 +windows-936=gbk +ansi-1251=windows-1251 + +cp936=gbk diff --git a/mailnews/intl/jar.mn b/mailnews/intl/jar.mn new file mode 100644 index 000000000..ab02275d9 --- /dev/null +++ b/mailnews/intl/jar.mn @@ -0,0 +1,6 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +toolkit.jar: + res/charsetData.properties (charsetData.properties) diff --git a/mailnews/intl/moz.build b/mailnews/intl/moz.build new file mode 100644 index 000000000..10412b119 --- /dev/null +++ b/mailnews/intl/moz.build @@ -0,0 +1,35 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +XPIDL_SOURCES += [ + 'nsICharsetConverterManager.idl', +] + +UNIFIED_SOURCES += [ + 'nsCharsetAlias.cpp', + 'nsCharsetConverterManager.cpp', + 'nsMUTF7ToUnicode.cpp', + 'nsUnicodeToMUTF7.cpp', + 'nsUnicodeToUTF7.cpp', + 'nsUTF7ToUnicode.cpp', +] + +XPIDL_MODULE = 'commuconv' + +LOCAL_INCLUDES += [ + '/intl/locale', +] + +GENERATED_FILES += [ + 'charsetalias.properties.h', +] +charsetalias = GENERATED_FILES['charsetalias.properties.h'] +charsetalias.script = '../../intl/locale/props2arrays.py' +charsetalias.inputs = ['charsetalias.properties'] + +FINAL_LIBRARY = 'mail' + +JAR_MANIFESTS += ['jar.mn'] diff --git a/mailnews/intl/nsCharsetAlias.cpp b/mailnews/intl/nsCharsetAlias.cpp new file mode 100644 index 000000000..c10725596 --- /dev/null +++ b/mailnews/intl/nsCharsetAlias.cpp @@ -0,0 +1,93 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ArrayUtils.h" +#include "mozilla/dom/EncodingUtils.h" + +#include "nsCharsetAlias.h" + +// for NS_ERROR_UCONV_NOCONV +#include "nsCharsetConverterManager.h" + +#include "nsUConvPropertySearch.h" + +using namespace mozilla; +using namespace mozilla::dom; + +// +static const nsUConvProp kAliases[] = { +#include "charsetalias.properties.h" +}; + +//-------------------------------------------------------------- +// static +nsresult +nsCharsetAlias::GetPreferredInternal(const nsACString& aAlias, + nsACString& oResult) +{ + // First check charsetalias.properties and if there is no match, continue to + // call EncodingUtils::FindEncodingForLabel. + nsAutoCString key(aAlias); + ToLowerCase(key); + + nsresult rv = nsUConvPropertySearch::SearchPropertyValue(kAliases, + ArrayLength(kAliases), key, oResult); + if (NS_SUCCEEDED(rv)) { + return NS_OK; + } + return EncodingUtils::FindEncodingForLabel(key, oResult) ? + NS_OK: NS_ERROR_NOT_AVAILABLE; +} + +//-------------------------------------------------------------- +// static +nsresult +nsCharsetAlias::GetPreferred(const nsACString& aAlias, + nsACString& oResult) +{ + if (aAlias.IsEmpty()) return NS_ERROR_NULL_POINTER; + + nsresult res = GetPreferredInternal(aAlias, oResult); + if (NS_FAILED(res)) + return res; + + if (nsCharsetConverterManager::IsInternal(oResult)) + return NS_ERROR_UCONV_NOCONV; + + return res; +} + +//-------------------------------------------------------------- +// static +nsresult +nsCharsetAlias::Equals(const nsACString& aCharset1, + const nsACString& aCharset2, bool* oResult) +{ + nsresult res = NS_OK; + + if(aCharset1.Equals(aCharset2, nsCaseInsensitiveCStringComparator())) { + *oResult = true; + return res; + } + + if(aCharset1.IsEmpty() || aCharset2.IsEmpty()) { + *oResult = false; + return res; + } + + *oResult = false; + nsAutoCString name1; + res = GetPreferredInternal(aCharset1, name1); + if (NS_FAILED(res)) + return res; + + nsAutoCString name2; + res = GetPreferredInternal(aCharset2, name2); + if (NS_FAILED(res)) + return res; + + *oResult = name1.Equals(name2); + return NS_OK; +} diff --git a/mailnews/intl/nsCharsetAlias.h b/mailnews/intl/nsCharsetAlias.h new file mode 100644 index 000000000..c792d8de1 --- /dev/null +++ b/mailnews/intl/nsCharsetAlias.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsCharsetAlias_h___ +#define nsCharsetAlias_h___ + +#include "nscore.h" +#include "nsString.h" + +class nsCharsetConverterManager; +class nsScriptableUnicodeConverter; + +class nsCharsetAlias +{ + friend class nsCharsetConverterManager; + friend class nsScriptableUnicodeConverter; + static nsresult GetPreferredInternal(const nsACString& aAlias, nsACString& aResult); +public: + static nsresult GetPreferred(const nsACString& aAlias, nsACString& aResult); + static nsresult Equals(const nsACString& aCharset1, const nsACString& aCharset2, bool* aResult); +}; + +#endif /* nsCharsetAlias_h___ */ diff --git a/mailnews/intl/nsCharsetConverterManager.cpp b/mailnews/intl/nsCharsetConverterManager.cpp new file mode 100644 index 000000000..e434ecd41 --- /dev/null +++ b/mailnews/intl/nsCharsetConverterManager.cpp @@ -0,0 +1,356 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsUnicharUtils.h" +#include "nsCharsetAlias.h" +#include "nsICategoryManager.h" +#include "nsICharsetConverterManager.h" +#include "nsEncoderDecoderUtils.h" +#include "nsIStringBundle.h" +#include "nsTArray.h" +#include "nsStringEnumerator.h" +#include "mozilla/Services.h" + +#include "nsComponentManagerUtils.h" +#include "nsISupportsPrimitives.h" +#include "nsServiceManagerUtils.h" + +// just for CONTRACTIDs +#include "nsCharsetConverterManager.h" + +static nsIStringBundle * sDataBundle; +static nsIStringBundle * sTitleBundle; + +// Class nsCharsetConverterManager [implementation] + +NS_IMPL_ISUPPORTS(nsCharsetConverterManager, nsICharsetConverterManager) + +nsCharsetConverterManager::nsCharsetConverterManager() +{ +} + +nsCharsetConverterManager::~nsCharsetConverterManager() +{ +} + +//static +void nsCharsetConverterManager::Shutdown() +{ + NS_IF_RELEASE(sDataBundle); + NS_IF_RELEASE(sTitleBundle); +} + +static +nsresult LoadExtensibleBundle(const char* aCategory, + nsIStringBundle ** aResult) +{ + nsCOMPtr<nsIStringBundleService> sbServ = + mozilla::services::GetStringBundleService(); + if (!sbServ) + return NS_ERROR_FAILURE; + + return sbServ->CreateExtensibleBundle(aCategory, aResult); +} + +static +nsresult GetBundleValue(nsIStringBundle * aBundle, + const char * aName, + const nsAFlatString& aProp, + char16_t ** aResult) +{ + nsAutoString key; + + key.AssignWithConversion(aName); + ToLowerCase(key); // we lowercase the main comparison key + key.Append(aProp); + + return aBundle->GetStringFromName(key.get(), aResult); +} + +static +nsresult GetBundleValue(nsIStringBundle * aBundle, + const char * aName, + const nsAFlatString& aProp, + nsAString& aResult) +{ + nsresult rv = NS_OK; + + nsXPIDLString value; + rv = GetBundleValue(aBundle, aName, aProp, getter_Copies(value)); + if (NS_FAILED(rv)) + return rv; + + aResult = value; + + return NS_OK; +} + +static +nsresult GetCharsetDataImpl(const char * aCharset, const char16_t * aProp, + nsAString& aResult) +{ + NS_ENSURE_ARG_POINTER(aCharset); + // aProp can be nullptr + + if (!sDataBundle) { + nsresult rv = LoadExtensibleBundle(NS_DATA_BUNDLE_CATEGORY, &sDataBundle); + if (NS_FAILED(rv)) + return rv; + } + + return GetBundleValue(sDataBundle, aCharset, nsDependentString(aProp), aResult); +} + +//static +bool nsCharsetConverterManager::IsInternal(const nsACString& aCharset) +{ + nsAutoString str; + // fully qualify to possibly avoid vtable call + nsresult rv = GetCharsetDataImpl(PromiseFlatCString(aCharset).get(), + u".isInternal", + str); + + return NS_SUCCEEDED(rv); +} + + +//----------------------------------------------------------------------------//---------------------------------------------------------------------------- +// Interface nsICharsetConverterManager [implementation] + +NS_IMETHODIMP +nsCharsetConverterManager::GetUnicodeEncoder(const char * aDest, + nsIUnicodeEncoder ** aResult) +{ + // resolve the charset first + nsAutoCString charset; + + // fully qualify to possibly avoid vtable call + nsCharsetConverterManager::GetCharsetAlias(aDest, charset); + + return nsCharsetConverterManager::GetUnicodeEncoderRaw(charset.get(), + aResult); +} + + +NS_IMETHODIMP +nsCharsetConverterManager::GetUnicodeEncoderRaw(const char * aDest, + nsIUnicodeEncoder ** aResult) +{ + *aResult= nullptr; + nsCOMPtr<nsIUnicodeEncoder> encoder; + + nsresult rv = NS_OK; + + nsAutoCString + contractid(NS_LITERAL_CSTRING(NS_UNICODEENCODER_CONTRACTID_BASE) + + nsDependentCString(aDest)); + + // Always create an instance since encoders hold state. + encoder = do_CreateInstance(contractid.get(), &rv); + + if (NS_FAILED(rv)) + rv = NS_ERROR_UCONV_NOCONV; + else + { + *aResult = encoder.get(); + NS_ADDREF(*aResult); + } + return rv; +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetUnicodeDecoder(const char * aSrc, + nsIUnicodeDecoder ** aResult) +{ + // resolve the charset first + nsAutoCString charset; + + // fully qualify to possibly avoid vtable call + if (NS_FAILED(nsCharsetConverterManager::GetCharsetAlias(aSrc, charset))) + return NS_ERROR_UCONV_NOCONV; + + return nsCharsetConverterManager::GetUnicodeDecoderRaw(charset.get(), + aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetUnicodeDecoderInternal(const char * aSrc, + nsIUnicodeDecoder ** aResult) +{ + // resolve the charset first + nsAutoCString charset; + + nsresult rv = nsCharsetAlias::GetPreferredInternal(nsDependentCString(aSrc), + charset); + NS_ENSURE_SUCCESS(rv, rv); + + return nsCharsetConverterManager::GetUnicodeDecoderRaw(charset.get(), + aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetUnicodeDecoderRaw(const char * aSrc, + nsIUnicodeDecoder ** aResult) +{ + *aResult= nullptr; + nsCOMPtr<nsIUnicodeDecoder> decoder; + + nsresult rv = NS_OK; + + NS_NAMED_LITERAL_CSTRING(contractbase, NS_UNICODEDECODER_CONTRACTID_BASE); + nsDependentCString src(aSrc); + + decoder = do_CreateInstance(PromiseFlatCString(contractbase + src).get(), + &rv); + NS_ENSURE_SUCCESS(rv, NS_ERROR_UCONV_NOCONV); + + decoder.forget(aResult); + return rv; +} + +static +nsresult GetList(const nsACString& aCategory, + const nsACString& aPrefix, + nsIUTF8StringEnumerator** aResult) +{ + NS_ENSURE_ARG_POINTER(aResult); + *aResult = nullptr; + + nsresult rv; + + nsCOMPtr<nsICategoryManager> catman = do_GetService(NS_CATEGORYMANAGER_CONTRACTID, &rv); + if (NS_FAILED(rv)) + return rv; + + nsTArray<nsCString>* array = new nsTArray<nsCString>; + if (!array) + return NS_ERROR_OUT_OF_MEMORY; + + nsCOMPtr<nsISimpleEnumerator> enumerator; + catman->EnumerateCategory(PromiseFlatCString(aCategory).get(), + getter_AddRefs(enumerator)); + + bool hasMore; + while (NS_SUCCEEDED(enumerator->HasMoreElements(&hasMore)) && hasMore) { + nsCOMPtr<nsISupports> supports; + if (NS_FAILED(enumerator->GetNext(getter_AddRefs(supports)))) + continue; + + nsCOMPtr<nsISupportsCString> supStr = do_QueryInterface(supports); + if (!supStr) + continue; + + nsAutoCString name; + if (NS_FAILED(supStr->GetData(name))) + continue; + + nsAutoCString fullName(aPrefix); + fullName.Append(name); + NS_ENSURE_TRUE(array->AppendElement(fullName), NS_ERROR_OUT_OF_MEMORY); + } + + return NS_NewAdoptingUTF8StringEnumerator(aResult, array); +} + +// we should change the interface so that we can just pass back a enumerator! +NS_IMETHODIMP +nsCharsetConverterManager::GetDecoderList(nsIUTF8StringEnumerator ** aResult) +{ + return GetList(NS_LITERAL_CSTRING(NS_UNICODEDECODER_NAME), + EmptyCString(), aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetEncoderList(nsIUTF8StringEnumerator ** aResult) +{ + return GetList(NS_LITERAL_CSTRING(NS_UNICODEENCODER_NAME), + EmptyCString(), aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetDetectorList(nsIUTF8StringEnumerator** aResult) +{ + return GetList(NS_LITERAL_CSTRING("charset-detectors"), + NS_LITERAL_CSTRING("chardet."), aResult); +} + +// XXX Improve the implementation of this method. Right now, it is build on +// top of the nsCharsetAlias service. We can make the nsCharsetAlias +// better, with its own hash table (not the StringBundle anymore) and +// a nicer file format. +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetAlias(const char * aCharset, + nsACString& aResult) +{ + NS_ENSURE_ARG_POINTER(aCharset); + + // We try to obtain the preferred name for this charset from the charset + // aliases. + nsresult rv; + + rv = nsCharsetAlias::GetPreferred(nsDependentCString(aCharset), aResult); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetTitle(const char * aCharset, + nsAString& aResult) +{ + NS_ENSURE_ARG_POINTER(aCharset); + + if (!sTitleBundle) { + nsresult rv = LoadExtensibleBundle(NS_TITLE_BUNDLE_CATEGORY, &sTitleBundle); + NS_ENSURE_SUCCESS(rv, rv); + } + + return GetBundleValue(sTitleBundle, aCharset, NS_LITERAL_STRING(".title"), aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetData(const char * aCharset, + const char16_t * aProp, + nsAString& aResult) +{ + return GetCharsetDataImpl(aCharset, aProp, aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetLangGroup(const char * aCharset, + nsIAtom** aResult) +{ + // resolve the charset first + nsAutoCString charset; + + nsresult rv = GetCharsetAlias(aCharset, charset); + NS_ENSURE_SUCCESS(rv, rv); + + // fully qualify to possibly avoid vtable call + return nsCharsetConverterManager::GetCharsetLangGroupRaw(charset.get(), + aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetLangGroupRaw(const char * aCharset, + nsIAtom** aResult) +{ + + *aResult = nullptr; + nsAutoString langGroup; + // fully qualify to possibly avoid vtable call + nsresult rv = nsCharsetConverterManager::GetCharsetData( + aCharset, u".LangGroup", langGroup); + + if (NS_SUCCEEDED(rv)) { + ToLowerCase(langGroup); // use lowercase for all language atoms + *aResult = NS_Atomize(langGroup).take(); + } + + return rv; +} diff --git a/mailnews/intl/nsCharsetConverterManager.h b/mailnews/intl/nsCharsetConverterManager.h new file mode 100644 index 000000000..3cc1f5830 --- /dev/null +++ b/mailnews/intl/nsCharsetConverterManager.h @@ -0,0 +1,36 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsCharsetConverterManager_h__ +#define nsCharsetConverterManager_h__ + +#include "nsISupports.h" +#include "nsICharsetConverterManager.h" + +#define NS_DATA_BUNDLE_CATEGORY "uconv-charset-data" +#define NS_TITLE_BUNDLE_CATEGORY "uconv-charset-titles" + +class nsCharsetAlias; + +class nsCharsetConverterManager : public nsICharsetConverterManager +{ + friend class nsCharsetAlias; + + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSICHARSETCONVERTERMANAGER + +public: + nsCharsetConverterManager(); + + static void Shutdown(); + +private: + virtual ~nsCharsetConverterManager(); + + static bool IsInternal(const nsACString& aCharset); +}; + +#endif // nsCharsetConverterManager_h__ + + diff --git a/mailnews/intl/nsCommUConvCID.h b/mailnews/intl/nsCommUConvCID.h new file mode 100644 index 000000000..4f1686098 --- /dev/null +++ b/mailnews/intl/nsCommUConvCID.h @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Class ID for our UTF7ToUnicode charset converter +// {77CFAAF1-1CF4-11d3-8AAF-00600811A836} +#define NS_UTF7TOUNICODE_CID \ + { 0x77cfaaf1, 0x1cf4, 0x11d3, {0x8a, 0xaf, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36}} + +// Class ID for our MUTF7ToUnicode charset converter +// {B57F97C1-0D70-11d3-8AAE-00600811A836} +#define NS_MUTF7TOUNICODE_CID \ + { 0xb57f97c1, 0xd70, 0x11d3, {0x8a, 0xae, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36}} + +// Class ID for our UnicodeToUTF7 charset converter +// {77CFAAF2-1CF4-11d3-8AAF-00600811A836} +#define NS_UNICODETOUTF7_CID \ + { 0x77cfaaf2, 0x1cf4, 0x11d3, {0x8a, 0xaf, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36}} + +// Class ID for our UnicodeToMUTF7 charset converter +// {B57F97C2-0D70-11d3-8AAE-00600811A836} +#define NS_UNICODETOMUTF7_CID \ + { 0xb57f97c2, 0xd70, 0x11d3, {0x8a, 0xae, 0x0, 0x60, 0x8, 0x11, 0xa8, 0x36}} + + diff --git a/mailnews/intl/nsICharsetConverterManager.idl b/mailnews/intl/nsICharsetConverterManager.idl new file mode 100644 index 000000000..026f0887c --- /dev/null +++ b/mailnews/intl/nsICharsetConverterManager.idl @@ -0,0 +1,108 @@ +/* -*- Mode: IDL; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" +#include "nsIAtom.idl" + +%{ C++ +#include "nsIUnicodeDecoder.h" +#include "nsIUnicodeEncoder.h" + +// XXX change to NS_CHARSETCONVERTERMANAGER_CID +#define NS_ICHARSETCONVERTERMANAGER_CID \ + {0x3c1c0163, 0x9bd0, 0x11d3, { 0x9d, 0x9, 0x0, 0x50, 0x4, 0x0, 0x7, 0xb2}} + +#define NS_CHARSETCONVERTERMANAGER_CONTRACTID "@mozilla.org/charset-converter-manager;1" +%} + +interface nsIUnicodeDecoder; +interface nsIUnicodeEncoder; +interface nsIUTF8StringEnumerator; + +/** + * DON'T ADD NEW USES OF THIS INTERFACE TO MOZILLA-CENTRAL. Use + * mozilla::dom::EncodingUtils instead. + * + * Here Charsets are identified by ASCII strings. Charset alias + * resolution is provided by default in most methods. "Raw" + * versions that do not need this resolution are also provided. + * + * @deprecated Use mozilla::dom::EncodingUtils in mozilla-central instead. + * @created 21/Feb/2000 + * @author Catalin Rotaru [CATA] + */ +[scriptable, uuid(a0550d46-8d9c-47dd-acc7-c083620dff12)] +interface nsICharsetConverterManager : nsISupports +{ + /** + * Get the Unicode decoder for the given charset. + * The "Raw" version skips charset alias resolution + */ + [noscript] nsIUnicodeDecoder getUnicodeDecoder(in string charset); + [noscript] nsIUnicodeDecoder getUnicodeDecoderRaw(in string charset); + [noscript] nsIUnicodeDecoder getUnicodeDecoderInternal(in string charset); + + /** + * Get the Unicode encoder for the given charset. + * The "Raw" version skips charset alias resolution + */ + [noscript] nsIUnicodeEncoder getUnicodeEncoder(in string charset); + [noscript] nsIUnicodeEncoder getUnicodeEncoderRaw(in string charset); + + /** + * A shortcut to calling nsICharsetAlias to do alias resolution + * @throws if aCharset is an unknown charset. + */ + ACString getCharsetAlias(in string aCharset); + + /** + * Get the complete list of available decoders. + */ + nsIUTF8StringEnumerator getDecoderList(); + + /** + * Get the complete list of available encoders. + */ + nsIUTF8StringEnumerator getEncoderList(); + + /** + * Get the complete list of available charset detectors. + */ + nsIUTF8StringEnumerator GetCharsetDetectorList(); + + /** + * Get the human-readable name for the given charset. + * @throws if aCharset is an unknown charset. + */ + AString getCharsetTitle(in string aCharset); + + /** + * Get some data about the given charset. This includes whether the + * character encoding may be used for certain purposes, if it is + * multi-byte, and the language code for it. See charsetData.properties + * for the source of this data. Some known property names: + * LangGroup - language code for charset, e.g. 'he' and 'zh-CN'. + * isMultibyte - is this a multi-byte charset? + * isInternal - not to be used in untrusted web content. + * + * @param aCharset name of the character encoding, e.g. 'iso-8859-15'. + * @param aProp property desired for the character encoding. + * @throws if aCharset is an unknown charset. + * @return the value of the property, for the character encoding. + */ + AString getCharsetData(in string aCharset, + in wstring aProp); + + /** + * Get the language group for the given charset. This is similar to + * calling <tt>getCharsetData</tt> with the <tt>prop</tt> "LangGroup". + * + * @param aCharset name of the character encoding, e.g. 'iso-8859-15'. + * @throws if aCharset is an unknown charset. + * @return the language code for the character encoding. + */ + nsIAtom getCharsetLangGroup(in string aCharset); + nsIAtom getCharsetLangGroupRaw(in string aCharset); +}; diff --git a/mailnews/intl/nsMUTF7ToUnicode.cpp b/mailnews/intl/nsMUTF7ToUnicode.cpp new file mode 100644 index 000000000..c513e4fb0 --- /dev/null +++ b/mailnews/intl/nsMUTF7ToUnicode.cpp @@ -0,0 +1,14 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsMUTF7ToUnicode.h" + +//---------------------------------------------------------------------- +// Class nsMUTF7ToUnicode [implementation] + +nsMUTF7ToUnicode::nsMUTF7ToUnicode() +: nsBasicUTF7Decoder(',', '&') +{ +} diff --git a/mailnews/intl/nsMUTF7ToUnicode.h b/mailnews/intl/nsMUTF7ToUnicode.h new file mode 100644 index 000000000..1b5046e82 --- /dev/null +++ b/mailnews/intl/nsMUTF7ToUnicode.h @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsMUTF7ToUnicode_h___ +#define nsMUTF7ToUnicode_h___ + +#include "nsUTF7ToUnicode.h" + +//---------------------------------------------------------------------- +// Class nsMUTF7ToUnicode [declaration] + +/** + * A character set converter from Modified UTF7 to Unicode. + * + * @created 18/May/1999 + * @author Catalin Rotaru [CATA] + */ +class nsMUTF7ToUnicode : public nsBasicUTF7Decoder +{ +public: + + /** + * Class constructor. + */ + nsMUTF7ToUnicode(); + +}; + +#endif /* nsMUTF7ToUnicode_h___ */ diff --git a/mailnews/intl/nsUTF7ToUnicode.cpp b/mailnews/intl/nsUTF7ToUnicode.cpp new file mode 100644 index 000000000..201bcccb9 --- /dev/null +++ b/mailnews/intl/nsUTF7ToUnicode.cpp @@ -0,0 +1,228 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUTF7ToUnicode.h" + +#define ENC_DIRECT 0 +#define ENC_BASE64 1 + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Decoder [implementation] + +nsBasicUTF7Decoder::nsBasicUTF7Decoder(char aLastChar, char aEscChar) +: nsBufferDecoderSupport(1) +{ + mLastChar = aLastChar; + mEscChar = aEscChar; + Reset(); +} + +nsresult nsBasicUTF7Decoder::DecodeDirect( + const char * aSrc, + int32_t * aSrcLength, + char16_t * aDest, + int32_t * aDestLength) +{ + const char * srcEnd = aSrc + *aSrcLength; + const char * src = aSrc; + char16_t * destEnd = aDest + *aDestLength; + char16_t * dest = aDest; + nsresult res = NS_OK; + char ch; + + while (src < srcEnd) { + ch = *src; + + // stop when we meet other chars or end of direct encoded seq. + // if (!(DirectEncodable(ch)) || (ch == mEscChar)) { + // but we are decoding; so we should be lax; pass everything until escchar + if (ch == mEscChar) { + res = NS_ERROR_UDEC_ILLEGALINPUT; + break; + } + + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } else { + *dest++ = ch; + src++; + } + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsBasicUTF7Decoder::DecodeBase64( + const char * aSrc, + int32_t * aSrcLength, + char16_t * aDest, + int32_t * aDestLength) +{ + const char * srcEnd = aSrc + *aSrcLength; + const char * src = aSrc; + char16_t * destEnd = aDest + *aDestLength; + char16_t * dest = aDest; + nsresult res = NS_OK; + char ch; + uint32_t value; + + while (src < srcEnd) { + ch = *src; + + // stop when we meet other chars or end of direct encoded seq. + value = CharToValue(ch); + if (value > 0xff) { + res = NS_ERROR_UDEC_ILLEGALINPUT; + break; + } + + switch (mEncStep) { + case 0: + mEncBits = value << 10; + break; + case 1: + mEncBits += value << 4; + break; + case 2: + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } + mEncBits += value >> 2; + *(dest++) = (char16_t) mEncBits; + mEncBits = (value & 0x03) << 14; + break; + case 3: + mEncBits += value << 8; + break; + case 4: + mEncBits += value << 2; + break; + case 5: + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } + mEncBits += value >> 4; + *(dest++) = (char16_t) mEncBits; + mEncBits = (value & 0x0f) << 12; + break; + case 6: + mEncBits += value << 6; + break; + case 7: + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } + mEncBits += value; + *(dest++) = (char16_t) mEncBits; + mEncBits = 0; + break; + } + + if (res != NS_OK) break; + + src++; + (++mEncStep)%=8; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +uint32_t nsBasicUTF7Decoder::CharToValue(char aChar) { + if ((aChar>='A')&&(aChar<='Z')) + return (uint8_t)(aChar-'A'); + else if ((aChar>='a')&&(aChar<='z')) + return (uint8_t)(26+aChar-'a'); + else if ((aChar>='0')&&(aChar<='9')) + return (uint8_t)(26+26+aChar-'0'); + else if (aChar=='+') + return (uint8_t)(26+26+10); + else if (aChar==mLastChar) + return (uint8_t)(26+26+10+1); + else + return 0xffff; +} + +//---------------------------------------------------------------------- +// Subclassing of nsBufferDecoderSupport class [implementation] + +NS_IMETHODIMP nsBasicUTF7Decoder::ConvertNoBuff(const char * aSrc, + int32_t * aSrcLength, + char16_t * aDest, + int32_t * aDestLength) +{ + const char * srcEnd = aSrc + *aSrcLength; + const char * src = aSrc; + char16_t * destEnd = aDest + *aDestLength; + char16_t * dest = aDest; + int32_t bcr,bcw; + nsresult res = NS_OK; + + while (src < srcEnd) { + + // fist, attept to decode in the current mode + bcr = srcEnd - src; + bcw = destEnd - dest; + if (mEncoding == ENC_DIRECT) + res = DecodeDirect(src, &bcr, dest, &bcw); + else if ((mFreshBase64) && (*src == '-')) { + *dest = mEscChar; + bcr = 0; + bcw = 1; + res = NS_ERROR_UDEC_ILLEGALINPUT; + } else { + mFreshBase64 = false; + res = DecodeBase64(src, &bcr, dest, &bcw); + } + src += bcr; + dest += bcw; + + // if an illegal char was encountered, test if it is an escape seq. + if (res == NS_ERROR_UDEC_ILLEGALINPUT) { + if (mEncoding == ENC_DIRECT) { + if (*src == mEscChar) { + mEncoding = ENC_BASE64; + mFreshBase64 = true; + mEncBits = 0; + mEncStep = 0; + src++; + res = NS_OK; + } else break; + } else { + mEncoding = ENC_DIRECT; + res = NS_OK; + // absorbe end of escape sequence + if (*src == '-') src++; + } + } else if (res != NS_OK) break; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +NS_IMETHODIMP nsBasicUTF7Decoder::Reset() +{ + mEncoding = ENC_DIRECT; + mEncBits = 0; + mEncStep = 0; + return nsBufferDecoderSupport::Reset(); +} + +//---------------------------------------------------------------------- +// Class nsUTF7ToUnicode [implementation] + +nsUTF7ToUnicode::nsUTF7ToUnicode() +: nsBasicUTF7Decoder('/', '+') +{ +} diff --git a/mailnews/intl/nsUTF7ToUnicode.h b/mailnews/intl/nsUTF7ToUnicode.h new file mode 100644 index 000000000..74bf295a0 --- /dev/null +++ b/mailnews/intl/nsUTF7ToUnicode.h @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUTF7ToUnicode_h___ +#define nsUTF7ToUnicode_h___ + +#include "nsUCSupport.h" + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Decoder [declaration] + +/** + * Basic class for a character set converter from UTF-7 to Unicode. + * + * @created 03/Jun/1999 + * @author Catalin Rotaru [CATA] + */ +class nsBasicUTF7Decoder : public nsBufferDecoderSupport +{ +public: + + /** + * Class constructor. + */ + nsBasicUTF7Decoder(char aLastChar, char aEscChar); + +protected: + + int32_t mEncoding; // current encoding + uint32_t mEncBits; + int32_t mEncStep; + char mLastChar; + char mEscChar; + bool mFreshBase64; + + nsresult DecodeDirect(const char * aSrc, int32_t * aSrcLength, + char16_t * aDest, int32_t * aDestLength); + nsresult DecodeBase64(const char * aSrc, int32_t * aSrcLength, + char16_t * aDest, int32_t * aDestLength); + uint32_t CharToValue(char aChar); + + //-------------------------------------------------------------------- + // Subclassing of nsBufferDecoderSupport class [declaration] + + NS_IMETHOD ConvertNoBuff(const char * aSrc, int32_t * aSrcLength, + char16_t * aDest, int32_t * aDestLength); + NS_IMETHOD Reset(); +}; + +//---------------------------------------------------------------------- +// Class nsUTF7ToUnicode [declaration] + +/** + * A character set converter from Modified UTF7 to Unicode. + * + * @created 18/May/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUTF7ToUnicode : public nsBasicUTF7Decoder +{ +public: + + /** + * Class constructor. + */ + nsUTF7ToUnicode(); + +}; + +#endif /* nsUTF7ToUnicode_h___ */ diff --git a/mailnews/intl/nsUnicodeToMUTF7.cpp b/mailnews/intl/nsUnicodeToMUTF7.cpp new file mode 100644 index 000000000..a9c368ce8 --- /dev/null +++ b/mailnews/intl/nsUnicodeToMUTF7.cpp @@ -0,0 +1,14 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnicodeToMUTF7.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToMUTF7 [implementation] + +nsUnicodeToMUTF7::nsUnicodeToMUTF7() +: nsBasicUTF7Encoder(',', '&') +{ +} diff --git a/mailnews/intl/nsUnicodeToMUTF7.h b/mailnews/intl/nsUnicodeToMUTF7.h new file mode 100644 index 000000000..83aea6ce4 --- /dev/null +++ b/mailnews/intl/nsUnicodeToMUTF7.h @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnicodeToMUTF7_h___ +#define nsUnicodeToMUTF7_h___ + +#include "nsUnicodeToUTF7.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToMUTF7 [declaration] + +/** + * A character set converter from Unicode to Modified UTF-7. + * + * @created 18/May/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeToMUTF7 : public nsBasicUTF7Encoder +{ +public: + + /** + * Class constructor. + */ + nsUnicodeToMUTF7(); + +}; + +#endif /* nsUnicodeToMUTF7_h___ */ diff --git a/mailnews/intl/nsUnicodeToUTF7.cpp b/mailnews/intl/nsUnicodeToUTF7.cpp new file mode 100644 index 000000000..5db623a17 --- /dev/null +++ b/mailnews/intl/nsUnicodeToUTF7.cpp @@ -0,0 +1,298 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnicodeToUTF7.h" +#include <string.h> + +//---------------------------------------------------------------------- +// Global functions and data [declaration] + +#define ENC_DIRECT 0 +#define ENC_BASE64 1 + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Encoder [implementation] + +nsBasicUTF7Encoder::nsBasicUTF7Encoder(char aLastChar, char aEscChar) +: nsEncoderSupport(5) +{ + mLastChar = aLastChar; + mEscChar = aEscChar; + Reset(); +} + +nsresult nsBasicUTF7Encoder::ShiftEncoding(int32_t aEncoding, + char * aDest, + int32_t * aDestLength) +{ + if (aEncoding == mEncoding) { + *aDestLength = 0; + return NS_OK; + } + + nsresult res = NS_OK; + char * dest = aDest; + char * destEnd = aDest + *aDestLength; + + if (mEncStep != 0) { + if (dest >= destEnd) return NS_OK_UENC_MOREOUTPUT; + *(dest++)=ValueToChar(mEncBits); + mEncStep = 0; + mEncBits = 0; + } + + if (dest >= destEnd) { + res = NS_OK_UENC_MOREOUTPUT; + } else { + switch (aEncoding) { + case 0: + *(dest++) = '-'; + mEncStep = 0; + mEncBits = 0; + break; + case 1: + *(dest++) = mEscChar; + break; + } + mEncoding = aEncoding; + } + + *aDestLength = dest - aDest; + return res; +} + +nsresult nsBasicUTF7Encoder::EncodeDirect( + const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength) +{ + nsresult res = NS_OK; + const char16_t * src = aSrc; + const char16_t * srcEnd = aSrc + *aSrcLength; + char * dest = aDest; + char * destEnd = aDest + *aDestLength; + char16_t ch; + + while (src < srcEnd) { + ch = *src; + + // stop when we reach Unicode chars + if (!DirectEncodable(ch)) break; + + if (ch == mEscChar) { + // special case for the escape char + if (destEnd - dest < 1) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } else { + *dest++ = (char)ch; + *dest++ = (char)'-'; + src++; + } + } else { + //classic direct encoding + if (dest >= destEnd) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } else { + *dest++ = (char)ch; + src++; + } + } + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsBasicUTF7Encoder::EncodeBase64( + const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength) +{ + nsresult res = NS_OK; + const char16_t * src = aSrc; + const char16_t * srcEnd = aSrc + *aSrcLength; + char * dest = aDest; + char * destEnd = aDest + *aDestLength; + char16_t ch; + uint32_t value; + + while (src < srcEnd) { + ch = *src; + + // stop when we reach printable US-ASCII chars + if (DirectEncodable(ch)) break; + + switch (mEncStep) { + case 0: + if (destEnd - dest < 2) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + value=ch>>10; + *(dest++)=ValueToChar(value); + value=(ch>>4)&0x3f; + *(dest++)=ValueToChar(value); + mEncBits=(ch&0x0f)<<2; + break; + case 1: + if (destEnd - dest < 3) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + value=mEncBits+(ch>>14); + *(dest++)=ValueToChar(value); + value=(ch>>8)&0x3f; + *(dest++)=ValueToChar(value); + value=(ch>>2)&0x3f; + *(dest++)=ValueToChar(value); + mEncBits=(ch&0x03)<<4; + break; + case 2: + if (destEnd - dest < 3) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + value=mEncBits+(ch>>12); + *(dest++)=ValueToChar(value); + value=(ch>>6)&0x3f; + *(dest++)=ValueToChar(value); + value=ch&0x3f; + *(dest++)=ValueToChar(value); + mEncBits=0; + break; + } + + if (res != NS_OK) break; + + src++; + (++mEncStep)%=3; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +char nsBasicUTF7Encoder::ValueToChar(uint32_t aValue) { + if (aValue < 26) + return (char)('A'+aValue); + else if (aValue < 26 + 26) + return (char)('a' + aValue - 26); + else if (aValue < 26 + 26 + 10) + return (char)('0' + aValue - 26 - 26); + else if (aValue == 26 + 26 + 10) + return '+'; + else if (aValue == 26 + 26 + 10 + 1) + return mLastChar; + else + return -1; +} + +bool nsBasicUTF7Encoder::DirectEncodable(char16_t aChar) { + // spec says: printable US-ASCII chars + if ((aChar >= 0x20) && (aChar <= 0x7e)) return true; + else return false; +} + +//---------------------------------------------------------------------- +// Subclassing of nsEncoderSupport class [implementation] + +NS_IMETHODIMP nsBasicUTF7Encoder::ConvertNoBuffNoErr( + const char16_t * aSrc, + int32_t * aSrcLength, + char * aDest, + int32_t * aDestLength) +{ + nsresult res = NS_OK; + const char16_t * src = aSrc; + const char16_t * srcEnd = aSrc + *aSrcLength; + char * dest = aDest; + char * destEnd = aDest + *aDestLength; + int32_t bcr,bcw; + char16_t ch; + int32_t enc; + + while (src < srcEnd) { + // find the encoding for the next char + ch = *src; + if (DirectEncodable(ch)) + enc = ENC_DIRECT; + else + enc = ENC_BASE64; + + // if necessary, shift into the required encoding + bcw = destEnd - dest; + res = ShiftEncoding(enc, dest, &bcw); + dest += bcw; + if (res != NS_OK) break; + + // now encode (as much as you can) + bcr = srcEnd - src; + bcw = destEnd - dest; + if (enc == ENC_DIRECT) + res = EncodeDirect(src, &bcr, dest, &bcw); + else + res = EncodeBase64(src, &bcr, dest, &bcw); + src += bcr; + dest += bcw; + + if (res != NS_OK) break; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +NS_IMETHODIMP nsBasicUTF7Encoder::FinishNoBuff(char * aDest, + int32_t * aDestLength) +{ + return ShiftEncoding(ENC_DIRECT, aDest, aDestLength); +} + +NS_IMETHODIMP nsBasicUTF7Encoder::Reset() +{ + mEncoding = ENC_DIRECT; + mEncBits = 0; + mEncStep = 0; + return nsEncoderSupport::Reset(); +} + +//---------------------------------------------------------------------- +// Class nsUnicodeToUTF7 [implementation] + +nsUnicodeToUTF7::nsUnicodeToUTF7() +: nsBasicUTF7Encoder('/', '+') +{ +} + + +bool nsUnicodeToUTF7::DirectEncodable(char16_t aChar) { + if ((aChar >= 'A') && (aChar <= 'Z')) return true; + else if ((aChar >= 'a') && (aChar <= 'z')) return true; + else if ((aChar >= '0') && (aChar <= '9')) return true; + else if ((aChar >= 39) && (aChar <= 41)) return true; + else if ((aChar >= 44) && (aChar <= 47)) return true; + else if (aChar == 58) return true; + else if (aChar == 63) return true; + else if (aChar == ' ') return true; + else if (aChar == 9) return true; + else if (aChar == 13) return true; + else if (aChar == 10) return true; + else if (aChar == 60) return true; // '<' + else if (aChar == 33) return true; // '!' + else if (aChar == 34) return true; // '"' + else if (aChar == 62) return true; // '>' + else if (aChar == 61) return true; // '=' + else if (aChar == 59) return true; // ';' + else if (aChar == 91) return true; // '[' + else if (aChar == 93) return true; // ']' + else return false; +} diff --git a/mailnews/intl/nsUnicodeToUTF7.h b/mailnews/intl/nsUnicodeToUTF7.h new file mode 100644 index 000000000..dc8cc678e --- /dev/null +++ b/mailnews/intl/nsUnicodeToUTF7.h @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnicodeToUTF7_h___ +#define nsUnicodeToUTF7_h___ + +#include "nsUCSupport.h" + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Encoder [declaration] + +/** + * Basic class for a character set converter from Unicode to UTF-7. + * + * @created 03/Jun/1999 + * @author Catalin Rotaru [CATA] + */ +class nsBasicUTF7Encoder : public nsEncoderSupport +{ +public: + + /** + * Class constructor. + */ + nsBasicUTF7Encoder(char aLastChar, char aEscChar); + +protected: + + int32_t mEncoding; // current encoding + uint32_t mEncBits; + int32_t mEncStep; + char mLastChar; + char mEscChar; + + nsresult ShiftEncoding(int32_t aEncoding, char * aDest, + int32_t * aDestLength); + nsresult EncodeDirect(const char16_t * aSrc, int32_t * aSrcLength, + char * aDest, int32_t * aDestLength); + nsresult EncodeBase64(const char16_t * aSrc, int32_t * aSrcLength, + char * aDest, int32_t * aDestLength); + char ValueToChar(uint32_t aValue); + virtual bool DirectEncodable(char16_t aChar); + + //-------------------------------------------------------------------- + // Subclassing of nsEncoderSupport class [declaration] + + NS_IMETHOD ConvertNoBuffNoErr(const char16_t * aSrc, int32_t * aSrcLength, + char * aDest, int32_t * aDestLength); + NS_IMETHOD FinishNoBuff(char * aDest, int32_t * aDestLength); + NS_IMETHOD Reset(); +}; + +//---------------------------------------------------------------------- +// Class nsUnicodeToUTF7 [declaration] + +/** + * A character set converter from Unicode to UTF-7. + * + * @created 03/Jun/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeToUTF7 : public nsBasicUTF7Encoder +{ +public: + + /** + * Class constructor. + */ + nsUnicodeToUTF7(); + +protected: + + virtual bool DirectEncodable(char16_t aChar); +}; + +#endif /* nsUnicodeToUTF7_h___ */ |