summaryrefslogtreecommitdiffstats
path: root/intl/uconv/nsUTF8ConverterService.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/nsUTF8ConverterService.cpp')
-rw-r--r--intl/uconv/nsUTF8ConverterService.cpp120
1 files changed, 120 insertions, 0 deletions
diff --git a/intl/uconv/nsUTF8ConverterService.cpp b/intl/uconv/nsUTF8ConverterService.cpp
new file mode 100644
index 000000000..0944dae02
--- /dev/null
+++ b/intl/uconv/nsUTF8ConverterService.cpp
@@ -0,0 +1,120 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:expandtab:shiftwidth=2:tabstop=4:
+ */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "nsString.h"
+#include "nsUTF8ConverterService.h"
+#include "nsEscape.h"
+#include "nsIUnicodeDecoder.h"
+#include "mozilla/dom/EncodingUtils.h"
+#include "mozilla/UniquePtr.h"
+
+using mozilla::dom::EncodingUtils;
+
+NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
+
+static nsresult
+ToUTF8(const nsACString &aString, const char *aCharset,
+ bool aAllowSubstitution, nsACString &aResult)
+{
+ nsresult rv;
+ if (!aCharset || !*aCharset)
+ return NS_ERROR_INVALID_ARG;
+
+ nsDependentCString label(aCharset);
+ nsAutoCString encoding;
+ if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder =
+ EncodingUtils::DecoderForEncoding(encoding);
+
+ if (!aAllowSubstitution)
+ unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+
+ int32_t srcLen = aString.Length();
+ int32_t dstLen;
+ const nsAFlatCString& inStr = PromiseFlatCString(aString);
+ rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ auto ustr = mozilla::MakeUnique<char16_t[]>(dstLen);
+ NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
+
+ rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr.get(), &dstLen);
+ if (NS_SUCCEEDED(rv)){
+ CopyUTF16toUTF8(Substring(ustr.get(), ustr.get() + dstLen), aResult);
+ }
+ return rv;
+}
+
+NS_IMETHODIMP
+nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString,
+ const char *aCharset,
+ bool aSkipCheck,
+ bool aAllowSubstitution,
+ uint8_t aOptionalArgc,
+ nsACString &aUTF8String)
+{
+ bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
+
+ // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
+ // check is requested. It may not be asked for if a caller suspects
+ // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
+ // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
+ if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
+ aUTF8String = aString;
+ return NS_OK;
+ }
+
+ aUTF8String.Truncate();
+
+ nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
+
+ // additional protection for cases where check is skipped and the input
+ // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
+ // was wrong.) We don't check ASCIIness assuming there's no charset
+ // incompatible with ASCII (we don't support EBCDIC).
+ if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
+ aUTF8String = aString;
+ return NS_OK;
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec,
+ const char *aCharset,
+ nsACString &aUTF8Spec)
+{
+ // assume UTF-8 if the spec contains unescaped non-ASCII characters.
+ // No valid spec in Mozilla would break this assumption.
+ if (!IsASCII(aSpec)) {
+ aUTF8Spec = aSpec;
+ return NS_OK;
+ }
+
+ aUTF8Spec.Truncate();
+
+ nsAutoCString unescapedSpec;
+ // NS_UnescapeURL does not fill up unescapedSpec unless there's at least
+ // one character to unescape.
+ bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(),
+ esc_OnlyNonASCII, unescapedSpec);
+
+ if (!written) {
+ aUTF8Spec = aSpec;
+ return NS_OK;
+ }
+ // return if ASCII only or escaped UTF-8
+ if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
+ aUTF8Spec = unescapedSpec;
+ return NS_OK;
+ }
+
+ return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
+}
+