summaryrefslogtreecommitdiffstats
path: root/intl/uconv/nsNCRFallbackEncoderWrapper.cpp
blob: 7b83b72e8efcbbbddbb8c8cf46c86e17e910f88e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsNCRFallbackEncoderWrapper.h"

#include "mozilla/dom/EncodingUtils.h"

nsNCRFallbackEncoderWrapper::nsNCRFallbackEncoderWrapper(const nsACString& aEncoding)
 : mEncoder(mozilla::dom::EncodingUtils::EncoderForEncoding(aEncoding))
{
}

nsNCRFallbackEncoderWrapper::~nsNCRFallbackEncoderWrapper()
{
}

bool
nsNCRFallbackEncoderWrapper::WriteNCR(nsACString& aBytes,
                                      uint32_t& aDstWritten,
                                      int32_t aUnmappable)
{
  // To avoid potentially shrinking aBytes and then growing it back, use
  // another string for number formatting.
  nsAutoCString ncr("&#");
  ncr.AppendInt(aUnmappable);
  ncr.Append(';');
  uint32_t ncrLen = ncr.Length();
  uint32_t needed = aDstWritten + ncrLen;
  if (needed > INT32_MAX) {
    return false;
  }
  if (needed > aBytes.Length() && !aBytes.SetLength(needed,
                                                    mozilla::fallible_t())) {
    return false;
  }
  memcpy(aBytes.BeginWriting() + aDstWritten,
         ncr.BeginReading(),
         ncrLen);
  aDstWritten += ncrLen;
  return true;
}

bool
nsNCRFallbackEncoderWrapper::Encode(const nsAString& aUtf16,
                                    nsACString& aBytes)
{
  // mozilla::dom::EncodingUtils::EncoderForEncoding fails during shutdown
  if (!mEncoder) {
    return false;
  }
  // nsIUnicodeEncoder uses int32_t for sizes :-(
  if (aUtf16.Length() > INT32_MAX) {
    return false;
  }
  const char16_t* src = aUtf16.BeginReading();
  const char16_t* srcEnd = aUtf16.EndReading();
  uint32_t dstWritten = 0;
  for (;;) {
    int32_t srcLen = srcEnd - src;
    int32_t dstLen = 0;
    nsresult rv = mEncoder->GetMaxLength(src, srcLen, &dstLen);
    if (NS_FAILED(rv)) {
      return false;
    }
    uint32_t needed = dstWritten + dstLen;
    if (needed > INT32_MAX) {
      return false;
    }
    // Behind the scenes SetLength() makes the underlying allocation not have
    // slop, so we don't need to round up here.
    if (needed > aBytes.Length() && !aBytes.SetLength(needed,
                                                      mozilla::fallible_t())) {
      return false;
    }
    // We need to re-obtain the destination pointer on every iteration, because
    // SetLength() invalidates it.
    char* dst = aBytes.BeginWriting() + dstWritten;
    dstLen = aBytes.Length() - dstWritten;
    mEncoder->Reset();
    rv = mEncoder->Convert(src, &srcLen, dst, &dstLen);
    // Update state tracking
    src += srcLen;
    dstWritten += dstLen;
    if (rv == NS_OK_UENC_MOREOUTPUT) {
      MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length.");
      return false;
    }
    if (rv == NS_ERROR_UENC_NOMAPPING) {
      int32_t unmappable;
      // The unmappable code unit or the first half of an unmappable surrogate
      // pair is consumed by the encoder.
      MOZ_ASSERT(srcLen > 0, "Encoder should have consumed some input.");
      char16_t codeUnit = src[-1];
      // Let's see if it is a surrogate
      size_t highBits = (codeUnit & 0xFC00);
      if (highBits == 0xD800) {
        // high surrogate
        // Let's see if we actually have a surrogate pair.
        char16_t next;
        if (src < srcEnd && NS_IS_LOW_SURROGATE((next = *src))) {
          src++; // consume the low surrogate
          unmappable = SURROGATE_TO_UCS4(codeUnit, next);
        } else {
          // unpaired surrogate.
          unmappable = 0xFFFD;
        }
      } else if (highBits == 0xDC00) {
        // low surrogate
        // This must be an unpaired surrogate.
        unmappable = 0xFFFD;
      } else {
        // not a surrogate
        unmappable = codeUnit;
      }
      // If we are encoding to ISO-2022-JP, we need to let the encoder to
      // generate a transition to the ASCII state if not already there.
      dst = aBytes.BeginWriting() + dstWritten;
      dstLen = aBytes.Length() - dstWritten;
      rv = mEncoder->Finish(dst, &dstLen);
      dstWritten += dstLen;
      if (rv != NS_OK) {
        // Failures should be impossible if GetMaxLength works. Big5 is the
        // only case where Finish() may return NS_ERROR_UENC_NOMAPPING but
        // that should never happen right after Convert() has returned it.
        MOZ_ASSERT_UNREACHABLE("Broken encoder.");
        return false;
      }
      if (!WriteNCR(aBytes, dstWritten, unmappable)) {
        return false;
      }
      continue;
    }
    if (!(rv == NS_OK || rv == NS_OK_UENC_MOREINPUT)) {
      return false;
    }
    MOZ_ASSERT(src == srcEnd, "Converter did not consume all input.");
    dst = aBytes.BeginWriting() + dstWritten;
    dstLen = aBytes.Length() - dstWritten;
    rv = mEncoder->Finish(dst, &dstLen);
    dstWritten += dstLen;
    if (rv == NS_OK_UENC_MOREOUTPUT) {
      MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length.");
      return false;
    }
    if (rv == NS_ERROR_UENC_NOMAPPING) {
      // Big5
      if (!WriteNCR(aBytes, dstWritten, 0xFFFD)) {
        return false;
      }
    }
    return aBytes.SetLength(dstWritten, mozilla::fallible_t());
  }
}