From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001
From: "Matt A. Tobin" <mattatobin@localhost.localdomain>
Date: Fri, 2 Feb 2018 04:16:08 -0500
Subject: Add m-esr52 at 52.6.0

---
 gfx/2d/Blur.cpp | 770 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 770 insertions(+)
 create mode 100644 gfx/2d/Blur.cpp

(limited to 'gfx/2d/Blur.cpp')
diff --git a/gfx/2d/Blur.cpp b/gfx/2d/Blur.cpp
new file mode 100644
index 000000000..f3f41c3af
--- /dev/null
+++ b/gfx/2d/Blur.cpp
@@ -0,0 +1,770 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Blur.h"
+
+#include <algorithm>
+#include <math.h>
+#include <string.h>
+
+#include "mozilla/CheckedInt.h"
+
+#include "2D.h"
+#include "DataSurfaceHelpers.h"
+#include "Tools.h"
+
+#ifdef BUILD_ARM_NEON
+#include "mozilla/arm.h"
+#endif
+
+using namespace std;
+
+namespace mozilla {
+namespace gfx {
+
+/**
+ * Box blur involves looking at one pixel, and setting its value to the average
+ * of its neighbouring pixels.
+ * @param aInput The input buffer.
+ * @param aOutput The output buffer.
+ * @param aLeftLobe The number of pixels to blend on the left.
+ * @param aRightLobe The number of pixels to blend on the right.
+ * @param aWidth The number of columns in the buffers.
+ * @param aRows The number of rows in the buffers.
+ * @param aSkipRect An area to skip blurring in.
+ * XXX shouldn't we pass stride in separately here?
+ */
+static void
+BoxBlurHorizontal(unsigned char* aInput,
+                  unsigned char* aOutput,
+                  int32_t aLeftLobe,
+                  int32_t aRightLobe,
+                  int32_t aWidth,
+                  int32_t aRows,
+                  const IntRect& aSkipRect)
+{
+    MOZ_ASSERT(aWidth > 0);
+
+    int32_t boxSize = aLeftLobe + aRightLobe + 1;
+    bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
+                                  aWidth <= aSkipRect.XMost();
+    if (boxSize == 1) {
+        memcpy(aOutput, aInput, aWidth*aRows);
+        return;
+    }
+    uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
+
+    for (int32_t y = 0; y < aRows; y++) {
+        // Check whether the skip rect intersects this row. If the skip
+        // rect covers the whole surface in this row, we can avoid
+        // this row entirely (and any others along the skip rect).
+        bool inSkipRectY = y >= aSkipRect.y &&
+                           y < aSkipRect.YMost();
+        if (inSkipRectY && skipRectCoversWholeRow) {
+            y = aSkipRect.YMost() - 1;
+            continue;
+        }
+
+        uint32_t alphaSum = 0;
+        for (int32_t i = 0; i < boxSize; i++) {
+            int32_t pos = i - aLeftLobe;
+            // See assertion above; if aWidth is zero, then we would have no
+            // valid position to clamp to.
+            pos = max(pos, 0);
+            pos = min(pos, aWidth - 1);
+            alphaSum += aInput[aWidth * y + pos];
+        }
+        for (int32_t x = 0; x < aWidth; x++) {
+            // Check whether we are within the skip rect. If so, go
+            // to the next point outside the skip rect.
+            if (inSkipRectY && x >= aSkipRect.x &&
+                x < aSkipRect.XMost()) {
+                x = aSkipRect.XMost();
+                if (x >= aWidth)
+                    break;
+
+                // Recalculate the neighbouring alpha values for
+                // our new point on the surface.
+                alphaSum = 0;
+                for (int32_t i = 0; i < boxSize; i++) {
+                    int32_t pos = x + i - aLeftLobe;
+                    // See assertion above; if aWidth is zero, then we would have no
+                    // valid position to clamp to.
+                    pos = max(pos, 0);
+                    pos = min(pos, aWidth - 1);
+                    alphaSum += aInput[aWidth * y + pos];
+                }
+            }
+            int32_t tmp = x - aLeftLobe;
+            int32_t last = max(tmp, 0);
+            int32_t next = min(tmp + boxSize, aWidth - 1);
+
+            aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
+
+            alphaSum += aInput[aWidth * y + next] -
+                        aInput[aWidth * y + last];
+        }
+    }
+}
+
+/**
+ * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of
+ * left and right.
+ * XXX shouldn't we pass stride in separately here?
+ */
+static void
+BoxBlurVertical(unsigned char* aInput,
+                unsigned char* aOutput,
+                int32_t aTopLobe,
+                int32_t aBottomLobe,
+                int32_t aWidth,
+                int32_t aRows,
+                const IntRect& aSkipRect)
+{
+    MOZ_ASSERT(aRows > 0);
+
+    int32_t boxSize = aTopLobe + aBottomLobe + 1;
+    bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
+                                     aRows <= aSkipRect.YMost();
+    if (boxSize == 1) {
+        memcpy(aOutput, aInput, aWidth*aRows);
+        return;
+    }
+    uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
+
+    for (int32_t x = 0; x < aWidth; x++) {
+        bool inSkipRectX = x >= aSkipRect.x &&
+                           x < aSkipRect.XMost();
+        if (inSkipRectX && skipRectCoversWholeColumn) {
+            x = aSkipRect.XMost() - 1;
+            continue;
+        }
+
+        uint32_t alphaSum = 0;
+        for (int32_t i = 0; i < boxSize; i++) {
+            int32_t pos = i - aTopLobe;
+            // See assertion above; if aRows is zero, then we would have no
+            // valid position to clamp to.
+            pos = max(pos, 0);
+            pos = min(pos, aRows - 1);
+            alphaSum += aInput[aWidth * pos + x];
+        }
+        for (int32_t y = 0; y < aRows; y++) {
+            if (inSkipRectX && y >= aSkipRect.y &&
+                y < aSkipRect.YMost()) {
+                y = aSkipRect.YMost();
+                if (y >= aRows)
+                    break;
+
+                alphaSum = 0;
+                for (int32_t i = 0; i < boxSize; i++) {
+                    int32_t pos = y + i - aTopLobe;
+                    // See assertion above; if aRows is zero, then we would have no
+                    // valid position to clamp to.
+                    pos = max(pos, 0);
+                    pos = min(pos, aRows - 1);
+                    alphaSum += aInput[aWidth * pos + x];
+                }
+            }
+            int32_t tmp = y - aTopLobe;
+            int32_t last = max(tmp, 0);
+            int32_t next = min(tmp + boxSize, aRows - 1);
+
+            aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
+
+            alphaSum += aInput[aWidth * next + x] -
+                        aInput[aWidth * last + x];
+        }
+    }
+}
+
+static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
+{
+    int32_t major, minor, final;
+
+    /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
+     * some notes about approximating the Gaussian blur with box-blurs.
+     * The comments below are in the terminology of that page.
+     */
+    int32_t z = aRadius / 3;
+    switch (aRadius % 3) {
+    case 0:
+        // aRadius = z*3; choose d = 2*z + 1
+        major = minor = final = z;
+        break;
+    case 1:
+        // aRadius = z*3 + 1
+        // This is a tricky case since there is no value of d which will
+        // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
+        // for some integer k, then the radius will be 3*k. If d is even,
+        // i.e. d=2*k, then the radius will be 3*k - 1.
+        // So we have to choose values that don't match the standard
+        // algorithm.
+        major = z + 1;
+        minor = final = z;
+        break;
+    case 2:
+        // aRadius = z*3 + 2; choose d = 2*z + 2
+        major = final = z + 1;
+        minor = z;
+        break;
+    default:
+        // Mathematical impossibility!
+        MOZ_ASSERT(false);
+        major = minor = final = 0;
+    }
+    MOZ_ASSERT(major + minor + final == aRadius);
+
+    aLobes[0][0] = major;
+    aLobes[0][1] = minor;
+    aLobes[1][0] = minor;
+    aLobes[1][1] = major;
+    aLobes[2][0] = final;
+    aLobes[2][1] = final;
+}
+
+static void
+SpreadHorizontal(unsigned char* aInput,
+                 unsigned char* aOutput,
+                 int32_t aRadius,
+                 int32_t aWidth,
+                 int32_t aRows,
+                 int32_t aStride,
+                 const IntRect& aSkipRect)
+{
+    if (aRadius == 0) {
+        memcpy(aOutput, aInput, aStride * aRows);
+        return;
+    }
+
+    bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
+                                    aWidth <= aSkipRect.XMost();
+    for (int32_t y = 0; y < aRows; y++) {
+        // Check whether the skip rect intersects this row. If the skip
+        // rect covers the whole surface in this row, we can avoid
+        // this row entirely (and any others along the skip rect).
+        bool inSkipRectY = y >= aSkipRect.y &&
+                             y < aSkipRect.YMost();
+        if (inSkipRectY && skipRectCoversWholeRow) {
+            y = aSkipRect.YMost() - 1;
+            continue;
+        }
+
+        for (int32_t x = 0; x < aWidth; x++) {
+            // Check whether we are within the skip rect. If so, go
+            // to the next point outside the skip rect.
+            if (inSkipRectY && x >= aSkipRect.x &&
+                x < aSkipRect.XMost()) {
+                x = aSkipRect.XMost();
+                if (x >= aWidth)
+                    break;
+            }
+
+            int32_t sMin = max(x - aRadius, 0);
+            int32_t sMax = min(x + aRadius, aWidth - 1);
+            int32_t v = 0;
+            for (int32_t s = sMin; s <= sMax; ++s) {
+                v = max<int32_t>(v, aInput[aStride * y + s]);
+            }
+            aOutput[aStride * y + x] = v;
+        }
+    }
+}
+
+static void
+SpreadVertical(unsigned char* aInput,
+               unsigned char* aOutput,
+               int32_t aRadius,
+               int32_t aWidth,
+               int32_t aRows,
+               int32_t aStride,
+               const IntRect& aSkipRect)
+{
+    if (aRadius == 0) {
+        memcpy(aOutput, aInput, aStride * aRows);
+        return;
+    }
+
+    bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
+                                     aRows <= aSkipRect.YMost();
+    for (int32_t x = 0; x < aWidth; x++) {
+        bool inSkipRectX = x >= aSkipRect.x &&
+                           x < aSkipRect.XMost();
+        if (inSkipRectX && skipRectCoversWholeColumn) {
+            x = aSkipRect.XMost() - 1;
+            continue;
+        }
+
+        for (int32_t y = 0; y < aRows; y++) {
+            // Check whether we are within the skip rect. If so, go
+            // to the next point outside the skip rect.
+            if (inSkipRectX && y >= aSkipRect.y &&
+                y < aSkipRect.YMost()) {
+                y = aSkipRect.YMost();
+                if (y >= aRows)
+                    break;
+            }
+
+            int32_t sMin = max(y - aRadius, 0);
+            int32_t sMax = min(y + aRadius, aRows - 1);
+            int32_t v = 0;
+            for (int32_t s = sMin; s <= sMax; ++s) {
+                v = max<int32_t>(v, aInput[aStride * s + x]);
+            }
+            aOutput[aStride * y + x] = v;
+        }
+    }
+}
+
+CheckedInt<int32_t>
+AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
+{
+  CheckedInt<int32_t> val(aVal);
+
+  val += 3;
+  val /= 4;
+  val *= 4;
+
+  return val;
+}
+
+AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
+                           const IntSize& aSpreadRadius,
+                           const IntSize& aBlurRadius,
+                           const Rect* aDirtyRect,
+                           const Rect* aSkipRect)
+ : mSpreadRadius(aSpreadRadius),
+   mBlurRadius(aBlurRadius),
+   mSurfaceAllocationSize(0)
+{
+  Rect rect(aRect);
+  rect.Inflate(Size(aBlurRadius + aSpreadRadius));
+  rect.RoundOut();
+
+  if (aDirtyRect) {
+    // If we get passed a dirty rect from layout, we can minimize the
+    // shadow size and make painting faster.
+    mHasDirtyRect = true;
+    mDirtyRect = *aDirtyRect;
+    Rect requiredBlurArea = mDirtyRect.Intersect(rect);
+    requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
+    rect = requiredBlurArea.Intersect(rect);
+  } else {
+    mHasDirtyRect = false;
+  }
+
+  mRect = IntRect(int32_t(rect.x), int32_t(rect.y),
+                  int32_t(rect.width), int32_t(rect.height));
+  if (mRect.IsEmpty()) {
+    return;
+  }
+
+  if (aSkipRect) {
+    // If we get passed a skip rect, we can lower the amount of
+    // blurring/spreading we need to do. We convert it to IntRect to avoid
+    // expensive int<->float conversions if we were to use Rect instead.
+    Rect skipRect = *aSkipRect;
+    skipRect.RoundIn();
+    skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
+    mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y),
+                        int32_t(skipRect.width), int32_t(skipRect.height));
+
+    mSkipRect = mSkipRect.Intersect(mRect);
+    if (mSkipRect.IsEqualInterior(mRect))
+      return;
+
+    mSkipRect -= mRect.TopLeft();
+  } else {
+    mSkipRect = IntRect(0, 0, 0, 0);
+  }
+
+  CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width);
+  if (stride.isValid()) {
+    mStride = stride.value();
+
+    // We need to leave room for an additional 3 bytes for a potential overrun
+    // in our blurring code.
+    size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3);
+    if (size != 0) {
+      mSurfaceAllocationSize = size;
+    }
+  }
+}
+
+AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
+                           int32_t aStride,
+                           float aSigmaX,
+                           float aSigmaY)
+  : mRect(int32_t(aRect.x), int32_t(aRect.y),
+          int32_t(aRect.width), int32_t(aRect.height)),
+    mSpreadRadius(),
+    mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
+    mStride(aStride),
+    mSurfaceAllocationSize(0)
+{
+  IntRect intRect;
+  if (aRect.ToIntRect(&intRect)) {
+    size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height);
+    if (minDataSize != 0) {
+      mSurfaceAllocationSize = minDataSize;
+    }
+  }
+}
+
+
+AlphaBoxBlur::~AlphaBoxBlur()
+{
+}
+
+IntSize
+AlphaBoxBlur::GetSize()
+{
+  IntSize size(mRect.width, mRect.height);
+  return size;
+}
+
+int32_t
+AlphaBoxBlur::GetStride()
+{
+  return mStride;
+}
+
+IntRect
+AlphaBoxBlur::GetRect()
+{
+  return mRect;
+}
+
+Rect*
+AlphaBoxBlur::GetDirtyRect()
+{
+  if (mHasDirtyRect) {
+    return &mDirtyRect;
+  }
+
+  return nullptr;
+}
+
+size_t
+AlphaBoxBlur::GetSurfaceAllocationSize() const
+{
+  return mSurfaceAllocationSize;
+}
+
+void
+AlphaBoxBlur::Blur(uint8_t* aData)
+{
+  if (!aData) {
+    return;
+  }
+
+  // no need to do all this if not blurring or spreading
+  if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
+    int32_t stride = GetStride();
+
+    IntSize size = GetSize();
+
+    if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
+      // No need to use CheckedInt here - we have validated it in the constructor.
+      size_t szB = stride * size.height;
+      unsigned char* tmpData = new (std::nothrow) uint8_t[szB];
+
+      if (!tmpData) {
+        return;
+      }
+
+      memset(tmpData, 0, szB);
+
+      SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect);
+      SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect);
+
+      delete [] tmpData;
+    }
+
+    int32_t horizontalLobes[3][2];
+    ComputeLobes(mBlurRadius.width, horizontalLobes);
+    int32_t verticalLobes[3][2];
+    ComputeLobes(mBlurRadius.height, verticalLobes);
+
+    // We want to allow for some extra space on the left for alignment reasons.
+    int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
+
+    IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
+                              size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
+
+    if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
+      // Fallback to old blurring code when the surface is so large it may
+      // overflow our integral image!
+
+      // No need to use CheckedInt here - we have validated it in the constructor.
+      size_t szB = stride * size.height;
+      uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
+      if (!tmpData) {
+        return;
+      }
+
+      memset(tmpData, 0, szB);
+
+      uint8_t* a = aData;
+      uint8_t* b = tmpData;
+      if (mBlurRadius.width > 0) {
+        BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect);
+        BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect);
+        BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect);
+      } else {
+        a = tmpData;
+        b = aData;
+      }
+      // The result is in 'b' here.
+      if (mBlurRadius.height > 0) {
+        BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect);
+        BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect);
+        BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect);
+      } else {
+        a = b;
+      }
+      // The result is in 'a' here.
+      if (a == tmpData) {
+        memcpy(aData, tmpData, szB);
+      }
+      delete [] tmpData;
+    } else {
+      size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width, 4);
+      if (integralImageStride == 0) {
+        return;
+      }
+
+      // We need to leave room for an additional 12 bytes for a maximum overrun
+      // of 3 pixels in the blurring code.
+      size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
+      if (bufLen == 0) {
+        return;
+      }
+      // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
+      // we divide by 4.
+      AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
+
+      if (!integralImage) {
+        return;
+      }
+
+#ifdef USE_SSE2
+      if (Factory::HasSSE2()) {
+        BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
+                     verticalLobes[0][1], integralImage, integralImageStride);
+        BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
+                     verticalLobes[1][1], integralImage, integralImageStride);
+        BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
+                     verticalLobes[2][1], integralImage, integralImageStride);
+      } else
+#endif
+#ifdef BUILD_ARM_NEON
+      if (mozilla::supports_neon()) {
+        BoxBlur_NEON(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
+                     verticalLobes[0][1], integralImage, integralImageStride);
+        BoxBlur_NEON(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
+                     verticalLobes[1][1], integralImage, integralImageStride);
+        BoxBlur_NEON(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
+                     verticalLobes[2][1], integralImage, integralImageStride);
+      } else
+#endif
+      {
+#ifdef _MIPS_ARCH_LOONGSON3A
+        BoxBlur_LS3(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
+                     verticalLobes[0][1], integralImage, integralImageStride);
+        BoxBlur_LS3(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
+                     verticalLobes[1][1], integralImage, integralImageStride);
+        BoxBlur_LS3(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
+                     verticalLobes[2][1], integralImage, integralImageStride);
+#else
+        BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
+                  verticalLobes[0][1], integralImage, integralImageStride);
+        BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
+                  verticalLobes[1][1], integralImage, integralImageStride);
+        BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
+                  verticalLobes[2][1], integralImage, integralImageStride);
+#endif
+      }
+    }
+  }
+}
+
+MOZ_ALWAYS_INLINE void
+GenerateIntegralRow(uint32_t  *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
+                    const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
+{
+  uint32_t currentRowSum = 0;
+  uint32_t pixel = aSource[0];
+  for (uint32_t x = 0; x < aLeftInflation; x++) {
+    currentRowSum += pixel;
+    *aDest++ = currentRowSum + *aPreviousRow++;
+  }
+  for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
+      uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
+#if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
+      currentRowSum += (alphaValues >> 24) & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+      currentRowSum += (alphaValues >> 16) & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+      currentRowSum += (alphaValues >> 8) & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+      currentRowSum += alphaValues & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+#else
+      currentRowSum += alphaValues & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+      alphaValues >>= 8;
+      currentRowSum += alphaValues & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+      alphaValues >>= 8;
+      currentRowSum += alphaValues & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+      alphaValues >>= 8;
+      currentRowSum += alphaValues & 0xff;
+      *aDest++ = *aPreviousRow++ + currentRowSum;
+#endif
+  }
+  pixel = aSource[aSourceWidth - 1];
+  for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
+    currentRowSum += pixel;
+    *aDest++ = currentRowSum + *aPreviousRow++;
+  }
+}
+
+MOZ_ALWAYS_INLINE void
+GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
+                        int32_t aTopInflation, int32_t aBottomInflation,
+                        uint32_t *aIntegralImage, size_t aIntegralImageStride,
+                        uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
+{
+  uint32_t stride32bit = aIntegralImageStride / 4;
+
+  IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
+                            aSize.height + aTopInflation + aBottomInflation);
+
+  memset(aIntegralImage, 0, aIntegralImageStride);
+
+  GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
+                      aSize.width, aLeftInflation, aRightInflation);
+  for (int y = 1; y < aTopInflation + 1; y++) {
+    GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
+                        aSize.width, aLeftInflation, aRightInflation);
+  }
+
+  for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
+    GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
+                        aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
+  }
+
+  if (aBottomInflation) {
+    for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
+      GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
+                          aIntegralImage + (y - 1) * stride32bit,
+                          aSize.width, aLeftInflation, aRightInflation);
+    }
+  }
+}
+
+/**
+ * Attempt to do an in-place box blur using an integral image.
+ */
+void
+AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
+                        int32_t aLeftLobe,
+                        int32_t aRightLobe,
+                        int32_t aTopLobe,
+                        int32_t aBottomLobe,
+                        uint32_t *aIntegralImage,
+                        size_t aIntegralImageStride)
+{
+  IntSize size = GetSize();
+
+  MOZ_ASSERT(size.width > 0);
+
+  // Our 'left' or 'top' lobe will include the current pixel. i.e. when
+  // looking at an integral image the value of a pixel at 'x,y' is calculated
+  // using the value of the integral image values above/below that.
+  aLeftLobe++;
+  aTopLobe++;
+  int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
+
+  MOZ_ASSERT(boxSize > 0);
+
+  if (boxSize == 1) {
+      return;
+  }
+
+  int32_t stride32bit = aIntegralImageStride / 4;
+
+  int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
+
+  GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
+                          aIntegralImage, aIntegralImageStride, aData,
+                          mStride, size);
+
+  uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
+
+  uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
+
+  // Storing these locally makes this about 30% faster! Presumably the compiler
+  // can't be sure we're not altering the member variables in this loop.
+  IntRect skipRect = mSkipRect;
+  uint8_t *data = aData;
+  int32_t stride = mStride;
+  for (int32_t y = 0; y < size.height; y++) {
+    bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
+
+    uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
+    uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
+    uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
+    uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
+
+    for (int32_t x = 0; x < size.width; x++) {
+      if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) {
+        x = skipRect.XMost() - 1;
+        // Trigger early jump on coming loop iterations, this will be reset
+        // next line anyway.
+        inSkipRectY = false;
+        continue;
+      }
+      int32_t topLeft = topLeftBase[x];
+      int32_t topRight = topRightBase[x];
+      int32_t bottomRight = bottomRightBase[x];
+      int32_t bottomLeft = bottomLeftBase[x];
+
+      uint32_t value = bottomRight - topRight - bottomLeft;
+      value += topLeft;
+
+      data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
+    }
+  }
+}
+
+/**
+ * Compute the box blur size (which we're calling the blur radius) from
+ * the standard deviation.
+ *
+ * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
+ * approximating a Gaussian using box blurs.  This yields quite a good
+ * approximation for a Gaussian.  Then we multiply this by 1.5 since our
+ * code wants the radius of the entire triple-box-blur kernel instead of
+ * the diameter of an individual box blur.  For more details, see:
+ *   http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
+ *   https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
+ */
+static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
+
+IntSize
+AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
+{
+    IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5f)),
+                 static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5f)));
+
+    return size;
+}
+
+} // namespace gfx
+} // namespace mozilla
-- 
cgit v1.2.3