/* -*- Mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40; -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "GLUploadHelpers.h" #include "GLContext.h" #include "mozilla/gfx/2D.h" #include "gfxUtils.h" #include "mozilla/gfx/Tools.h" // For BytesPerPixel #include "nsRegion.h" #include "GfxTexturesReporter.h" #include "mozilla/gfx/Logging.h" namespace mozilla { using namespace gfx; namespace gl { static unsigned int DataOffset(const IntPoint& aPoint, int32_t aStride, SurfaceFormat aFormat) { unsigned int data = aPoint.y * aStride; data += aPoint.x * BytesPerPixel(aFormat); return data; } static GLint GetAddressAlignment(ptrdiff_t aAddress) { if (!(aAddress & 0x7)) { return 8; } else if (!(aAddress & 0x3)) { return 4; } else if (!(aAddress & 0x1)) { return 2; } else { return 1; } } // Take texture data in a given buffer and copy it into a larger buffer, // padding out the edge pixels for filtering if necessary static void CopyAndPadTextureData(const GLvoid* srcBuffer, GLvoid* dstBuffer, GLsizei srcWidth, GLsizei srcHeight, GLsizei dstWidth, GLsizei dstHeight, GLsizei stride, GLint pixelsize) { unsigned char* rowDest = static_cast<unsigned char*>(dstBuffer); const unsigned char* source = static_cast<const unsigned char*>(srcBuffer); for (GLsizei h = 0; h < srcHeight; ++h) { memcpy(rowDest, source, srcWidth * pixelsize); rowDest += dstWidth * pixelsize; source += stride; } GLsizei padHeight = srcHeight; // Pad out an extra row of pixels so that edge filtering doesn't use garbage data if (dstHeight > srcHeight) { memcpy(rowDest, source - stride, srcWidth * pixelsize); padHeight++; } // Pad out an extra column of pixels if (dstWidth > srcWidth) { rowDest = static_cast<unsigned char*>(dstBuffer) + srcWidth * pixelsize; for (GLsizei h = 0; h < padHeight; ++h) { memcpy(rowDest, rowDest - pixelsize, pixelsize); rowDest += dstWidth * pixelsize; } } } // In both of these cases (for the Adreno at least) it is impossible // to determine good or bad driver versions for POT texture uploads, // so blacklist them all. Newer drivers use a different rendering // string in the form "Adreno (TM) 200" and the drivers we've seen so // far work fine with NPOT textures, so don't blacklist those until we // have evidence of any problems with them. bool CanUploadSubTextures(GLContext* gl) { if (!gl->WorkAroundDriverBugs()) return true; // There are certain GPUs that we don't want to use glTexSubImage2D on // because that function can be very slow and/or buggy if (gl->Renderer() == GLRenderer::Adreno200 || gl->Renderer() == GLRenderer::Adreno205) { return false; } // On PowerVR glTexSubImage does a readback, so it will be slower // than just doing a glTexImage2D() directly. i.e. 26ms vs 10ms if (gl->Renderer() == GLRenderer::SGX540 || gl->Renderer() == GLRenderer::SGX530) { return false; } return true; } static void TexSubImage2DWithUnpackSubimageGLES(GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLenum format, GLenum type, const GLvoid* pixels) { gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); // When using GL_UNPACK_ROW_LENGTH, we need to work around a Tegra // driver crash where the driver apparently tries to read // (stride - width * pixelsize) bytes past the end of the last input // row. We only upload the first height-1 rows using GL_UNPACK_ROW_LENGTH, // and then we upload the final row separately. See bug 697990. int rowLength = stride/pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height-1, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0); gl->fTexSubImage2D(target, level, xoffset, yoffset+height-1, width, 1, format, type, (const unsigned char*)pixels+(height-1)*stride); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } static void TexSubImage2DWithoutUnpackSubimage(GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLenum format, GLenum type, const GLvoid* pixels) { // Not using the whole row of texture data and GL_UNPACK_ROW_LENGTH // isn't supported. We make a copy of the texture data we're using, // such that we're using the whole row of data in the copy. This turns // out to be more efficient than uploading row-by-row; see bug 698197. // Width and height are never more than 16384. At 16Ki*16Ki, 4Bpp is 1GiB, but // if we allow 8Bpp (16-bit channels, or higher) here, that's 2GiB+, which would // overflow on 32-bit. MOZ_ASSERT(width <= 16384); MOZ_ASSERT(height <= 16384); MOZ_ASSERT(pixelsize < 8); const auto size = CheckedInt<size_t>(width) * height * pixelsize; if (!size.isValid()) { // This should never happen, but we use a defensive check. MOZ_ASSERT_UNREACHABLE("Unacceptable size calculated.!"); return; } unsigned char* newPixels = new (fallible) unsigned char[size.value()]; if (newPixels) { unsigned char* rowDest = newPixels; const unsigned char* rowSource = (const unsigned char*)pixels; for (int h = 0; h < height; h++) { memcpy(rowDest, rowSource, width*pixelsize); rowDest += width*pixelsize; rowSource += stride; } stride = width*pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)newPixels), GetAddressAlignment((ptrdiff_t)stride))); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, newPixels); delete [] newPixels; gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } else { // If we did not have sufficient memory for the required // temporary buffer, then fall back to uploading row-by-row. const unsigned char* rowSource = (const unsigned char*)pixels; gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); for (int i = 0; i < height; i++) { gl->fTexSubImage2D(target, level, xoffset, yoffset + i, width, 1, format, type, rowSource); rowSource += stride; } gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } } static void TexSubImage2DHelper(GLContext* gl, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLenum format, GLenum type, const GLvoid* pixels) { if (gl->IsGLES()) { if (stride == width * pixelsize) { gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } else if (gl->IsExtensionSupported(GLContext::EXT_unpack_subimage)) { TexSubImage2DWithUnpackSubimageGLES(gl, target, level, xoffset, yoffset, width, height, stride, pixelsize, format, type, pixels); } else { TexSubImage2DWithoutUnpackSubimage(gl, target, level, xoffset, yoffset, width, height, stride, pixelsize, format, type, pixels); } } else { // desktop GL (non-ES) path gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); int rowLength = stride/pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength); gl->fTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } } static void TexImage2DHelper(GLContext* gl, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei stride, GLint pixelsize, GLint border, GLenum format, GLenum type, const GLvoid* pixels) { if (gl->IsGLES()) { NS_ASSERTION(format == (GLenum)internalformat, "format and internalformat not the same for glTexImage2D on GLES2"); MOZ_ASSERT(width >= 0 && height >= 0); if (!CanUploadNonPowerOfTwo(gl) && (stride != width * pixelsize || !IsPowerOfTwo((uint32_t)width) || !IsPowerOfTwo((uint32_t)height))) { // Pad out texture width and height to the next power of two // as we don't support/want non power of two texture uploads GLsizei paddedWidth = RoundUpPow2((uint32_t)width); GLsizei paddedHeight = RoundUpPow2((uint32_t)height); // Width and height are never more than 16384. At 16Ki*16Ki, 4Bpp // is 1GiB, but if we allow 8Bpp (or higher) here, that's 2GiB, // which would overflow on 32-bit. MOZ_ASSERT(width <= 16384); MOZ_ASSERT(height <= 16384); MOZ_ASSERT(pixelsize < 8); const auto size = CheckedInt<size_t>(paddedWidth) * paddedHeight * pixelsize; if (!size.isValid()) { // This should never happen, but we use a defensive check. MOZ_ASSERT_UNREACHABLE("Unacceptable size calculated.!"); return; } GLvoid* paddedPixels = new unsigned char[size.value()]; // Pad out texture data to be in a POT sized buffer for uploading to // a POT sized texture CopyAndPadTextureData(pixels, paddedPixels, width, height, paddedWidth, paddedHeight, stride, pixelsize); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)paddedPixels), GetAddressAlignment((ptrdiff_t)paddedWidth * pixelsize))); gl->fTexImage2D(target, border, internalformat, paddedWidth, paddedHeight, border, format, type, paddedPixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); delete[] static_cast<unsigned char*>(paddedPixels); return; } if (stride == width * pixelsize) { gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); gl->fTexImage2D(target, border, internalformat, width, height, border, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } else { // Use GLES-specific workarounds for GL_UNPACK_ROW_LENGTH; these are // implemented in TexSubImage2D. gl->fTexImage2D(target, border, internalformat, width, height, border, format, type, nullptr); TexSubImage2DHelper(gl, target, level, 0, 0, width, height, stride, pixelsize, format, type, pixels); } } else { // desktop GL (non-ES) path gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, std::min(GetAddressAlignment((ptrdiff_t)pixels), GetAddressAlignment((ptrdiff_t)stride))); int rowLength = stride/pixelsize; gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, rowLength); gl->fTexImage2D(target, level, internalformat, width, height, border, format, type, pixels); gl->fPixelStorei(LOCAL_GL_UNPACK_ROW_LENGTH, 0); gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 4); } } SurfaceFormat UploadImageDataToTexture(GLContext* gl, unsigned char* aData, int32_t aStride, SurfaceFormat aFormat, const nsIntRegion& aDstRegion, GLuint aTexture, const gfx::IntSize& aSize, size_t* aOutUploadSize, bool aNeedInit, GLenum aTextureUnit, GLenum aTextureTarget) { gl->MakeCurrent(); gl->fActiveTexture(aTextureUnit); gl->fBindTexture(aTextureTarget, aTexture); GLenum format = 0; GLenum internalFormat = 0; GLenum type = 0; int32_t pixelSize = BytesPerPixel(aFormat); SurfaceFormat surfaceFormat = gfx::SurfaceFormat::UNKNOWN; MOZ_ASSERT(gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA || gl->GetPreferredARGB32Format() == LOCAL_GL_RGBA); switch (aFormat) { case SurfaceFormat::B8G8R8A8: if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) { format = LOCAL_GL_BGRA; surfaceFormat = SurfaceFormat::R8G8B8A8; type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV; } else { format = LOCAL_GL_RGBA; surfaceFormat = SurfaceFormat::B8G8R8A8; type = LOCAL_GL_UNSIGNED_BYTE; } internalFormat = LOCAL_GL_RGBA; break; case SurfaceFormat::B8G8R8X8: // Treat BGRX surfaces as BGRA except for the surface // format used. if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) { format = LOCAL_GL_BGRA; surfaceFormat = SurfaceFormat::R8G8B8X8; type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV; } else { format = LOCAL_GL_RGBA; surfaceFormat = SurfaceFormat::B8G8R8X8; type = LOCAL_GL_UNSIGNED_BYTE; } internalFormat = LOCAL_GL_RGBA; break; case SurfaceFormat::R8G8B8A8: if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) { // Upload our RGBA as BGRA, but store that the uploaded format is // BGRA. (sample from R to get B) format = LOCAL_GL_BGRA; type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV; surfaceFormat = SurfaceFormat::B8G8R8A8; } else { format = LOCAL_GL_RGBA; type = LOCAL_GL_UNSIGNED_BYTE; surfaceFormat = SurfaceFormat::R8G8B8A8; } internalFormat = LOCAL_GL_RGBA; break; case SurfaceFormat::R8G8B8X8: // Treat RGBX surfaces as RGBA except for the surface // format used. if (gl->GetPreferredARGB32Format() == LOCAL_GL_BGRA) { format = LOCAL_GL_BGRA; type = LOCAL_GL_UNSIGNED_INT_8_8_8_8_REV; surfaceFormat = SurfaceFormat::B8G8R8X8; } else { format = LOCAL_GL_RGBA; type = LOCAL_GL_UNSIGNED_BYTE; surfaceFormat = SurfaceFormat::R8G8B8X8; } internalFormat = LOCAL_GL_RGBA; break; case SurfaceFormat::R5G6B5_UINT16: internalFormat = format = LOCAL_GL_RGB; type = LOCAL_GL_UNSIGNED_SHORT_5_6_5; surfaceFormat = SurfaceFormat::R5G6B5_UINT16; break; case SurfaceFormat::A8: internalFormat = format = LOCAL_GL_LUMINANCE; type = LOCAL_GL_UNSIGNED_BYTE; // We don't have a specific luminance shader surfaceFormat = SurfaceFormat::A8; break; default: MOZ_ASSERT_UNREACHABLE("Unhandled image surface format!"); } if (aOutUploadSize) { *aOutUploadSize = 0; } if (surfaceFormat == gfx::SurfaceFormat::UNKNOWN) { return gfx::SurfaceFormat::UNKNOWN; } if (aNeedInit || !CanUploadSubTextures(gl)) { // If the texture needs initialized, or we are unable to // upload sub textures, then initialize and upload the entire // texture. TexImage2DHelper(gl, aTextureTarget, 0, internalFormat, aSize.width, aSize.height, aStride, pixelSize, 0, format, type, aData); if (aOutUploadSize && aNeedInit) { uint32_t texelSize = GetBytesPerTexel(internalFormat, type); size_t numTexels = size_t(aSize.width) * size_t(aSize.height); *aOutUploadSize += texelSize * numTexels; } } else { // Upload each rect in the region to the texture for (auto iter = aDstRegion.RectIter(); !iter.Done(); iter.Next()) { const IntRect& rect = iter.Get(); const unsigned char* rectData = aData + DataOffset(rect.TopLeft(), aStride, aFormat); TexSubImage2DHelper(gl, aTextureTarget, 0, rect.x, rect.y, rect.width, rect.height, aStride, pixelSize, format, type, rectData); } } return surfaceFormat; } SurfaceFormat UploadSurfaceToTexture(GLContext* gl, DataSourceSurface* aSurface, const nsIntRegion& aDstRegion, GLuint aTexture, const gfx::IntSize& aSize, size_t* aOutUploadSize, bool aNeedInit, const gfx::IntPoint& aSrcPoint, GLenum aTextureUnit, GLenum aTextureTarget) { int32_t stride = aSurface->Stride(); SurfaceFormat format = aSurface->GetFormat(); unsigned char* data = aSurface->GetData() + DataOffset(aSrcPoint, stride, format); return UploadImageDataToTexture(gl, data, stride, format, aDstRegion, aTexture, aSize, aOutUploadSize, aNeedInit, aTextureUnit, aTextureTarget); } bool CanUploadNonPowerOfTwo(GLContext* gl) { if (!gl->WorkAroundDriverBugs()) return true; // Some GPUs driver crash when uploading non power of two 565 textures. return gl->Renderer() != GLRenderer::Adreno200 && gl->Renderer() != GLRenderer::Adreno205; } } // namespace gl } // namespace mozilla