changeset: 94061:73a9b24d863a tag: bilin tag: qbase tag: qtip tag: tip user: Jeff Muizelaar <jmuizelaar@mozilla.com> date: Tue May 15 18:26:16 2012 -0400 summary: Bug 754364. Add bilinear non-repeat and repeat fast paths. r=joe diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c --- a/gfx/cairo/libpixman/src/pixman-fast-path.c +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c @@ -1186,16 +1186,228 @@ FAST_NEAREST (8888_565_none, 8888, 0565, FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) +static force_inline void +scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + while ((w -= 1) >= 0) + { + uint32_t tl = src_top [pixman_fixed_to_int (vx)]; + uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; + uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; + uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; + uint32_t src, result; + uint16_t d; + d = *dst; + src = bilinear_interpolation (tl, tr, + bl, br, + interpolation_coord(vx), + wb >> (8 - INTERPOLATION_PRECISION_BITS)); + vx += unit_x; + result = over (src, CONVERT_0565_TO_0888 (d)); + *dst++ = CONVERT_8888_TO_0565(result); + } +} + +static force_inline void +scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + while ((w -= 1) >= 0) + { + uint32_t tl = src_top [pixman_fixed_to_int (vx)]; + uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; + uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; + uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; + uint32_t src; + uint32_t d; + uint32_t result; + d = *dst; + src = bilinear_interpolation (tl, tr, + bl, br, + interpolation_coord(vx), + wb >> (8 - INTERPOLATION_PRECISION_BITS)); + vx += unit_x; + *dst++ = over (src, d); + } +} + +#if 1 + +static force_inline void +scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, + const uint32_t * mask, + const uint16_t * src_top, + const uint16_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + while ((w -= 1) >= 0) + { + uint16_t tl = src_top [pixman_fixed_to_int (vx)]; + uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; + uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; + uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; + uint32_t d; + d = bilinear_interpolation(CONVERT_0565_TO_8888(tl), + CONVERT_0565_TO_8888(tr), + CONVERT_0565_TO_8888(bl), + CONVERT_0565_TO_8888(br), + interpolation_coord(vx), + wb >> (8 - INTERPOLATION_PRECISION_BITS)); + vx += unit_x; + *dst++ = CONVERT_8888_TO_0565(d); + } +} + +#else + +#define SK_G16_MASK_IN_PLACE 0xfc0 + +static inline uint32_t SkExpand_rgb_16(uint16_t c) { + + return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE); +} + +/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit + color value. The computation yields only 16bits of valid data, but we claim + to return 32bits, so that the compiler won't generate extra instructions to + "clean" the top 16bits. However, the top 16 can contain garbage, so it is + up to the caller to safely ignore them. +*/ +static inline uint16_t SkCompact_rgb_16(uint32_t c) { + return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE); +} +// returns expanded * 5bits +static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y, + uint32_t a00, uint32_t a01, + uint32_t a10, uint32_t a11) { + a00 = SkExpand_rgb_16(a00); + a01 = SkExpand_rgb_16(a01); + a10 = SkExpand_rgb_16(a10); + a11 = SkExpand_rgb_16(a11); + + int xy = x * y >> 3; + return a00 * (32 - 2*y - 2*x + xy) + + a01 * (2*x - xy) + + a10 * (2*y - xy) + + a11 * xy; +} + + + +static force_inline void +scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, + const uint32_t * mask, + const uint16_t * src_top, + const uint16_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + while ((w -= 1) >= 0) + { + uint16_t tl = src_top [pixman_fixed_to_int (vx)]; + uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; + uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; + uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; + + uint32_t tmp = Filter_565_Expanded((vx>>12)&0xf, wb>>4, tl, tr, bl, br); + vx += unit_x; + *dst++ = SkCompact_rgb_16((tmp) >> 5); + } +} + + +#endif +FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC, + scaled_bilinear_scanline_565_565_SRC, + uint16_t, uint32_t, uint16_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC, + scaled_bilinear_scanline_565_565_SRC, + uint16_t, uint32_t, uint16_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC, + scaled_bilinear_scanline_565_565_SRC, + uint16_t, uint32_t, uint16_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC, + scaled_bilinear_scanline_565_565_SRC, + uint16_t, uint32_t, uint16_t, + NORMAL, FLAG_NONE) + +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER, + scaled_bilinear_scanline_8888_565_OVER, + uint32_t, uint32_t, uint16_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER, + scaled_bilinear_scanline_8888_565_OVER, + uint32_t, uint32_t, uint16_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER, + scaled_bilinear_scanline_8888_565_OVER, + uint32_t, uint32_t, uint16_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER, + scaled_bilinear_scanline_8888_565_OVER, + uint32_t, uint32_t, uint16_t, + NORMAL, FLAG_NONE) + +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER, + scaled_bilinear_scanline_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER, + scaled_bilinear_scanline_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER, + scaled_bilinear_scanline_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER, + scaled_bilinear_scanline_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + #define REPEAT_MIN_WIDTH 32 static void fast_composite_tiled_repeat (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); pixman_composite_func_t func; @@ -1960,16 +2172,20 @@ static const pixman_fast_path_t c_fast_p PIXMAN_any, (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | FAST_PATH_NORMAL_REPEAT), PIXMAN_any, 0, PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, fast_composite_tiled_repeat }, + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), + { PIXMAN_OP_NONE }, }; #ifdef WORDS_BIGENDIAN #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) #else #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) #endif diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h --- a/gfx/cairo/libpixman/src/pixman-inlines.h +++ b/gfx/cairo/libpixman/src/pixman-inlines.h @@ -80,16 +80,21 @@ repeat (pixman_repeat_t repeat, int *c, } return TRUE; } #ifdef MOZ_GFX_OPTIMIZE_MOBILE #define LOW_QUALITY_INTERPOLATION #endif +#ifdef LOW_QUALITY_INTERPOLATION +#define INTERPOLATION_PRECISION_BITS 4 +#else +#define INTERPOLATION_PRECISION_BITS 8 +#endif static force_inline int32_t interpolation_coord(pixman_fixed_t t) { #ifdef LOW_QUALITY_INTERPOLATION return (t >> 12) & 0xf; #else return (t >> 8) & 0xff; #endif