diff options
Diffstat (limited to 'third_party/aom/av1/common/av1_loopfilter.c')
-rw-r--r-- | third_party/aom/av1/common/av1_loopfilter.c | 1056 |
1 files changed, 913 insertions, 143 deletions
diff --git a/third_party/aom/av1/common/av1_loopfilter.c b/third_party/aom/av1/common/av1_loopfilter.c index 4b27ae93b..10df7fa91 100644 --- a/third_party/aom/av1/common/av1_loopfilter.c +++ b/third_party/aom/av1/common/av1_loopfilter.c @@ -22,7 +22,262 @@ #include "av1/common/seg_common.h" +#if CONFIG_LPF_DIRECT +static void pick_filter_pixel_left(uint8_t *const src, uint8_t *const line, + int *const orig_pos, int length, int row, + int col, int width, int height, int pitch, + int pivot, int direct) { + int i; + int pos = row * pitch + col; + + for (i = 0; i < length; ++i) { + int dy = 0; + switch (direct) { + case VERT_HORZ: dy = 0; break; + case DEGREE_45: dy = 1; break; + case DEGREE_135: dy = -1; break; + } + col -= 1; + row += dy; + if (col >= 0 && col < width && row >= 0 && row < height) { + pos = row * pitch + col; + line[pivot - 1 - i] = src[pos]; + orig_pos[pivot - 1 - i] = pos; + } + } +} + +static void pick_filter_pixel_right(uint8_t *const src, uint8_t *const line, + int *const orig_pos, int length, int row, + int col, int width, int height, int pitch, + int pivot, int direct) { + int i; + int pos = row * pitch + col; + + line[pivot] = src[pos]; + orig_pos[pivot] = pos; + + for (i = 1; i < length; ++i) { + int dy = 0; + switch (direct) { + case VERT_HORZ: dy = 0; break; + case DEGREE_45: dy = -1; break; + case DEGREE_135: dy = 1; break; + } + col += 1; + row += dy; + if (col >= 0 && col < width && row >= 0 && row < height) { + pos = row * pitch + col; + line[pivot + i] = src[pos]; + orig_pos[pivot + i] = pos; + } + } +} + +static void pick_filter_pixel_above(uint8_t *const src, uint8_t *const line, + int *const orig_pos, int length, int row, + int col, int width, int height, int pitch, + int pivot, int direct) { + int i; + int pos = row * pitch + col; + + for (i = 0; i < length; ++i) { + int dx = 0; + switch (direct) { + case VERT_HORZ: dx = 0; break; + case DEGREE_45: dx = 1; break; + case DEGREE_135: dx = -1; break; + } + col += dx; + row -= 1; + if (col >= 0 && col < width && row >= 0 && row < height) { + pos = row * pitch + col; + line[pivot - 1 - i] = src[pos]; + orig_pos[pivot - 1 - i] = pos; + } + } +} + +static void pick_filter_pixel_bot(uint8_t *const src, uint8_t *const line, + int *const orig_pos, int length, int row, + int col, int width, int height, int pitch, + int pivot, int direct) { + int i; + int pos = row * pitch + col; + + line[pivot] = src[pos]; + orig_pos[pivot] = pos; + + for (i = 1; i < length; ++i) { + int dx = 0; + switch (direct) { + case VERT_HORZ: dx = 0; break; + case DEGREE_45: dx = -1; break; + case DEGREE_135: dx = 1; break; + } + col += dx; + row += 1; + if (col >= 0 && col < width && row >= 0 && row < height) { + pos = row * pitch + col; + line[pivot + i] = src[pos]; + orig_pos[pivot + i] = pos; + } + } +} + +static void pick_filter_block_vert(uint8_t *const src, uint8_t *const block, + int *const orig_pos, int length, int row, + int col, int width, int height, int pitch, + int pivot, int line_length, int unit, + int direct) { + int i; + for (i = 0; i < 8 * unit; ++i) { + pick_filter_pixel_left(src, block + i * line_length, + orig_pos + i * line_length, length, row + i, col, + width, height, pitch, pivot, direct); + pick_filter_pixel_right(src, block + i * line_length, + orig_pos + i * line_length, length, row + i, col, + width, height, pitch, pivot, direct); + } +} + +static void pick_filter_block_horz(uint8_t *const src, uint8_t *const block, + int *const orig_pos, int length, int row, + int col, int width, int height, int pitch, + int pivot, int line_length, int unit, + int direct) { + int i, j; + int num = 8 * unit; + for (i = 0; i < num; ++i) { + pick_filter_pixel_above(src, block + i * line_length, + orig_pos + i * line_length, length, row, col + i, + width, height, pitch, pivot, direct); + pick_filter_pixel_bot(src, block + i * line_length, + orig_pos + i * line_length, length, row, col + i, + width, height, pitch, pivot, direct); + } + + // rearrange block + // TODO(chengchen): make it in-place or a stand alone function + uint8_t tmp_block[256]; + int tmp_pos[256]; + for (i = 0; i < 256; ++i) { + tmp_block[i] = 0; + tmp_pos[i] = -1; + } + for (i = 0; i < num; ++i) { + for (j = 0; j < line_length; ++j) { + tmp_block[j * line_length + i] = block[i * line_length + j]; + tmp_pos[j * line_length + i] = orig_pos[i * line_length + j]; + } + } + for (i = 0; i < 256; ++i) { + block[i] = tmp_block[i]; + orig_pos[i] = tmp_pos[i]; + } +} + +static int compute_block_grad(uint8_t *const src, int length, int row, int col, + int width, int height, int pitch, int unit, + int vert_or_horz, int direct) { + int i, j; + int r0, c0, pos0, r1 = 0, c1 = 0, pos1; + int sum_grad = 0; + for (i = 0; i < 8 * unit; ++i) { + // vert_or_horz: 0 vertical edge, 1 horizontal edge + r0 = vert_or_horz ? row : row + i; + c0 = vert_or_horz ? col + i : col; + pos0 = r0 * pitch + c0; + + for (j = 0; j < length; ++j) { + if (vert_or_horz == 0) { + switch (direct) { + case VERT_HORZ: r1 = r0; break; + case DEGREE_45: r1 = r0 + 1; break; + case DEGREE_135: r1 = r0 - 1; break; + } + c1 = c0 - 1; + } else { + r1 = r0 - 1; + switch (direct) { + case VERT_HORZ: c1 = c0; break; + case DEGREE_45: c1 = c0 + 1; break; + case DEGREE_135: c1 = c0 - 1; break; + } + } + pos1 = r1 * pitch + c1; + + if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 && + r1 < height && c1 >= 0 && c1 < width) { + sum_grad += abs(src[pos1] - src[pos0]); + } else { + sum_grad += 255; // penalize unreachable boundary + } + r0 = r1; + c0 = c1; + pos0 = pos1; + } + + r0 = vert_or_horz ? row : row + i; + c0 = vert_or_horz ? col + i : col; + pos0 = r0 * pitch + c0; + + for (j = 0; j < length - 1; ++j) { + if (vert_or_horz == 0) { + switch (direct) { + case VERT_HORZ: r1 = r0; break; + case DEGREE_45: r1 = r0 - 1; break; + case DEGREE_135: r1 = r0 + 1; break; + } + c1 = c0 + 1; + } else { + r1 = r0 + 1; + switch (direct) { + case VERT_HORZ: c1 = c0; break; + case DEGREE_45: c1 = c0 - 1; break; + case DEGREE_135: c1 = c0 + 1; break; + } + } + pos1 = r1 * pitch + c1; + + if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 && + r1 < height && c1 >= 0 && c1 < width) { + sum_grad += abs(src[pos1] - src[pos0]); + } else { + sum_grad += 255; // penalize unreachable boundary + } + r0 = r1; + c0 = c1; + pos0 = pos1; + } + } + + return sum_grad; +} + +static int pick_min_grad_direct(uint8_t *const src, int length, int row, + int col, int width, int height, int pitch, + int unit, int vert_or_horz) { + int direct = VERT_HORZ; + int min_grad = INT_MAX, sum_grad = 0; + + int degree; + for (degree = 0; degree < FILTER_DEGREES; ++degree) { + // compute abs gradient along each line for the filter block + sum_grad = compute_block_grad(src, length, row, col, width, height, pitch, + unit, vert_or_horz, degree); + if (sum_grad < min_grad) { + min_grad = sum_grad; + direct = degree; + } + } + + return direct; +} +#endif // CONFIG_LPF_DIRECT + #define PARALLEL_DEBLOCKING_15TAPLUMAONLY 1 +#define PARALLEL_DEBLOCKING_DISABLE_15TAP 0 // 64 bit masks for left transform size. Each 1 represents a position where // we should apply a loop filter across the left border of an 8x8 block @@ -99,8 +354,8 @@ static const uint64_t above_64x64_txform_mask[TX_SIZES] = { // 00000000 // 00000000 // 00000000 -static const uint64_t left_prediction_mask[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 0x0000000000000001ULL, // BLOCK_2X2, 0x0000000000000001ULL, // BLOCK_2X4, 0x0000000000000001ULL, // BLOCK_4X2, @@ -117,12 +372,16 @@ static const uint64_t left_prediction_mask[BLOCK_SIZES] = { 0x0000000001010101ULL, // BLOCK_32X32, 0x0101010101010101ULL, // BLOCK_32X64, 0x0000000001010101ULL, // BLOCK_64X32, - 0x0101010101010101ULL, // BLOCK_64X64 + 0x0101010101010101ULL, // BLOCK_64X64, + 0x0000000000000101ULL, // BLOCK_4X16, + 0x0000000000000001ULL, // BLOCK_16X4, + 0x0000000001010101ULL, // BLOCK_8X32, + 0x0000000000000001ULL, // BLOCK_32X8 }; // 64 bit mask to shift and set for each prediction size. -static const uint64_t above_prediction_mask[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 0x0000000000000001ULL, // BLOCK_2X2 0x0000000000000001ULL, // BLOCK_2X4 0x0000000000000001ULL, // BLOCK_4X2 @@ -139,13 +398,17 @@ static const uint64_t above_prediction_mask[BLOCK_SIZES] = { 0x000000000000000fULL, // BLOCK_32X32, 0x000000000000000fULL, // BLOCK_32X64, 0x00000000000000ffULL, // BLOCK_64X32, - 0x00000000000000ffULL, // BLOCK_64X64 + 0x00000000000000ffULL, // BLOCK_64X64, + 0x0000000000000001ULL, // BLOCK_4X16, + 0x0000000000000003ULL, // BLOCK_16X4, + 0x0000000000000001ULL, // BLOCK_8X32, + 0x000000000000000fULL, // BLOCK_32X8 }; // 64 bit mask to shift and set for each prediction size. A bit is set for -// each 8x8 block that would be in the left most block of the given block +// each 8x8 block that would be in the top left most block of the given block // size in the 64x64 block. -static const uint64_t size_mask[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint64_t size_mask[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 0x0000000000000001ULL, // BLOCK_2X2 0x0000000000000001ULL, // BLOCK_2X4 0x0000000000000001ULL, // BLOCK_4X2 @@ -162,7 +425,11 @@ static const uint64_t size_mask[BLOCK_SIZES] = { 0x000000000f0f0f0fULL, // BLOCK_32X32, 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, 0x00000000ffffffffULL, // BLOCK_64X32, - 0xffffffffffffffffULL, // BLOCK_64X64 + 0xffffffffffffffffULL, // BLOCK_64X64, + 0x0000000000000101ULL, // BLOCK_4X16, + 0x0000000000000003ULL, // BLOCK_16X4, + 0x0000000001010101ULL, // BLOCK_8X32, + 0x000000000000000fULL, // BLOCK_32X8 }; // These are used for masking the left and above 32x32 borders. @@ -197,8 +464,8 @@ static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = { }; // 16 bit left mask to shift and set for each uv prediction size. -static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 0x0001, // BLOCK_2X2, 0x0001, // BLOCK_2X4, 0x0001, // BLOCK_4X2, @@ -215,11 +482,16 @@ static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { 0x0011, // BLOCK_32X32, 0x1111, // BLOCK_32X64 0x0011, // BLOCK_64X32, - 0x1111, // BLOCK_64X64 + 0x1111, // BLOCK_64X64, + 0x0001, // BLOCK_4X16, + 0x0001, // BLOCK_16X4, + 0x0011, // BLOCK_8X32, + 0x0001, // BLOCK_32X8 }; + // 16 bit above mask to shift and set for uv each prediction size. -static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 0x0001, // BLOCK_2X2 0x0001, // BLOCK_2X4 0x0001, // BLOCK_4X2 @@ -236,12 +508,16 @@ static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { 0x0003, // BLOCK_32X32, 0x0003, // BLOCK_32X64, 0x000f, // BLOCK_64X32, - 0x000f, // BLOCK_64X64 + 0x000f, // BLOCK_64X64, + 0x0001, // BLOCK_4X16, + 0x0001, // BLOCK_16X4, + 0x0001, // BLOCK_8X32, + 0x0003, // BLOCK_32X8 }; // 64 bit mask to shift and set for each uv prediction size -static const uint16_t size_mask_uv[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const uint16_t size_mask_uv[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 0x0001, // BLOCK_2X2 0x0001, // BLOCK_2X4 0x0001, // BLOCK_4X2 @@ -258,7 +534,11 @@ static const uint16_t size_mask_uv[BLOCK_SIZES] = { 0x0033, // BLOCK_32X32, 0x3333, // BLOCK_32X64, 0x00ff, // BLOCK_64X32, - 0xffff, // BLOCK_64X64 + 0xffff, // BLOCK_64X64, + 0x0001, // BLOCK_4X16, + 0x0001, // BLOCK_16X4, + 0x0011, // BLOCK_8X32, + 0x0003, // BLOCK_32X8 }; static const uint16_t left_border_uv = 0x1111; static const uint16_t above_border_uv = 0x000f; @@ -273,6 +553,11 @@ static const int mode_lf_lut[] = { #endif // CONFIG_ALT_INTRA 1, 1, 0, 1, // INTER_MODES (ZEROMV == 0) #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + // 1, 1, 1, 1, 1, // INTER_SINGLEREF_COMP_MODES + // NOTE(zoeliu): Remove SR_NEAREST_NEWMV + 1, 1, 1, 1, // INTER_SINGLEREF_COMP_MODES +#endif // CONFIG_COMPOUND_SINGLEREF 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0) #endif // CONFIG_EXT_INTER }; @@ -602,9 +887,21 @@ static void highbd_filter_selectively_vert_row2( static void filter_selectively_horiz( uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, const uint8_t *lfl) { + const loop_filter_info_n *lfi_n, const uint8_t *lfl +#if CONFIG_LPF_DIRECT + , + uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step, + int width, int height, int ss_x, int ss_y +#endif + ) { unsigned int mask; int count; +#if CONFIG_LPF_DIRECT + // scale for u, v plane + width >>= ss_x; + height >>= ss_y; + int idx_c = 0; +#endif for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { @@ -612,6 +909,270 @@ static void filter_selectively_horiz( count = 1; if (mask & 1) { +#if CONFIG_LPF_DIRECT + int i; + const int line_length = 16; + const int pivot = 8; + const int above_filt_len = mask_16x16 & 1 ? 8 : 4; + const int bot_filt_len = mask_16x16 & 1 ? 8 : 4; + uint8_t block[256]; // line_length * size_of(BLOCK_8X8) * two_blocks + int orig_pos[256]; + int direct; + + assert(above_filt_len == bot_filt_len); + (void)bot_filt_len; + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + // actual position for current pixel + const int row = (mi_row + idx_r) * MI_SIZE >> ss_y; + const int col = (mi_col + idx_c) * MI_SIZE >> ss_x; + + // Next block's thresholds. + const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + + if (mask_16x16 & 1) { + if ((mask_16x16 & 3) == 3) { + // Could use asymmetric length in the future + direct = pick_min_grad_direct(src, above_filt_len, row, col, width, + height, pitch, 2, 1); + + pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col, + width, height, pitch, pivot, line_length, 2, + direct); + + aom_lpf_horizontal_edge_16(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + count = 2; + } else { + direct = pick_min_grad_direct(src, above_filt_len, row, col, width, + height, pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col, + width, height, pitch, pivot, line_length, 1, + direct); + + aom_lpf_horizontal_edge_8(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + } + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } else if (mask_8x8 & 1) { + if ((mask_8x8 & 3) == 3) { + count = 2; + direct = pick_min_grad_direct(src, above_filt_len, row, col, width, + height, pitch, 2, 1); + + pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col, + width, height, pitch, pivot, line_length, 2, + direct); + + aom_lpf_horizontal_8_dual(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, lfin->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + + if ((mask_4x4_int & 3) == 3) { + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 2, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width, + height, pitch, pivot, line_length, 2, + direct); + + aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, lfin->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } else { + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + if (mask_4x4_int & 1) { + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, + width, height, pitch, pivot, line_length, + 1, direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_4x4_int & 2) { + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8, + width, height, pitch, pivot, line_length, + 1, direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfin->mblim, lfin->lim, lfin->hev_thr); + } + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } + } else { + direct = pick_min_grad_direct(src, above_filt_len, row, col, width, + height, pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col, + width, height, pitch, pivot, line_length, 1, + direct); + + aom_lpf_horizontal_8(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + + if (mask_4x4_int & 1) { + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width, + height, pitch, pivot, line_length, 1, + direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } + } + } else if (mask_4x4 & 1) { + if ((mask_4x4 & 3) == 3) { + count = 2; + direct = pick_min_grad_direct(src, 4, row, col, width, height, pitch, + 2, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row, col, width, + height, pitch, pivot, line_length, 2, direct); + + aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, lfin->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + + if ((mask_4x4_int & 3) == 3) { + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 2, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width, + height, pitch, pivot, line_length, 2, + direct); + + aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, lfin->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } else { + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + if (mask_4x4_int & 1) { + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, + width, height, pitch, pivot, line_length, + 1, direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_4x4_int & 2) { + direct = pick_min_grad_direct(src, 4, row, col, width, height, + pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8, + width, height, pitch, pivot, line_length, + 1, direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfin->mblim, lfin->lim, lfin->hev_thr); + } + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } + } else { + direct = pick_min_grad_direct(src, above_filt_len, row, col, width, + height, pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col, + width, height, pitch, pivot, line_length, 1, + direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + + if (mask_4x4_int & 1) { + for (i = 0; i < 256; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + direct = pick_min_grad_direct(src, above_filt_len, row, col, width, + height, pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width, + height, pitch, pivot, line_length, 1, + direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } + } + } else if (mask_4x4_int & 1) { + direct = + pick_min_grad_direct(src, 4, row, col, width, height, pitch, 1, 1); + + pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width, + height, pitch, pivot, line_length, 1, direct); + + aom_lpf_horizontal_4(block + pivot * line_length, line_length, + lfi->mblim, lfi->lim, lfi->hev_thr); + + for (i = 0; i < 256; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } +#else // CONFIG_LPF_DIRECT if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { aom_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, @@ -658,6 +1219,7 @@ static void filter_selectively_horiz( aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); + if ((mask_4x4_int & 3) == 3) { aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, @@ -682,7 +1244,11 @@ static void filter_selectively_horiz( aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } +#endif // CONFIG_LPF_DIRECT } +#if CONFIG_LPF_DIRECT + idx_c += col_step * count; +#endif s += 8 * count; lfl += count; mask_16x16 >>= count; @@ -1290,13 +1856,91 @@ void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col, static void filter_selectively_vert( uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, const uint8_t *lfl) { + const loop_filter_info_n *lfi_n, const uint8_t *lfl +#if CONFIG_LPF_DIRECT + , + uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step, + int width, int height, int ss_x, int ss_y +#endif + ) { unsigned int mask; +#if CONFIG_LPF_DIRECT + // scale for u, v plane + width >>= ss_x; + height >>= ss_y; + int idx_c = 0; +#endif for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; +#if CONFIG_LPF_DIRECT + int i; + const int pivot = 8; + const int left_filt_len = mask_16x16 & 1 ? 8 : 4; + const int right_filt_len = mask_16x16 & 1 ? 8 : 4; + const int line_length = 16; + uint8_t block[128]; + int orig_pos[128]; + + // actual position for current pixel + const int row = (mi_row + idx_r) * MI_SIZE >> ss_y; + const int col = (mi_col + idx_c) * MI_SIZE >> ss_x; + + // Could use asymmetric length in the future + assert(left_filt_len == right_filt_len); + (void)right_filt_len; + + if ((mask_16x16 & 1) || (mask_8x8 & 1) || (mask_4x4 & 1)) { + for (i = 0; i < 128; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + int direct = pick_min_grad_direct(src, left_filt_len, row, col, width, + height, pitch, 1, 0); + + pick_filter_block_vert(src, block, orig_pos, left_filt_len, row, col, + width, height, pitch, pivot, line_length, 1, + direct); + + // apply filtering + if (mask_16x16 & 1) { + aom_lpf_vertical_16(block + pivot, line_length, lfi->mblim, lfi->lim, + lfi->hev_thr); + } else if (mask_8x8 & 1) { + aom_lpf_vertical_8(block + pivot, line_length, lfi->mblim, lfi->lim, + lfi->hev_thr); + } else if (mask_4x4 & 1) { + aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + + for (i = 0; i < 128; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } + + // filter inner 4x4 + if (mask_4x4_int & 1) { + for (i = 0; i < 128; ++i) { + block[i] = 0; + orig_pos[i] = -1; + } + + int direct = pick_min_grad_direct(src, 4, row, col + 4, width, height, + pitch, 1, 0); + + pick_filter_block_vert(src, block, orig_pos, 4, row, col + 4, width, + height, pitch, pivot, line_length, 1, direct); + + aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim, + lfi->hev_thr); + + for (i = 0; i < 128; ++i) + if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i]; + } +#else if (mask & 1) { if (mask_16x16 & 1) { aom_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); @@ -1308,6 +1952,10 @@ static void filter_selectively_vert( } if (mask_4x4_int & 1) aom_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); +#endif // CONFIG_LPF_DIRECT +#if CONFIG_LPF_DIRECT + idx_c += col_step; +#endif s += 8; lfl += 1; mask_16x16 >>= 1; @@ -1401,7 +2049,7 @@ static void get_filter_level_and_masks_non420( const int skip_this_r = skip_this && !block_edge_above; TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV) - ? get_uv_tx_size(mbmi, plane) + ? av1_get_uv_tx_size(mbmi, plane) : mbmi->tx_size; const int skip_border_4x4_c = @@ -1420,8 +2068,12 @@ static void get_filter_level_and_masks_non420( (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1; const int tx_col_idx = (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1; +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE bsize = AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, plane)); +#else + const BLOCK_SIZE bsize = get_plane_block_size(mbmi->sb_type, plane); +#endif const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx]; tx_size = (plane->plane_type == PLANE_TYPE_UV) ? uv_txsize_lookup[bsize][mb_tx_size][0][0] @@ -1437,19 +2089,35 @@ static void get_filter_level_and_masks_non420( #endif #if CONFIG_VAR_TX - TX_SIZE tx_size_r, tx_size_c; + TX_SIZE tx_size_horz_edge, tx_size_vert_edge; + + // filt_len_vert_edge is the length of deblocking filter for a vertical edge + // The filter direction of a vertical edge is horizontal. + // Thus, filt_len_vert_edge is determined as the minimum width of the two + // transform block sizes on the left and right (current block) side of edge + const int filt_len_vert_edge = AOMMIN( + tx_size_wide[tx_size], + tx_size_wide[cm->left_txfm_context[pl][((mi_row + idx_r) & MAX_MIB_MASK) + << TX_UNIT_HIGH_LOG2]]); - const int tx_wide = - AOMMIN(tx_size_wide[tx_size], - tx_size_wide[cm->top_txfm_context[pl][(mi_col + idx_c) + // filt_len_horz_edge is the len of deblocking filter for a horizontal edge + // The filter direction of a horizontal edge is vertical. + // Thus, filt_len_horz_edge is determined as the minimum height of the two + // transform block sizes on the top and bottom (current block) side of edge + const int filt_len_horz_edge = + AOMMIN(tx_size_high[tx_size], + tx_size_high[cm->top_txfm_context[pl][(mi_col + idx_c) << TX_UNIT_WIDE_LOG2]]); - const int tx_high = AOMMIN( - tx_size_high[tx_size], - tx_size_high[cm->left_txfm_context[pl][((mi_row + idx_r) & MAX_MIB_MASK) - << TX_UNIT_HIGH_LOG2]]); - tx_size_c = get_sqr_tx_size(tx_wide); - tx_size_r = get_sqr_tx_size(tx_high); + // transform width/height of current block + const int tx_wide_cur = tx_size_wide[tx_size]; + const int tx_high_cur = tx_size_high[tx_size]; + + // tx_size_vert_edge is square transform size for a vertical deblocking edge + // It determines the type of filter applied to the vertical edge + // Similarly, tx_size_horz_edge is for a horizontal deblocking edge + tx_size_vert_edge = get_sqr_tx_size(filt_len_vert_edge); + tx_size_horz_edge = get_sqr_tx_size(filt_len_horz_edge); memset(cm->top_txfm_context[pl] + ((mi_col + idx_c) << TX_UNIT_WIDE_LOG2), tx_size, mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2); @@ -1457,28 +2125,32 @@ static void get_filter_level_and_masks_non420( (((mi_row + idx_r) & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2), tx_size, mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2); #else - TX_SIZE tx_size_c = txsize_horz_map[tx_size]; - TX_SIZE tx_size_r = txsize_vert_map[tx_size]; + // The length (or equally the square tx size) of deblocking filter is only + // determined by + // a) current block's width for a vertical deblocking edge + // b) current block's height for a horizontal deblocking edge + TX_SIZE tx_size_vert_edge = txsize_horz_map[tx_size]; + TX_SIZE tx_size_horz_edge = txsize_vert_map[tx_size]; (void)pl; #endif // CONFIG_VAR_TX - if (tx_size_c == TX_32X32) + if (tx_size_vert_edge == TX_32X32) tx_size_mask = 3; - else if (tx_size_c == TX_16X16) + else if (tx_size_vert_edge == TX_16X16) tx_size_mask = 1; else tx_size_mask = 0; // Build masks based on the transform size of each block // handle vertical mask - if (tx_size_c == TX_32X32) { + if (tx_size_vert_edge == TX_32X32) { if (!skip_this_c && (c_step & tx_size_mask) == 0) { if (!skip_border_4x4_c) col_masks.m16x16 |= col_mask; else col_masks.m8x8 |= col_mask; } - } else if (tx_size_c == TX_16X16) { + } else if (tx_size_vert_edge == TX_16X16) { if (!skip_this_c && (c_step & tx_size_mask) == 0) { if (!skip_border_4x4_c) col_masks.m16x16 |= col_mask; @@ -1488,33 +2160,38 @@ static void get_filter_level_and_masks_non420( } else { // force 8x8 filtering on 32x32 boundaries if (!skip_this_c && (c_step & tx_size_mask) == 0) { - if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0) + if (tx_size_vert_edge == TX_8X8 || (c_step & 3) == 0) col_masks.m8x8 |= col_mask; else col_masks.m4x4 |= col_mask; } - if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c && +#if CONFIG_VAR_TX + if (!skip_this && tx_wide_cur < 8 && !skip_border_4x4_c && + (c_step & tx_size_mask) == 0) +#else + if (!skip_this && tx_size_vert_edge < TX_8X8 && !skip_border_4x4_c && (c_step & tx_size_mask) == 0) +#endif // CONFIG_VAR_TX mask_4x4_int_c |= col_mask; } - if (tx_size_r == TX_32X32) + if (tx_size_horz_edge == TX_32X32) tx_size_mask = 3; - else if (tx_size_r == TX_16X16) + else if (tx_size_horz_edge == TX_16X16) tx_size_mask = 1; else tx_size_mask = 0; // set horizontal mask - if (tx_size_r == TX_32X32) { + if (tx_size_horz_edge == TX_32X32) { if (!skip_this_r && (r_step & tx_size_mask) == 0) { if (!skip_border_4x4_r) row_masks.m16x16 |= col_mask; else row_masks.m8x8 |= col_mask; } - } else if (tx_size_r == TX_16X16) { + } else if (tx_size_horz_edge == TX_16X16) { if (!skip_this_r && (r_step & tx_size_mask) == 0) { if (!skip_border_4x4_r) row_masks.m16x16 |= col_mask; @@ -1524,14 +2201,19 @@ static void get_filter_level_and_masks_non420( } else { // force 8x8 filtering on 32x32 boundaries if (!skip_this_r && (r_step & tx_size_mask) == 0) { - if (tx_size_r == TX_8X8 || (r_step & 3) == 0) + if (tx_size_horz_edge == TX_8X8 || (r_step & 3) == 0) row_masks.m8x8 |= col_mask; else row_masks.m4x4 |= col_mask; } - if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_r && - ((r >> ss_y) & tx_size_mask) == 0) +#if CONFIG_VAR_TX + if (!skip_this && tx_high_cur < 8 && !skip_border_4x4_r && + (r_step & tx_size_mask) == 0) +#else + if (!skip_this && tx_size_horz_edge < TX_8X8 && !skip_border_4x4_r && + (r_step & tx_size_mask) == 0) +#endif // CONFIG_VAR_TX mask_4x4_int_r |= col_mask; } } @@ -1548,6 +2230,10 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm, int pl) { const int ss_y = plane->subsampling_y; const int row_step = mi_size_high[BLOCK_8X8] << ss_y; +#if CONFIG_LPF_DIRECT + const int ss_x = plane->subsampling_x; + const int col_step = mi_size_wide[BLOCK_8X8] << ss_x; +#endif struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } }; @@ -1565,8 +2251,9 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm, // Disable filtering on the leftmost column or tile boundary unsigned int border_mask = ~(mi_col == 0); #if CONFIG_LOOPFILTERING_ACROSS_TILES + MODE_INFO *const mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col; if (av1_disable_loopfilter_on_tile_boundary(cm) && - ((mib[0]->mbmi.boundary_info & TILE_LEFT_BOUNDARY) != 0)) { + ((mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) != 0)) { border_mask = 0xfffffffe; } #endif // CONFIG_LOOPFILTERING_ACROSS_TILES @@ -1583,7 +2270,13 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm, filter_selectively_vert( dst->buf, dst->stride, col_masks.m16x16 & border_mask, col_masks.m8x8 & border_mask, col_masks.m4x4 & border_mask, - mask_4x4_int, &cm->lf_info, &lfl[r][0]); + mask_4x4_int, &cm->lf_info, &lfl[r][0] +#if CONFIG_LPF_DIRECT + , + dst->buf0, mi_row, mi_col, idx_r, col_step, cm->width, cm->height, + ss_x, ss_y +#endif // CONFIG_LPF_DIRECT + ); dst->buf += 8 * dst->stride; } @@ -1597,49 +2290,52 @@ void av1_filter_block_plane_non420_hor(AV1_COMMON *const cm, int pl) { const int ss_y = plane->subsampling_y; const int row_step = mi_size_high[BLOCK_8X8] << ss_y; +#if CONFIG_LPF_DIRECT + const int ss_x = plane->subsampling_x; + const int col_step = mi_size_wide[BLOCK_8X8] << ss_x; +#endif struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; - FilterMasks row_masks_array[MAX_MIB_SIZE]; - unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 }; uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } }; + int idx_r; for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows; idx_r += row_step) { + unsigned int mask_4x4_int; + FilterMasks row_masks; const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8]; get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r, - &lfl[r][0], mask_4x4_int + r, NULL, - row_masks_array + r, NULL); - } - for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows; - idx_r += row_step) { - const int r = idx_r >> mi_width_log2_lookup[BLOCK_8X8]; - FilterMasks row_masks; + &lfl[r][0], &mask_4x4_int, NULL, + &row_masks, NULL); #if CONFIG_LOOPFILTERING_ACROSS_TILES // Disable filtering on the abovemost row or tile boundary - const MODE_INFO *mi = cm->mi + (mi_row + r) * cm->mi_stride; + const MODE_INFO *mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col; if ((av1_disable_loopfilter_on_tile_boundary(cm) && (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY)) || - (mi_row + idx_r == 0)) { + (mi_row + idx_r == 0)) memset(&row_masks, 0, sizeof(row_masks)); #else - if (mi_row + idx_r == 0) { - memset(&row_masks, 0, sizeof(row_masks)); + if (mi_row + idx_r == 0) memset(&row_masks, 0, sizeof(row_masks)); #endif // CONFIG_LOOPFILTERING_ACROSS_TILES - } else { - memcpy(&row_masks, row_masks_array + r, sizeof(row_masks)); - } + #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, row_masks.m16x16, - row_masks.m8x8, row_masks.m4x4, mask_4x4_int[r], &cm->lf_info, + row_masks.m8x8, row_masks.m4x4, mask_4x4_int, &cm->lf_info, &lfl[r][0], (int)cm->bit_depth); else #endif // CONFIG_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, row_masks.m16x16, - row_masks.m8x8, row_masks.m4x4, mask_4x4_int[r], - &cm->lf_info, &lfl[r][0]); + row_masks.m8x8, row_masks.m4x4, mask_4x4_int, + &cm->lf_info, &lfl[r][0] +#if CONFIG_LPF_DIRECT + , + dst->buf0, mi_row, mi_col, idx_r, col_step, + cm->width, cm->height, ss_x, ss_y +#endif // CONFIG_LPF_DIRECT + ); dst->buf += 8 * dst->stride; } dst->buf = dst0; @@ -1725,9 +2421,11 @@ void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm, (int)cm->bit_depth); else #endif // CONFIG_HIGHBITDEPTH +#if !CONFIG_LPF_DIRECT filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0]); +#endif // CONFIG_LPF_DIRECT dst->buf += MI_SIZE * dst->stride; mask_16x16 >>= MI_SIZE; @@ -1843,9 +2541,11 @@ void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm, (int)cm->bit_depth); else #endif // CONFIG_HIGHBITDEPTH +#if !CONFIG_LPF_DIRECT filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0]); +#endif // CONFIG_LPF_DIRECT dst->buf += MI_SIZE * dst->stride; mask_16x16 >>= MI_SIZE / 2; @@ -1859,14 +2559,14 @@ void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm, #if CONFIG_PARALLEL_DEBLOCKING typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR; -static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES] = { +static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES_ALL] = { // mask for vertical edges filtering { -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 2 - 1, // BLOCK_2X2 2 - 1, // BLOCK_2X4 4 - 1, // BLOCK_4X2 -#endif // CONFIG_CB4X4 +#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 4 - 1, // BLOCK_4X4 4 - 1, // BLOCK_4X8 8 - 1, // BLOCK_8X4 @@ -1883,16 +2583,20 @@ static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES] = { #if CONFIG_EXT_PARTITION 64 - 1, // BLOCK_64X128 128 - 1, // BLOCK_128X64 - 128 - 1 // BLOCK_128X128 + 128 - 1, // BLOCK_128X128 #endif // CONFIG_EXT_PARTITION + 4 - 1, // BLOCK_4X16, + 16 - 1, // BLOCK_16X4, + 8 - 1, // BLOCK_8X32, + 32 - 1 // BLOCK_32X8 }, // mask for horizontal edges filtering { -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 2 - 1, // BLOCK_2X2 4 - 1, // BLOCK_2X4 2 - 1, // BLOCK_4X2 -#endif // CONFIG_CB4X4 +#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 4 - 1, // BLOCK_4X4 8 - 1, // BLOCK_4X8 4 - 1, // BLOCK_8X4 @@ -1909,8 +2613,12 @@ static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES] = { #if CONFIG_EXT_PARTITION 128 - 1, // BLOCK_64X128 64 - 1, // BLOCK_128X64 - 128 - 1 // BLOCK_128X128 + 128 - 1, // BLOCK_128X128 #endif // CONFIG_EXT_PARTITION + 16 - 1, // BLOCK_4X16, + 4 - 1, // BLOCK_16X4, + 32 - 1, // BLOCK_8X32, + 8 - 1 // BLOCK_32X8 }, }; @@ -1962,20 +2670,65 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = { }; static TX_SIZE av1_get_transform_size(const MODE_INFO *const pCurr, - const EDGE_DIR edgeDir, + const EDGE_DIR edgeDir, const int mi_row, + const int mi_col, const int plane, + const struct macroblockd_plane *pPlane, const uint32_t scaleHorz, const uint32_t scaleVert) { - const BLOCK_SIZE bs = pCurr->mbmi.sb_type; - TX_SIZE txSize; + const MB_MODE_INFO *mbmi = &pCurr->mbmi; + TX_SIZE tx_size = (plane == PLANE_TYPE_Y) ? mbmi->tx_size + : av1_get_uv_tx_size(mbmi, pPlane); + assert(tx_size < TX_SIZES_ALL); + +#if CONFIG_VAR_TX + // mi_row and mi_col is the absolute position of the MI block. + // idx_c and idx_r is the relative offset of the MI within the super block + // c and r is the relative offset of the 8x8 block within the supert block + // blk_row and block_col is the relative offset of the current 8x8 block + // within the current partition. + const int idx_c = mi_col & MAX_MIB_MASK; + const int idx_r = mi_row & MAX_MIB_MASK; + const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8]; + const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8]; + const BLOCK_SIZE sb_type = pCurr->mbmi.sb_type; + const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1); + const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1); + + if (is_inter_block(mbmi) && !mbmi->skip) { + const int tx_row_idx = + (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1; + const int tx_col_idx = + (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1; + +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 + const BLOCK_SIZE bsize = + AOMMAX(BLOCK_4X4, ss_size_lookup[sb_type][scaleHorz][scaleVert]); +#else + const BLOCK_SIZE bsize = ss_size_lookup[sb_type][scaleHorz][scaleVert]; +#endif + const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx]; + + assert(mb_tx_size < TX_SIZES_ALL); + + tx_size = (plane == PLANE_TYPE_UV) + ? uv_txsize_lookup[bsize][mb_tx_size][0][0] + : mb_tx_size; + assert(tx_size < TX_SIZES_ALL); + } +#else + (void)mi_row; + (void)mi_col; + (void)scaleHorz; + (void)scaleVert; +#endif // CONFIG_VAR_TX + // since in case of chrominance or non-square transorm need to convert // transform size into transform size in particular direction. - txSize = uv_txsize_lookup[bs][pCurr->mbmi.tx_size][scaleHorz][scaleVert]; - if (VERT_EDGE == edgeDir) { - txSize = txsize_horz_map[txSize]; - } else { - txSize = txsize_vert_map[txSize]; - } - return txSize; + // for vertical edge, filter direction is horizontal, for horizontal + // edge, filter direction is vertical. + tx_size = (VERT_EDGE == edgeDir) ? txsize_horz_map[tx_size] + : txsize_vert_map[tx_size]; + return tx_size; } typedef struct AV1_DEBLOCKING_PARAMETERS { @@ -1989,14 +2742,13 @@ typedef struct AV1_DEBLOCKING_PARAMETERS { const uint8_t *hev_thr; } AV1_DEBLOCKING_PARAMETERS; -static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams, - const MODE_INFO **const ppCurr, - const ptrdiff_t modeStep, - const AV1_COMMON *const cm, - const EDGE_DIR edgeDir, const uint32_t x, - const uint32_t y, const uint32_t width, - const uint32_t height, const uint32_t scaleHorz, - const uint32_t scaleVert) { +static void set_lpf_parameters( + AV1_DEBLOCKING_PARAMETERS *const pParams, const MODE_INFO **const ppCurr, + const ptrdiff_t modeStep, const AV1_COMMON *const cm, + const EDGE_DIR edgeDir, const uint32_t x, const uint32_t y, + const uint32_t width, const uint32_t height, const int plane, + const struct macroblockd_plane *const pPlane, const uint32_t scaleHorz, + const uint32_t scaleVert) { // reset to initial values pParams->filterLength = 0; pParams->filterLengthInternal = 0; @@ -2004,41 +2756,48 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams, if ((width <= x) || (height <= y)) { return; } -#if CONFIG_EXT_PARTITION - // not sure if changes are required. - assert(0 && "Not yet updated"); -#endif // CONFIG_EXT_PARTITION + + const int mi_row = (y << scaleVert) >> MI_SIZE_LOG2; + const int mi_col = (x << scaleHorz) >> MI_SIZE_LOG2; + const MB_MODE_INFO *mbmi = &ppCurr[0]->mbmi; { const TX_SIZE ts = - av1_get_transform_size(ppCurr[0], edgeDir, scaleHorz, scaleVert); + av1_get_transform_size(ppCurr[0], edgeDir, mi_row, mi_col, plane, + pPlane, scaleHorz, scaleVert); + #if CONFIG_EXT_DELTA_Q - const uint32_t currLevel = - get_filter_level(cm, &cm->lf_info, &ppCurr[0]->mbmi); + const uint32_t currLevel = get_filter_level(cm, &cm->lf_info, mbmi); #else - const uint32_t currLevel = get_filter_level(&cm->lf_info, &ppCurr[0]->mbmi); + const uint32_t currLevel = get_filter_level(&cm->lf_info, mbmi); #endif // CONFIG_EXT_DELTA_Q - const int currSkipped = - ppCurr[0]->mbmi.skip && is_inter_block(&ppCurr[0]->mbmi); + const int currSkipped = mbmi->skip && is_inter_block(mbmi); const uint32_t coord = (VERT_EDGE == edgeDir) ? (x) : (y); uint32_t level = currLevel; // prepare outer edge parameters. deblock the edge if it's an edge of a TU if (coord) { #if CONFIG_LOOPFILTERING_ACROSS_TILES + MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride + mi_col; if (!av1_disable_loopfilter_on_tile_boundary(cm) || ((VERT_EDGE == edgeDir) && - (0 == (ppCurr[0]->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) || + (0 == (mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) || ((HORZ_EDGE == edgeDir) && - (0 == (ppCurr[0]->mbmi.boundary_info & TILE_ABOVE_BOUNDARY)))) + (0 == (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY)))) #endif // CONFIG_LOOPFILTERING_ACROSS_TILES { const int32_t tuEdge = (coord & av1_transform_masks[edgeDir][ts]) ? (0) : (1); if (tuEdge) { const MODE_INFO *const pPrev = *(ppCurr - modeStep); + const int pvRow = + (VERT_EDGE == edgeDir) ? (mi_row) : (mi_row - (1 << scaleVert)); + const int pvCol = + (VERT_EDGE == edgeDir) ? (mi_col - (1 << scaleHorz)) : (mi_col); const TX_SIZE pvTs = - av1_get_transform_size(pPrev, edgeDir, scaleHorz, scaleVert); + av1_get_transform_size(pPrev, edgeDir, pvRow, pvCol, plane, + pPlane, scaleHorz, scaleVert); + #if CONFIG_EXT_DELTA_Q const uint32_t pvLvl = get_filter_level(cm, &cm->lf_info, &pPrev->mbmi); @@ -2050,14 +2809,13 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams, const int32_t puEdge = (coord & av1_prediction_masks[edgeDir] - [ss_size_lookup[ppCurr[0]->mbmi.sb_type] - [scaleHorz][scaleVert]]) + [ss_size_lookup[mbmi->sb_type][scaleHorz] + [scaleVert]]) ? (0) : (1); // if the current and the previous blocks are skipped, // deblock the edge if the edge belongs to a PU's edge only. if ((currLevel || pvLvl) && (!pvSkip || !currSkipped || puEdge)) { -#if CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY const TX_SIZE minTs = AOMMIN(ts, pvTs); if (TX_4X4 >= minTs) { pParams->filterLength = 4; @@ -2067,15 +2825,15 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams, pParams->filterLength = 16; #if PARALLEL_DEBLOCKING_15TAPLUMAONLY // No wide filtering for chroma plane - if (scaleHorz || scaleVert) { + if (plane != 0) { pParams->filterLength = 8; } #endif } -#else - pParams->filterLength = (TX_4X4 >= AOMMIN(ts, pvTs)) ? (4) : (8); -#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY +#if PARALLEL_DEBLOCKING_DISABLE_15TAP + pParams->filterLength = (TX_4X4 >= AOMMIN(ts, pvTs)) ? (4) : (8); +#endif // PARALLEL_DEBLOCKING_DISABLE_15TAP // update the level if the current block is skipped, // but the previous one is not @@ -2103,9 +2861,9 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams, } static void av1_filter_block_plane_vert(const AV1_COMMON *const cm, + const int plane, const MACROBLOCKD_PLANE *const pPlane, const MODE_INFO **ppModeInfo, - const ptrdiff_t modeStride, const uint32_t cuX, const uint32_t cuY) { const int col_step = MI_SIZE >> MI_SIZE_LOG2; @@ -2124,12 +2882,14 @@ static void av1_filter_block_plane_vert(const AV1_COMMON *const cm, // If 4x4 trasnform is used, it will then filter the internal edge // aligned with a 4x4 block const MODE_INFO **const pCurr = - ppModeInfo + (y << scaleVert) * modeStride + (x << scaleHorz); + ppModeInfo + (y << scaleVert) * cm->mi_stride + (x << scaleHorz); AV1_DEBLOCKING_PARAMETERS params; memset(¶ms, 0, sizeof(params)); + set_lpf_parameters(¶ms, pCurr, ((ptrdiff_t)1 << scaleHorz), cm, VERT_EDGE, cuX + x * MI_SIZE, cuY + y * MI_SIZE, width, - height, scaleHorz, scaleVert); + height, plane, pPlane, scaleHorz, scaleVert); + switch (params.filterLength) { // apply 4-tap filtering case 4: @@ -2155,7 +2915,6 @@ static void av1_filter_block_plane_vert(const AV1_COMMON *const cm, aom_lpf_vertical_8_c(p, dstStride, params.mblim, params.lim, params.hev_thr); break; -#if CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY // apply 16-tap filtering case 16: #if CONFIG_HIGHBITDEPTH @@ -2168,7 +2927,6 @@ static void av1_filter_block_plane_vert(const AV1_COMMON *const cm, aom_lpf_vertical_16_c(p, dstStride, params.mblim, params.lim, params.hev_thr); break; -#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY // no filtering default: break; } @@ -2191,9 +2949,9 @@ static void av1_filter_block_plane_vert(const AV1_COMMON *const cm, } static void av1_filter_block_plane_horz(const AV1_COMMON *const cm, + const int plane, const MACROBLOCKD_PLANE *const pPlane, const MODE_INFO **ppModeInfo, - const ptrdiff_t modeStride, const uint32_t cuX, const uint32_t cuY) { const int col_step = MI_SIZE >> MI_SIZE_LOG2; @@ -2212,12 +2970,12 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm, // block. If 4x4 trasnform is used, it will then filter the internal // edge aligned with a 4x4 block const MODE_INFO **const pCurr = - ppModeInfo + (y << scaleVert) * modeStride + (x << scaleHorz); + ppModeInfo + (y << scaleVert) * cm->mi_stride + (x << scaleHorz); AV1_DEBLOCKING_PARAMETERS params; memset(¶ms, 0, sizeof(params)); - set_lpf_parameters(¶ms, pCurr, (modeStride << scaleVert), cm, + set_lpf_parameters(¶ms, pCurr, (cm->mi_stride << scaleVert), cm, HORZ_EDGE, cuX + x * MI_SIZE, cuY + y * MI_SIZE, width, - height, scaleHorz, scaleVert); + height, plane, pPlane, scaleHorz, scaleVert); switch (params.filterLength) { // apply 4-tap filtering case 4: @@ -2243,7 +3001,6 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm, aom_lpf_horizontal_8_c(p, dstStride, params.mblim, params.lim, params.hev_thr); break; -#if CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY // apply 16-tap filtering case 16: #if CONFIG_HIGHBITDEPTH @@ -2256,7 +3013,6 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm, aom_lpf_horizontal_edge_16_c(p, dstStride, params.mblim, params.lim, params.hev_thr); break; -#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY // no filtering default: break; } @@ -2282,7 +3038,16 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm, void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { +#if CONFIG_UV_LVL + // y_only no longer has its original meaning. + // Here it means which plane to filter + // when y_only = {0, 1, 2}, it means we are searching for filter level for + // Y/U/V plane individually. + const int plane_start = y_only; + const int plane_end = plane_start + 1; +#else const int num_planes = y_only ? 1 : MAX_MB_PLANE; +#endif // CONFIG_UV_LVL int mi_row, mi_col; #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \ @@ -2298,14 +3063,18 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, #if CONFIG_VAR_TX for (int i = 0; i < MAX_MB_PLANE; ++i) memset(cm->left_txfm_context[i], TX_32X32, MAX_MIB_SIZE - << TX_UNIT_WIDE_LOG2); + << TX_UNIT_HIGH_LOG2); #endif // CONFIG_VAR_TX for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) { int plane; av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col); +#if CONFIG_UV_LVL + for (plane = plane_start; plane < plane_end; ++plane) { +#else for (plane = 0; plane < num_planes; ++plane) { +#endif // CONFIG_UV_LVL av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col, mi_row, mi_col, plane); av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col, @@ -2315,37 +3084,40 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, } #else -#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES - assert(0 && "Not yet updated. ToDo as next steps"); -#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES - + // filter all vertical edges in every 64x64 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col); - // filter all vertical edges in every 64x64 super block +#if CONFIG_UV_LVL + for (int planeIdx = plane_start; planeIdx < plane_end; ++planeIdx) { +#else for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) { +#endif // CONFIG_UV_LVL const int32_t scaleHorz = planes[planeIdx].subsampling_x; const int32_t scaleVert = planes[planeIdx].subsampling_y; av1_filter_block_plane_vert( - cm, planes + planeIdx, (const MODE_INFO **)(mi + mi_col), - cm->mi_stride, (mi_col * MI_SIZE) >> scaleHorz, - (mi_row * MI_SIZE) >> scaleVert); + cm, planeIdx, &planes[planeIdx], (const MODE_INFO **)(mi + mi_col), + (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert); } } } + + // filter all horizontal edges in every 64x64 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col); - // filter all horizontal edges in every 64x64 super block +#if CONFIG_UV_LVL + for (int planeIdx = plane_start; planeIdx < plane_end; ++planeIdx) { +#else for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) { +#endif // CONFIG_UV_LVL const int32_t scaleHorz = planes[planeIdx].subsampling_x; const int32_t scaleVert = planes[planeIdx].subsampling_y; av1_filter_block_plane_horz( - cm, planes + planeIdx, (const MODE_INFO **)(mi + mi_col), - cm->mi_stride, (mi_col * MI_SIZE) >> scaleHorz, - (mi_row * MI_SIZE) >> scaleVert); + cm, planeIdx, &planes[planeIdx], (const MODE_INFO **)(mi + mi_col), + (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert); } } } @@ -2363,9 +3135,8 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, const int32_t scaleHorz = planes[planeIdx].subsampling_x; const int32_t scaleVert = planes[planeIdx].subsampling_y; av1_filter_block_plane_vert( - cm, planes + planeIdx, (const MODE_INFO **)(mi + mi_col), - cm->mi_stride, (mi_col * MI_SIZE) >> scaleHorz, - (mi_row * MI_SIZE) >> scaleVert); + cm, planeIdx, planes + planeIdx, (const MODE_INFO **)(mi + mi_col), + (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert); } } } @@ -2378,9 +3149,8 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, const int32_t scaleHorz = planes[planeIdx].subsampling_x; const int32_t scaleVert = planes[planeIdx].subsampling_y; av1_filter_block_plane_horz( - cm, planes + planeIdx, (const MODE_INFO **)(mi + mi_col), - cm->mi_stride, (mi_col * MI_SIZE) >> scaleHorz, - (mi_row * MI_SIZE) >> scaleVert); + cm, planeIdx, planes + planeIdx, (const MODE_INFO **)(mi + mi_col), + (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert); } } } |