diff options
author | Ronald S. Bultje <rbultje@google.com> | 2013-04-10 08:51:58 -0700 |
---|---|---|
committer | Gerrit Code Review <gerrit@gerrit.golo.chromium.org> | 2013-04-10 08:51:58 -0700 |
commit | 1932828d19a2b6db8f9a120570a1f4d522064cda (patch) | |
tree | 2885cdef3f6fa4f742aaef2d9edfdd973cb5b11b /vp9/encoder/vp9_encodemb.c | |
parent | 9b46e304949644001b1ea209eda642f88df3e2cb (diff) | |
parent | a3874850dd615064719a0c6cd4717d3d656628a3 (diff) | |
download | libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.tar libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.tar.gz libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.tar.bz2 libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.zip |
Merge "Make SB coding size-independent." into experimental
Diffstat (limited to 'vp9/encoder/vp9_encodemb.c')
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 721 |
1 files changed, 263 insertions, 458 deletions
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index f0c215d90..75db660cc 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -106,14 +106,16 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, } void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { + const uint8_t *pred, int dst_stride, + BLOCK_SIZE_TYPE bsize) { + const int bh = 16 << mb_height_log2(bsize), bw = 16 << mb_width_log2(bsize); int r, c; - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) diff[c] = src[c] - pred[c]; - diff += 32; + diff += bw; pred += dst_stride; src += src_stride; } @@ -122,69 +124,29 @@ void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 1024; - int16_t *vdiff = diff + 1024 + 256; + const uint8_t *vpred, int dst_stride, + BLOCK_SIZE_TYPE bsize) { + const int bhl = mb_height_log2(bsize), bwl = mb_width_log2(bsize); + const int uoff = (16 * 16) << (bhl + bwl), voff = (uoff * 5) >> 2; + const int bw = 8 << bwl, bh = 8 << bhl; + int16_t *udiff = diff + uoff; + int16_t *vdiff = diff + voff; int r, c; - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) udiff[c] = usrc[c] - upred[c]; - udiff += 16; + udiff += bw; upred += dst_stride; usrc += src_stride; } - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) vdiff[c] = vsrc[c] - vpred[c]; - vdiff += 16; - vpred += dst_stride; - vsrc += src_stride; - } -} - -void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { - int r, c; - - for (r = 0; r < 64; r++) { - for (c = 0; c < 64; c++) { - diff[c] = src[c] - pred[c]; - } - - diff += 64; - pred += dst_stride; - src += src_stride; - } -} - -void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 4096; - int16_t *vdiff = diff + 4096 + 1024; - int r, c; - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) { - udiff[c] = usrc[c] - upred[c]; - } - - udiff += 32; - upred += dst_stride; - usrc += src_stride; - } - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) { - vdiff[c] = vsrc[c] - vpred[c]; - } - - vdiff += 32; + vdiff += bw; vpred += dst_stride; vsrc += src_stride; } @@ -288,164 +250,86 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { vp9_transform_mbuv_8x8(x); } -void vp9_transform_sby_32x32(MACROBLOCK *x) { - vp9_short_fdct32x32(x->src_diff, x->coeff, 64); -} - -void vp9_transform_sby_16x16(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); - - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, - x->coeff + n * 256, 32, tx_type); - } else { - x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, - x->coeff + n * 256, 64); - } - } -} - -void vp9_transform_sby_8x8(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, - x->coeff + n * 64, 32, tx_type); - } else { - x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, - x->coeff + n * 64, 64); - } - } -} - -void vp9_transform_sby_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); - - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, - x->coeff + n * 16, 32, tx_type); - } else { - x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, - x->coeff + n * 16, 64); - } - } -} - -void vp9_transform_sbuv_16x16(MACROBLOCK *x) { - vp9_clear_system_state(); - x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32); - x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32); -} - -void vp9_transform_sbuv_8x8(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8, - x->coeff + 1024 + n * 64, 32); - x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8, - x->coeff + 1280 + n * 64, 32); - } -} - -void vp9_transform_sbuv_4x4(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4, - x->coeff + 1024 + n * 16, 32); - x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4, - x->coeff + 1280 + n * 16, 32); - } -} - -void vp9_transform_sb64y_32x32(MACROBLOCK *x) { +void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) - 1); + const int stride = 32 << bwl; int n; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; - vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32, - x->coeff + n * 1024, 128); + vp9_short_fdct32x32(x->src_diff + y_idx * stride * 32 + x_idx * 32, + x->coeff + n * 1024, stride * 2); } } -void vp9_transform_sb64y_16x16(MACROBLOCK *x) { +void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bw = 1 << bwl; + const int bh = 1 << mb_height_log2(bsize); + const int stride = 16 << bwl, bstride = 4 << bwl; MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_16x16(xd, + (y_idx * bstride + x_idx) * 4); if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, - x->coeff + n * 256, 64, tx_type); + vp9_short_fht16x16(x->src_diff + y_idx * stride * 16 + x_idx * 16, + x->coeff + n * 256, stride, tx_type); } else { - x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, - x->coeff + n * 256, 128); + x->fwd_txm16x16(x->src_diff + y_idx * stride * 16 + x_idx * 16, + x->coeff + n * 256, stride * 2); } } } -void vp9_transform_sb64y_8x8(MACROBLOCK *x) { +void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 1); + const int stride = 8 << bwl, bstride = 2 << bwl; MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, - x->coeff + n * 64, 64, tx_type); + vp9_short_fht8x8(x->src_diff + y_idx * stride * 8 + x_idx * 8, + x->coeff + n * 64, stride, tx_type); } else { - x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, - x->coeff + n * 64, 128); + x->fwd_txm8x8(x->src_diff + y_idx * stride * 8 + x_idx * 8, + x->coeff + n * 64, stride * 2); } } } -void vp9_transform_sb64y_4x4(MACROBLOCK *x) { +void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 2); + const int stride = 4 << bwl; MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_4x4(xd, n); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4, - x->coeff + n * 16, 64, tx_type); + vp9_short_fht4x4(x->src_diff + y_idx * stride * 4 + x_idx * 4, + x->coeff + n * 16, stride, tx_type); } else { - x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4, - x->coeff + n * 16, 128); + x->fwd_txm4x4(x->src_diff + y_idx * stride * 4 + x_idx * 4, + x->coeff + n * 16, stride * 2); } } } -void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { +void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + assert(bsize == BLOCK_SIZE_SB64X64); vp9_clear_system_state(); vp9_short_fdct32x32(x->src_diff + 4096, x->coeff + 4096, 64); @@ -453,45 +337,57 @@ void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { x->coeff + 4096 + 1024, 64); } -void vp9_transform_sb64uv_16x16(MACROBLOCK *x) { +void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 16 << (bwl - 1); int n; vp9_clear_system_state(); - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16, - x->coeff + 4096 + n * 256, 64); - x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16, - x->coeff + 4096 + 1024 + n * 256, 64); + x->fwd_txm16x16(x->src_diff + uoff + y_idx * stride * 16 + x_idx * 16, + x->coeff + uoff + n * 256, stride * 2); + x->fwd_txm16x16(x->src_diff + voff + y_idx * stride * 16 + x_idx * 16, + x->coeff + voff + n * 256, stride * 2); } } -void vp9_transform_sb64uv_8x8(MACROBLOCK *x) { +void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; + const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 8 << (bwl - 1); int n; vp9_clear_system_state(); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8, - x->coeff + 4096 + n * 64, 64); - x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8, - x->coeff + 4096 + 1024 + n * 64, 64); + x->fwd_txm8x8(x->src_diff + uoff + y_idx * stride * 8 + x_idx * 8, + x->coeff + uoff + n * 64, stride * 2); + x->fwd_txm8x8(x->src_diff + voff + y_idx * stride * 8 + x_idx * 8, + x->coeff + voff + n * 64, stride * 2); } } -void vp9_transform_sb64uv_4x4(MACROBLOCK *x) { +void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 4 << (bwl - 1); int n; vp9_clear_system_state(); - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4, - x->coeff + 4096 + n * 16, 64); - x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4, - x->coeff + 4096 + 1024 + n * 16, 64); + x->fwd_txm4x4(x->src_diff + uoff + y_idx * stride * 4 + x_idx * 4, + x->coeff + uoff + n * 16, stride * 2); + x->fwd_txm4x4(x->src_diff + voff + y_idx * stride * 4 + x_idx * 4, + x->coeff + voff + n * 16, stride * 2); } } @@ -968,252 +864,120 @@ static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_optimize_mbuv_8x8(cm, x); } -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta, tl; - - ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; - tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - &ta, &tl, TX_32X32, 64); -} - -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); +void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) - 1); ENTROPY_CONTEXT ta[2], tl[2]; int n; - ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; - ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; - tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 64); - } -} - -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - ta[0] = (a[0] + a[1]) != 0; - ta[1] = (a[2] + a[3]) != 0; - ta[2] = (a1[0] + a1[1]) != 0; - ta[3] = (a1[2] + a1[3]) != 0; - tl[0] = (l[0] + l[1]) != 0; - tl[1] = (l[2] + l[3]) != 0; - tl[2] = (l1[0] + l1[1]) != 0; - tl[3] = (l1[2] + l1[3]) != 0; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 64); - } -} - -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT ta[8], tl[8]; - int n; - - vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, 64); - } -} - -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; - int b; - - for (b = 64; b < 96; b += 16) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_16X16][b]; - l = tl + vp9_block2left_sb[TX_16X16][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16, 64); - } -} - -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, above_ec, left_ec; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 64; b < 96; b += 4) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_8X8][b]; - l = tl + vp9_block2left_sb[TX_8X8][b]; - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8, 64); - a[0] = a[1] = above_ec; - l[0] = l[1] = left_ec; + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT *a = + (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n * 2 + 0); + ENTROPY_CONTEXT *a1 = + (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n * 2 + 1); + ta[n] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; } -} - -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 64; b < 96; b++) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_4X4][b]; - l = tl + vp9_block2left_sb[TX_4X4][b]; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4, 64); + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT *l = + (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n * 2); + ENTROPY_CONTEXT *l1 = + (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n * 2 + 1); + tl[n] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; } -} -void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; - ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32, 256); + ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh); } } -void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); +void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bw = 1 << bwl; + const int bh = 1 << mb_height_log2(bsize); ENTROPY_CONTEXT ta[4], tl[4]; int n; - ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; - ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; - ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0; - ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; - tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; - tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0; - tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n); + ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; + } + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n); + tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; + } + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 256); + ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh); } } -void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); +void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl; + const int bh = 2 << mb_height_log2(bsize); ENTROPY_CONTEXT ta[8], tl[8]; int n; - ta[0] = (a[0] + a[1]) != 0; - ta[1] = (a[2] + a[3]) != 0; - ta[2] = (a1[0] + a1[1]) != 0; - ta[3] = (a1[2] + a1[3]) != 0; - ta[4] = (a2[0] + a2[1]) != 0; - ta[5] = (a2[2] + a2[3]) != 0; - ta[6] = (a3[0] + a3[1]) != 0; - ta[7] = (a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1]) != 0; - tl[1] = (l[2] + l[3]) != 0; - tl[2] = (l1[0] + l1[1]) != 0; - tl[3] = (l1[2] + l1[3]) != 0; - tl[4] = (l2[0] + l2[1]) != 0; - tl[5] = (l2[2] + l2[3]) != 0; - tl[6] = (l3[0] + l3[1]) != 0; - tl[7] = (l3[2] + l3[3]) != 0; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; + for (n = 0; n < bw; n += 2) { + ENTROPY_CONTEXT *a = + (ENTROPY_CONTEXT *) (x->e_mbd.above_context + (n >> 1)); + ta[n + 0] = (a[0] + a[1]) != 0; + ta[n + 1] = (a[2] + a[3]) != 0; + } + for (n = 0; n < bh; n += 2) { + ENTROPY_CONTEXT *l = + (ENTROPY_CONTEXT *) (x->e_mbd.left_context + (n >> 1)); + tl[n + 0] = (l[0] + l[1]) != 0; + tl[n + 1] = (l[2] + l[3]) != 0; + } + + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 256); + ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh); } } -void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { +void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + int bwl = mb_width_log2(bsize), bw = 1 << bwl; + int bh = 1 << mb_height_log2(bsize); ENTROPY_CONTEXT ta[16], tl[16]; int n; - vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; + for (n = 0; n < bw; n++) + vpx_memcpy(&ta[n * 4], x->e_mbd.above_context + n, + sizeof(ENTROPY_CONTEXT) * 4); + for (n = 0; n < bh; n++) + vpx_memcpy(&tl[n * 4], x->e_mbd.left_context + n, + sizeof(ENTROPY_CONTEXT) * 4); + bw *= 4; + bh *= 4; + bwl += 2; + + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, 256); + ta + x_idx, tl + y_idx, TX_4X4, bh * bw); } } -void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { +void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; int b; + assert(bsize == BLOCK_SIZE_SB64X64); for (b = 256; b < 384; b += 64) { const int cidx = b >= 320 ? 20 : 16; a = ta + vp9_block2above_sb64[TX_32X32][b]; @@ -1231,67 +995,108 @@ void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { } } -void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; - int b; +void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + int uvoff = 16 << (bwl + bhl); + ENTROPY_CONTEXT ta[2][2], tl[2][2]; + int plane, n; + + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + n * 2; + ENTROPY_CONTEXT_PLANES *a1 = x->e_mbd.above_context + n * 2 + 1; + ta[0][n] = (a->u[0] + a->u[1] + a1->u[0] + a1->u[1]) != 0; + ta[1][n] = (a->v[0] + a->v[1] + a1->v[0] + a1->v[1]) != 0; + } + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT_PLANES *l = (x->e_mbd.left_context + n * 2); + ENTROPY_CONTEXT_PLANES *l1 = (x->e_mbd.left_context + n * 2 + 1); + tl[0][n] = (l->u[0] + l->u[1] + l1->u[0] + l1->u[1]) != 0; + tl[1][n] = (l->v[0] + l->v[1] + l1->v[0] + l1->v[1]) != 0; + } - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b += 16) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_16X16][b]; - l = tl + vp9_block2left_sb64[TX_16X16][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16, 256); - a[0] = a[1] = a1[0] = a1[1] = above_ec; - l[0] = l[1] = l1[0] = l1[1] = left_ec; + for (plane = 0; plane < 2; plane++) { + const int cidx = 16 + plane * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV, + x->e_mbd.block[cidx].dequant, + &ta[plane][x_idx], &tl[plane][y_idx], + TX_16X16, bh * bw * 64); + } + uvoff = (uvoff * 5) >> 2; // switch u -> v } } -void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, above_ec, left_ec; - int b; +void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + int uvoff = 4 << (bwl + bhl); + ENTROPY_CONTEXT ta[2][4], tl[2][4]; + int plane, n; + + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + n; + ta[0][n] = (a->u[0] + a->u[1]) != 0; + ta[1][n] = (a->v[0] + a->v[1]) != 0; + } + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT_PLANES *l = x->e_mbd.left_context + n; + tl[0][n] = (l->u[0] + l->u[1]) != 0; + tl[1][n] = (l->v[0] + l->v[1]) != 0; + } - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b += 4) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_8X8][b]; - l = tl + vp9_block2left_sb64[TX_8X8][b]; - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8, 256); - a[0] = a[1] = above_ec; - l[0] = l[1] = left_ec; + for (plane = 0; plane < 2; plane++) { + const int cidx = 16 + plane * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV, + x->e_mbd.block[cidx].dequant, + &ta[plane][x_idx], &tl[plane][y_idx], + TX_8X8, bh * bw * 16); + } + uvoff = (uvoff * 5) >> 2; // switch u -> v } } -void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l; - int b; +void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + int uvoff = 1 << (bwl + bhl); + ENTROPY_CONTEXT ta[2][8], tl[2][8]; + int plane, n; + + for (n = 0; n < bw; n += 2) { + ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + (n >> 1); + ta[0][n + 0] = (a->u[0]) != 0; + ta[0][n + 1] = (a->u[1]) != 0; + ta[1][n + 0] = (a->v[0]) != 0; + ta[1][n + 1] = (a->v[1]) != 0; + } + for (n = 0; n < bh; n += 2) { + ENTROPY_CONTEXT_PLANES *l = x->e_mbd.left_context + (n >> 1); + tl[0][n + 0] = (l->u[0]) != 0; + tl[0][n + 1] = (l->u[1]) != 0; + tl[1][n + 0] = (l->v[0]) != 0; + tl[1][n + 1] = (l->v[1]) != 0; + } - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b++) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_4X4][b]; - l = tl + vp9_block2left_sb64[TX_4X4][b]; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4, 256); + for (plane = 0; plane < 2; plane++) { + const int cidx = 16 + plane * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV, + x->e_mbd.block[cidx].dequant, + &ta[plane][x_idx], &tl[plane][y_idx], + TX_4X4, bh * bw * 4); + } + uvoff = (uvoff * 5) >> 2; // switch u -> v } } |