summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encodemb.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rbultje@google.com>2013-04-10 08:51:58 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-04-10 08:51:58 -0700
commit1932828d19a2b6db8f9a120570a1f4d522064cda (patch)
tree2885cdef3f6fa4f742aaef2d9edfdd973cb5b11b /vp9/encoder/vp9_encodemb.c
parent9b46e304949644001b1ea209eda642f88df3e2cb (diff)
parenta3874850dd615064719a0c6cd4717d3d656628a3 (diff)
downloadlibvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.tar
libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.tar.gz
libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.tar.bz2
libvpx-1932828d19a2b6db8f9a120570a1f4d522064cda.zip
Merge "Make SB coding size-independent." into experimental
Diffstat (limited to 'vp9/encoder/vp9_encodemb.c')
-rw-r--r--vp9/encoder/vp9_encodemb.c721
1 files changed, 263 insertions, 458 deletions
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index f0c215d90..75db660cc 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -106,14 +106,16 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
}
void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
- const uint8_t *pred, int dst_stride) {
+ const uint8_t *pred, int dst_stride,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bh = 16 << mb_height_log2(bsize), bw = 16 << mb_width_log2(bsize);
int r, c;
- for (r = 0; r < 32; r++) {
- for (c = 0; c < 32; c++)
+ for (r = 0; r < bh; r++) {
+ for (c = 0; c < bw; c++)
diff[c] = src[c] - pred[c];
- diff += 32;
+ diff += bw;
pred += dst_stride;
src += src_stride;
}
@@ -122,69 +124,29 @@ void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
const uint8_t *vsrc, int src_stride,
const uint8_t *upred,
- const uint8_t *vpred, int dst_stride) {
- int16_t *udiff = diff + 1024;
- int16_t *vdiff = diff + 1024 + 256;
+ const uint8_t *vpred, int dst_stride,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bhl = mb_height_log2(bsize), bwl = mb_width_log2(bsize);
+ const int uoff = (16 * 16) << (bhl + bwl), voff = (uoff * 5) >> 2;
+ const int bw = 8 << bwl, bh = 8 << bhl;
+ int16_t *udiff = diff + uoff;
+ int16_t *vdiff = diff + voff;
int r, c;
- for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++)
+ for (r = 0; r < bh; r++) {
+ for (c = 0; c < bw; c++)
udiff[c] = usrc[c] - upred[c];
- udiff += 16;
+ udiff += bw;
upred += dst_stride;
usrc += src_stride;
}
- for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++)
+ for (r = 0; r < bh; r++) {
+ for (c = 0; c < bw; c++)
vdiff[c] = vsrc[c] - vpred[c];
- vdiff += 16;
- vpred += dst_stride;
- vsrc += src_stride;
- }
-}
-
-void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride,
- const uint8_t *pred, int dst_stride) {
- int r, c;
-
- for (r = 0; r < 64; r++) {
- for (c = 0; c < 64; c++) {
- diff[c] = src[c] - pred[c];
- }
-
- diff += 64;
- pred += dst_stride;
- src += src_stride;
- }
-}
-
-void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc,
- const uint8_t *vsrc, int src_stride,
- const uint8_t *upred,
- const uint8_t *vpred, int dst_stride) {
- int16_t *udiff = diff + 4096;
- int16_t *vdiff = diff + 4096 + 1024;
- int r, c;
-
- for (r = 0; r < 32; r++) {
- for (c = 0; c < 32; c++) {
- udiff[c] = usrc[c] - upred[c];
- }
-
- udiff += 32;
- upred += dst_stride;
- usrc += src_stride;
- }
-
- for (r = 0; r < 32; r++) {
- for (c = 0; c < 32; c++) {
- vdiff[c] = vsrc[c] - vpred[c];
- }
-
- vdiff += 32;
+ vdiff += bw;
vpred += dst_stride;
vsrc += src_stride;
}
@@ -288,164 +250,86 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) {
vp9_transform_mbuv_8x8(x);
}
-void vp9_transform_sby_32x32(MACROBLOCK *x) {
- vp9_short_fdct32x32(x->src_diff, x->coeff, 64);
-}
-
-void vp9_transform_sby_16x16(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
- const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
-
- if (tx_type != DCT_DCT) {
- vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
- x->coeff + n * 256, 32, tx_type);
- } else {
- x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
- x->coeff + n * 256, 64);
- }
- }
-}
-
-void vp9_transform_sby_8x8(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
- const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
-
- if (tx_type != DCT_DCT) {
- vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
- x->coeff + n * 64, 32, tx_type);
- } else {
- x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
- x->coeff + n * 64, 64);
- }
- }
-}
-
-void vp9_transform_sby_4x4(MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < 64; n++) {
- const int x_idx = n & 7, y_idx = n >> 3;
- const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
-
- if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
- x->coeff + n * 16, 32, tx_type);
- } else {
- x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
- x->coeff + n * 16, 64);
- }
- }
-}
-
-void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
- vp9_clear_system_state();
- x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32);
- x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32);
-}
-
-void vp9_transform_sbuv_8x8(MACROBLOCK *x) {
- int n;
-
- vp9_clear_system_state();
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
-
- x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8,
- x->coeff + 1024 + n * 64, 32);
- x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8,
- x->coeff + 1280 + n * 64, 32);
- }
-}
-
-void vp9_transform_sbuv_4x4(MACROBLOCK *x) {
- int n;
-
- vp9_clear_system_state();
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
-
- x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4,
- x->coeff + 1024 + n * 16, 32);
- x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4,
- x->coeff + 1280 + n * 16, 32);
- }
-}
-
-void vp9_transform_sb64y_32x32(MACROBLOCK *x) {
+void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl;
+ const int bh = 1 << (mb_height_log2(bsize) - 1);
+ const int stride = 32 << bwl;
int n;
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32,
- x->coeff + n * 1024, 128);
+ vp9_short_fdct32x32(x->src_diff + y_idx * stride * 32 + x_idx * 32,
+ x->coeff + n * 1024, stride * 2);
}
}
-void vp9_transform_sb64y_16x16(MACROBLOCK *x) {
+void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
+ const int bh = 1 << mb_height_log2(bsize);
+ const int stride = 16 << bwl, bstride = 4 << bwl;
MACROBLOCKD *const xd = &x->e_mbd;
int n;
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
- const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd,
+ (y_idx * bstride + x_idx) * 4);
if (tx_type != DCT_DCT) {
- vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
- x->coeff + n * 256, 64, tx_type);
+ vp9_short_fht16x16(x->src_diff + y_idx * stride * 16 + x_idx * 16,
+ x->coeff + n * 256, stride, tx_type);
} else {
- x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
- x->coeff + n * 256, 128);
+ x->fwd_txm16x16(x->src_diff + y_idx * stride * 16 + x_idx * 16,
+ x->coeff + n * 256, stride * 2);
}
}
}
-void vp9_transform_sb64y_8x8(MACROBLOCK *x) {
+void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl;
+ const int bh = 1 << (mb_height_log2(bsize) + 1);
+ const int stride = 8 << bwl, bstride = 2 << bwl;
MACROBLOCKD *const xd = &x->e_mbd;
int n;
- for (n = 0; n < 64; n++) {
- const int x_idx = n & 7, y_idx = n >> 3;
- const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2);
if (tx_type != DCT_DCT) {
- vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
- x->coeff + n * 64, 64, tx_type);
+ vp9_short_fht8x8(x->src_diff + y_idx * stride * 8 + x_idx * 8,
+ x->coeff + n * 64, stride, tx_type);
} else {
- x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
- x->coeff + n * 64, 128);
+ x->fwd_txm8x8(x->src_diff + y_idx * stride * 8 + x_idx * 8,
+ x->coeff + n * 64, stride * 2);
}
}
}
-void vp9_transform_sb64y_4x4(MACROBLOCK *x) {
+void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl;
+ const int bh = 1 << (mb_height_log2(bsize) + 2);
+ const int stride = 4 << bwl;
MACROBLOCKD *const xd = &x->e_mbd;
int n;
- for (n = 0; n < 256; n++) {
- const int x_idx = n & 15, y_idx = n >> 4;
- const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
if (tx_type != DCT_DCT) {
- vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
- x->coeff + n * 16, 64, tx_type);
+ vp9_short_fht4x4(x->src_diff + y_idx * stride * 4 + x_idx * 4,
+ x->coeff + n * 16, stride, tx_type);
} else {
- x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
- x->coeff + n * 16, 128);
+ x->fwd_txm4x4(x->src_diff + y_idx * stride * 4 + x_idx * 4,
+ x->coeff + n * 16, stride * 2);
}
}
}
-void vp9_transform_sb64uv_32x32(MACROBLOCK *x) {
+void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ assert(bsize == BLOCK_SIZE_SB64X64);
vp9_clear_system_state();
vp9_short_fdct32x32(x->src_diff + 4096,
x->coeff + 4096, 64);
@@ -453,45 +337,57 @@ void vp9_transform_sb64uv_32x32(MACROBLOCK *x) {
x->coeff + 4096 + 1024, 64);
}
-void vp9_transform_sb64uv_16x16(MACROBLOCK *x) {
+void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
+ const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2;
+ const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
+ const int stride = 16 << (bwl - 1);
int n;
vp9_clear_system_state();
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
- x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16,
- x->coeff + 4096 + n * 256, 64);
- x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16,
- x->coeff + 4096 + 1024 + n * 256, 64);
+ x->fwd_txm16x16(x->src_diff + uoff + y_idx * stride * 16 + x_idx * 16,
+ x->coeff + uoff + n * 256, stride * 2);
+ x->fwd_txm16x16(x->src_diff + voff + y_idx * stride * 16 + x_idx * 16,
+ x->coeff + voff + n * 256, stride * 2);
}
}
-void vp9_transform_sb64uv_8x8(MACROBLOCK *x) {
+void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
+ const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2;
+ const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
+ const int stride = 8 << (bwl - 1);
int n;
vp9_clear_system_state();
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
- x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8,
- x->coeff + 4096 + n * 64, 64);
- x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8,
- x->coeff + 4096 + 1024 + n * 64, 64);
+ x->fwd_txm8x8(x->src_diff + uoff + y_idx * stride * 8 + x_idx * 8,
+ x->coeff + uoff + n * 64, stride * 2);
+ x->fwd_txm8x8(x->src_diff + voff + y_idx * stride * 8 + x_idx * 8,
+ x->coeff + voff + n * 64, stride * 2);
}
}
-void vp9_transform_sb64uv_4x4(MACROBLOCK *x) {
+void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
+ const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2;
+ const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
+ const int stride = 4 << (bwl - 1);
int n;
vp9_clear_system_state();
- for (n = 0; n < 64; n++) {
- const int x_idx = n & 7, y_idx = n >> 3;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
- x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4,
- x->coeff + 4096 + n * 16, 64);
- x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4,
- x->coeff + 4096 + 1024 + n * 16, 64);
+ x->fwd_txm4x4(x->src_diff + uoff + y_idx * stride * 4 + x_idx * 4,
+ x->coeff + uoff + n * 16, stride * 2);
+ x->fwd_txm4x4(x->src_diff + voff + y_idx * stride * 4 + x_idx * 4,
+ x->coeff + voff + n * 16, stride * 2);
}
}
@@ -968,252 +864,120 @@ static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
vp9_optimize_mbuv_8x8(cm, x);
}
-void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
- ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
- ENTROPY_CONTEXT ta, tl;
-
- ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
- tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
- optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- &ta, &tl, TX_32X32, 64);
-}
-
-void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
- ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl;
+ const int bh = 1 << (mb_height_log2(bsize) - 1);
ENTROPY_CONTEXT ta[2], tl[2];
int n;
- ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
- ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
- tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
- tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
-
- optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_16X16, 64);
- }
-}
-
-void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
- ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
- ENTROPY_CONTEXT ta[4], tl[4];
- int n;
-
- ta[0] = (a[0] + a[1]) != 0;
- ta[1] = (a[2] + a[3]) != 0;
- ta[2] = (a1[0] + a1[1]) != 0;
- ta[3] = (a1[2] + a1[3]) != 0;
- tl[0] = (l[0] + l[1]) != 0;
- tl[1] = (l[2] + l[3]) != 0;
- tl[2] = (l1[0] + l1[1]) != 0;
- tl[3] = (l1[2] + l1[3]) != 0;
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
-
- optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_8X8, 64);
- }
-}
-
-void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT ta[8], tl[8];
- int n;
-
- vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
- for (n = 0; n < 64; n++) {
- const int x_idx = n & 7, y_idx = n >> 3;
-
- optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_4X4, 64);
- }
-}
-
-void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
- int b;
-
- for (b = 64; b < 96; b += 16) {
- const int cidx = b >= 80 ? 20 : 16;
- a = ta + vp9_block2above_sb[TX_16X16][b];
- l = tl + vp9_block2left_sb[TX_16X16][b];
- a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
- l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
- above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
- left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
- &above_ec, &left_ec, TX_16X16, 64);
- }
-}
-
-void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
- ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
- ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
- ENTROPY_CONTEXT *a, *l, above_ec, left_ec;
- int b;
-
- vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
- vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
- for (b = 64; b < 96; b += 4) {
- const int cidx = b >= 80 ? 20 : 16;
- a = ta + vp9_block2above_sb[TX_8X8][b];
- l = tl + vp9_block2left_sb[TX_8X8][b];
- above_ec = (a[0] + a[1]) != 0;
- left_ec = (l[0] + l[1]) != 0;
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
- &above_ec, &left_ec, TX_8X8, 64);
- a[0] = a[1] = above_ec;
- l[0] = l[1] = left_ec;
+ for (n = 0; n < bw; n++) {
+ ENTROPY_CONTEXT *a =
+ (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n * 2 + 0);
+ ENTROPY_CONTEXT *a1 =
+ (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n * 2 + 1);
+ ta[n] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
}
-}
-
-void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
- ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
- ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
- ENTROPY_CONTEXT *a, *l;
- int b;
-
- vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
- vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
- for (b = 64; b < 96; b++) {
- const int cidx = b >= 80 ? 20 : 16;
- a = ta + vp9_block2above_sb[TX_4X4][b];
- l = tl + vp9_block2left_sb[TX_4X4][b];
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
- a, l, TX_4X4, 64);
+ for (n = 0; n < bh; n++) {
+ ENTROPY_CONTEXT *l =
+ (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n * 2);
+ ENTROPY_CONTEXT *l1 =
+ (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n * 2 + 1);
+ tl[n] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
}
-}
-void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
- ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
- ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
- ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
- ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
- ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
- ENTROPY_CONTEXT ta[2], tl[2];
- int n;
-
- ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
- ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0;
- tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
- tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0;
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_32X32, 256);
+ ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh);
}
}
-void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
- ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
- ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
- ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
- ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
- ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
+ const int bh = 1 << mb_height_log2(bsize);
ENTROPY_CONTEXT ta[4], tl[4];
int n;
- ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
- ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
- ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0;
- ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0;
- tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
- tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
- tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0;
- tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0;
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
+ for (n = 0; n < bw; n++) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n);
+ ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0;
+ }
+ for (n = 0; n < bh; n++) {
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n);
+ tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0;
+ }
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_16X16, 256);
+ ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh);
}
}
-void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
- ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
- ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
- ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
- ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
- ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
- ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
- ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl;
+ const int bh = 2 << mb_height_log2(bsize);
ENTROPY_CONTEXT ta[8], tl[8];
int n;
- ta[0] = (a[0] + a[1]) != 0;
- ta[1] = (a[2] + a[3]) != 0;
- ta[2] = (a1[0] + a1[1]) != 0;
- ta[3] = (a1[2] + a1[3]) != 0;
- ta[4] = (a2[0] + a2[1]) != 0;
- ta[5] = (a2[2] + a2[3]) != 0;
- ta[6] = (a3[0] + a3[1]) != 0;
- ta[7] = (a3[2] + a3[3]) != 0;
- tl[0] = (l[0] + l[1]) != 0;
- tl[1] = (l[2] + l[3]) != 0;
- tl[2] = (l1[0] + l1[1]) != 0;
- tl[3] = (l1[2] + l1[3]) != 0;
- tl[4] = (l2[0] + l2[1]) != 0;
- tl[5] = (l2[2] + l2[3]) != 0;
- tl[6] = (l3[0] + l3[1]) != 0;
- tl[7] = (l3[2] + l3[3]) != 0;
- for (n = 0; n < 64; n++) {
- const int x_idx = n & 7, y_idx = n >> 3;
+ for (n = 0; n < bw; n += 2) {
+ ENTROPY_CONTEXT *a =
+ (ENTROPY_CONTEXT *) (x->e_mbd.above_context + (n >> 1));
+ ta[n + 0] = (a[0] + a[1]) != 0;
+ ta[n + 1] = (a[2] + a[3]) != 0;
+ }
+ for (n = 0; n < bh; n += 2) {
+ ENTROPY_CONTEXT *l =
+ (ENTROPY_CONTEXT *) (x->e_mbd.left_context + (n >> 1));
+ tl[n + 0] = (l[0] + l[1]) != 0;
+ tl[n + 1] = (l[2] + l[3]) != 0;
+ }
+
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_8X8, 256);
+ ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh);
}
}
-void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ int bwl = mb_width_log2(bsize), bw = 1 << bwl;
+ int bh = 1 << mb_height_log2(bsize);
ENTROPY_CONTEXT ta[16], tl[16];
int n;
- vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT));
- vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT));
- for (n = 0; n < 256; n++) {
- const int x_idx = n & 15, y_idx = n >> 4;
+ for (n = 0; n < bw; n++)
+ vpx_memcpy(&ta[n * 4], x->e_mbd.above_context + n,
+ sizeof(ENTROPY_CONTEXT) * 4);
+ for (n = 0; n < bh; n++)
+ vpx_memcpy(&tl[n * 4], x->e_mbd.left_context + n,
+ sizeof(ENTROPY_CONTEXT) * 4);
+ bw *= 4;
+ bh *= 4;
+ bwl += 2;
+
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> bwl;
optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
- ta + x_idx, tl + y_idx, TX_4X4, 256);
+ ta + x_idx, tl + y_idx, TX_4X4, bh * bw);
}
}
-void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
int b;
+ assert(bsize == BLOCK_SIZE_SB64X64);
for (b = 256; b < 384; b += 64) {
const int cidx = b >= 320 ? 20 : 16;
a = ta + vp9_block2above_sb64[TX_32X32][b];
@@ -1231,67 +995,108 @@ void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
}
}
-void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
- ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
- ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
- ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
- int b;
+void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
+ const int bw = 1 << (bwl - 1);
+ const int bh = 1 << (bhl - 1);
+ int uvoff = 16 << (bwl + bhl);
+ ENTROPY_CONTEXT ta[2][2], tl[2][2];
+ int plane, n;
+
+ for (n = 0; n < bw; n++) {
+ ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + n * 2;
+ ENTROPY_CONTEXT_PLANES *a1 = x->e_mbd.above_context + n * 2 + 1;
+ ta[0][n] = (a->u[0] + a->u[1] + a1->u[0] + a1->u[1]) != 0;
+ ta[1][n] = (a->v[0] + a->v[1] + a1->v[0] + a1->v[1]) != 0;
+ }
+ for (n = 0; n < bh; n++) {
+ ENTROPY_CONTEXT_PLANES *l = (x->e_mbd.left_context + n * 2);
+ ENTROPY_CONTEXT_PLANES *l1 = (x->e_mbd.left_context + n * 2 + 1);
+ tl[0][n] = (l->u[0] + l->u[1] + l1->u[0] + l1->u[1]) != 0;
+ tl[1][n] = (l->v[0] + l->v[1] + l1->v[0] + l1->v[1]) != 0;
+ }
- vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
- vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
- for (b = 256; b < 384; b += 16) {
- const int cidx = b >= 320 ? 20 : 16;
- a = ta + vp9_block2above_sb64[TX_16X16][b];
- l = tl + vp9_block2left_sb64[TX_16X16][b];
- a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
- l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
- above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
- left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
- &above_ec, &left_ec, TX_16X16, 256);
- a[0] = a[1] = a1[0] = a1[1] = above_ec;
- l[0] = l[1] = l1[0] = l1[1] = left_ec;
+ for (plane = 0; plane < 2; plane++) {
+ const int cidx = 16 + plane * 4;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
+ optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV,
+ x->e_mbd.block[cidx].dequant,
+ &ta[plane][x_idx], &tl[plane][y_idx],
+ TX_16X16, bh * bw * 64);
+ }
+ uvoff = (uvoff * 5) >> 2; // switch u -> v
}
}
-void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
- ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
- ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
- ENTROPY_CONTEXT *a, *l, above_ec, left_ec;
- int b;
+void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
+ const int bw = 1 << (bwl - 1);
+ const int bh = 1 << (bhl - 1);
+ int uvoff = 4 << (bwl + bhl);
+ ENTROPY_CONTEXT ta[2][4], tl[2][4];
+ int plane, n;
+
+ for (n = 0; n < bw; n++) {
+ ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + n;
+ ta[0][n] = (a->u[0] + a->u[1]) != 0;
+ ta[1][n] = (a->v[0] + a->v[1]) != 0;
+ }
+ for (n = 0; n < bh; n++) {
+ ENTROPY_CONTEXT_PLANES *l = x->e_mbd.left_context + n;
+ tl[0][n] = (l->u[0] + l->u[1]) != 0;
+ tl[1][n] = (l->v[0] + l->v[1]) != 0;
+ }
- vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
- vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
- for (b = 256; b < 384; b += 4) {
- const int cidx = b >= 320 ? 20 : 16;
- a = ta + vp9_block2above_sb64[TX_8X8][b];
- l = tl + vp9_block2left_sb64[TX_8X8][b];
- above_ec = (a[0] + a[1]) != 0;
- left_ec = (l[0] + l[1]) != 0;
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
- &above_ec, &left_ec, TX_8X8, 256);
- a[0] = a[1] = above_ec;
- l[0] = l[1] = left_ec;
+ for (plane = 0; plane < 2; plane++) {
+ const int cidx = 16 + plane * 4;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
+ optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV,
+ x->e_mbd.block[cidx].dequant,
+ &ta[plane][x_idx], &tl[plane][y_idx],
+ TX_8X8, bh * bw * 16);
+ }
+ uvoff = (uvoff * 5) >> 2; // switch u -> v
}
}
-void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
- ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
- ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
- ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
- ENTROPY_CONTEXT *a, *l;
- int b;
+void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
+ const int bw = 1 << (bwl - 1);
+ const int bh = 1 << (bhl - 1);
+ int uvoff = 1 << (bwl + bhl);
+ ENTROPY_CONTEXT ta[2][8], tl[2][8];
+ int plane, n;
+
+ for (n = 0; n < bw; n += 2) {
+ ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + (n >> 1);
+ ta[0][n + 0] = (a->u[0]) != 0;
+ ta[0][n + 1] = (a->u[1]) != 0;
+ ta[1][n + 0] = (a->v[0]) != 0;
+ ta[1][n + 1] = (a->v[1]) != 0;
+ }
+ for (n = 0; n < bh; n += 2) {
+ ENTROPY_CONTEXT_PLANES *l = x->e_mbd.left_context + (n >> 1);
+ tl[0][n + 0] = (l->u[0]) != 0;
+ tl[0][n + 1] = (l->u[1]) != 0;
+ tl[1][n + 0] = (l->v[0]) != 0;
+ tl[1][n + 1] = (l->v[1]) != 0;
+ }
- vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
- vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
- for (b = 256; b < 384; b++) {
- const int cidx = b >= 320 ? 20 : 16;
- a = ta + vp9_block2above_sb64[TX_4X4][b];
- l = tl + vp9_block2left_sb64[TX_4X4][b];
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
- a, l, TX_4X4, 256);
+ for (plane = 0; plane < 2; plane++) {
+ const int cidx = 16 + plane * 4;
+ for (n = 0; n < bw * bh; n++) {
+ const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
+ optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV,
+ x->e_mbd.block[cidx].dequant,
+ &ta[plane][x_idx], &tl[plane][y_idx],
+ TX_4X4, bh * bw * 4);
+ }
+ uvoff = (uvoff * 5) >> 2; // switch u -> v
}
}