summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_blockd.h6
-rw-r--r--vp9/common/vp9_convolve.c56
-rw-r--r--vp9/common/vp9_convolve.h3
-rw-r--r--vp9/common/vp9_filter.c15
-rw-r--r--vp9/common/vp9_idct.h14
-rw-r--r--vp9/common/vp9_idctllm.c7
-rw-r--r--vp9/common/vp9_invtrans.c2
-rw-r--r--vp9/common/vp9_mbpitch.c11
-rw-r--r--vp9/common/vp9_onyxc_int.h9
-rw-r--r--vp9/common/vp9_reconinter.c345
-rw-r--r--vp9/common/vp9_reconinter.h144
-rw-r--r--vp9/common/vp9_reconintra.h64
-rw-r--r--vp9/common/vp9_rtcd.c5
-rw-r--r--vp9/common/vp9_rtcd_defs.sh2
-rw-r--r--vp9/common/vp9_setupintrarecon.h2
-rw-r--r--vp9/common/x86/vp9_idctllm_x86.c76
-rw-r--r--vp9/decoder/vp9_decodemv.c67
-rw-r--r--vp9/decoder/vp9_decodframe.c145
-rw-r--r--vp9/decoder/vp9_dequantize.c4
-rw-r--r--vp9/decoder/vp9_detokenize.c22
-rw-r--r--vp9/decoder/vp9_idct_blk.c48
-rw-r--r--vp9/decoder/vp9_onyxd_int.h2
-rw-r--r--vp9/encoder/vp9_asm_enc_offsets.c1
-rw-r--r--vp9/encoder/vp9_block.h13
-rw-r--r--vp9/encoder/vp9_dct.c604
-rw-r--r--vp9/encoder/vp9_encodeframe.c74
-rw-r--r--vp9/encoder/vp9_encodeintra.c16
-rw-r--r--vp9/encoder/vp9_encodemb.c32
-rw-r--r--vp9/encoder/vp9_encodemb.h4
-rw-r--r--vp9/encoder/vp9_firstpass.c6
-rw-r--r--vp9/encoder/vp9_mbgraph.c28
-rw-r--r--vp9/encoder/vp9_onyx_if.c196
-rw-r--r--vp9/encoder/vp9_onyx_int.h1
-rw-r--r--vp9/encoder/vp9_quantize.c67
-rw-r--r--vp9/encoder/vp9_quantize.h6
-rw-r--r--vp9/encoder/vp9_rdopt.c223
-rw-r--r--vp9/encoder/vp9_rdopt.h14
-rw-r--r--vp9/encoder/vp9_temporal_filter.c7
-rw-r--r--vp9/encoder/vp9_tokenize.c28
-rw-r--r--vp9/vp9_common.mk3
-rw-r--r--vp9/vp9cx.mk6
41 files changed, 1252 insertions, 1126 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 5d876c15b..372a58ec4 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -276,8 +276,6 @@ typedef struct blockd {
int dst;
int dst_stride;
- int eob;
-
union b_mode_info bmi;
} BLOCKD;
@@ -292,9 +290,12 @@ struct scale_factors {
int x_num;
int x_den;
int x_offset_q4;
+ int x_step_q4;
int y_num;
int y_den;
int y_offset_q4;
+ int y_step_q4;
+ convolve_fn_t predict[2][2][2]; // horiz, vert, avg
};
typedef struct macroblockd {
@@ -302,6 +303,7 @@ typedef struct macroblockd {
DECLARE_ALIGNED(16, uint8_t, predictor[384]);
DECLARE_ALIGNED(16, int16_t, qcoeff[384]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[384]);
+ DECLARE_ALIGNED(16, uint16_t, eobs[24]);
SUPERBLOCKD sb_coeff_data;
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index ac5d5cb3e..b062e7dc7 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -19,7 +19,6 @@
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
-#define ALIGN_FILTERS_256 0
/* Assume a bank of 16 filters to choose from. There are two implementations
* for filter wrapping behavior, since we want to be able to pick which filter
@@ -34,8 +33,11 @@
* always 256 byte aligned.
*
* Implementations 2 and 3 are likely preferable, as they avoid an extra 2
- * parameters, and switching between them is trivial.
+ * parameters, and switching between them is trivial, with the
+ * ALIGN_FILTERS_256 macro, below.
*/
+ #define ALIGN_FILTERS_256 1
+
static void convolve_horiz_c(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int16_t *filter_x0, int x_step_q4,
@@ -56,11 +58,12 @@ static void convolve_horiz_c(const uint8_t *src, int src_stride,
const int16_t *filter_x = filter_x0;
/* Initial phase offset */
- int x_q4 = (filter_x - filter_x_base) / taps;
+ int x0_q4 = (filter_x - filter_x_base) / taps;
+ int x_q4 = x0_q4;
for (x = 0; x < w; ++x) {
/* Per-pixel src offset */
- int src_x = x_q4 >> 4;
+ int src_x = (x_q4 - x0_q4) >> 4;
for (sum = 0, k = 0; k < taps; ++k) {
sum += src[src_x + k] * filter_x[k];
@@ -97,11 +100,12 @@ static void convolve_avg_horiz_c(const uint8_t *src, int src_stride,
const int16_t *filter_x = filter_x0;
/* Initial phase offset */
- int x_q4 = (filter_x - filter_x_base) / taps;
+ int x0_q4 = (filter_x - filter_x_base) / taps;
+ int x_q4 = x0_q4;
for (x = 0; x < w; ++x) {
/* Per-pixel src offset */
- int src_x = x_q4 >> 4;
+ int src_x = (x_q4 - x0_q4) >> 4;
for (sum = 0, k = 0; k < taps; ++k) {
sum += src[src_x + k] * filter_x[k];
@@ -138,11 +142,12 @@ static void convolve_vert_c(const uint8_t *src, int src_stride,
const int16_t *filter_y = filter_y0;
/* Initial phase offset */
- int y_q4 = (filter_y - filter_y_base) / taps;
+ int y0_q4 = (filter_y - filter_y_base) / taps;
+ int y_q4 = y0_q4;
for (y = 0; y < h; ++y) {
/* Per-pixel src offset */
- int src_y = y_q4 >> 4;
+ int src_y = (y_q4 - y0_q4) >> 4;
for (sum = 0, k = 0; k < taps; ++k) {
sum += src[(src_y + k) * src_stride] * filter_y[k];
@@ -179,11 +184,12 @@ static void convolve_avg_vert_c(const uint8_t *src, int src_stride,
const int16_t *filter_y = filter_y0;
/* Initial phase offset */
- int y_q4 = (filter_y - filter_y_base) / taps;
+ int y0_q4 = (filter_y - filter_y_base) / taps;
+ int y_q4 = y0_q4;
for (y = 0; y < h; ++y) {
/* Per-pixel src offset */
- int src_y = y_q4 >> 4;
+ int src_y = (y_q4 - y0_q4) >> 4;
for (sum = 0, k = 0; k < taps; ++k) {
sum += src[(src_y + k) * src_stride] * filter_y[k];
@@ -206,16 +212,25 @@ static void convolve_c(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
- /* Fixed size intermediate buffer places limits on parameters. */
- uint8_t temp[16 * 23];
+ /* Fixed size intermediate buffer places limits on parameters.
+ * Maximum intermediate_height is 39, for y_step_q4 == 32,
+ * h == 16, taps == 8.
+ */
+ uint8_t temp[16 * 39];
+ int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
+
assert(w <= 16);
assert(h <= 16);
assert(taps <= 8);
+ assert(y_step_q4 <= 32);
+
+ if (intermediate_height < h)
+ intermediate_height = h;
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
temp, 16,
filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + taps - 1, taps);
+ w, intermediate_height, taps);
convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h, taps);
@@ -226,16 +241,25 @@ static void convolve_avg_c(const uint8_t *src, int src_stride,
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
- /* Fixed size intermediate buffer places limits on parameters. */
- uint8_t temp[16 * 23];
+ /* Fixed size intermediate buffer places limits on parameters.
+ * Maximum intermediate_height is 39, for y_step_q4 == 32,
+ * h == 16, taps == 8.
+ */
+ uint8_t temp[16 * 39];
+ int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
+
assert(w <= 16);
assert(h <= 16);
assert(taps <= 8);
+ assert(y_step_q4 <= 32);
+
+ if (intermediate_height < h)
+ intermediate_height = h;
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
temp, 16,
filter_x, x_step_q4, filter_y, y_step_q4,
- w, h + taps - 1, taps);
+ w, intermediate_height, taps);
convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h, taps);
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
index 46c935ab7..8c4856187 100644
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -33,11 +33,8 @@ void vp9_convolve_avg(const uint8_t *src, int src_stride,
int w, int h);
struct subpix_fn_table {
- convolve_fn_t predict[2][2][2]; // horiz, vert, avg
const int16_t (*filter_x)[8];
const int16_t (*filter_y)[8];
- int x_step_q4;
- int y_step_q4;
};
#endif // VP9_COMMON_CONVOLVE_H_
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 5e425895f..434c63e7e 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -15,7 +15,7 @@
#include "vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
-DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
+DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 },
@@ -36,7 +36,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
#define FILTER_ALPHA 0
#define FILTER_ALPHA_SHARP 1
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8])
+ = {
#if FILTER_ALPHA == 0
/* Lagrangian interpolation filter */
{ 0, 0, 0, 128, 0, 0, 0, 0},
@@ -55,6 +56,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
{ -1, 3, -9, 27, 118, -13, 4, -1},
{ 0, 2, -6, 18, 122, -10, 3, -1},
{ 0, 1, -3, 8, 126, -5, 1, 0}
+
#elif FILTER_ALPHA == 50
/* Generated using MATLAB:
* alpha = 0.5;
@@ -82,7 +84,8 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
#endif /* FILTER_ALPHA */
};
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
+ = {
#if FILTER_ALPHA_SHARP == 1
/* dct based filter */
{0, 0, 0, 128, 0, 0, 0, 0},
@@ -101,6 +104,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {
{-2, 5, -10, 27, 121, -17, 7, -3},
{-1, 3, -6, 17, 125, -13, 5, -2},
{0, 1, -3, 8, 127, -7, 3, -1}
+
#elif FILTER_ALPHA_SHARP == 75
/* alpha = 0.75 */
{0, 0, 0, 128, 0, 0, 0, 0},
@@ -122,7 +126,7 @@ DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {
#endif /* FILTER_ALPHA_SHARP */
};
-DECLARE_ALIGNED(16, const int16_t,
+DECLARE_ALIGNED(256, const int16_t,
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {
/* 8-tap lowpass filter */
/* Hamming window */
@@ -144,7 +148,8 @@ DECLARE_ALIGNED(16, const int16_t,
{ 1, -2, -7, 37, 80, 28, -8, -1}
};
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]) = {
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8])
+ = {
{0, 0, 0, 128, 0, 0, 0, 0},
{0, 1, -5, 125, 8, -2, 1, 0},
{0, 1, -8, 122, 17, -5, 1, 0},
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 430cec083..3e0ee4b63 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -13,6 +13,13 @@
#include "./vpx_config.h"
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
+
+/* If we don't want to use ROUND_POWER_OF_TWO macro
+static INLINE int16_t round_power_of_two(int16_t value, int n) {
+ return (value + (1 << (n - 1))) >> n;
+}*/
+
// Constants and Macros used by all idct/dct functions
#define DCT_CONST_BITS 14
#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
@@ -61,4 +68,11 @@ static INLINE int dct_const_round_shift(int input) {
assert(INT16_MIN <= rv && rv <= INT16_MAX);
return rv;
}
+
+static INLINE int dct_32_round(int input) {
+ int rv = (input + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;
+ assert(-131072 <= rv && rv <= 131071);
+ return rv;
+}
+
#endif
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 632dae8fd..f34823b36 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -31,13 +31,6 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
-
-/* If we don't want to use ROUND_POWER_OF_TWO macro
-static INLINE int16_t round_power_of_two(int16_t value, int n) {
- return (value + (1 << (n - 1))) >> n;
-}*/
-
typedef void (*transform_1d)(int16_t*, int16_t*);
typedef struct {
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index d431ea24b..1311b9111 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -13,7 +13,7 @@
void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
BLOCKD *b = &xd->block[block];
- if (b->eob <= 1)
+ if (xd->eobs[block] <= 1)
xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
else
xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c
index b3303eb59..ed96292a4 100644
--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@@ -71,17 +71,6 @@ static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) {
setup_block(&blockd[block + 4], stride, v, v2, stride,
((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs);
}
-
- // TODO(jkoleszar): this will move once we're actually scaling.
- xd->scale_factor[0].x_num = 1;
- xd->scale_factor[0].x_den = 1;
- xd->scale_factor[0].y_num = 1;
- xd->scale_factor[0].y_den = 1;
- xd->scale_factor[0].x_offset_q4 = 0;
- xd->scale_factor[0].y_offset_q4 = 0;
- xd->scale_factor[1]= xd->scale_factor[0];
- xd->scale_factor_uv[0] = xd->scale_factor[0];
- xd->scale_factor_uv[1] = xd->scale_factor[1];
}
void vp9_setup_block_dptrs(MACROBLOCKD *xd) {
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index e952fe933..c4bb12340 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -39,7 +39,11 @@ void vp9_initialize_common(void);
#define NUM_REF_FRAMES 3
#define NUM_REF_FRAMES_LG2 2
-#define NUM_YV12_BUFFERS (NUM_REF_FRAMES + 1)
+
+// 1 scratch frame for the new frame, 3 for scaled references on the encoder
+// TODO(jkoleszar): These 3 extra references could probably come from the
+// normal reference pool.
+#define NUM_YV12_BUFFERS (NUM_REF_FRAMES + 4)
#define NUM_FRAME_CONTEXTS_LG2 2
#define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LG2)
@@ -128,6 +132,8 @@ typedef struct VP9Common {
int Width;
int Height;
+ int last_width;
+ int last_height;
int horiz_scale;
int vert_scale;
@@ -145,6 +151,7 @@ typedef struct VP9Common {
*/
int active_ref_idx[3]; /* each frame can reference 3 buffers */
int new_fb_idx;
+ struct scale_factors active_ref_scale[3];
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 3b4b34216..30e8951af 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -17,26 +17,97 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
-void vp9_setup_interp_filters(MACROBLOCKD *xd,
- INTERPOLATIONFILTERTYPE mcomp_filter_type,
- VP9_COMMON *cm) {
+void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
+ YV12_BUFFER_CONFIG *other,
+ int this_w, int this_h) {
+ int other_w, other_h;
+
+ other_h = other->y_height;
+ other_w = other->y_width;
+ scale->x_num = other_w;
+ scale->x_den = this_w;
+ scale->x_offset_q4 = 0; // calculated per-mb
+ scale->x_step_q4 = 16 * other_w / this_w;
+ scale->y_num = other_h;
+ scale->y_den = this_h;
+ scale->y_offset_q4 = 0; // calculated per-mb
+ scale->y_step_q4 = 16 * other_h / this_h;
+
// TODO(agrange): Investigate the best choice of functions to use here
// for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
// to do at full-pel offsets. The current selection, where the filter is
// applied in one direction only, and not at all for 0,0, seems to give the
// best quality, but it may be worth trying an additional mode that does
// do the filtering on full-pel.
- xd->subpix.predict[0][0][0] = vp9_convolve_copy;
- xd->subpix.predict[0][0][1] = vp9_convolve_avg;
- xd->subpix.predict[0][1][0] = vp9_convolve8_vert;
- xd->subpix.predict[0][1][1] = vp9_convolve8_avg_vert;
- xd->subpix.predict[1][0][0] = vp9_convolve8_horiz;
- xd->subpix.predict[1][0][1] = vp9_convolve8_avg_horiz;
- xd->subpix.predict[1][1][0] = vp9_convolve8;
- xd->subpix.predict[1][1][1] = vp9_convolve8_avg;
-
- xd->subpix.x_step_q4 = 16;
- xd->subpix.y_step_q4 = 16;
+ if (scale->x_step_q4 == 16) {
+ if (scale->y_step_q4 == 16) {
+ // No scaling in either direction.
+ scale->predict[0][0][0] = vp9_convolve_copy;
+ scale->predict[0][0][1] = vp9_convolve_avg;
+ scale->predict[0][1][0] = vp9_convolve8_vert;
+ scale->predict[0][1][1] = vp9_convolve8_avg_vert;
+ scale->predict[1][0][0] = vp9_convolve8_horiz;
+ scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
+ } else {
+ // No scaling in x direction. Must always scale in the y direction.
+ scale->predict[0][0][0] = vp9_convolve8_vert;
+ scale->predict[0][0][1] = vp9_convolve8_avg_vert;
+ scale->predict[0][1][0] = vp9_convolve8_vert;
+ scale->predict[0][1][1] = vp9_convolve8_avg_vert;
+ scale->predict[1][0][0] = vp9_convolve8;
+ scale->predict[1][0][1] = vp9_convolve8_avg;
+ }
+ } else {
+ if (scale->y_step_q4 == 16) {
+ // No scaling in the y direction. Must always scale in the x direction.
+ scale->predict[0][0][0] = vp9_convolve8_horiz;
+ scale->predict[0][0][1] = vp9_convolve8_avg_horiz;
+ scale->predict[0][1][0] = vp9_convolve8;
+ scale->predict[0][1][1] = vp9_convolve8_avg;
+ scale->predict[1][0][0] = vp9_convolve8_horiz;
+ scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
+ } else {
+ // Must always scale in both directions.
+ scale->predict[0][0][0] = vp9_convolve8;
+ scale->predict[0][0][1] = vp9_convolve8_avg;
+ scale->predict[0][1][0] = vp9_convolve8;
+ scale->predict[0][1][1] = vp9_convolve8_avg;
+ scale->predict[1][0][0] = vp9_convolve8;
+ scale->predict[1][0][1] = vp9_convolve8_avg;
+ }
+ }
+ // 2D subpel motion always gets filtered in both directions
+ scale->predict[1][1][0] = vp9_convolve8;
+ scale->predict[1][1][1] = vp9_convolve8_avg;
+}
+
+void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATIONFILTERTYPE mcomp_filter_type,
+ VP9_COMMON *cm) {
+ int i;
+
+ /* Calculate scaling factors for each of the 3 available references */
+ for (i = 0; i < 3; ++i) {
+ if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) {
+ memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i]));
+ continue;
+ }
+
+ vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],
+ &cm->yv12_fb[cm->active_ref_idx[i]],
+ cm->mb_cols * 16, cm->mb_rows * 16);
+ }
+
+ if (xd->mode_info_context) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+
+ set_scale_factors(xd,
+ mbmi->ref_frame - 1,
+ mbmi->second_ref_frame - 1,
+ cm->active_ref_scale);
+ }
+
+
switch (mcomp_filter_type) {
case EIGHTTAP:
case SWITCHABLE:
@@ -57,6 +128,7 @@ void vp9_setup_interp_filters(MACROBLOCKD *xd,
break;
#endif
}
+ assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
}
void vp9_copy_mem16x16_c(const uint8_t *src,
@@ -146,30 +218,50 @@ void vp9_copy_mem8x4_c(const uint8_t *src,
}
}
-static int32_t scale_motion_vector_component(int mv,
- int num,
- int den,
- int offset_q4) {
+static void set_scaled_offsets(struct scale_factors *scale,
+ int row, int col) {
+ const int x_q4 = 16 * col;
+ const int y_q4 = 16 * row;
+
+ scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf;
+ scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf;
+}
+
+static int32_t scale_motion_vector_component_q3(int mv_q3,
+ int num,
+ int den,
+ int offset_q4) {
+ // returns the scaled and offset value of the mv component.
+ const int32_t mv_q4 = mv_q3 << 1;
+
+ /* TODO(jkoleszar): make fixed point, or as a second multiply? */
+ return mv_q4 * num / den + offset_q4;
+}
+
+static int32_t scale_motion_vector_component_q4(int mv_q4,
+ int num,
+ int den,
+ int offset_q4) {
// returns the scaled and offset value of the mv component.
- // input and output mv have the same units -- this would work with either q3
- // or q4 motion vectors. Offset is given as a q4 fractional number.
- const int32_t mv_q4 = mv * 16;
/* TODO(jkoleszar): make fixed point, or as a second multiply? */
- return (mv_q4 * num / den + offset_q4 + 8) >> 4;
+ return mv_q4 * num / den + offset_q4;
}
-static int_mv32 scale_motion_vector(const int_mv *src_mv,
- const struct scale_factors *scale) {
+static int_mv32 scale_motion_vector_q3_to_q4(
+ const int_mv *src_mv,
+ const struct scale_factors *scale) {
// returns mv * scale + offset
int_mv32 result;
- result.as_mv.row = scale_motion_vector_component(src_mv->as_mv.row,
- scale->y_num, scale->y_den,
- scale->y_offset_q4);
- result.as_mv.col = scale_motion_vector_component(src_mv->as_mv.col,
- scale->x_num, scale->x_den,
- scale->x_offset_q4);
+ result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row,
+ scale->y_num,
+ scale->y_den,
+ scale->y_offset_q4);
+ result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col,
+ scale->x_num,
+ scale->x_den,
+ scale->x_offset_q4);
return result;
}
@@ -181,12 +273,13 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
const struct subpix_fn_table *subpix) {
int_mv32 mv;
- mv = scale_motion_vector(mv_q3, scale);
- src = src + (mv.as_mv.row >> 3) * src_stride + (mv.as_mv.col >> 3);
- subpix->predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][do_avg](
+ mv = scale_motion_vector_q3_to_q4(mv_q3, scale);
+ src = src + (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4);
+
+ scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg](
src, src_stride, dst, dst_stride,
- subpix->filter_x[(mv.as_mv.col & 7) << 1], subpix->x_step_q4,
- subpix->filter_y[(mv.as_mv.row & 7) << 1], subpix->y_step_q4,
+ subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4,
+ subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4,
w, h);
}
@@ -205,29 +298,32 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4)
+ (frac_mv_q4->as_mv.col & 0xf);
const int scaled_mv_row_q4 =
- scale_motion_vector_component(mv_row_q4, scale->y_num, scale->y_den,
- scale->y_offset_q4);
+ scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den,
+ scale->y_offset_q4);
const int scaled_mv_col_q4 =
- scale_motion_vector_component(mv_col_q4, scale->x_num, scale->x_den,
- scale->x_offset_q4);
+ scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den,
+ scale->x_offset_q4);
const int subpel_x = scaled_mv_col_q4 & 15;
const int subpel_y = scaled_mv_row_q4 & 15;
src = src + (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4);
- subpix->predict[!!subpel_x][!!subpel_y][do_avg](
+ scale->predict[!!subpel_x][!!subpel_y][do_avg](
src, src_stride, dst, dst_stride,
- subpix->filter_x[subpel_x], subpix->x_step_q4,
- subpix->filter_y[subpel_y], subpix->y_step_q4,
+ subpix->filter_x[subpel_x], scale->x_step_q4,
+ subpix->filter_y[subpel_y], scale->y_step_q4,
w, h);
}
static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
- const struct scale_factors *scale,
+ struct scale_factors *scale,
int block_size, int stride, int which_mv,
- const struct subpix_fn_table *subpix) {
+ const struct subpix_fn_table *subpix,
+ int row, int col) {
assert(d1->predictor - d0->predictor == block_size);
assert(d1->pre == d0->pre + block_size);
+ set_scaled_offsets(&scale[which_mv], row, col);
+
if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
@@ -250,6 +346,9 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
&scale[which_mv],
block_size, block_size, which_mv,
subpix);
+
+ set_scaled_offsets(&scale[which_mv], row, col + block_size);
+
vp9_build_inter_predictor(*base_pre1 + d1->pre,
d1->pre_stride,
d1->predictor, stride,
@@ -261,7 +360,9 @@ static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
}
/*encoder only*/
-void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
int i, j;
BLOCKD *blockd = xd->block;
@@ -339,13 +440,16 @@ void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
for (i = 16; i < 24; i += 2) {
const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ const int x = 4 * (i & 1);
+ const int y = ((i - 16) >> 1) * 4;
+
int which_mv;
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
- &xd->subpix);
+ &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
}
}
}
@@ -389,7 +493,9 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
/*encoder only*/
void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
uint8_t *dst_y,
- int dst_ystride) {
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
int which_mv;
@@ -399,14 +505,19 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
: xd->mode_info_context->mbmi.need_to_clamp_mvs;
uint8_t *base_pre;
int_mv ymv;
+ int pre_stride;
ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
base_pre = which_mv ? xd->second_pre.y_buffer
: xd->pre.y_buffer;
+ pre_stride = which_mv ? xd->second_pre.y_stride
+ : xd->pre.y_stride;
if (clamp_mvs)
clamp_mv_to_umv_border(&ymv.as_mv, xd);
- vp9_build_inter_predictor(base_pre, xd->block[0].pre_stride,
+ set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);
+
+ vp9_build_inter_predictor(base_pre, pre_stride,
dst_y, dst_ystride,
&ymv, &xd->scale_factor[which_mv],
16, 16, which_mv, &xd->subpix);
@@ -416,7 +527,9 @@ void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
uint8_t *dst_u,
uint8_t *dst_v,
- int dst_uvstride) {
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
int which_mv;
@@ -425,7 +538,8 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
: xd->mode_info_context->mbmi.need_to_clamp_mvs;
uint8_t *uptr, *vptr;
- int pre_stride = xd->block[0].pre_stride;
+ int pre_stride = which_mv ? xd->second_pre.y_stride
+ : xd->pre.y_stride;
int_mv _o16x16mv;
int_mv _16x16mv;
@@ -456,6 +570,9 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
+ set_scaled_offsets(&xd->scale_factor_uv[which_mv],
+ mb_row * 16, mb_col * 16);
+
vp9_build_inter_predictor_q4(uptr, pre_stride,
dst_u, dst_uvstride,
&_16x16mv, &_o16x16mv,
@@ -475,7 +592,9 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
- int dst_uvstride) {
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
*v2 = x->second_pre.v_buffer;
@@ -488,27 +607,43 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
+ int scaled_uv_offset;
x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3);
- x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride + x_idx * 16;
- x->pre.u_buffer = u1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
- x->pre.v_buffer = v1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+ x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->pre.y_stride,
+ &x->scale_factor[0]);
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+ y_idx * 8,
+ x->pre.uv_stride,
+ &x->scale_factor_uv[0]);
+ x->pre.u_buffer = u1 + scaled_uv_offset;
+ x->pre.v_buffer = v1 + scaled_uv_offset;
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride + x_idx * 16;
- x->second_pre.u_buffer = u2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
- x->second_pre.v_buffer = v2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+ x->second_pre.y_buffer = y2 +
+ scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->second_pre.y_stride,
+ &x->scale_factor[1]);
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+ y_idx * 8,
+ x->second_pre.uv_stride,
+ &x->scale_factor_uv[1]);
+ x->second_pre.u_buffer = u2 + scaled_uv_offset;
+ x->second_pre.v_buffer = v2 + scaled_uv_offset;
}
vp9_build_inter16x16_predictors_mb(x,
dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
- dst_ystride, dst_uvstride);
+ dst_ystride, dst_uvstride, mb_row + y_idx, mb_col + x_idx);
}
x->mb_to_top_edge = edge[0];
@@ -539,7 +674,9 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
- int dst_uvstride) {
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
*v2 = x->second_pre.v_buffer;
@@ -552,27 +689,43 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
+ int scaled_uv_offset;
x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3);
x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3);
x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3);
- x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32;
- x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
- x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
+ x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32,
+ y_idx * 32,
+ x->pre.y_stride,
+ &x->scale_factor[0]);
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->pre.uv_stride,
+ &x->scale_factor_uv[0]);
+ x->pre.u_buffer = u1 + scaled_uv_offset;
+ x->pre.v_buffer = v1 + scaled_uv_offset;
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32;
- x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
- x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
+ x->second_pre.y_buffer = y2 +
+ scaled_buffer_offset(x_idx * 32,
+ y_idx * 32,
+ x->second_pre.y_stride,
+ &x->scale_factor[1]);
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->second_pre.uv_stride,
+ &x->scale_factor_uv[1]);
+ x->second_pre.u_buffer = u2 + scaled_uv_offset;
+ x->second_pre.v_buffer = v2 + scaled_uv_offset;
}
vp9_build_inter32x32_predictors_sb(x,
dst_y + y_idx * 32 * dst_ystride + x_idx * 32,
dst_u + y_idx * 16 * dst_uvstride + x_idx * 16,
dst_v + y_idx * 16 * dst_uvstride + x_idx * 16,
- dst_ystride, dst_uvstride);
+ dst_ystride, dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2);
}
x->mb_to_top_edge = edge[0];
@@ -598,7 +751,8 @@ void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
#endif
}
-static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
+static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
int i;
MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
BLOCKD *blockd = xd->block;
@@ -609,6 +763,7 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
for (i = 0; i < 16; i += 8) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 2];
+ const int y = i & 8;
blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
@@ -619,44 +774,25 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
}
- /* TODO(jkoleszar): Enabling this for EIGHTTAP_SMOOTH changes the
- * result slightly, for reasons that are not immediately obvious to me.
- * It probably makes sense to enable this for all filter types to be
- * consistent with the way we do 8x4 below. Leaving disabled for now.
- */
- if (mbmi->interp_filter != EIGHTTAP_SMOOTH) {
- build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16,
- which_mv, &xd->subpix);
- } else {
- uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
- uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
-
- vp9_build_inter_predictor(*base_pre0 + d0->pre,
- d0->pre_stride,
- d0->predictor, 16,
- &d0->bmi.as_mv[which_mv],
- &xd->scale_factor[which_mv],
- 8, 8, which_mv, &xd->subpix);
- vp9_build_inter_predictor(*base_pre1 + d1->pre,
- d1->pre_stride,
- d1->predictor, 16,
- &d1->bmi.as_mv[which_mv],
- &xd->scale_factor[which_mv],
- 8, 8, which_mv, &xd->subpix);
- }
+ build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16,
+ which_mv, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16);
}
}
} else {
for (i = 0; i < 16; i += 2) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
+ const int x = (i & 3) * 4;
+ const int y = (i >> 2) * 4;
blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16,
- which_mv, &xd->subpix);
+ which_mv, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
}
}
}
@@ -664,10 +800,13 @@ static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
for (i = 16; i < 24; i += 2) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
+ const int x = 4 * (i & 1);
+ const int y = ((i - 16) >> 1) * 4;
for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8,
- which_mv, &xd->subpix);
+ which_mv, &xd->subpix,
+ mb_row * 8 + y, mb_col * 8 + x);
}
}
}
@@ -769,17 +908,23 @@ void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
uint8_t *dst_u,
uint8_t *dst_v,
int dst_ystride,
- int dst_uvstride) {
- vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride);
- vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride);
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col);
+ vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride,
+ mb_row, mb_col);
}
-void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) {
+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
if (xd->mode_info_context->mbmi.mode != SPLITMV) {
vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
&xd->predictor[256],
- &xd->predictor[320], 16, 8);
+ &xd->predictor[320], 16, 8,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
@@ -790,6 +935,6 @@ void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) {
#endif
} else {
build_4x4uvmvs(xd);
- build_inter4x4_predictors_mb(xd);
+ build_inter4x4_predictors_mb(xd, mb_row, mb_col);
}
}
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 43f716427..831ce2a73 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -16,43 +16,61 @@
struct subpix_fn_table;
-extern void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *dst_y,
- int dst_ystride);
-
-extern void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride);
-
-extern void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
-
-extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
-
-extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
-
-extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd);
-
-extern void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd);
-
-extern void vp9_setup_interp_filters(MACROBLOCKD *xd,
- INTERPOLATIONFILTERTYPE filter,
- VP9_COMMON *cm);
+void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col);
+
+void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
+
+void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
+
+void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
+
+void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
+
+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col);
+
+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col);
+
+void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATIONFILTERTYPE filter,
+ VP9_COMMON *cm);
+
+void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
+ YV12_BUFFER_CONFIG *other,
+ int this_w, int this_h);
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
@@ -68,4 +86,56 @@ void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
const struct scale_factors *scale,
int w, int h, int do_avg,
const struct subpix_fn_table *subpix);
+
+static int scale_value_x(int val, const struct scale_factors *scale) {
+ return val * scale->x_num / scale->x_den;
+}
+
+static int scale_value_y(int val, const struct scale_factors *scale) {
+ return val * scale->y_num / scale->y_den;
+}
+
+static int scaled_buffer_offset(int x_offset,
+ int y_offset,
+ int stride,
+ const struct scale_factors *scale) {
+ return scale_value_y(y_offset, scale) * stride +
+ scale_value_x(x_offset, scale);
+}
+
+static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
+ const YV12_BUFFER_CONFIG *src,
+ int mb_row, int mb_col,
+ const struct scale_factors *scale,
+ const struct scale_factors *scale_uv) {
+ const int recon_y_stride = src->y_stride;
+ const int recon_uv_stride = src->uv_stride;
+ int recon_yoffset;
+ int recon_uvoffset;
+
+ if (scale) {
+ recon_yoffset = scaled_buffer_offset(16 * mb_col, 16 * mb_row,
+ recon_y_stride, scale);
+ recon_uvoffset = scaled_buffer_offset(8 * mb_col, 8 * mb_row,
+ recon_uv_stride, scale_uv);
+ } else {
+ recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;
+ recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;
+ }
+ *dst = *src;
+ dst->y_buffer += recon_yoffset;
+ dst->u_buffer += recon_uvoffset;
+ dst->v_buffer += recon_uvoffset;
+}
+
+static void set_scale_factors(MACROBLOCKD *xd,
+ int ref0, int ref1,
+ struct scale_factors scale_factor[MAX_REF_FRAMES]) {
+
+ xd->scale_factor[0] = scale_factor[ref0 >= 0 ? ref0 : 0];
+ xd->scale_factor[1] = scale_factor[ref1 >= 0 ? ref1 : 0];
+ xd->scale_factor_uv[0] = xd->scale_factor[0];
+ xd->scale_factor_uv[1] = xd->scale_factor[1];
+}
+
#endif // VP9_COMMON_VP9_RECONINTER_H_
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index 88584ad3b..3031fb699 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -14,37 +14,43 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
-extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd);
-extern B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
- int stride, int n);
-extern B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x);
+void vp9_recon_intra_mbuv(MACROBLOCKD *xd);
+
+B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
+ int stride, int n);
+
+B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x);
+
#if CONFIG_COMP_INTERINTRA_PRED
-extern void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *ypred,
- uint8_t *upred,
- uint8_t *vpred,
- int ystride,
- int uvstride);
-extern void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *ypred,
- int ystride);
-extern void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *upred,
- uint8_t *vpred,
- int uvstride);
+void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int ystride,
+ int uvstride);
+
+void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ int ystride);
+
+void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int uvstride);
#endif // CONFIG_COMP_INTERINTRA_PRED
-extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
- uint8_t *ypred,
- uint8_t *upred,
- uint8_t *vpred,
- int ystride,
- int uvstride);
-extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd,
- uint8_t *ypred,
- uint8_t *upred,
- uint8_t *vpred,
- int ystride,
- int uvstride);
+void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int ystride,
+ int uvstride);
+
+void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int ystride,
+ int uvstride);
#endif // VP9_COMMON_VP9_RECONINTRA_H_
diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c
index 277d5b217..72613ae07 100644
--- a/vp9/common/vp9_rtcd.c
+++ b/vp9/common/vp9_rtcd.c
@@ -12,10 +12,9 @@
#include "vp9_rtcd.h"
#include "vpx_ports/vpx_once.h"
-extern void vpx_scale_rtcd(void);
+void vpx_scale_rtcd(void);
-void vp9_rtcd()
-{
+void vp9_rtcd() {
vpx_scale_rtcd();
once(setup_rtcd_internal);
}
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 700af7fa7..02a6711e5 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -296,7 +296,7 @@ specialize vp9_short_iht16x16
# dct and add
prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
-specialize vp9_dc_only_idct_add
+specialize vp9_dc_only_idct_add sse2
prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_inv_walsh4x4_1_x8
diff --git a/vp9/common/vp9_setupintrarecon.h b/vp9/common/vp9_setupintrarecon.h
index 457265528..e389f3c91 100644
--- a/vp9/common/vp9_setupintrarecon.h
+++ b/vp9/common/vp9_setupintrarecon.h
@@ -13,6 +13,6 @@
#include "vpx_scale/yv12config.h"
-extern void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
+void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
#endif // VP9_COMMON_VP9_SETUPINTRARECON_H_
diff --git a/vp9/common/x86/vp9_idctllm_x86.c b/vp9/common/x86/vp9_idctllm_x86.c
new file mode 100644
index 000000000..667f5c1d3
--- /dev/null
+++ b/vp9/common/x86/vp9_idctllm_x86.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+#if HAVE_SSE2
+// In order to improve performance, clip absolute diff values to [0, 255],
+// which allows to keep the additions/subtractions in 8 bits.
+void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr,
+ uint8_t *dst_ptr, int pitch, int stride) {
+ int a1;
+ int16_t out;
+ uint8_t abs_diff;
+ __m128i p0, p1, p2, p3;
+ unsigned int extended_diff;
+ __m128i diff;
+
+ out = dct_const_round_shift(input_dc * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ a1 = ROUND_POWER_OF_TWO(out, 4);
+
+ // Read prediction data.
+ p0 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 0 * pitch));
+ p1 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 1 * pitch));
+ p2 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 2 * pitch));
+ p3 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 3 * pitch));
+
+ // Unpack prediction data, and store 4x4 array in 1 XMM register.
+ p0 = _mm_unpacklo_epi32(p0, p1);
+ p2 = _mm_unpacklo_epi32(p2, p3);
+ p0 = _mm_unpacklo_epi64(p0, p2);
+
+ // Clip dc value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (a1 >= 0) {
+ abs_diff = (a1 > 255) ? 255 : a1;
+ extended_diff = abs_diff * 0x01010101u;
+ diff = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_diff), 0);
+
+ p1 = _mm_adds_epu8(p0, diff);
+ } else {
+ abs_diff = (a1 < -255) ? 255 : -a1;
+ extended_diff = abs_diff * 0x01010101u;
+ diff = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_diff), 0);
+
+ p1 = _mm_subs_epu8(p0, diff);
+ }
+
+ // Store results to dst.
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+ dst_ptr += stride;
+
+ p1 = _mm_srli_si128(p1, 4);
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+ dst_ptr += stride;
+
+ p1 = _mm_srli_si128(p1, 4);
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+ dst_ptr += stride;
+
+ p1 = _mm_srli_si128(p1, 4);
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+}
+#endif
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index a1225f1dc..5893c1132 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -12,6 +12,7 @@
#include "vp9/decoder/vp9_treereader.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_common.h"
@@ -697,6 +698,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
int mb_to_top_edge;
int mb_to_bottom_edge;
const int mb_size = 1 << mi->mbmi.sb_type;
+ const int use_prev_in_find_mv_refs = cm->Width == cm->last_width &&
+ cm->Height == cm->last_height &&
+ !cm->error_resilient_mode;
mb_to_top_edge = xd->mb_to_top_edge;
mb_to_bottom_edge = xd->mb_to_bottom_edge;
@@ -749,25 +753,22 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
int_mv nearest_second, nearby_second, best_mv_second;
vp9_prob mv_ref_p [VP9_MVREFS - 1];
- int recon_y_stride, recon_yoffset;
- int recon_uv_stride, recon_uvoffset;
MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
+ xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
{
int ref_fb_idx;
+ const int use_prev_in_find_best_ref =
+ xd->scale_factor[0].x_num == xd->scale_factor[0].x_den &&
+ xd->scale_factor[0].y_num == xd->scale_factor[0].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
/* Select the appropriate reference frame for this MB */
ref_fb_idx = cm->active_ref_idx[ref_frame - 1];
- recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride ;
- recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
-
- recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
- recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
-
- xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx],
+ mb_row, mb_col, &xd->scale_factor[0], &xd->scale_factor_uv[0]);
#ifdef DEC_DEBUG
if (dec_debug)
@@ -776,7 +777,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
#endif
// if (cm->current_video_frame == 1 && mb_row == 4 && mb_col == 5)
// printf("Dello\n");
- vp9_find_mv_refs(cm, xd, mi, cm->error_resilient_mode ? 0 : prev_mi,
+ vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL,
ref_frame, mbmi->ref_mvs[ref_frame],
cm->ref_frame_sign_bias);
@@ -809,10 +810,9 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
if (mbmi->mode != ZEROMV) {
vp9_find_best_ref_mvs(xd,
- pbi->common.error_resilient_mode ||
- pbi->common.frame_parallel_decoding_mode ?
- 0 : xd->pre.y_buffer,
- recon_y_stride,
+ use_prev_in_find_best_ref ?
+ xd->pre.y_buffer : NULL,
+ xd->pre.y_stride,
mbmi->ref_mvs[ref_frame],
&nearest, &nearby);
@@ -853,27 +853,31 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
mbmi->second_ref_frame = 1;
if (mbmi->second_ref_frame > 0) {
int second_ref_fb_idx;
+ int use_prev_in_find_best_ref;
+
+ xd->scale_factor[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1];
+ use_prev_in_find_best_ref =
+ xd->scale_factor[1].x_num == xd->scale_factor[1].x_den &&
+ xd->scale_factor[1].y_num == xd->scale_factor[1].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
+
/* Select the appropriate reference frame for this MB */
second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
- xd->second_pre.y_buffer =
- cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
- xd->second_pre.u_buffer =
- cm->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
- xd->second_pre.v_buffer =
- cm->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]);
- vp9_find_mv_refs(cm, xd, mi, cm->error_resilient_mode ? 0 : prev_mi,
+ vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL,
mbmi->second_ref_frame,
mbmi->ref_mvs[mbmi->second_ref_frame],
cm->ref_frame_sign_bias);
if (mbmi->mode != ZEROMV) {
vp9_find_best_ref_mvs(xd,
- pbi->common.error_resilient_mode ||
- pbi->common.frame_parallel_decoding_mode ?
- 0 : xd->second_pre.y_buffer,
- recon_y_stride,
+ use_prev_in_find_best_ref ?
+ xd->second_pre.y_buffer : NULL,
+ xd->second_pre.y_stride,
mbmi->ref_mvs[mbmi->second_ref_frame],
&nearest_second,
&nearby_second);
@@ -1089,7 +1093,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
break;
case NEWMV:
-
read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);
read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,
xd->allow_high_precision_mv);
@@ -1230,8 +1233,12 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
MODE_INFO *mi = xd->mode_info_context;
MODE_INFO *prev_mi = xd->prev_mode_info_context;
- if (pbi->common.frame_type == KEY_FRAME)
+ if (pbi->common.frame_type == KEY_FRAME) {
kfread_modes(pbi, mi, mb_row, mb_col, bc);
- else
+ } else {
read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc);
+ set_scale_factors(xd,
+ mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1,
+ pbi->common.active_ref_scale);
+ }
}
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 830b6fdfe..8e9e5ad7d 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -126,7 +126,7 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
xd->inv_txm4x4_1 = vp9_short_idct4x4llm_1;
xd->inv_txm4x4 = vp9_short_idct4x4llm;
xd->itxm_add = vp9_dequant_idct_add;
- xd->dc_only_itxm_add = vp9_dc_only_idct_add_c;
+ xd->dc_only_itxm_add = vp9_dc_only_idct_add;
xd->itxm_add_y_block = vp9_dequant_idct_add_y_block;
xd->itxm_add_uv_block = vp9_dequant_idct_add_uv_block;
if (xd->lossless) {
@@ -147,7 +147,8 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
* to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
*/
-static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) {
+static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
@@ -168,21 +169,24 @@ static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) {
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
} else if (sb_type == BLOCK_SIZE_SB32X32) {
vp9_build_inter32x32_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
} else {
vp9_build_inter16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd,
@@ -222,11 +226,11 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
xd->block[0].dequant, xd->predictor,
xd->dst.y_buffer, 16, xd->dst.y_stride,
- xd->block[0].eob);
+ xd->eobs[0]);
} else {
vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
xd->predictor, xd->dst.y_buffer,
- 16, xd->dst.y_stride, xd->block[0].eob);
+ 16, xd->dst.y_stride, xd->eobs[0]);
}
vp9_dequant_idct_add_uv_block_8x8(
xd->qcoeff + 16 * 16, xd->block[16].dequant,
@@ -268,10 +272,10 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
- xd->block[idx].eob);
+ xd->eobs[idx]);
} else {
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
- xd->block[idx].eob);
+ xd->eobs[idx]);
}
}
} else {
@@ -344,7 +348,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16,
- b->dst_stride, b->eob);
+ b->dst_stride, xd->eobs[ib + iblock[j]]);
} else {
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -377,7 +381,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride,
- b->eob);
+ xd->eobs[i]);
} else {
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -433,7 +437,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16,
- b->dst_stride, b->eob);
+ b->dst_stride, xd->eobs[i]);
} else {
xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -459,13 +463,13 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
tx_type, xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]);
} else {
vp9_dequant_idct_add_16x16(
xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]);
}
vp9_dequant_idct_add_uv_block_8x8_inplace_c(
xd->qcoeff + 16 * 16,
@@ -488,7 +492,6 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
int16_t *q = xd->block[idx].qcoeff;
int16_t *dq = xd->block[0].dequant;
int stride = xd->dst.y_stride;
- BLOCKD *b = &xd->block[ib];
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_8x8_c(
@@ -497,7 +500,7 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ x_idx * 16 + (i & 1) * 8,
xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ x_idx * 16 + (i & 1) * 8,
- stride, stride, b->eob);
+ stride, stride, xd->eobs[idx]);
} else {
vp9_dequant_idct_add_8x8_c(
q, dq,
@@ -505,7 +508,7 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ x_idx * 16 + (i & 1) * 8,
xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ x_idx * 16 + (i & 1) * 8,
- stride, stride, b->eob);
+ stride, stride, xd->eobs[idx]);
}
}
} else {
@@ -538,7 +541,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ x_idx * 16 + (i & 3) * 4,
xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ x_idx * 16 + (i & 3) * 4,
- xd->dst.y_stride, xd->dst.y_stride, b->eob);
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]);
} else {
xd->itxm_add(
b->qcoeff, b->dequant,
@@ -565,7 +568,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, unsigned int mb_col,
BOOL_DECODER* const bc) {
- int i, n, eobtotal;
+ int n, eobtotal;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
VP9_COMMON *const pc = &pbi->common;
MODE_INFO *orig_mi = xd->mode_info_context;
@@ -599,7 +602,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
/* Special case: Force the loopfilter to skip when eobtotal and
* mb_skip_coeff are zero.
*/
- skip_recon_mb(pbi, xd);
+ skip_recon_mb(pbi, xd, mb_row, mb_col);
return;
}
@@ -610,7 +613,8 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
} else {
vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
/* dequantization and idct */
@@ -642,7 +646,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.y_buffer + x_idx * 32 +
xd->dst.y_stride * y_idx * 32,
xd->dst.y_stride, xd->dst.y_stride,
- xd->block[0].eob);
+ xd->eobs[0]);
vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
xd->block[16].dequant,
xd->dst.u_buffer + x_idx * 16 +
@@ -662,9 +666,6 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->above_context = pc->above_context + mb_col + x_idx;
xd->left_context = pc->left_context + y_idx;
xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
- for (i = 0; i < 24; i++) {
- xd->block[i].eob = 0;
- }
eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
if (eobtotal == 0) { // skip loopfilter
@@ -690,7 +691,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, unsigned int mb_col,
BOOL_DECODER* const bc) {
- int i, n, eobtotal;
+ int n, eobtotal;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
VP9_COMMON *const pc = &pbi->common;
MODE_INFO *orig_mi = xd->mode_info_context;
@@ -720,7 +721,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
/* Special case: Force the loopfilter to skip when eobtotal and
* mb_skip_coeff are zero.
*/
- skip_recon_mb(pbi, xd);
+ skip_recon_mb(pbi, xd, mb_row, mb_col);
return;
}
@@ -731,7 +732,8 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
} else {
vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
/* dequantization and idct */
@@ -750,7 +752,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant,
xd->dst.y_buffer, xd->dst.y_buffer,
xd->dst.y_stride, xd->dst.y_stride,
- xd->block[0].eob);
+ xd->eobs[0]);
vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
@@ -766,9 +768,6 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->above_context = pc->above_context + mb_col + x_idx;
xd->left_context = pc->left_context + y_idx + (mb_row & 2);
xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
- for (i = 0; i < 24; i++) {
- xd->block[i].eob = 0;
- }
eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
if (eobtotal == 0) { // skip loopfilter
@@ -796,7 +795,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
int eobtotal = 0;
MB_PREDICTION_MODE mode;
- int i;
int tx_size;
assert(!xd->mode_info_context->mbmi.sb_type);
@@ -811,9 +809,6 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
vp9_reset_mb_tokens_context(xd);
} else if (!bool_error(bc)) {
- for (i = 0; i < 24; i++) {
- xd->block[i].eob = 0;
- }
if (mode != B_PRED) {
eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
}
@@ -832,7 +827,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
/* Special case: Force the loopfilter to skip when eobtotal and
mb_skip_coeff are zero. */
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- skip_recon_mb(pbi, xd);
+ skip_recon_mb(pbi, xd, mb_row, mb_col);
return;
}
#ifdef DEC_DEBUG
@@ -859,7 +854,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd,
xd->mode_info_context->mbmi.mode, tx_size,
xd->mode_info_context->mbmi.interp_filter);
#endif
- vp9_build_inter_predictors_mb(xd);
+ vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
}
if (tx_size == TX_16X16) {
@@ -966,18 +961,14 @@ static void set_refs(VP9D_COMP *pbi, int block_size,
MB_MODE_INFO *const mbmi = &mi->mbmi;
if (mbmi->ref_frame > INTRA_FRAME) {
- int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride;
+ int ref_fb_idx;
/* Select the appropriate reference frame for this MB */
ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1];
-
- ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
- ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col;
- xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset;
- ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
- ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col;
- xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset;
- xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset;
+ xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
+ xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
+ setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
/* propagate errors from reference frames */
xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted;
@@ -988,12 +979,9 @@ static void set_refs(VP9D_COMP *pbi, int block_size,
/* Select the appropriate reference frame for this MB */
second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
- xd->second_pre.y_buffer =
- cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset;
- xd->second_pre.u_buffer =
- cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset;
- xd->second_pre.v_buffer =
- cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset;
+ setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
/* propagate errors from reference frames */
xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted;
@@ -1204,6 +1192,26 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
}
}
+static void update_frame_size(VP9D_COMP *pbi) {
+ VP9_COMMON *cm = &pbi->common;
+
+ /* our internal buffers are always multiples of 16 */
+ int width = (cm->Width + 15) & ~15;
+ int height = (cm->Height + 15) & ~15;
+
+ cm->mb_rows = height >> 4;
+ cm->mb_cols = width >> 4;
+ cm->MBs = cm->mb_rows * cm->mb_cols;
+ cm->mode_info_stride = cm->mb_cols + 1;
+ memset(cm->mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO));
+ vp9_update_mode_info_border(cm, cm->mip);
+
+ cm->mi = cm->mip + cm->mode_info_stride + 1;
+ cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
+ vp9_update_mode_info_in_image(cm, cm->mi);
+}
+
int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
BOOL_DECODER header_bc, residual_bc;
VP9_COMMON *const pc = &pbi->common;
@@ -1281,9 +1289,25 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
"Invalid frame height");
}
- if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height))
- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate frame buffers");
+ if (!pbi->initial_width || !pbi->initial_height) {
+ if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffers");
+ pbi->initial_width = pc->Width;
+ pbi->initial_height = pc->Height;
+ }
+
+ if (pc->Width > pbi->initial_width) {
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Frame width too large");
+ }
+
+ if (pc->Height > pbi->initial_height) {
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Frame height too large");
+ }
+
+ update_frame_size(pbi);
}
}
}
@@ -1295,6 +1319,11 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
init_frame(pbi);
+ /* Reset the frame pointers to the current frame size */
+ vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx],
+ pc->mb_cols * 16, pc->mb_rows * 16,
+ VP9BORDERINPIXELS);
+
if (vp9_start_decode(&header_bc, data,
(unsigned int)first_partition_length_in_bytes))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
@@ -1727,6 +1756,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
}
corrupt_tokens |= xd->corrupted;
+ // keep track of the last coded dimensions
+ pc->last_width = pc->Width;
+ pc->last_height = pc->Height;
+
/* Collect information about decoder corruption. */
/* 1. Check first boolean decoder for errors. */
pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc);
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c
index 1d6c66afd..9485abf3a 100644
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -345,7 +345,7 @@ void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq,
int stride,
MACROBLOCKD *xd) {
vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
- xd->block[16].eob);
+ xd->eobs[16]);
vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
- xd->block[20].eob);
+ xd->eobs[20]);
}
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 91042c4fe..d3fb25ace 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -110,15 +110,12 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
case TX_8X8:
coef_probs = fc->coef_probs_8x8;
coef_counts = fc->coef_counts_8x8;
-#if CONFIG_CNVCONTEXT
above_ec = (A0[aidx] + A0[aidx + 1]) != 0;
left_ec = (L0[lidx] + L0[lidx + 1]) != 0;
-#endif
break;
case TX_16X16:
coef_probs = fc->coef_probs_16x16;
coef_counts = fc->coef_counts_16x16;
-#if CONFIG_CNVCONTEXT
if (type == PLANE_TYPE_UV) {
ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
@@ -128,12 +125,10 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0;
left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0;
}
-#endif
break;
case TX_32X32:
coef_probs = fc->coef_probs_32x32;
coef_counts = fc->coef_counts_32x32;
-#if CONFIG_CNVCONTEXT
if (type == PLANE_TYPE_UV) {
ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
@@ -153,7 +148,6 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3] +
L1[lidx] + L1[lidx + 1] + L1[lidx + 2] + L1[lidx + 3]) != 0;
}
-#endif
break;
}
@@ -285,7 +279,7 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
DCT_DCT, get_eob(xd, segment_id, 1024),
xd->sb_coeff_data.qcoeff,
vp9_default_zig_zag1d_32x32, TX_32X32);
- xd->block[0].eob = c;
+ xd->eobs[0] = c;
eobtotal += c;
// 16x16 chroma blocks
@@ -294,7 +288,7 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob,
xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
vp9_default_zig_zag1d_16x16, TX_16X16);
- xd->block[i].eob = c;
+ xd->eobs[i] = c;
eobtotal += c;
}
@@ -312,7 +306,7 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
get_tx_type(xd, &xd->block[0]),
get_eob(xd, segment_id, 256),
xd->qcoeff, vp9_default_zig_zag1d_16x16, TX_16X16);
- xd->block[0].eob = c;
+ xd->eobs[0] = c;
eobtotal += c;
// 8x8 chroma blocks
@@ -321,7 +315,7 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
DCT_DCT, seg_eob, xd->block[i].qcoeff,
vp9_default_zig_zag1d_8x8, TX_8X8);
- xd->block[i].eob = c;
+ xd->eobs[i] = c;
eobtotal += c;
}
return eobtotal;
@@ -340,7 +334,7 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
get_tx_type(xd, xd->block + i),
seg_eob, xd->block[i].qcoeff,
vp9_default_zig_zag1d_8x8, TX_8X8);
- xd->block[i].eob = c;
+ xd->eobs[i] = c;
eobtotal += c;
}
@@ -353,7 +347,7 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
DCT_DCT, seg_eob, xd->block[i].qcoeff,
vp9_default_zig_zag1d_4x4, TX_4X4);
- xd->block[i].eob = c;
+ xd->eobs[i] = c;
eobtotal += c;
}
} else {
@@ -361,7 +355,7 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
DCT_DCT, seg_eob, xd->block[i].qcoeff,
vp9_default_zig_zag1d_8x8, TX_8X8);
- xd->block[i].eob = c;
+ xd->eobs[i] = c;
eobtotal += c;
}
}
@@ -375,7 +369,7 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
TX_TYPE tx_type, const int *scan) {
int c = decode_coefs(dx, xd, bc, i, type, tx_type, seg_eob,
xd->block[i].qcoeff, scan, TX_4X4);
- xd->block[i].eob = c;
+ xd->eobs[i] = c;
return c;
}
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index 6e55e45ae..0ec5036e4 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -21,7 +21,7 @@ void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q,
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- if (xd->block[i * 4 + j].eob > 1) {
+ if (xd->eobs[i * 4 + j] > 1) {
xd->itxm_add(q, dq, dst, dst, stride, stride);
} else {
xd->dc_only_itxm_add(q[0]*dq[0], dst, dst, stride, stride);
@@ -44,10 +44,10 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- if (xd->block[i * 4 + j].eob > 1)
+ if (xd->eobs[i * 4 + j] > 1)
vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride);
else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride);
+ vp9_dc_only_idct_add(q[0]*dq[0], pre, dst, 16, stride);
((int *)q)[0] = 0;
}
@@ -69,10 +69,10 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (xd->block[16 + i * 2 + j].eob > 1)
+ if (xd->eobs[16 + i * 2 + j] > 1)
vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride);
else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride);
+ vp9_dc_only_idct_add(q[0]*dq[0], pre, dstu, 8, stride);
((int *)q)[0] = 0;
}
@@ -87,10 +87,10 @@ void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (xd->block[20 + i * 2 + j].eob > 1)
+ if (xd->eobs[20 + i * 2 + j] > 1)
vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride);
else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride);
+ vp9_dc_only_idct_add(q[0]*dq[0], pre, dstv, 8, stride);
((int *)q)[0] = 0;
}
@@ -113,7 +113,7 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (xd->block[16 + i * 2 + j].eob > 1) {
+ if (xd->eobs[16 + i * 2 + j] > 1) {
xd->itxm_add(q, dq, dstu, dstu, stride, stride);
} else {
xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride);
@@ -129,7 +129,7 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (xd->block[20 + i * 2 + j].eob > 1) {
+ if (xd->eobs[20 + i * 2 + j] > 1) {
xd->itxm_add(q, dq, dstv, dstv, stride, stride);
} else {
xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride);
@@ -149,18 +149,18 @@ void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q,
uint8_t *dst,
int stride,
MACROBLOCKD *xd) {
- vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->block[0].eob);
+ vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->eobs[0]);
vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8,
- dst + 8, stride, stride, xd->block[4].eob);
+ dst + 8, stride, stride, xd->eobs[4]);
vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride,
dst + 8 * stride, stride, stride,
- xd->block[8].eob);
+ xd->eobs[8]);
vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8,
dst + 8 * stride + 8, stride, stride,
- xd->block[12].eob);
+ xd->eobs[12]);
}
void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
@@ -170,15 +170,15 @@ void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
uint8_t *origdest = dst;
uint8_t *origpred = pre;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->block[0].eob);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]);
vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8,
- origdest + 8, 16, stride, xd->block[4].eob);
+ origdest + 8, 16, stride, xd->eobs[4]);
vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16,
origdest + 8 * stride, 16, stride,
- xd->block[8].eob);
+ xd->eobs[8]);
vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8,
origdest + 8 * stride + 8, 16, stride,
- xd->block[12].eob);
+ xd->eobs[12]);
}
void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq,
@@ -186,12 +186,12 @@ void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq,
uint8_t *dstu,
uint8_t *dstv,
int stride, MACROBLOCKD *xd) {
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->block[16].eob);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]);
q += 64;
pre += 64;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->block[20].eob);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]);
}
void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
@@ -200,11 +200,11 @@ void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
int stride,
MACROBLOCKD *xd) {
vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride,
- xd->block[16].eob);
+ xd->eobs[16]);
q += 64;
vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride,
- xd->block[20].eob);
+ xd->eobs[20]);
}
@@ -216,7 +216,7 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- if (xd->block[i * 4 + j].eob > 1)
+ if (xd->eobs[i * 4 + j] > 1)
vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride);
else {
vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride);
@@ -243,7 +243,7 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (xd->block[16 + i * 2 + j].eob > 1)
+ if (xd->eobs[16 + i * 2 + j] > 1)
vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride);
else {
vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride);
@@ -261,7 +261,7 @@ void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (xd->block[20 + i * 2 + j].eob > 1)
+ if (xd->eobs[20 + i * 2 + j] > 1)
vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride);
else {
vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride);
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 8c1f76e73..0e6d059af 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -38,6 +38,8 @@ typedef struct VP9Decompressor {
int decoded_key_frame;
+ int initial_width;
+ int initial_height;
} VP9D_COMP;
int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end);
diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c
index 71fad2e07..e174a894a 100644
--- a/vp9/encoder/vp9_asm_enc_offsets.c
+++ b/vp9/encoder/vp9_asm_enc_offsets.c
@@ -32,7 +32,6 @@ DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift));
DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
-DEFINE(vp9_blockd_eob, offsetof(BLOCKD, eob));
END
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index d888b6fe1..79a021cfb 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -88,7 +88,8 @@ typedef struct superblock {
DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]);
} SUPERBLOCK;
-typedef struct macroblock {
+typedef struct macroblock MACROBLOCK;
+struct macroblock {
DECLARE_ALIGNED(16, int16_t, src_diff[384]); // 16x16 Y 8x8 U 8x8 V
DECLARE_ALIGNED(16, int16_t, coeff[384]); // 16x16 Y 8x8 U 8x8 V
// 16 Y blocks, 4 U blocks, 4 V blocks,
@@ -171,10 +172,10 @@ typedef struct macroblock {
void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
- void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
- void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
-} MACROBLOCK;
+ void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx);
+ void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2);
+ void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx);
+ void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx);
+};
#endif // VP9_ENCODER_VP9_BLOCK_H_
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 9c2203dea..080f4a70b 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -741,412 +741,9 @@ void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
}
}
-#define TEST_INT_32x32_DCT 1
-
-#if !TEST_INT_32x32_DCT
-
-static void dct32_1d(double *input, double *output, int stride) {
- static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
- static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
- static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
- static const double C4 = 0.980785280403; // cos(pi * 4 / 64)
- static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
- static const double C6 = 0.956940335732; // cos(pi * 6 / 64)
- static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
- static const double C8 = 0.923879532511; // cos(pi * 8 / 64)
- static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
- static const double C10 = 0.881921264348; // cos(pi * 10 / 64)
- static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
- static const double C12 = 0.831469612303; // cos(pi * 12 / 64)
- static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
- static const double C14 = 0.773010453363; // cos(pi * 14 / 64)
- static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
- static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
- static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
- static const double C18 = 0.634393284164; // cos(pi * 18 / 64)
- static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
- static const double C20 = 0.555570233020; // cos(pi * 20 / 64)
- static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
- static const double C22 = 0.471396736826; // cos(pi * 22 / 64)
- static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
- static const double C24 = 0.382683432365; // cos(pi * 24 / 64)
- static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
- static const double C26 = 0.290284677254; // cos(pi * 26 / 64)
- static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
- static const double C28 = 0.195090322016; // cos(pi * 28 / 64)
- static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
- static const double C30 = 0.098017140330; // cos(pi * 30 / 64)
- static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
-
- double step[32];
-
- // Stage 1
- step[0] = input[stride*0] + input[stride*(32 - 1)];
- step[1] = input[stride*1] + input[stride*(32 - 2)];
- step[2] = input[stride*2] + input[stride*(32 - 3)];
- step[3] = input[stride*3] + input[stride*(32 - 4)];
- step[4] = input[stride*4] + input[stride*(32 - 5)];
- step[5] = input[stride*5] + input[stride*(32 - 6)];
- step[6] = input[stride*6] + input[stride*(32 - 7)];
- step[7] = input[stride*7] + input[stride*(32 - 8)];
- step[8] = input[stride*8] + input[stride*(32 - 9)];
- step[9] = input[stride*9] + input[stride*(32 - 10)];
- step[10] = input[stride*10] + input[stride*(32 - 11)];
- step[11] = input[stride*11] + input[stride*(32 - 12)];
- step[12] = input[stride*12] + input[stride*(32 - 13)];
- step[13] = input[stride*13] + input[stride*(32 - 14)];
- step[14] = input[stride*14] + input[stride*(32 - 15)];
- step[15] = input[stride*15] + input[stride*(32 - 16)];
- step[16] = -input[stride*16] + input[stride*(32 - 17)];
- step[17] = -input[stride*17] + input[stride*(32 - 18)];
- step[18] = -input[stride*18] + input[stride*(32 - 19)];
- step[19] = -input[stride*19] + input[stride*(32 - 20)];
- step[20] = -input[stride*20] + input[stride*(32 - 21)];
- step[21] = -input[stride*21] + input[stride*(32 - 22)];
- step[22] = -input[stride*22] + input[stride*(32 - 23)];
- step[23] = -input[stride*23] + input[stride*(32 - 24)];
- step[24] = -input[stride*24] + input[stride*(32 - 25)];
- step[25] = -input[stride*25] + input[stride*(32 - 26)];
- step[26] = -input[stride*26] + input[stride*(32 - 27)];
- step[27] = -input[stride*27] + input[stride*(32 - 28)];
- step[28] = -input[stride*28] + input[stride*(32 - 29)];
- step[29] = -input[stride*29] + input[stride*(32 - 30)];
- step[30] = -input[stride*30] + input[stride*(32 - 31)];
- step[31] = -input[stride*31] + input[stride*(32 - 32)];
-
- // Stage 2
- output[stride*0] = step[0] + step[16 - 1];
- output[stride*1] = step[1] + step[16 - 2];
- output[stride*2] = step[2] + step[16 - 3];
- output[stride*3] = step[3] + step[16 - 4];
- output[stride*4] = step[4] + step[16 - 5];
- output[stride*5] = step[5] + step[16 - 6];
- output[stride*6] = step[6] + step[16 - 7];
- output[stride*7] = step[7] + step[16 - 8];
- output[stride*8] = -step[8] + step[16 - 9];
- output[stride*9] = -step[9] + step[16 - 10];
- output[stride*10] = -step[10] + step[16 - 11];
- output[stride*11] = -step[11] + step[16 - 12];
- output[stride*12] = -step[12] + step[16 - 13];
- output[stride*13] = -step[13] + step[16 - 14];
- output[stride*14] = -step[14] + step[16 - 15];
- output[stride*15] = -step[15] + step[16 - 16];
-
- output[stride*16] = step[16];
- output[stride*17] = step[17];
- output[stride*18] = step[18];
- output[stride*19] = step[19];
-
- output[stride*20] = (-step[20] + step[27])*C16;
- output[stride*21] = (-step[21] + step[26])*C16;
- output[stride*22] = (-step[22] + step[25])*C16;
- output[stride*23] = (-step[23] + step[24])*C16;
-
- output[stride*24] = (step[24] + step[23])*C16;
- output[stride*25] = (step[25] + step[22])*C16;
- output[stride*26] = (step[26] + step[21])*C16;
- output[stride*27] = (step[27] + step[20])*C16;
-
- output[stride*28] = step[28];
- output[stride*29] = step[29];
- output[stride*30] = step[30];
- output[stride*31] = step[31];
-
- // Stage 3
- step[0] = output[stride*0] + output[stride*(8 - 1)];
- step[1] = output[stride*1] + output[stride*(8 - 2)];
- step[2] = output[stride*2] + output[stride*(8 - 3)];
- step[3] = output[stride*3] + output[stride*(8 - 4)];
- step[4] = -output[stride*4] + output[stride*(8 - 5)];
- step[5] = -output[stride*5] + output[stride*(8 - 6)];
- step[6] = -output[stride*6] + output[stride*(8 - 7)];
- step[7] = -output[stride*7] + output[stride*(8 - 8)];
- step[8] = output[stride*8];
- step[9] = output[stride*9];
- step[10] = (-output[stride*10] + output[stride*13])*C16;
- step[11] = (-output[stride*11] + output[stride*12])*C16;
- step[12] = (output[stride*12] + output[stride*11])*C16;
- step[13] = (output[stride*13] + output[stride*10])*C16;
- step[14] = output[stride*14];
- step[15] = output[stride*15];
-
- step[16] = output[stride*16] + output[stride*23];
- step[17] = output[stride*17] + output[stride*22];
- step[18] = output[stride*18] + output[stride*21];
- step[19] = output[stride*19] + output[stride*20];
- step[20] = -output[stride*20] + output[stride*19];
- step[21] = -output[stride*21] + output[stride*18];
- step[22] = -output[stride*22] + output[stride*17];
- step[23] = -output[stride*23] + output[stride*16];
- step[24] = -output[stride*24] + output[stride*31];
- step[25] = -output[stride*25] + output[stride*30];
- step[26] = -output[stride*26] + output[stride*29];
- step[27] = -output[stride*27] + output[stride*28];
- step[28] = output[stride*28] + output[stride*27];
- step[29] = output[stride*29] + output[stride*26];
- step[30] = output[stride*30] + output[stride*25];
- step[31] = output[stride*31] + output[stride*24];
-
- // Stage 4
- output[stride*0] = step[0] + step[3];
- output[stride*1] = step[1] + step[2];
- output[stride*2] = -step[2] + step[1];
- output[stride*3] = -step[3] + step[0];
- output[stride*4] = step[4];
- output[stride*5] = (-step[5] + step[6])*C16;
- output[stride*6] = (step[6] + step[5])*C16;
- output[stride*7] = step[7];
- output[stride*8] = step[8] + step[11];
- output[stride*9] = step[9] + step[10];
- output[stride*10] = -step[10] + step[9];
- output[stride*11] = -step[11] + step[8];
- output[stride*12] = -step[12] + step[15];
- output[stride*13] = -step[13] + step[14];
- output[stride*14] = step[14] + step[13];
- output[stride*15] = step[15] + step[12];
-
- output[stride*16] = step[16];
- output[stride*17] = step[17];
- output[stride*18] = step[18]*-C8 + step[29]*C24;
- output[stride*19] = step[19]*-C8 + step[28]*C24;
- output[stride*20] = step[20]*-C24 + step[27]*-C8;
- output[stride*21] = step[21]*-C24 + step[26]*-C8;
- output[stride*22] = step[22];
- output[stride*23] = step[23];
- output[stride*24] = step[24];
- output[stride*25] = step[25];
- output[stride*26] = step[26]*C24 + step[21]*-C8;
- output[stride*27] = step[27]*C24 + step[20]*-C8;
- output[stride*28] = step[28]*C8 + step[19]*C24;
- output[stride*29] = step[29]*C8 + step[18]*C24;
- output[stride*30] = step[30];
- output[stride*31] = step[31];
-
- // Stage 5
- step[0] = (output[stride*0] + output[stride*1]) * C16;
- step[1] = (-output[stride*1] + output[stride*0]) * C16;
- step[2] = output[stride*2]*C24 + output[stride*3] * C8;
- step[3] = output[stride*3]*C24 - output[stride*2] * C8;
- step[4] = output[stride*4] + output[stride*5];
- step[5] = -output[stride*5] + output[stride*4];
- step[6] = -output[stride*6] + output[stride*7];
- step[7] = output[stride*7] + output[stride*6];
- step[8] = output[stride*8];
- step[9] = output[stride*9]*-C8 + output[stride*14]*C24;
- step[10] = output[stride*10]*-C24 + output[stride*13]*-C8;
- step[11] = output[stride*11];
- step[12] = output[stride*12];
- step[13] = output[stride*13]*C24 + output[stride*10]*-C8;
- step[14] = output[stride*14]*C8 + output[stride*9]*C24;
- step[15] = output[stride*15];
-
- step[16] = output[stride*16] + output[stride*19];
- step[17] = output[stride*17] + output[stride*18];
- step[18] = -output[stride*18] + output[stride*17];
- step[19] = -output[stride*19] + output[stride*16];
- step[20] = -output[stride*20] + output[stride*23];
- step[21] = -output[stride*21] + output[stride*22];
- step[22] = output[stride*22] + output[stride*21];
- step[23] = output[stride*23] + output[stride*20];
- step[24] = output[stride*24] + output[stride*27];
- step[25] = output[stride*25] + output[stride*26];
- step[26] = -output[stride*26] + output[stride*25];
- step[27] = -output[stride*27] + output[stride*24];
- step[28] = -output[stride*28] + output[stride*31];
- step[29] = -output[stride*29] + output[stride*30];
- step[30] = output[stride*30] + output[stride*29];
- step[31] = output[stride*31] + output[stride*28];
-
- // Stage 6
- output[stride*0] = step[0];
- output[stride*1] = step[1];
- output[stride*2] = step[2];
- output[stride*3] = step[3];
- output[stride*4] = step[4]*C28 + step[7]*C4;
- output[stride*5] = step[5]*C12 + step[6]*C20;
- output[stride*6] = step[6]*C12 + step[5]*-C20;
- output[stride*7] = step[7]*C28 + step[4]*-C4;
- output[stride*8] = step[8] + step[9];
- output[stride*9] = -step[9] + step[8];
- output[stride*10] = -step[10] + step[11];
- output[stride*11] = step[11] + step[10];
- output[stride*12] = step[12] + step[13];
- output[stride*13] = -step[13] + step[12];
- output[stride*14] = -step[14] + step[15];
- output[stride*15] = step[15] + step[14];
-
- output[stride*16] = step[16];
- output[stride*17] = step[17]*-C4 + step[30]*C28;
- output[stride*18] = step[18]*-C28 + step[29]*-C4;
- output[stride*19] = step[19];
- output[stride*20] = step[20];
- output[stride*21] = step[21]*-C20 + step[26]*C12;
- output[stride*22] = step[22]*-C12 + step[25]*-C20;
- output[stride*23] = step[23];
- output[stride*24] = step[24];
- output[stride*25] = step[25]*C12 + step[22]*-C20;
- output[stride*26] = step[26]*C20 + step[21]*C12;
- output[stride*27] = step[27];
- output[stride*28] = step[28];
- output[stride*29] = step[29]*C28 + step[18]*-C4;
- output[stride*30] = step[30]*C4 + step[17]*C28;
- output[stride*31] = step[31];
-
- // Stage 7
- step[0] = output[stride*0];
- step[1] = output[stride*1];
- step[2] = output[stride*2];
- step[3] = output[stride*3];
- step[4] = output[stride*4];
- step[5] = output[stride*5];
- step[6] = output[stride*6];
- step[7] = output[stride*7];
- step[8] = output[stride*8]*C30 + output[stride*15]*C2;
- step[9] = output[stride*9]*C14 + output[stride*14]*C18;
- step[10] = output[stride*10]*C22 + output[stride*13]*C10;
- step[11] = output[stride*11]*C6 + output[stride*12]*C26;
- step[12] = output[stride*12]*C6 + output[stride*11]*-C26;
- step[13] = output[stride*13]*C22 + output[stride*10]*-C10;
- step[14] = output[stride*14]*C14 + output[stride*9]*-C18;
- step[15] = output[stride*15]*C30 + output[stride*8]*-C2;
-
- step[16] = output[stride*16] + output[stride*17];
- step[17] = -output[stride*17] + output[stride*16];
- step[18] = -output[stride*18] + output[stride*19];
- step[19] = output[stride*19] + output[stride*18];
- step[20] = output[stride*20] + output[stride*21];
- step[21] = -output[stride*21] + output[stride*20];
- step[22] = -output[stride*22] + output[stride*23];
- step[23] = output[stride*23] + output[stride*22];
- step[24] = output[stride*24] + output[stride*25];
- step[25] = -output[stride*25] + output[stride*24];
- step[26] = -output[stride*26] + output[stride*27];
- step[27] = output[stride*27] + output[stride*26];
- step[28] = output[stride*28] + output[stride*29];
- step[29] = -output[stride*29] + output[stride*28];
- step[30] = -output[stride*30] + output[stride*31];
- step[31] = output[stride*31] + output[stride*30];
-
- // Final stage --- outputs indices are bit-reversed.
- output[stride*0] = step[0];
- output[stride*16] = step[1];
- output[stride*8] = step[2];
- output[stride*24] = step[3];
- output[stride*4] = step[4];
- output[stride*20] = step[5];
- output[stride*12] = step[6];
- output[stride*28] = step[7];
- output[stride*2] = step[8];
- output[stride*18] = step[9];
- output[stride*10] = step[10];
- output[stride*26] = step[11];
- output[stride*6] = step[12];
- output[stride*22] = step[13];
- output[stride*14] = step[14];
- output[stride*30] = step[15];
-
- output[stride*1] = step[16]*C31 + step[31]*C1;
- output[stride*17] = step[17]*C15 + step[30]*C17;
- output[stride*9] = step[18]*C23 + step[29]*C9;
- output[stride*25] = step[19]*C7 + step[28]*C25;
- output[stride*5] = step[20]*C27 + step[27]*C5;
- output[stride*21] = step[21]*C11 + step[26]*C21;
- output[stride*13] = step[22]*C19 + step[25]*C13;
- output[stride*29] = step[23]*C3 + step[24]*C29;
- output[stride*3] = step[24]*C3 + step[23]*-C29;
- output[stride*19] = step[25]*C19 + step[22]*-C13;
- output[stride*11] = step[26]*C11 + step[21]*-C21;
- output[stride*27] = step[27]*C27 + step[20]*-C5;
- output[stride*7] = step[28]*C7 + step[19]*-C25;
- output[stride*23] = step[29]*C23 + step[18]*-C9;
- output[stride*15] = step[30]*C15 + step[17]*-C17;
- output[stride*31] = step[31]*C31 + step[16]*-C1;
-}
-
-void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[1024];
- // First transform columns
- for (i = 0; i < 32; i++) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct32_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; j++)
- output[j*32 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 32; ++i) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; ++j)
- temp_in[j] = output[j + i*32];
- dct32_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; ++j)
- output[j + i*32] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 1024; i++) {
- out[i] = (short)round(output[i]/4);
- }
- }
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#else
-
-#define RIGHT_SHIFT 13
-#define ROUNDING (1 << (RIGHT_SHIFT - 1))
-
-static void dct32_1d(int *input, int *output, int last_shift_bits) {
- static const int16_t C1 = 8182; // 2^13
- static const int16_t C2 = 8153;
- static const int16_t C3 = 8103;
- static const int16_t C4 = 8035;
- static const int16_t C5 = 7946;
- static const int16_t C6 = 7839;
- static const int16_t C7 = 7713;
- static const int16_t C8 = 7568;
- static const int16_t C9 = 7405;
- static const int16_t C10 = 7225;
- static const int16_t C11 = 7027;
- static const int16_t C12 = 6811;
- static const int16_t C13 = 6580;
- static const int16_t C14 = 6333;
- static const int16_t C15 = 6070;
- static const int16_t C16 = 5793;
- static const int16_t C17 = 5501;
- static const int16_t C18 = 5197;
- static const int16_t C19 = 4880;
- static const int16_t C20 = 4551;
- static const int16_t C21 = 4212;
- static const int16_t C22 = 3862;
- static const int16_t C23 = 3503;
- static const int16_t C24 = 3135;
- static const int16_t C25 = 2760;
- static const int16_t C26 = 2378;
- static const int16_t C27 = 1990;
- static const int16_t C28 = 1598;
- static const int16_t C29 = 1202;
- static const int16_t C30 = 803;
- static const int16_t C31 = 402;
+static void dct32_1d(int *input, int *output) {
int step[32];
-
- int last_rounding = 0;
- int final_shift = RIGHT_SHIFT;
- int final_rounding = 0;
-
- if (last_shift_bits > 0)
- last_rounding = 1 << (last_shift_bits - 1);
-
- final_shift += last_shift_bits;
- if (final_shift > 0)
- final_rounding = 1 << (final_shift - 1);
-
// Stage 1
step[0] = input[0] + input[(32 - 1)];
step[1] = input[1] + input[(32 - 2)];
@@ -1204,15 +801,15 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
output[18] = step[18];
output[19] = step[19];
- output[20] = ((-step[20] + step[27]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[21] = ((-step[21] + step[26]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[22] = ((-step[22] + step[25]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[23] = ((-step[23] + step[24]) * C16 + ROUNDING) >> RIGHT_SHIFT;
+ output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64);
+ output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64);
+ output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64);
+ output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64);
- output[24] = ((step[24] + step[23]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[25] = ((step[25] + step[22]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[26] = ((step[26] + step[21]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[27] = ((step[27] + step[20]) * C16 + ROUNDING) >> RIGHT_SHIFT;
+ output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64);
+ output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64);
+ output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64);
+ output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64);
output[28] = step[28];
output[29] = step[29];
@@ -1230,10 +827,10 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
step[7] = -output[7] + output[(8 - 8)];
step[8] = output[8];
step[9] = output[9];
- step[10] = ((-output[10] + output[13]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- step[11] = ((-output[11] + output[12]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- step[12] = ((output[12] + output[11]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- step[13] = ((output[13] + output[10]) * C16 + ROUNDING) >> RIGHT_SHIFT;
+ step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64);
+ step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64);
+ step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64);
+ step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64);
step[14] = output[14];
step[15] = output[15];
@@ -1260,8 +857,8 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
output[2] = -step[2] + step[1];
output[3] = -step[3] + step[0];
output[4] = step[4];
- output[5] = ((-step[5] + step[6]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- output[6] = ((step[6] + step[5]) * C16 + ROUNDING) >> RIGHT_SHIFT;
+ output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);
+ output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);
output[7] = step[7];
output[8] = step[8] + step[11];
output[9] = step[9] + step[10];
@@ -1274,37 +871,37 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
output[16] = step[16];
output[17] = step[17];
- output[18] = (step[18] * -C8 + step[29] * C24 + ROUNDING) >> RIGHT_SHIFT;
- output[19] = (step[19] * -C8 + step[28] * C24 + ROUNDING) >> RIGHT_SHIFT;
- output[20] = (step[20] * -C24 + step[27] * -C8 + ROUNDING) >> RIGHT_SHIFT;
- output[21] = (step[21] * -C24 + step[26] * -C8 + ROUNDING) >> RIGHT_SHIFT;
+ output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64);
+ output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64);
+ output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64);
+ output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64);
output[22] = step[22];
output[23] = step[23];
output[24] = step[24];
output[25] = step[25];
- output[26] = (step[26] * C24 + step[21] * -C8 + ROUNDING) >> RIGHT_SHIFT;
- output[27] = (step[27] * C24 + step[20] * -C8 + ROUNDING) >> RIGHT_SHIFT;
- output[28] = (step[28] * C8 + step[19] * C24 + ROUNDING) >> RIGHT_SHIFT;
- output[29] = (step[29] * C8 + step[18] * C24 + ROUNDING) >> RIGHT_SHIFT;
+ output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64);
+ output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64);
+ output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64);
+ output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64);
output[30] = step[30];
output[31] = step[31];
// Stage 5
- step[0] = ((output[0] + output[1]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- step[1] = ((-output[1] + output[0]) * C16 + ROUNDING) >> RIGHT_SHIFT;
- step[2] = (output[2] * C24 + output[3] * C8 + ROUNDING) >> RIGHT_SHIFT;
- step[3] = (output[3] * C24 - output[2] * C8 + ROUNDING) >> RIGHT_SHIFT;
+ step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64);
+ step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64);
+ step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64);
+ step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64);
step[4] = output[4] + output[5];
step[5] = -output[5] + output[4];
step[6] = -output[6] + output[7];
step[7] = output[7] + output[6];
step[8] = output[8];
- step[9] = (output[9] * -C8 + output[14] * C24 + ROUNDING) >> RIGHT_SHIFT;
- step[10] = (output[10] * -C24 + output[13] * -C8 + ROUNDING) >> RIGHT_SHIFT;
+ step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64);
+ step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64);
step[11] = output[11];
step[12] = output[12];
- step[13] = (output[13] * C24 + output[10] * -C8 + ROUNDING) >> RIGHT_SHIFT;
- step[14] = (output[14] * C8 + output[9] * C24 + ROUNDING) >> RIGHT_SHIFT;
+ step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64);
+ step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64);
step[15] = output[15];
step[16] = output[16] + output[19];
@@ -1329,10 +926,10 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
output[1] = step[1];
output[2] = step[2];
output[3] = step[3];
- output[4] = (step[4] * C28 + step[7] * C4 + ROUNDING) >> RIGHT_SHIFT;
- output[5] = (step[5] * C12 + step[6] * C20 + ROUNDING) >> RIGHT_SHIFT;
- output[6] = (step[6] * C12 + step[5] * -C20 + ROUNDING) >> RIGHT_SHIFT;
- output[7] = (step[7] * C28 + step[4] * -C4 + ROUNDING) >> RIGHT_SHIFT;
+ output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64);
+ output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64);
+ output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64);
+ output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64);
output[8] = step[8] + step[9];
output[9] = -step[9] + step[8];
output[10] = -step[10] + step[11];
@@ -1343,20 +940,20 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
output[15] = step[15] + step[14];
output[16] = step[16];
- output[17] = (step[17] * -C4 + step[30] * C28 + ROUNDING) >> RIGHT_SHIFT;
- output[18] = (step[18] * -C28 + step[29] * -C4 + ROUNDING) >> RIGHT_SHIFT;
+ output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64);
+ output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64);
output[19] = step[19];
output[20] = step[20];
- output[21] = (step[21] * -C20 + step[26] * C12 + ROUNDING) >> RIGHT_SHIFT;
- output[22] = (step[22] * -C12 + step[25] * -C20 + ROUNDING) >> RIGHT_SHIFT;
+ output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64);
+ output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64);
output[23] = step[23];
output[24] = step[24];
- output[25] = (step[25] * C12 + step[22] * -C20 + ROUNDING) >> RIGHT_SHIFT;
- output[26] = (step[26] * C20 + step[21] * C12 + ROUNDING) >> RIGHT_SHIFT;
+ output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64);
+ output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64);
output[27] = step[27];
output[28] = step[28];
- output[29] = (step[29] * C28 + step[18] * -C4 + ROUNDING) >> RIGHT_SHIFT;
- output[30] = (step[30] * C4 + step[17] * C28 + ROUNDING) >> RIGHT_SHIFT;
+ output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64);
+ output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64);
output[31] = step[31];
// Stage 7
@@ -1368,14 +965,14 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
step[5] = output[5];
step[6] = output[6];
step[7] = output[7];
- step[8] = (output[8] * C30 + output[15] * C2 + ROUNDING) >> RIGHT_SHIFT;
- step[9] = (output[9] * C14 + output[14] * C18 + ROUNDING) >> RIGHT_SHIFT;
- step[10] = (output[10] * C22 + output[13] * C10 + ROUNDING) >> RIGHT_SHIFT;
- step[11] = (output[11] * C6 + output[12] * C26 + ROUNDING) >> RIGHT_SHIFT;
- step[12] = (output[12] * C6 + output[11] * -C26 + ROUNDING) >> RIGHT_SHIFT;
- step[13] = (output[13] * C22 + output[10] * -C10 + ROUNDING) >> RIGHT_SHIFT;
- step[14] = (output[14] * C14 + output[9] * -C18 + ROUNDING) >> RIGHT_SHIFT;
- step[15] = (output[15] * C30 + output[8] * -C2 + ROUNDING) >> RIGHT_SHIFT;
+ step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64);
+ step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64);
+ step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64);
+ step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64);
+ step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64);
+ step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64);
+ step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64);
+ step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64);
step[16] = output[16] + output[17];
step[17] = -output[17] + output[16];
@@ -1395,62 +992,40 @@ static void dct32_1d(int *input, int *output, int last_shift_bits) {
step[31] = output[31] + output[30];
// Final stage --- outputs indices are bit-reversed.
- output[0] = (step[0] + last_rounding) >> last_shift_bits;
- output[16] = (step[1] + last_rounding) >> last_shift_bits;
- output[8] = (step[2] + last_rounding) >> last_shift_bits;
- output[24] = (step[3] + last_rounding) >> last_shift_bits;
- output[4] = (step[4] + last_rounding) >> last_shift_bits;
- output[20] = (step[5] + last_rounding) >> last_shift_bits;
- output[12] = (step[6] + last_rounding) >> last_shift_bits;
- output[28] = (step[7] + last_rounding) >> last_shift_bits;
- output[2] = (step[8] + last_rounding) >> last_shift_bits;
- output[18] = (step[9] + last_rounding) >> last_shift_bits;
- output[10] = (step[10] + last_rounding) >> last_shift_bits;
- output[26] = (step[11] + last_rounding) >> last_shift_bits;
- output[6] = (step[12] + last_rounding) >> last_shift_bits;
- output[22] = (step[13] + last_rounding) >> last_shift_bits;
- output[14] = (step[14] + last_rounding) >> last_shift_bits;
- output[30] = (step[15] + last_rounding) >> last_shift_bits;
-
- output[1] = (step[16] * C31 + step[31] * C1 + final_rounding) >> final_shift;
- output[17] = (step[17] * C15 + step[30] * C17 + final_rounding)
- >> final_shift;
- output[9] = (step[18] * C23 + step[29] * C9 + final_rounding) >> final_shift;
- output[25] = (step[19] * C7 + step[28] * C25 + final_rounding) >> final_shift;
- output[5] = (step[20] * C27 + step[27] * C5 + final_rounding) >> final_shift;
- output[21] = (step[21] * C11 + step[26] * C21 + final_rounding)
- >> final_shift;
- output[13] = (step[22] * C19 + step[25] * C13 + final_rounding)
- >> final_shift;
- output[29] = (step[23] * C3 + step[24] * C29 + final_rounding) >> final_shift;
- output[3] = (step[24] * C3 + step[23] * -C29 + final_rounding) >> final_shift;
- output[19] = (step[25] * C19 + step[22] * -C13 + final_rounding)
- >> final_shift;
- output[11] = (step[26] * C11 + step[21] * -C21 + final_rounding)
- >> final_shift;
- output[27] = (step[27] * C27 + step[20] * -C5 + final_rounding)
- >> final_shift;
- output[7] = (step[28] * C7 + step[19] * -C25 + final_rounding) >> final_shift;
- output[23] = (step[29] * C23 + step[18] * -C9 + final_rounding)
- >> final_shift;
- output[15] = (step[30] * C15 + step[17] * -C17 + final_rounding)
- >> final_shift;
- output[31] = (step[31] * C31 + step[16] * -C1 + final_rounding)
- >> final_shift;
-
- // Clamp to fit 16-bit.
- if (last_shift_bits > 0) {
- int i;
-
- for (i = 0; i < 32; i++)
- if (output[i] < -32768)
- output[i] = -32768;
- else if (output[i] > 32767)
- output[i] = 32767;
- }
+ output[0] = step[0];
+ output[16] = step[1];
+ output[8] = step[2];
+ output[24] = step[3];
+ output[4] = step[4];
+ output[20] = step[5];
+ output[12] = step[6];
+ output[28] = step[7];
+ output[2] = step[8];
+ output[18] = step[9];
+ output[10] = step[10];
+ output[26] = step[11];
+ output[6] = step[12];
+ output[22] = step[13];
+ output[14] = step[14];
+ output[30] = step[15];
+
+ output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64);
+ output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64);
+ output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64);
+ output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64);
+ output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64);
+ output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64);
+ output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64);
+ output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64);
+ output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
+ output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
+ output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
+ output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
+ output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
+ output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
+ output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
+ output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
-#undef RIGHT_SHIFT
-#undef ROUNDING
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
int shortpitch = pitch >> 1;
@@ -1460,10 +1035,10 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
for (i = 0; i < 32; i++) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; j++)
- temp_in[j] = input[j * shortpitch + i];
- dct32_1d(temp_in, temp_out, 0);
+ temp_in[j] = input[j * shortpitch + i] << 2;
+ dct32_1d(temp_in, temp_out);
for (j = 0; j < 32; j++)
- output[j * 32 + i] = temp_out[j];
+ output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
// Then transform rows
@@ -1471,10 +1046,9 @@ void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32];
- dct32_1d(temp_in, temp_out, 2);
+ dct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
- out[j + i * 32] = temp_out[j];
+ out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
}
-#endif
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index fec5a7c61..c0fe5ac76 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -654,7 +654,7 @@ static void set_offsets(VP9_COMP *cpi,
// Set up destination pointers
setup_pred_block(&xd->dst,
&cm->yv12_fb[dst_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col, NULL, NULL);
/* Set up limit values for MV components to prevent them from
* extending beyond the UMV borders assuming 16x16 block size */
@@ -679,7 +679,7 @@ static void set_offsets(VP9_COMP *cpi,
xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);
/* set up source buffers */
- setup_pred_block(&x->src, cpi->Source, mb_row, mb_col);
+ setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL);
/* R/D setup */
x->rddiv = cpi->RDDIV;
@@ -1187,7 +1187,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
// Copy data over into macro block data structures.
x->src = *cpi->Source;
- xd->pre = cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]];
+ xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
// set up frame for intra coded blocks
@@ -1272,9 +1272,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
totalrate = 0;
- // Functions setup for all frame types so we can use MC in AltRef
- vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm);
-
// Reset frame count of inter 0,0 motion vector usage.
cpi->inter_zz_count = 0;
@@ -2092,33 +2089,35 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
assert(cm->frame_type != KEY_FRAME);
if (mbmi->ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (mbmi->ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
setup_pred_block(&xd->pre,
&cpi->common.yv12_fb[ref_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (mbmi->second_ref_frame > 0) {
int second_ref_fb_idx;
if (mbmi->second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (mbmi->second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
setup_pred_block(&xd->second_pre,
&cpi->common.yv12_fb[second_ref_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
if (!x->skip) {
- vp9_encode_inter16x16(x);
+ vp9_encode_inter16x16(x, mb_row, mb_col);
// Clear mb_skip_coeff if mb_no_coeff_skip is not set
if (!cpi->common.mb_no_coeff_skip)
@@ -2130,7 +2129,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd,
@@ -2319,34 +2319,37 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
assert(cm->frame_type != KEY_FRAME);
if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
setup_pred_block(&xd->pre,
&cpi->common.yv12_fb[ref_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
int second_ref_fb_idx;
if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
setup_pred_block(&xd->second_pre,
&cpi->common.yv12_fb[second_ref_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
@@ -2545,34 +2548,37 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
assert(cm->frame_type != KEY_FRAME);
if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx];
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
setup_pred_block(&xd->pre,
&cpi->common.yv12_fb[ref_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
int second_ref_fb_idx;
if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->lst_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->gld_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.active_ref_idx[cpi->alt_fb_idx];
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
setup_pred_block(&xd->second_pre,
&cpi->common.yv12_fb[second_ref_fb_idx],
- mb_row, mb_col);
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 43bb4640c..076e1ec0d 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -55,11 +55,11 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
tx_type = get_tx_type_4x4(&x->e_mbd, b);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib, tx_type);
vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
} else {
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b) ;
+ x->quantize_b_4x4(x, ib);
vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
}
@@ -150,12 +150,12 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
if (tx_type != DCT_DCT) {
vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->quantize_b_8x8(x, idx);
vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
16, tx_type);
} else {
x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->quantize_b_8x8(x, idx);
vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
}
} else {
@@ -165,17 +165,17 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
} else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
+ x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
i++;
} else {
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->quantize_b_4x4(x, ib + iblock[i]);
vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
}
}
@@ -208,7 +208,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
vp9_subtract_b(be, b, 8);
x->fwd_txm4x4(be->src_diff, be->coeff, 16);
- x->quantize_b_4x4(be, b);
+ x->quantize_b_4x4(x, ib);
vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index a753bf40f..edb301895 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -315,14 +315,15 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+ MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK *b = &mb->block[i];
- BLOCKD *d = &mb->e_mbd.block[i];
+ BLOCKD *d = &xd->block[i];
vp9_token_state tokens[257][2];
unsigned best_index[257][2];
const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
int16_t *qcoeff_ptr = d->qcoeff;
int16_t *dqcoeff_ptr = d->dqcoeff;
- int eob = d->eob, final_eob, sz = 0;
+ int eob = xd->eobs[i], final_eob, sz = 0;
const int i0 = 0;
int rc, x, next;
int64_t rdmult, rddiv, rd_cost0, rd_cost1;
@@ -527,8 +528,8 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
}
final_eob++;
- d->eob = final_eob;
- *a = *l = (d->eob > 0);
+ xd->eobs[d - xd->block] = final_eob;
+ *a = *l = (final_eob > 0);
}
void vp9_optimize_mby_4x4(MACROBLOCK *x) {
@@ -597,13 +598,8 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) {
for (b = 0; b < 16; b += 4) {
ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8);
a[1] = a[0] = above_ec;
l[1] = l[0] = left_ec;
@@ -621,13 +617,8 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
for (b = 16; b < 24; b += 4) {
ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8);
}
}
@@ -645,13 +636,8 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) {
if (!t_above || !t_left)
return;
-#if CONFIG_CNVCONTEXT
ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
-#else
- ta = t_above->y1[0];
- tl = t_left->y1[0];
-#endif
optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16);
}
@@ -699,21 +685,21 @@ void vp9_fidct_mb(MACROBLOCK *x) {
}
}
-void vp9_encode_inter16x16(MACROBLOCK *x) {
+void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) {
MACROBLOCKD *const xd = &x->e_mbd;
- vp9_build_inter_predictors_mb(xd);
+ vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
subtract_mb(x);
vp9_fidct_mb(x);
vp9_recon_mb(xd);
}
/* this function is used by first pass only */
-void vp9_encode_inter16x16y(MACROBLOCK *x) {
+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
- vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index f3c679227..6356df215 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -23,14 +23,14 @@ typedef struct {
#include "vp9/encoder/vp9_onyx_int.h"
struct VP9_ENCODER_RTCD;
-void vp9_encode_inter16x16(MACROBLOCK *x);
+void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col);
void vp9_transform_mbuv_4x4(MACROBLOCK *x);
void vp9_transform_mby_4x4(MACROBLOCK *x);
void vp9_optimize_mby_4x4(MACROBLOCK *x);
void vp9_optimize_mbuv_4x4(MACROBLOCK *x);
-void vp9_encode_inter16x16y(MACROBLOCK *x);
+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col);
void vp9_transform_mb_8x8(MACROBLOCK *mb);
void vp9_transform_mby_8x8(MACROBLOCK *x);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 6aee9ef0d..4d0a299e8 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -436,10 +436,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
int recon_yoffset, recon_uvoffset;
YV12_BUFFER_CONFIG *lst_yv12 =
- &cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]];
+ &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
YV12_BUFFER_CONFIG *gld_yv12 =
- &cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]];
+ &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]];
int recon_y_stride = lst_yv12->y_stride;
int recon_uv_stride = lst_yv12->uv_stride;
int64_t intra_error = 0;
@@ -613,7 +613,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
this_error = motion_error;
vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- vp9_encode_inter16x16y(x);
+ vp9_encode_inter16x16y(x, mb_row, mb_col);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 40823f60c..d6644c2aa 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -20,7 +20,9 @@
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
int_mv *ref_mv,
- int_mv *dst_mv) {
+ int_mv *dst_mv,
+ int mb_row,
+ int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
BLOCK *b = &x->block[0];
@@ -72,7 +74,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
}
vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
- vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);
best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
xd->predictor, 16, INT_MAX);
@@ -93,8 +95,9 @@ static int do_16x16_motion_search
YV12_BUFFER_CONFIG *buf,
int buf_mb_y_offset,
YV12_BUFFER_CONFIG *ref,
- int mb_y_offset
-) {
+ int mb_y_offset,
+ int mb_row,
+ int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err, tmp_err;
@@ -124,7 +127,7 @@ static int do_16x16_motion_search
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search
- tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv);
+ tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
if (tmp_err < err) {
err = tmp_err;
dst_mv->as_int = tmp_mv.as_int;
@@ -136,7 +139,8 @@ static int do_16x16_motion_search
int_mv zero_ref_mv, tmp_mv;
zero_ref_mv.as_int = 0;
- tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv);
+ tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
+ mb_row, mb_col);
if (tmp_err < err) {
dst_mv->as_int = tmp_mv.as_int;
err = tmp_err;
@@ -229,7 +233,9 @@ static void update_mbgraph_mb_stats
int gld_y_offset,
YV12_BUFFER_CONFIG *alt_ref,
int_mv *prev_alt_ref_mv,
- int arf_y_offset
+ int arf_y_offset,
+ int mb_row,
+ int mb_col
) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -249,7 +255,8 @@ static void update_mbgraph_mb_stats
int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,
&stats->ref[GOLDEN_FRAME].m.mv,
buf, mb_y_offset,
- golden_ref, gld_y_offset);
+ golden_ref, gld_y_offset,
+ mb_row, mb_col);
stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else {
stats->ref[GOLDEN_FRAME].err = INT_MAX;
@@ -326,7 +333,8 @@ static void update_mbgraph_frame_stats
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
golden_ref, &gld_left_mv, gld_y_in_offset,
- alt_ref, &arf_left_mv, arf_y_in_offset);
+ alt_ref, &arf_left_mv, arf_y_in_offset,
+ mb_row, mb_col);
arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
if (mb_col == 0) {
@@ -437,7 +445,7 @@ void vp9_update_mbgraph_stats
VP9_COMMON *const cm = &cpi->common;
int i, n_frames = vp9_lookahead_depth(cpi->lookahead);
YV12_BUFFER_CONFIG *golden_ref =
- &cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]];
+ &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]];
// we need to look ahead beyond where the ARF transitions into
// being a GF - so exit if we don't look ahead beyond that
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index feb1e36c0..ced6eddca 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_onyx_int.h"
@@ -832,7 +833,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
}
{
- int y_stride = cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_stride;
+ int y_stride = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].y_stride;
if (cpi->sf.search_method == NSTEP) {
vp9_init3smotion_compensation(&cpi->mb, y_stride);
@@ -1753,7 +1754,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
#endif
if (cpi->b_calculate_psnr) {
YV12_BUFFER_CONFIG *lst_yv12 =
- &cpi->common.yv12_fb[cpi->common.active_ref_idx[cpi->lst_fb_idx]];
+ &cpi->common.yv12_fb[cpi->common.ref_frame_map[cpi->lst_fb_idx]];
double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error);
double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2);
@@ -2098,11 +2099,11 @@ int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
int ref_fb_idx;
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->active_ref_idx[cpi->lst_fb_idx];
+ ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->active_ref_idx[cpi->gld_fb_idx];
+ ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->active_ref_idx[cpi->alt_fb_idx];
+ ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx];
else
return -1;
@@ -2119,11 +2120,11 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
int ref_fb_idx;
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->active_ref_idx[cpi->lst_fb_idx];
+ ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->active_ref_idx[cpi->gld_fb_idx];
+ ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->active_ref_idx[cpi->alt_fb_idx];
+ ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx];
else
return -1;
@@ -2198,6 +2199,69 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
}
#endif
+static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
+ YV12_BUFFER_CONFIG *dst_fb) {
+ const int in_w = src_fb->y_width;
+ const int in_h = src_fb->y_height;
+ const int out_w = dst_fb->y_width;
+ const int out_h = dst_fb->y_height;
+ int x, y;
+
+ for (y = 0; y < out_h; y += 16) {
+ for (x = 0; x < out_w; x += 16) {
+ int x_q4 = x * 16 * in_w / out_w;
+ int y_q4 = y * 16 * in_h / out_h;
+ uint8_t *src, *dst;
+ int src_stride, dst_stride;
+
+
+ src = src_fb->y_buffer +
+ y * in_h / out_h * src_fb->y_stride +
+ x * in_w / out_w;
+ dst = dst_fb->y_buffer +
+ y * dst_fb->y_stride +
+ x;
+ src_stride = src_fb->y_stride;
+ dst_stride = dst_fb->y_stride;
+
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 16, 16);
+
+ x_q4 >>= 1;
+ y_q4 >>= 1;
+ src_stride = src_fb->uv_stride;
+ dst_stride = dst_fb->uv_stride;
+
+ src = src_fb->u_buffer +
+ y / 2 * in_h / out_h * src_fb->uv_stride +
+ x / 2 * in_w / out_w;
+ dst = dst_fb->u_buffer +
+ y / 2 * dst_fb->uv_stride +
+ x / 2;
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 8, 8);
+
+ src = src_fb->v_buffer +
+ y / 2 * in_h / out_h * src_fb->uv_stride +
+ x / 2 * in_w / out_w;
+ dst = dst_fb->v_buffer +
+ y / 2 * dst_fb->uv_stride +
+ x / 2;
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 8, 8);
+ }
+ }
+
+ vp8_yv12_extend_frame_borders(dst_fb);
+}
+
+
static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
@@ -2416,9 +2480,9 @@ static void update_reference_frames(VP9_COMP * const cpi) {
// If any buffer copy / swapping is signaled it should be done here.
if (cm->frame_type == KEY_FRAME) {
ref_cnt_fb(cm->fb_idx_ref_cnt,
- &cm->active_ref_idx[cpi->gld_fb_idx], cm->new_fb_idx);
+ &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
ref_cnt_fb(cm->fb_idx_ref_cnt,
- &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx);
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
} else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
/* Preserve the previously existing golden frame and update the frame in
* the alt ref slot instead. This is highly specific to the current use of
@@ -2432,7 +2496,7 @@ static void update_reference_frames(VP9_COMP * const cpi) {
int tmp;
ref_cnt_fb(cm->fb_idx_ref_cnt,
- &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx);
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
@@ -2440,18 +2504,18 @@ static void update_reference_frames(VP9_COMP * const cpi) {
} else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
ref_cnt_fb(cm->fb_idx_ref_cnt,
- &cm->active_ref_idx[cpi->alt_fb_idx], cm->new_fb_idx);
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
}
if (cpi->refresh_golden_frame) {
ref_cnt_fb(cm->fb_idx_ref_cnt,
- &cm->active_ref_idx[cpi->gld_fb_idx], cm->new_fb_idx);
+ &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
}
}
if (cpi->refresh_last_frame) {
ref_cnt_fb(cm->fb_idx_ref_cnt,
- &cm->active_ref_idx[cpi->lst_fb_idx], cm->new_fb_idx);
+ &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
}
}
@@ -2535,6 +2599,38 @@ static void select_interintra_mode(VP9_COMP *cpi) {
}
#endif
+static void scale_references(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]];
+
+ if (ref->y_width != cm->Width || ref->y_height != cm->Height) {
+ int new_fb = get_free_fb(cm);
+
+ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb],
+ cm->mb_cols * 16,
+ cm->mb_rows * 16,
+ VP9BORDERINPIXELS);
+ scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
+ cpi->scaled_ref_idx[i] = new_fb;
+ } else {
+ cpi->scaled_ref_idx[i] = cm->ref_frame_map[i];
+ cm->fb_idx_ref_cnt[cm->ref_frame_map[i]]++;
+ }
+ }
+}
+
+static void release_scaled_references(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--;
+ }
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi,
unsigned long *size,
unsigned char *dest,
@@ -2583,6 +2679,17 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
int mcomp_filter_index = 0;
int64_t mcomp_filter_cost[4];
+ /* Scale the source buffer, if required */
+ if (cm->Width != cpi->un_scaled_source->y_width ||
+ cm->Height != cpi->un_scaled_source->y_height) {
+ scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source);
+ cpi->Source = &cpi->scaled_source;
+ } else {
+ cpi->Source = cpi->un_scaled_source;
+ }
+
+ scale_references(cpi);
+
// Clear down mmx registers to allow floating point in what follows
vp9_clear_system_state();
@@ -3231,6 +3338,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
update_reference_segmentation_map(cpi);
}
+ release_scaled_references(cpi);
update_reference_frames(cpi);
vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4);
vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
@@ -3373,7 +3481,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
if (cpi->twopass.total_left_stats->coded_error != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
"%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%6d %5d %5d %5d %8.2f %10d %10.3f"
"%10.3f %8d %10d %10d %10d\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size, 0, //loop_size_estimate,
@@ -3400,7 +3508,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
else
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
"%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%5d %5d %8d %8.2f %10d %10.3f"
+ "%5d %5d %8d %8d %8.2f %10d %10.3f"
"%8d %10d %10d %10d\n",
cpi->common.current_video_frame,
cpi->this_frame_target, cpi->projected_frame_size,
@@ -3516,6 +3624,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
xd->update_mb_segmentation_data = 0;
xd->mode_ref_lf_delta_update = 0;
+ // keep track of the last coded dimensions
+ cm->last_width = cm->Width;
+ cm->last_height = cm->Height;
// Dont increment frame counters if this was an altref buffer update not a real frame
if (cm->show_frame) {
@@ -3533,8 +3644,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
FILE *recon_file;
sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
recon_file = fopen(filename, "wb");
- fwrite(cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].buffer_alloc,
- cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].frame_size,
+ fwrite(cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].buffer_alloc,
+ cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].frame_size,
1, recon_file);
fclose(recon_file);
}
@@ -3756,28 +3867,16 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
cm->new_fb_idx = get_free_fb(cm);
+ /* Get the mapping of L/G/A to the reference buffer pool */
+ cm->active_ref_idx[0] = cm->ref_frame_map[cpi->lst_fb_idx];
+ cm->active_ref_idx[1] = cm->ref_frame_map[cpi->gld_fb_idx];
+ cm->active_ref_idx[2] = cm->ref_frame_map[cpi->alt_fb_idx];
+
/* Reset the frame pointers to the current frame size */
vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx],
cm->mb_cols * 16, cm->mb_rows * 16,
VP9BORDERINPIXELS);
- /* Disable any references that have different size */
- if ((cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_width !=
- cm->yv12_fb[cm->new_fb_idx].y_width) ||
- (cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_height !=
- cm->yv12_fb[cm->new_fb_idx].y_height))
- cpi->ref_frame_flags &= ~VP9_LAST_FLAG;
- if ((cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_width !=
- cm->yv12_fb[cm->new_fb_idx].y_width) ||
- (cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_height !=
- cm->yv12_fb[cm->new_fb_idx].y_height))
- cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
- if ((cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_width !=
- cm->yv12_fb[cm->new_fb_idx].y_width) ||
- (cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_height !=
- cm->yv12_fb[cm->new_fb_idx].y_height))
- cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
-
vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
if (cpi->pass == 1) {
Pass1Encode(cpi, size, dest, frame_flags);
@@ -4027,18 +4126,31 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
int vp9_set_internal_size(VP9_PTR comp,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
VP9_COMP *cpi = (VP9_COMP *) comp;
+ VP9_COMMON *cm = &cpi->common;
- if (horiz_mode <= ONETWO)
- cpi->horiz_scale = horiz_mode;
- else
+ if (horiz_mode > ONETWO)
return -1;
- if (vert_mode <= ONETWO)
- cpi->vert_scale = vert_mode;
- else
+ if (vert_mode > ONETWO)
return -1;
- vp9_change_config(comp, &cpi->oxcf);
+ if (cm->horiz_scale != horiz_mode || cm->vert_scale != vert_mode) {
+ int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
+ int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
+
+ cm->horiz_scale = horiz_mode;
+ cm->vert_scale = vert_mode;
+
+ Scale2Ratio(cm->horiz_scale, &hr, &hs);
+ Scale2Ratio(cm->vert_scale, &vr, &vs);
+
+ // always go to the next whole number
+ cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
+ cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
+ }
+ assert(cm->Width <= cpi->initial_width);
+ assert(cm->Height <= cpi->initial_height);
+ update_frame_size(cpi);
return 0;
}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 9b509ea0b..02a371964 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -332,6 +332,7 @@ typedef struct VP9_COMP {
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
int gold_is_alt; // don't do both alt and gold search ( just do gold).
+ int scaled_ref_idx[3];
int lst_fb_idx;
int gld_fb_idx;
int alt_fb_idx;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 8ae53e60e..399e8ecda 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -21,7 +21,10 @@
extern int enc_debug;
#endif
-void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
+void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -84,10 +87,13 @@ void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
}
-void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -135,7 +141,7 @@ void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
}
void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
@@ -144,9 +150,9 @@ void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
for (i = 0; i < 16; i++) {
TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]);
if (tx_type != DCT_DCT) {
- vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type);
+ vp9_ht_quantize_b_4x4(x, i, tx_type);
} else {
- x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_4x4(x, i);
}
}
}
@@ -155,7 +161,7 @@ void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) {
int i;
for (i = 16; i < 24; i++)
- x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_4x4(x, i);
}
void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
@@ -163,7 +169,10 @@ void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
vp9_quantize_mbuv_4x4_c(x);
}
-void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
int16_t *qcoeff_ptr = d->qcoeff;
int16_t *dqcoeff_ptr = d->dqcoeff;
@@ -236,30 +245,25 @@ void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
}
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
} else {
- d->eob = 0;
+ xd->eobs[b_idx] = 0;
}
}
void vp9_quantize_mby_8x8(MACROBLOCK *x) {
int i;
- for (i = 0; i < 16; i ++) {
- x->e_mbd.block[i].eob = 0;
- }
for (i = 0; i < 16; i += 4) {
- x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_8x8(x, i);
}
}
void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i ++)
- x->e_mbd.block[i].eob = 0;
for (i = 16; i < 24; i += 4)
- x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_8x8(x, i);
}
void vp9_quantize_mb_8x8(MACROBLOCK *x) {
@@ -268,11 +272,7 @@ void vp9_quantize_mb_8x8(MACROBLOCK *x) {
}
void vp9_quantize_mby_16x16(MACROBLOCK *x) {
- int i;
-
- for (i = 0; i < 16; i++)
- x->e_mbd.block[i].eob = 0;
- x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]);
+ x->quantize_b_16x16(x, 0);
}
void vp9_quantize_mb_16x16(MACROBLOCK *x) {
@@ -286,7 +286,7 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
uint8_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
int16_t *dequant_ptr, int zbin_oq_value,
- int *eob_ptr, const int *scan, int mul) {
+ uint16_t *eob_ptr, const int *scan, int mul) {
int i, rc, eob;
int zbin;
int x, y, z, sz;
@@ -328,7 +328,10 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
*eob_ptr = eob + 1;
}
-void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[b_idx];
+ BLOCKD *const d = &xd->block[b_idx];
quantize(b->zrun_zbin_boost,
b->coeff,
256, b->skip_block,
@@ -337,7 +340,7 @@ void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
d->dqcoeff,
d->dequant,
b->zbin_extra,
- &d->eob, vp9_default_zig_zag1d_16x16, 1);
+ &xd->eobs[b_idx], vp9_default_zig_zag1d_16x16, 1);
}
void vp9_quantize_sby_32x32(MACROBLOCK *x) {
@@ -345,7 +348,6 @@ void vp9_quantize_sby_32x32(MACROBLOCK *x) {
BLOCK *b = &x->block[0];
BLOCKD *d = &xd->block[0];
- d->eob = 0;
quantize(b->zrun_zbin_boost,
x->sb_coeff_data.coeff,
1024, b->skip_block,
@@ -355,7 +357,7 @@ void vp9_quantize_sby_32x32(MACROBLOCK *x) {
xd->sb_coeff_data.dqcoeff,
d->dequant,
b->zbin_extra,
- &d->eob,
+ &xd->eobs[0],
vp9_default_zig_zag1d_32x32, 2);
}
@@ -363,8 +365,6 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
- xd->block[16].eob = 0;
- xd->block[20].eob = 0;
for (i = 16; i < 24; i += 4)
quantize(x->block[i].zrun_zbin_boost,
x->sb_coeff_data.coeff + 1024 + (i - 16) * 64,
@@ -375,7 +375,7 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
xd->sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
xd->block[i].dequant,
x->block[i].zbin_extra,
- &xd->block[i].eob,
+ &xd->eobs[i],
vp9_default_zig_zag1d_16x16, 1);
}
@@ -383,10 +383,9 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
* these two C functions if corresponding optimized routine is not available.
* NEON optimized version implements currently the fast quantization for pair
* of blocks. */
-void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2,
- BLOCKD *d1, BLOCKD *d2) {
- vp9_regular_quantize_b_4x4(b1, d1);
- vp9_regular_quantize_b_4x4(b2, d2);
+void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2) {
+ vp9_regular_quantize_b_4x4(x, b_idx1);
+ vp9_regular_quantize_b_4x4(x, b_idx2);
}
static void invert_quant(int16_t *quant,
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 9a8e35d2c..d338e620a 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -14,10 +14,10 @@
#include "vp9/encoder/vp9_block.h"
#define prototype_quantize_block(sym) \
- void (sym)(BLOCK *b,BLOCKD *d)
+ void (sym)(MACROBLOCK *mb, int b_idx)
#define prototype_quantize_block_pair(sym) \
- void (sym)(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
+ void (sym)(MACROBLOCK *mb, int b_idx1, int b_idx2)
#define prototype_quantize_mb(sym) \
void (sym)(MACROBLOCK *x)
@@ -27,7 +27,7 @@
#endif
#define prototype_quantize_block_type(sym) \
- void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
+ void (sym)(MACROBLOCK *mb, int b_ix, TX_TYPE type)
extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4);
#ifndef vp9_quantize_quantb_4x4
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 6e1122f3e..496be950c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -379,7 +379,6 @@ int vp9_uvsse(MACROBLOCK *x) {
sse2 += sse1;
}
return sse2;
-
}
static INLINE int cost_coeffs(MACROBLOCK *mb,
@@ -388,9 +387,9 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
ENTROPY_CONTEXT *l,
TX_SIZE tx_size) {
int pt;
- const int eob = b->eob;
- MACROBLOCKD *xd = &mb->e_mbd;
+ MACROBLOCKD *const xd = &mb->e_mbd;
const int ib = (int)(b - xd->block);
+ const int eob = xd->eobs[ib];
int c = 0;
int cost = 0, seg_eob;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
@@ -402,12 +401,10 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref];
ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT *const a1 = a +
sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
ENTROPY_CONTEXT *const l1 = l +
sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
-#endif
switch (tx_size) {
case TX_4X4:
@@ -422,10 +419,8 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
}
break;
case TX_8X8:
-#if CONFIG_CNVCONTEXT
a_ec = (a[0] + a[1]) != 0;
l_ec = (l[0] + l[1]) != 0;
-#endif
scan = vp9_default_zig_zag1d_8x8;
seg_eob = 64;
break;
@@ -435,27 +430,21 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
if (type == PLANE_TYPE_UV) {
const int uv_idx = ib - 16;
qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
-#if CONFIG_CNVCONTEXT
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
} else {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
-#endif
}
break;
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
seg_eob = 1024;
qcoeff_ptr = xd->sb_coeff_data.qcoeff;
-#if CONFIG_CNVCONTEXT
- a_ec = a[0] + a[1] + a[2] + a[3] +
- a1[0] + a1[1] + a1[2] + a1[3];
- l_ec = l[0] + l[1] + l[2] + l[3] +
- l1[0] + l1[1] + l1[2] + l1[3];
- a_ec = a_ec != 0;
- l_ec = l_ec != 0;
-#endif
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
break;
default:
abort();
@@ -484,7 +473,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
// is eob first coefficient;
pt = (c > 0);
*a = *l = pt;
-#if CONFIG_CNVCONTEXT
if (tx_size >= TX_8X8) {
a[1] = l[1] = pt;
if (tx_size >= TX_16X16) {
@@ -499,7 +487,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb,
}
}
}
-#endif
return cost;
}
@@ -1055,10 +1042,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_ht_quantize_b_4x4(x, be - x->block, tx_type);
} else {
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->quantize_b_4x4(x, be - x->block);
}
tempa = ta;
@@ -1354,7 +1341,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
else
x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->quantize_b_8x8(x, idx);
// compute quantization mse of 8x8 block
distortion = vp9_block_error_c((x->block + idx)->coeff,
@@ -1391,14 +1378,14 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
tx_type = get_tx_type_4x4(xd, b);
if (tx_type != DCT_DCT) {
vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
} else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
+ x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
do_two = 1;
} else {
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->quantize_b_4x4(x, ib + iblock[i]);
}
distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
@@ -1683,8 +1670,9 @@ static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
}
static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
- int *distortion, int *skip, int fullpixel) {
- vp9_build_inter4x4_predictors_mbuv(&x->e_mbd);
+ int *distortion, int *skip, int fullpixel,
+ int mb_row, int mb_col) {
+ vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);
@@ -2179,7 +2167,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
vp9_subtract_b(be, bd, 16);
x->fwd_txm4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, bd);
+ x->quantize_b_4x4(x, i);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
@@ -2242,7 +2230,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
if (otherrd) {
x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
- x->quantize_b_8x8(be2, bd2);
+ x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
@@ -2254,7 +2242,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
bd = &xd->block[ib + iblock[j]];
be = &x->block[ib + iblock[j]];
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
@@ -2272,7 +2260,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
BLOCKD *bd = &xd->block[ib + iblock[j]];
BLOCK *be = &x->block[ib + iblock[j]];
x->fwd_txm8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
@@ -2286,7 +2274,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
}
}
x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
- x->quantize_b_8x8(be2, bd2);
+ x->quantize_b_8x8(x, idx);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
*labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
@@ -2549,13 +2537,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
for (j = 0; j < 16; j++)
if (labels[j] == i)
- best_eobs[j] = x->e_mbd.block[j].eob;
+ best_eobs[j] = x->e_mbd.eobs[j];
} else {
for (j = 0; j < 4; j++) {
int ib = vp9_i8x8_block[j], idx = j * 4;
if (labels[ib] == i)
- best_eobs[idx] = x->e_mbd.block[idx].eob;
+ best_eobs[idx] = x->e_mbd.eobs[idx];
}
}
if (other_rd < best_other_rd)
@@ -2830,7 +2818,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int;
if (mbmi->second_ref_frame > 0)
bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int;
- bd->eob = bsi.eobs[i];
+ x->e_mbd.eobs[i] = bsi.eobs[i];
}
*returntotrate = bsi.r;
@@ -3120,26 +3108,45 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
int_mv frame_near_mv[MAX_REF_FRAMES],
int frame_mdcounts[4][4],
- YV12_BUFFER_CONFIG yv12_mb[4]) {
- YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx];
+ YV12_BUFFER_CONFIG yv12_mb[4],
+ struct scale_factors scale[MAX_REF_FRAMES]) {
+ VP9_COMMON *cm = &cpi->common;
+ YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ int use_prev_in_find_mv_refs, use_prev_in_find_best_ref;
+
+ // set up scaling factors
+ scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
+ scale[frame_type].x_offset_q4 =
+ (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf;
+ scale[frame_type].y_offset_q4 =
+ (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf;
- setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col);
+ // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
+ // use the UV scaling factors.
+ setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col,
+ &scale[frame_type], &scale[frame_type]);
// Gets an initial list of candidate vectors from neighbours and orders them
+ use_prev_in_find_mv_refs = cm->Width == cm->last_width &&
+ cm->Height == cm->last_height &&
+ !cpi->common.error_resilient_mode;
vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
- cpi->common.error_resilient_mode ?
- 0 : xd->prev_mode_info_context,
+ use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL,
frame_type,
mbmi->ref_mvs[frame_type],
cpi->common.ref_frame_sign_bias);
// Candidate refinement carried out at encoder and decoder
+ use_prev_in_find_best_ref =
+ scale[frame_type].x_num == scale[frame_type].x_den &&
+ scale[frame_type].y_num == scale[frame_type].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
vp9_find_best_ref_mvs(xd,
- cpi->common.error_resilient_mode ||
- cpi->common.frame_parallel_decoding_mode ?
- 0 : yv12_mb[frame_type].y_buffer,
+ use_prev_in_find_best_ref ?
+ yv12_mb[frame_type].y_buffer : NULL,
yv12->y_stride,
mbmi->ref_mvs[frame_type],
&frame_nearest_mv[frame_type],
@@ -3210,7 +3217,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int mode_index,
INTERPOLATIONFILTERTYPE *best_filter,
int_mv frame_mv[MB_MODE_COUNT]
- [MAX_REF_FRAMES]) {
+ [MAX_REF_FRAMES],
+ YV12_BUFFER_CONFIG *scaled_ref_frame,
+ int mb_row, int mb_col) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
@@ -3254,6 +3263,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost, 96,
x->e_mbd.allow_high_precision_mv);
} else {
+ YV12_BUFFER_CONFIG backup_yv12 = xd->pre;
int bestsme = INT_MAX;
int further_steps, step_param = cpi->sf.first_step;
int sadpb = x->sadperbit16;
@@ -3265,6 +3275,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
+ if (scaled_ref_frame) {
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ xd->pre = *scaled_ref_frame;
+ xd->pre.y_buffer += mb_row * 16 * xd->pre.y_stride + mb_col * 16;
+ xd->pre.u_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8;
+ xd->pre.v_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8;
+ }
+
vp9_clamp_mv_min_max(x, &ref_mv[0]);
// mvp_full.as_int = ref_mv[0].as_int;
@@ -3307,6 +3327,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
x->nmvjointcost, x->mvcost,
96, xd->allow_high_precision_mv);
+
+ // restore the predictor, if required
+ if (scaled_ref_frame) {
+ xd->pre = backup_yv12;
+ }
}
break;
case NEARMV:
@@ -3388,7 +3413,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
var = vp9_variance64x64(*(b->base_src), b->src_stride,
xd->dst.y_buffer, xd->dst.y_stride, &sse);
// Note our transform coeffs are 8 times an orthogonal transform.
@@ -3472,7 +3498,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
var = vp9_variance32x32(*(b->base_src), b->src_stride,
xd->dst.y_buffer, xd->dst.y_stride, &sse);
// Note our transform coeffs are 8 times an orthogonal transform.
@@ -3554,7 +3581,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_dist_y, tmp_dist_u, tmp_dist_v;
// TODO(jkoleszar): these 2 y/uv should be replaced with one call to
// vp9_build_interintra_16x16_predictors_mb().
- vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
@@ -3563,7 +3591,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256,
- xd->predictor + 320, 8);
+ xd->predictor + 320, 8,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
@@ -3659,25 +3688,29 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
} else if (block_size == BLOCK_32X32) {
vp9_build_inter32x32_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
} else {
// TODO(jkoleszar): These y/uv fns can be replaced with their mb
// equivalent
- vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
}
#endif
vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
+ &xd->predictor[320], 8,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (is_comp_interintra_pred) {
vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
@@ -3872,6 +3905,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
cpi->common.y1dc_delta_q);
+ struct scale_factors scale_factor[4];
+
vpx_memset(mode8x8, 0, sizeof(mode8x8));
vpx_memset(&frame_mv, 0, sizeof(frame_mv));
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3895,24 +3930,24 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->lst_fb_idx],
+ setup_buffer_inter(cpi, x, cpi->lst_fb_idx,
LAST_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb);
+ frame_mdcounts, yv12_mb, scale_factor);
}
if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->gld_fb_idx],
+ setup_buffer_inter(cpi, x, cpi->gld_fb_idx,
GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb);
+ frame_mdcounts, yv12_mb, scale_factor);
}
if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->alt_fb_idx],
+ setup_buffer_inter(cpi, x, cpi->alt_fb_idx,
ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb);
+ frame_mdcounts, yv12_mb, scale_factor);
}
*returnintra = INT64_MAX;
@@ -3951,6 +3986,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
#endif
int mode_excluded = 0;
int64_t txfm_cache[NB_TXFM_MODES] = { 0 };
+ YV12_BUFFER_CONFIG *scaled_ref_frame;
// These variables hold are rolling total cost and distortion for this mode
rate2 = 0;
@@ -3967,6 +4003,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
mbmi->interp_filter = cm->mcomp_filter_type;
+
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
+
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
// Test best rd so far against threshold for trying this mode.
@@ -3982,6 +4022,18 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
!(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))
continue;
+ // only scale on zeromv.
+ if (mbmi->ref_frame > 0 &&
+ (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode != ZEROMV)
+ continue;
+ if (mbmi->second_ref_frame > 0 &&
+ (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode != ZEROMV)
+ continue;
+
// current coding mode under rate-distortion optimization test loop
#if CONFIG_COMP_INTERINTRA_PRED
mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
@@ -4014,12 +4066,25 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
/* everything but intra */
+ scaled_ref_frame = NULL;
if (mbmi->ref_frame) {
int ref = mbmi->ref_frame;
+ int fb;
xd->pre = yv12_mb[ref];
best_ref_mv = mbmi->ref_mvs[ref][0];
vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts));
+
+ if (mbmi->ref_frame == LAST_FRAME) {
+ fb = cpi->lst_fb_idx;
+ } else if (mbmi->ref_frame == GOLDEN_FRAME) {
+ fb = cpi->gld_fb_idx;
+ } else {
+ fb = cpi->alt_fb_idx;
+ }
+
+ if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
+ scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
}
if (mbmi->second_ref_frame > 0) {
@@ -4300,7 +4365,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int uv_skippable;
rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- cpi->common.full_pixel);
+ cpi->common.full_pixel, mb_row, mb_col);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -4342,7 +4407,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
&rate_y, &distortion,
&rate_uv, &distortion_uv,
&mode_excluded, &disable_skip,
- mode_index, &tmp_best_filter, frame_mv);
+ mode_index, &tmp_best_filter, frame_mv,
+ scaled_ref_frame, mb_row, mb_col);
if (this_rd == INT64_MAX)
continue;
}
@@ -4593,6 +4659,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mb_skip_coeff =
(cpi->common.mb_no_coeff_skip) ? 1 : 0;
mbmi->partitioning = 0;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
@@ -4645,6 +4713,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
end:
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index],
best_mode_index, &best_partition,
&mbmi->ref_mvs[mbmi->ref_frame][0],
@@ -4858,9 +4928,9 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int idx_list[4] = {0,
- cpi->common.active_ref_idx[cpi->lst_fb_idx],
- cpi->common.active_ref_idx[cpi->gld_fb_idx],
- cpi->common.active_ref_idx[cpi->alt_fb_idx]};
+ cpi->lst_fb_idx,
+ cpi->gld_fb_idx,
+ cpi->alt_fb_idx};
int mdcounts[4];
int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int saddone = 0;
@@ -4887,6 +4957,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0;
int dist_uv_16x16 = 0, uv_skip_16x16 = 0;
MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV;
+ struct scale_factors scale_factor[4];
xd->mode_info_context->mbmi.segment_id = segment_id;
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
@@ -4902,7 +4973,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
mb_row, mb_col, frame_mv[NEARESTMV],
frame_mv[NEARMV], frame_mdcounts,
- yv12_mb);
+ yv12_mb, scale_factor);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
@@ -4981,6 +5052,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
mbmi->ref_frame = ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
mbmi->mode = this_mode;
mbmi->uv_mode = DC_PRED;
@@ -4988,6 +5061,7 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
#endif
+
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
mbmi->interp_filter = cm->mcomp_filter_type;
@@ -5012,6 +5086,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (!(cpi->ref_frame_flags & flag_list[second_ref]))
continue;
mbmi->second_ref_frame = second_ref;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
xd->second_pre = yv12_mb[second_ref];
mode_excluded =
@@ -5089,6 +5165,20 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv;
distortion2 = distortion_y + distortion_uv;
} else {
+ YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
+ int fb;
+
+ if (mbmi->ref_frame == LAST_FRAME) {
+ fb = cpi->lst_fb_idx;
+ } else if (mbmi->ref_frame == GOLDEN_FRAME) {
+ fb = cpi->gld_fb_idx;
+ } else {
+ fb = cpi->alt_fb_idx;
+ }
+
+ if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
+ scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
+
#if CONFIG_COMP_INTERINTRA_PRED
if (mbmi->second_ref_frame == INTRA_FRAME) {
if (best_intra16_mode == DC_PRED - 1) continue;
@@ -5110,7 +5200,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&rate_y, &distortion_y,
&rate_uv, &distortion_uv,
&mode_excluded, &disable_skip,
- mode_index, &tmp_best_filter, frame_mv);
+ mode_index, &tmp_best_filter, frame_mv,
+ scaled_ref_frame, mb_row, mb_col);
if (this_rd == INT64_MAX)
continue;
}
@@ -5363,6 +5454,8 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
end:
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
{
PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ?
&x->sb32_context[xd->sb_index] :
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 710ae58fe..01b156044 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -45,18 +45,4 @@ extern void vp9_init_me_luts();
extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x,
MB_PREDICTION_MODE mb, int_mv *mv);
-static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
- const YV12_BUFFER_CONFIG *src,
- int mb_row, int mb_col) {
- const int recon_y_stride = src->y_stride;
- const int recon_uv_stride = src->uv_stride;
- const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;
- const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;
-
- *dst = *src;
- dst->y_buffer += recon_yoffset;
- dst->u_buffer += recon_uvoffset;
- dst->v_buffer += recon_uvoffset;
-}
-
#endif // VP9_ENCODER_VP9_RDOPT_H_
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 39c02e6ad..a6cd1c0c3 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -456,6 +456,13 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
, start_frame);
#endif
+ // Setup scaling factors. Scaling on each of the arnr frames is not supported
+ vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0],
+ &cpi->common.yv12_fb[cpi->common.new_fb_idx],
+ 16 * cpi->common.mb_cols,
+ 16 * cpi->common.mb_rows);
+ cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];
+
// Setup frame pointers, NULL indicates frame not included in filter
vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
for (frame = 0; frame < frames_to_blur; frame++) {
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 8efc97697..95a2e1227 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -105,7 +105,7 @@ static void tokenize_b(VP9_COMP *cpi,
int c = 0;
int recent_energy = 0;
const BLOCKD * const b = xd->block + ib;
- const int eob = b->eob; /* one beyond last nonzero coeff */
+ const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
int16_t *qcoeff_ptr = b->qcoeff;
int seg_eob;
@@ -145,17 +145,14 @@ static void tokenize_b(VP9_COMP *cpi,
probs = cpi->common.fc.coef_probs_4x4;
break;
case TX_8X8:
-#if CONFIG_CNVCONTEXT
a_ec = (a[0] + a[1]) != 0;
l_ec = (l[0] + l[1]) != 0;
-#endif
seg_eob = 64;
scan = vp9_default_zig_zag1d_8x8;
counts = cpi->coef_counts_8x8;
probs = cpi->common.fc.coef_probs_8x8;
break;
case TX_16X16:
-#if CONFIG_CNVCONTEXT
if (type != PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -163,7 +160,6 @@ static void tokenize_b(VP9_COMP *cpi,
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
-#endif
seg_eob = 256;
scan = vp9_default_zig_zag1d_16x16;
counts = cpi->coef_counts_16x16;
@@ -174,14 +170,12 @@ static void tokenize_b(VP9_COMP *cpi,
}
break;
case TX_32X32:
-#if CONFIG_CNVCONTEXT
a_ec = a[0] + a[1] + a[2] + a[3] +
a1[0] + a1[1] + a1[2] + a1[3];
l_ec = l[0] + l[1] + l[2] + l[3] +
l1[0] + l1[1] + l1[2] + l1[3];
a_ec = a_ec != 0;
l_ec = l_ec != 0;
-#endif
seg_eob = 1024;
scan = vp9_default_zig_zag1d_32x32;
counts = cpi->coef_counts_32x32;
@@ -251,7 +245,7 @@ int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) {
int i = 0;
for (i = 0; i < 16; i++)
- skip &= (!xd->block[i].eob);
+ skip &= (!xd->eobs[i]);
return skip;
}
@@ -261,7 +255,7 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd) {
int i;
for (i = 16; i < 24; i++)
- skip &= (!xd->block[i].eob);
+ skip &= (!xd->eobs[i]);
return skip;
}
@@ -275,13 +269,13 @@ int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) {
int i = 0;
for (i = 0; i < 16; i += 4)
- skip &= (!xd->block[i].eob);
+ skip &= (!xd->eobs[i]);
return skip;
}
int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) {
- return (!xd->block[16].eob) & (!xd->block[20].eob);
+ return (!xd->eobs[16]) & (!xd->eobs[20]);
}
static int mb_is_skippable_8x8(MACROBLOCKD *xd) {
@@ -296,7 +290,7 @@ static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) {
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) {
int skip = 1;
- skip &= !xd->block[0].eob;
+ skip &= !xd->eobs[0];
return skip;
}
@@ -306,12 +300,12 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) {
int skip = 1;
- skip &= !xd->block[0].eob;
+ skip &= !xd->eobs[0];
return skip;
}
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) {
- return (!xd->block[16].eob) & (!xd->block[20].eob);
+ return (!xd->eobs[16]) & (!xd->eobs[20]);
}
static int sb_is_skippable_32x32(MACROBLOCKD *xd) {
@@ -635,15 +629,12 @@ static INLINE void stuff_b(VP9_COMP *cpi,
probs = cpi->common.fc.coef_probs_4x4;
break;
case TX_8X8:
-#if CONFIG_CNVCONTEXT
a_ec = (a[0] + a[1]) != 0;
l_ec = (l[0] + l[1]) != 0;
-#endif
counts = cpi->coef_counts_8x8;
probs = cpi->common.fc.coef_probs_8x8;
break;
case TX_16X16:
-#if CONFIG_CNVCONTEXT
if (type != PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -651,19 +642,16 @@ static INLINE void stuff_b(VP9_COMP *cpi,
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
-#endif
counts = cpi->coef_counts_16x16;
probs = cpi->common.fc.coef_probs_16x16;
break;
case TX_32X32:
-#if CONFIG_CNVCONTEXT
a_ec = a[0] + a[1] + a[2] + a[3] +
a1[0] + a1[1] + a1[2] + a1[3];
l_ec = l[0] + l[1] + l[2] + l[3] +
l1[0] + l1[1] + l1[2] + l1[3];
a_ec = a_ec != 0;
l_ec = l_ec != 0;
-#endif
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
break;
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index eb152f521..f330b464a 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -110,10 +110,13 @@ VP9_COMMON_SRCS-yes += common/vp9_maskingmv.c
VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm
endif
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idctllm_x86.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_x86.c
ifeq ($(HAVE_SSE2),yes)
+vp9/common/x86/vp9_idctllm_x86.c.o: CFLAGS += -msse2
vp9/common/x86/vp9_loopfilter_x86.c.o: CFLAGS += -msse2
vp9/common/x86/vp9_sadmxn_x86.c.o: CFLAGS += -msse2
+vp9/common/x86/vp9_idctllm_x86.c.d: CFLAGS += -msse2
vp9/common/x86/vp9_loopfilter_x86.c.d: CFLAGS += -msse2
vp9/common/x86/vp9_sadmxn_x86.c.d: CFLAGS += -msse2
endif
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 6e57e67ef..e89381083 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -97,16 +97,16 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2_yasm.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2_yasm.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm
+#VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_ssse3.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_impl_ssse3.asm
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
+#VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
-VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm
+#VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm