summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/fdct4x4_test.cc20
-rw-r--r--test/fdct8x8_test.cc31
-rw-r--r--vp9/common/vp9_blockd.h74
-rw-r--r--vp9/common/vp9_reconintra.c21
-rw-r--r--vp9/common/vp9_reconintra.h2
-rw-r--r--vp9/decoder/vp9_decodframe.c1
-rw-r--r--vp9/encoder/vp9_encodeframe.c5
-rw-r--r--vp9/encoder/vp9_encodeframe.h2
-rw-r--r--vp9/encoder/vp9_encodemb.c2
-rw-r--r--vp9/encoder/vp9_firstpass.c2
-rw-r--r--vp9/encoder/vp9_onyx_int.h1
-rw-r--r--vp9/encoder/vp9_rdopt.c160
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c10
13 files changed, 153 insertions, 178 deletions
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 4a788edc0..3538c7bd9 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -20,23 +20,24 @@ extern "C" {
#include "acm_random.h"
#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
using libvpx_test::ACMRandom;
namespace {
-void fdct4x4(int16_t *in, int16_t *out, uint8_t */*dst*/,
+void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int /*tx_type*/) {
vp9_short_fdct4x4_c(in, out, stride);
}
-void idct4x4_add(int16_t */*in*/, int16_t *out, uint8_t *dst,
+void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int /*tx_type*/) {
vp9_short_idct4x4_add_c(out, dst, stride >> 1);
}
-void fht4x4(int16_t *in, int16_t *out, uint8_t */*dst*/,
+void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
int stride, int tx_type) {
vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
}
-void iht4x4_add(int16_t */*in*/, int16_t *out, uint8_t *dst,
+void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht4x4_add_c(out, dst, stride >> 1, tx_type);
}
@@ -77,8 +78,8 @@ class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
TEST_P(FwdTrans4x4Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
- int16_t test_input_block[16];
- int16_t test_output_block[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
const int pitch = 8;
int count_sign_block[16][2];
const int count_test_block = 1000000;
@@ -140,9 +141,10 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
double total_error = 0;
const int count_test_block = 1000000;
for (int i = 0; i < count_test_block; ++i) {
- int16_t test_input_block[16];
- int16_t test_temp_block[16];
- uint8_t dst[16], src[16];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16);
for (int j = 0; j < 16; ++j) {
src[j] = rnd.Rand8();
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 03301a31b..eeae208f2 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,6 +13,7 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx_ports/mem.h"
extern "C" {
#include "vp9_rtcd.h"
@@ -25,14 +26,16 @@ void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
using libvpx_test::ACMRandom;
namespace {
-void fdct8x8(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
+void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
+ int stride, int /*tx_type*/) {
vp9_short_fdct8x8_c(in, out, stride);
}
-void idct8x8_add(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
+void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
+ int stride, int /*tx_type*/) {
vp9_short_idct8x8_add_c(out, dst, stride >> 1);
}
-void fht8x8(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
+void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
+ int stride, int tx_type) {
// TODO(jingning): need to refactor this to test both _c and _sse2 functions,
// when we have all inverse dct functions done sse2.
#if HAVE_SSE2
@@ -41,7 +44,7 @@ void fht8x8(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
#endif
}
-void iht8x8_add(int16_t *in, int16_t *out, uint8_t *dst,
+void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
int stride, int tx_type) {
vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
}
@@ -79,8 +82,8 @@ class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
TEST_P(FwdTrans8x8Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
- int16_t test_input_block[64];
- int16_t test_output_block[64];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
const int pitch = 16;
int count_sign_block[64][2];
const int count_test_block = 100000;
@@ -150,9 +153,10 @@ TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
double total_error = 0;
const int count_test_block = 100000;
for (int i = 0; i < count_test_block; ++i) {
- int16_t test_input_block[64];
- int16_t test_temp_block[64];
- uint8_t dst[64], src[64];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
for (int j = 0; j < 64; ++j) {
src[j] = rnd.Rand8();
@@ -200,9 +204,10 @@ TEST_P(FwdTrans8x8Test, ExtremalCheck) {
double total_error = 0;
const int count_test_block = 100000;
for (int i = 0; i < count_test_block; ++i) {
- int16_t test_input_block[64];
- int16_t test_temp_block[64];
- uint8_t dst[64], src[64];
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
for (int j = 0; j < 64; ++j) {
src[j] = rnd.Rand8() % 2 ? 255 : 0;
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index a09f33ed9..0f197e330 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -338,6 +338,7 @@ typedef struct macroblockd {
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];
/* 0 = Intra, Last, GF, ARF */
signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
+
/* 0 = ZERO_MV, MV */
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
/* 0 = ZERO_MV, MV */
@@ -404,34 +405,15 @@ static INLINE void update_partition_context(MACROBLOCKD *xd,
int bwl = b_width_log2(sb_type);
int bhl = b_height_log2(sb_type);
int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
- int i;
+ char pcvalue[2] = {~(0xe << boffset), ~(0xf <<boffset)};
+
+ assert(MAX(bwl, bhl) <= bsl);
// update the partition context at the end notes. set partition bits
// of block sizes larger than the current one to be one, and partition
// bits of smaller block sizes to be zero.
- if ((bwl == bsl) && (bhl == bsl)) {
- for (i = 0; i < bs; i++)
- xd->left_seg_context[i] = ~(0xf << boffset);
- for (i = 0; i < bs; i++)
- xd->above_seg_context[i] = ~(0xf << boffset);
- } else if ((bwl == bsl) && (bhl < bsl)) {
- for (i = 0; i < bs; i++)
- xd->left_seg_context[i] = ~(0xe << boffset);
- for (i = 0; i < bs; i++)
- xd->above_seg_context[i] = ~(0xf << boffset);
- } else if ((bwl < bsl) && (bhl == bsl)) {
- for (i = 0; i < bs; i++)
- xd->left_seg_context[i] = ~(0xf << boffset);
- for (i = 0; i < bs; i++)
- xd->above_seg_context[i] = ~(0xe << boffset);
- } else if ((bwl < bsl) && (bhl < bsl)) {
- for (i = 0; i < bs; i++)
- xd->left_seg_context[i] = ~(0xe << boffset);
- for (i = 0; i < bs; i++)
- xd->above_seg_context[i] = ~(0xe << boffset);
- } else {
- assert(0);
- }
+ vpx_memset(xd->above_seg_context, pcvalue[bwl == bsl], bs);
+ vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs);
}
static INLINE int partition_plane_context(MACROBLOCKD *xd,
@@ -504,53 +486,25 @@ static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
return subsize;
}
-// transform mapping
-static TX_TYPE txfm_map(MB_PREDICTION_MODE bmode) {
- switch (bmode) {
- case TM_PRED :
- case D135_PRED :
- return ADST_ADST;
-
- case V_PRED :
- case D117_PRED :
- case D63_PRED:
- return ADST_DCT;
-
- case H_PRED :
- case D153_PRED :
- case D27_PRED :
- return DCT_ADST;
+extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT];
- default:
- return DCT_DCT;
- }
-}
-
-static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
+static INLINE TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
MODE_INFO *const mi = xd->mode_info_context;
MB_MODE_INFO *const mbmi = &mi->mbmi;
if (xd->lossless || mbmi->ref_frame[0] != INTRA_FRAME)
return DCT_DCT;
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
- return txfm_map(mi->bmi[ib].as_mode.first);
- } else {
- assert(mbmi->mode <= TM_PRED);
- return txfm_map(mbmi->mode);
- }
+ return mode2txfm_map[mbmi->sb_type < BLOCK_SIZE_SB8X8 ?
+ mi->bmi[ib].as_mode.first : mbmi->mode];
}
-static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd) {
- return xd->mode_info_context->mbmi.mode <= TM_PRED
- ? txfm_map(xd->mode_info_context->mbmi.mode)
- : DCT_DCT;
+static INLINE TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd) {
+ return mode2txfm_map[xd->mode_info_context->mbmi.mode];
}
-static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd) {
- return xd->mode_info_context->mbmi.mode <= TM_PRED
- ? txfm_map(xd->mode_info_context->mbmi.mode)
- : DCT_DCT;
+static INLINE TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd) {
+ return mode2txfm_map[xd->mode_info_context->mbmi.mode];
}
void vp9_setup_block_dptrs(MACROBLOCKD *xd,
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 4086bf0e2..2989b9ccc 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -16,6 +16,24 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vpx_mem/vpx_mem.h"
+const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = {
+ DCT_DCT, // DC
+ ADST_DCT, // V
+ DCT_ADST, // H
+ DCT_DCT, // D45
+ ADST_ADST, // D135
+ ADST_DCT, // D117
+ DCT_ADST, // D153
+ DCT_ADST, // D27
+ ADST_DCT, // D63
+ ADST_ADST, // TM
+ DCT_DCT, // NEARESTMV
+ DCT_DCT, // NEARMV
+ DCT_DCT, // ZEROMV
+ DCT_DCT // NEWMV
+};
+
+
static void d27_predictor(uint8_t *ypred_ptr, int y_stride,
int bw, int bh,
uint8_t *yabove_row, uint8_t *yleft_col) {
@@ -300,6 +318,7 @@ void vp9_predict_intra_block(MACROBLOCKD *xd,
int bwl_in,
TX_SIZE tx_size,
int mode,
+ uint8_t *reference, int ref_stride,
uint8_t *predictor, int pre_stride) {
const int bwl = bwl_in - tx_size;
const int wmask = (1 << bwl) - 1;
@@ -309,7 +328,7 @@ void vp9_predict_intra_block(MACROBLOCKD *xd,
const int txfm_block_size = 4 << tx_size;
assert(bwl >= 0);
- vp9_build_intra_predictors(predictor, pre_stride,
+ vp9_build_intra_predictors(reference, ref_stride,
predictor, pre_stride,
mode,
txfm_block_size,
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index f5f5f42c4..e369a7192 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -25,6 +25,6 @@ void vp9_predict_intra_block(MACROBLOCKD *xd,
int block_idx,
int bwl_in,
TX_SIZE tx_size,
- int mode,
+ int mode, uint8_t *ref, int ref_stride,
uint8_t *predictor, int pre_stride);
#endif // VP9_COMMON_VP9_RECONINTRA_H_
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index a87cfd3c5..ac8404001 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -261,6 +261,7 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
plane_b_size = b_width_log2(bsize) - pd->subsampling_x;
vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode,
+ dst, pd->dst.stride,
dst, pd->dst.stride);
// Early exit if there are no coefficients
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index e800582dc..4b1ff103a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1531,8 +1531,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
0, 0, NULL, NULL );
setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);
- vp9_build_block_offsets(x);
-
vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
xd->mode_info_context->mbmi.mode = DC_PRED;
@@ -2006,9 +2004,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
}
-void vp9_build_block_offsets(MACROBLOCK *x) {
-}
-
static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
const MACROBLOCKD *xd = &x->e_mbd;
const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index d37bdca36..399196927 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -15,8 +15,6 @@
struct macroblock;
struct yv12_buffer_config;
-void vp9_build_block_offsets(struct macroblock *x);
-
void vp9_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mb_row, int mb_col);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index ccd84b39c..e13ffbdcd 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -78,7 +78,6 @@ void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
-#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp9_token_state vp9_token_state;
struct vp9_token_state {
@@ -643,6 +642,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
plane_b_size = b_width_log2(bsize) - pd->subsampling_x;
vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode,
+ dst, pd->dst.stride,
dst, pd->dst.stride);
vp9_subtract_block(txfm_b_size, txfm_b_size, src_diff, bw,
src, p->src.stride, dst, pd->dst.stride);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 522f89982..d25d78178 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -521,8 +521,6 @@ void vp9_first_pass(VP9_COMP *cpi) {
xd->mode_info_context = cm->mi;
- vp9_build_block_offsets(x);
-
vp9_setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
vp9_frame_init_quantizer(cpi);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 1204ce092..22fd87d1b 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -216,6 +216,7 @@ typedef struct {
int static_segmentation;
int comp_inter_joint_search_thresh;
int adpative_rd_thresh;
+ int skip_encode_sb;
int use_lastframe_partitioning;
int use_largest_txform;
int use_8tap_always;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index dc3536387..833dfff57 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -495,23 +495,26 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+ struct macroblockd_plane *p = &x->e_mbd.plane[0];
+ const int bw = plane_block_width(bsize, p);
+ const int bh = plane_block_height(bsize, p);
return vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
- 16 << (bwl + bhl)) >> shift;
+ bw * bh) >> shift;
}
static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
int64_t sum = 0;
int plane;
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
- const int subsampling = x->e_mbd.plane[plane].subsampling_x +
- x->e_mbd.plane[plane].subsampling_y;
+ struct macroblockd_plane *p = &x->e_mbd.plane[plane];
+ const int bw = plane_block_width(bsize, p);
+ const int bh = plane_block_height(bsize, p);
sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
- 16 << (bwl + bhl - subsampling));
+ bw * bh);
}
+
return sum >> shift;
}
@@ -645,7 +648,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
int rate = 0;
int64_t distortion;
VP9_COMMON *const cm = &cpi->common;
- const int src_stride = x->plane[0].src.stride;
+ struct macroblock_plane *p = &x->plane[0];
+ struct macroblockd_plane *pd = &xd->plane[0];
+ const int src_stride = p->src.stride;
uint8_t *src, *dst;
int16_t *src_diff, *coeff;
@@ -679,18 +684,20 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
block = ib + idy * 2 + idx;
xd->mode_info_context->bmi[block].as_mode.first = mode;
src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- x->plane[0].src.buf, src_stride);
+ p->src.buf, src_stride);
src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
- x->plane[0].src_diff);
+ p->src_diff);
coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
+ pd->dst.buf,
+ pd->dst.stride);
vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8),
- TX_4X4, mode, dst, xd->plane[0].dst.stride);
+ TX_4X4, mode,
+ dst, pd->dst.stride,
+ dst, pd->dst.stride);
vp9_subtract_block(4, 4, src_diff, 8,
src, src_stride,
- dst, xd->plane[0].dst.stride);
+ dst, pd->dst.stride);
tx_type = get_tx_type_4x4(xd, block);
if (tx_type != DCT_DCT) {
@@ -703,15 +710,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
tempa + idx, templ + idy, TX_4X4, 16);
- distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
- block, 16), 16) >> 2;
+ distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff,
+ block, 16), 16) >> 2;
if (best_tx_type != DCT_DCT)
- vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
- dst, xd->plane[0].dst.stride, best_tx_type);
+ vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
+ dst, pd->dst.stride, best_tx_type);
else
- xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
- dst, xd->plane[0].dst.stride);
+ xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
+ dst, pd->dst.stride);
}
}
@@ -731,7 +738,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
for (idx = 0; idx < bw; ++idx) {
block = ib + idy * 2 + idx;
vpx_memcpy(best_dqcoeff[idy * 2 + idx],
- BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
+ BLOCK_OFFSET(pd->dqcoeff, block, 16),
sizeof(best_dqcoeff[0]));
}
}
@@ -743,18 +750,19 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
block = ib + idy * 2 + idx;
xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
+ pd->dst.buf,
+ pd->dst.stride);
vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4,
- *best_mode, dst, xd->plane[0].dst.stride);
+ *best_mode, dst, pd->dst.stride,
+ dst, pd->dst.stride);
// inverse transform
if (best_tx_type != DCT_DCT)
vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
- xd->plane[0].dst.stride, best_tx_type);
+ pd->dst.stride, best_tx_type);
else
xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
- xd->plane[0].dst.stride);
+ pd->dst.stride);
}
}
@@ -1093,25 +1101,22 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
int k;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
- int bwl = b_width_log2(bsize), bw = 1 << bwl;
- int bhl = b_height_log2(bsize), bh = 1 << bhl;
+ const int bw = plane_block_width(bsize, &xd->plane[0]);
+ const int bh = plane_block_height(bsize, &xd->plane[0]);
int idx, idy;
const int src_stride = x->plane[0].src.stride;
- uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- x->plane[0].src.buf, src_stride);
- int16_t* src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
- x->plane[0].src_diff);
+ uint8_t* const src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src.buf,
+ src_stride);
+ int16_t* src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src_diff);
int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
- uint8_t* const pre =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].pre[0].buf,
- xd->plane[0].pre[0].stride);
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
+ uint8_t* const pre = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+ uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride);
int64_t thisdistortion = 0;
int thisrate = 0;
@@ -1124,7 +1129,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[0],
&xd->scale_factor[0],
- 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix,
+ bw, bh, 0 /* no avg */, &xd->subpix,
MV_PRECISION_Q3);
// TODO(debargha): Make this work properly with the
@@ -1138,17 +1143,17 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
dst, xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[1],
- &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
+ &xd->scale_factor[1], bw, bh, 1,
&xd->subpix, MV_PRECISION_Q3);
}
- vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
+ vp9_subtract_block(bh, bw, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
k = i;
- for (idy = 0; idy < bh; ++idy) {
- for (idx = 0; idx < bw; ++idx) {
+ for (idy = 0; idy < bh / 4; ++idy) {
+ for (idx = 0; idx < bw / 4; ++idx) {
k += (idy * 2 + idx);
src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
x->plane[0].src_diff);
@@ -2231,13 +2236,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *frame_mv,
int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES]) {
- const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
-
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
- const enum BlockSize uv_block_size = get_plane_block_size(bsize,
- &xd->plane[1]);
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
const int is_comp_pred = (mbmi->ref_frame[1] > 0);
const int num_refs = is_comp_pred ? 2 : 1;
@@ -2368,13 +2368,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int p;
for (p = 0; p < MAX_MB_PLANE; p++) {
- const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
- const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
+ struct macroblockd_plane *pd = &xd->plane[p];
+ const int bw = plane_block_width(bsize, pd);
+ const int bh = plane_block_height(bsize, pd);
int i;
- for (i = 0; i < y; i++)
- vpx_memcpy(&tmp_buf[p][64 * i],
- xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, x);
+ for (i = 0; i < bh; i++)
+ vpx_memcpy(&tmp_buf[p][64 * i], pd->dst.buf + i * pd->dst.stride,
+ bw);
}
pred_exists = 1;
}
@@ -2392,13 +2393,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int p;
for (p = 0; p < MAX_MB_PLANE; p++) {
- const int y = (MI_SIZE * bh) >> xd->plane[p].subsampling_y;
- const int x = (MI_SIZE * bw) >> xd->plane[p].subsampling_x;
+ struct macroblockd_plane *pd = &xd->plane[p];
+ const int bw = plane_block_width(bsize, pd);
+ const int bh = plane_block_height(bsize, pd);
int i;
- for (i = 0; i < y; i++)
- vpx_memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
- &tmp_buf[p][64 * i], x);
+ for (i = 0; i < bh; i++)
+ vpx_memcpy(pd->dst.buf + i * pd->dst.stride, &tmp_buf[p][64 * i], bw);
}
} else {
// Handles the special case when a filter that is not in the
@@ -2412,36 +2413,37 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
x->skip = 1;
else if (x->encode_breakout) {
+ const enum BlockSize y_size = get_plane_block_size(bsize, &xd->plane[0]);
+ const enum BlockSize uv_size = get_plane_block_size(bsize, &xd->plane[1]);
+
unsigned int var, sse;
- int threshold = (xd->plane[0].dequant[1]
- * xd->plane[0].dequant[1] >> 4);
+ int threshold = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1] >> 4);
+
if (threshold < x->encode_breakout)
threshold = x->encode_breakout;
- var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
- x->plane[0].src.stride,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride,
- &sse);
+ var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride,
+ &sse);
if ((int)sse < threshold) {
unsigned int q2dc = xd->plane[0].dequant[0];
- /* If there is no codeable 2nd order dc
- or a very small uniform pixel change change */
+ // If there is no codeable 2nd order dc
+ // or a very small uniform pixel change change
if ((sse - var < q2dc * q2dc >> 4) ||
(sse / 2 > var && sse - var < 64)) {
// Check u and v to make sure skip is ok
int sse2;
unsigned int sse2u, sse2v;
- var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
- x->plane[1].src.stride,
- xd->plane[1].dst.buf,
- xd->plane[1].dst.stride, &sse2u);
- var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
- x->plane[1].src.stride,
- xd->plane[2].dst.buf,
- xd->plane[1].dst.stride, &sse2v);
+ var = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
+ x->plane[1].src.stride,
+ xd->plane[1].dst.buf,
+ xd->plane[1].dst.stride, &sse2u);
+ var = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
+ x->plane[2].src.stride,
+ xd->plane[2].dst.buf,
+ xd->plane[2].dst.stride, &sse2v);
sse2 = sse2u + sse2v;
if (sse2 * 2 < threshold) {
@@ -2449,7 +2451,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*distortion = sse + sse2;
*rate2 = 500;
- /* for best_yrd calculation */
+ // for best_yrd calculation
*rate_uv = 0;
*distortion_uv = sse2;
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 484afce73..cc7d45243 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -375,7 +375,7 @@ void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) {
}
// load 8x8 array
-static INLINE void load_buffer_8x8(int16_t *input, __m128i in[8], int stride) {
+static INLINE void load_buffer_8x8(int16_t *input, __m128i *in, int stride) {
in[0] = _mm_load_si128((__m128i *)(input + 0 * stride));
in[1] = _mm_load_si128((__m128i *)(input + 1 * stride));
in[2] = _mm_load_si128((__m128i *)(input + 2 * stride));
@@ -396,7 +396,7 @@ static INLINE void load_buffer_8x8(int16_t *input, __m128i in[8], int stride) {
}
// write 8x8 array
-static INLINE void write_buffer_8x8(int16_t *output, __m128i res[8]) {
+static INLINE void write_buffer_8x8(int16_t *output, __m128i *res) {
__m128i sign0 = _mm_srai_epi16(res[0], 15);
__m128i sign1 = _mm_srai_epi16(res[1], 15);
__m128i sign2 = _mm_srai_epi16(res[2], 15);
@@ -435,7 +435,7 @@ static INLINE void write_buffer_8x8(int16_t *output, __m128i res[8]) {
}
// perform in-place transpose
-static INLINE void array_transpose_8x8(__m128i res[8]) {
+static INLINE void array_transpose_8x8(__m128i *res) {
const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
const __m128i tr0_1 = _mm_unpacklo_epi16(res[2], res[3]);
const __m128i tr0_2 = _mm_unpackhi_epi16(res[0], res[1]);
@@ -486,7 +486,7 @@ static INLINE void array_transpose_8x8(__m128i res[8]) {
// 07 17 27 37 47 57 67 77
}
-void fdct8_1d_sse2(__m128i in[8]) {
+void fdct8_1d_sse2(__m128i *in) {
// constants
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -626,7 +626,7 @@ void fdct8_1d_sse2(__m128i in[8]) {
array_transpose_8x8(in);
}
-void fadst8_1d_sse2(__m128i in[8]) {
+void fadst8_1d_sse2(__m128i *in) {
// Constants
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);