summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp9/encoder/vp9_avg.c6
-rw-r--r--vp9/encoder/vp9_encodeframe.c2
-rw-r--r--vp9/encoder/vp9_encodemb.c12
-rw-r--r--vp9/encoder/vp9_quantize.c49
-rw-r--r--vp9/encoder/vp9_quantize.h6
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c15
6 files changed, 60 insertions, 30 deletions
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c
index 50c8bca0b..90d113c32 100644
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -32,12 +32,13 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
const int ref_stride, const int height) {
int idx;
+ const int norm_factor = MAX(8, height >> 1);
for (idx = 0; idx < 16; ++idx) {
int i;
hbuf[idx] = 0;
for (i = 0; i < height; ++i)
hbuf[idx] += ref[i * ref_stride];
- hbuf[idx] /= 32;
+ hbuf[idx] /= norm_factor;
++ref;
}
}
@@ -45,9 +46,10 @@ void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
int idx;
int16_t sum = 0;
+ const int norm_factor = MAX(8, width >> 1);
for (idx = 0; idx < width; ++idx)
sum += ref[idx];
- return sum / 32;
+ return sum / norm_factor;
}
int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 65d8eaebf..2bdb9915c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3914,7 +3914,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
// Special case: set prev_mi to NULL when the previous mode info
// context cannot be used.
cm->prev_mi = cm->use_prev_frame_mvs ?
- cm->prev_mip + cm->mi_stride + 1 : NULL;
+ cm->prev_mip + cm->mi_stride + 1 : NULL;
x->quant_fp = cpi->sf.use_quant_fp;
vp9_zero(x->skip_txfm);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 70b804e31..65e299793 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -476,19 +476,19 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
break;
case TX_16X16:
vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
- vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
- vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
- vp9_highbd_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
@@ -508,19 +508,19 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
break;
case TX_16X16:
vp9_fdct16x16_1(src_diff, coeff, diff_stride);
- vp9_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_8X8:
vp9_fdct8x8_1(src_diff, coeff, diff_stride);
- vp9_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
case TX_4X4:
x->fwd_txm4x4(src_diff, coeff, diff_stride);
- vp9_quantize_dc(coeff, x->skip_block, p->round,
+ vp9_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 7143987d4..2523d1ea3 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -19,7 +19,8 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
-void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
@@ -29,6 +30,9 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp, eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant) >> 16;
@@ -41,12 +45,16 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
int eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
@@ -69,15 +77,20 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp, eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
- tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT16_MIN, INT16_MAX);
tmp = (tmp * quant) >> 15;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
@@ -96,8 +109,12 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
int eob = -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
if (!skip_block) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
@@ -105,8 +122,8 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp =
- (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
- quant) >> 15;
+ (clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT32_MIN, INT32_MAX) * quant) >> 15;
qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
if (tmp)
@@ -521,21 +538,21 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block),
- 16, x->skip_block,
- p->zbin, p->round, p->quant, p->quant_shift,
- BLOCK_OFFSET(p->qcoeff, block),
- BLOCK_OFFSET(pd->dqcoeff, block),
- pd->dequant, &p->eobs[block],
- scan, iscan);
+ 16, x->skip_block,
+ p->zbin, p->round, p->quant, p->quant_shift,
+ BLOCK_OFFSET(p->qcoeff, block),
+ BLOCK_OFFSET(pd->dqcoeff, block),
+ pd->dequant, &p->eobs[block],
+ scan, iscan);
return;
}
#endif
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
- 16, x->skip_block,
- p->zbin, p->round, p->quant, p->quant_shift,
- BLOCK_OFFSET(p->qcoeff, block),
- BLOCK_OFFSET(pd->dqcoeff, block),
- pd->dequant, &p->eobs[block], scan, iscan);
+ 16, x->skip_block,
+ p->zbin, p->round, p->quant, p->quant_shift,
+ BLOCK_OFFSET(p->qcoeff, block),
+ BLOCK_OFFSET(pd->dqcoeff, block),
+ pd->dequant, &p->eobs[block], scan, iscan);
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index de2839f5b..55e546944 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -37,7 +37,8 @@ typedef struct {
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
} QUANTS;
-void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
@@ -49,7 +50,8 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan);
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index 482fa3da3..f49949940 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -90,8 +90,16 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
- s0 = _mm_srai_epi16(s0, 5);
- s1 = _mm_srai_epi16(s1, 5);
+ if (height == 64) {
+ s0 = _mm_srai_epi16(s0, 5);
+ s1 = _mm_srai_epi16(s1, 5);
+ } else if (height == 32) {
+ s0 = _mm_srai_epi16(s0, 4);
+ s1 = _mm_srai_epi16(s1, 4);
+ } else {
+ s0 = _mm_srai_epi16(s0, 3);
+ s1 = _mm_srai_epi16(s1, 3);
+ }
_mm_store_si128((__m128i *)hbuf, s0);
hbuf += 8;
@@ -104,6 +112,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
+ const int norm_factor = 3 + (width >> 5);
for (i = 16; i < width; i += 16) {
ref += 16;
@@ -115,7 +124,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
s1 = _mm_srli_si128(s0, 8);
s0 = _mm_adds_epu16(s0, s1);
- return (_mm_extract_epi16(s0, 0)) >> 5;
+ return _mm_extract_epi16(s0, 0) >> norm_factor;
}
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,