summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_reconintra_neon.c207
-rw-r--r--vp9/common/vp9_filter.c28
-rw-r--r--vp9/common/vp9_reconintra.c4
-rw-r--r--vp9/common/vp9_rtcd_defs.pl20
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c55
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h2
-rw-r--r--vp9/encoder/vp9_avg.c12
-rw-r--r--vp9/encoder/vp9_encodeframe.c60
-rw-r--r--vp9/encoder/vp9_encoder.c30
-rw-r--r--vp9/encoder/vp9_encoder.h6
-rw-r--r--vp9/encoder/vp9_mcomp.c30
-rw-r--r--vp9/encoder/vp9_ratectrl.c90
-rw-r--r--vp9/encoder/vp9_ratectrl.h2
13 files changed, 431 insertions, 115 deletions
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c
index 499c42ac3..13c46a57e 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -15,6 +15,75 @@
#include "vpx/vpx_integer.h"
//------------------------------------------------------------------------------
+// DC 4x4
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x8_t A = vld1_u8(above); // top row
+ const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ sum_top = vcombine_u16(p1, p1);
+ }
+
+ if (do_left) {
+ const uint8x8_t L = vld1_u8(left); // left border
+ const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ sum_left = vcombine_u16(p1, p1);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 3);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 2);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 2);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x8_t dc = vdup_lane_u8(dc0, 0);
+ int i;
+ for (i = 0; i < 4; ++i) {
+ vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0);
+ }
+ }
+}
+
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_4x4(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ dc_4x4(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+ dc_4x4(dst, stride, above, NULL, 1, 0);
+}
+
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_4x4(dst, stride, NULL, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
// DC 8x8
// 'do_above' and 'do_left' facilitate branch removal when inlined.
@@ -161,6 +230,144 @@ void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
dc_16x16(dst, stride, NULL, NULL, 0, 0);
}
+//------------------------------------------------------------------------------
+// DC 32x32
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x16_t A0 = vld1q_u8(above); // top row
+ const uint8x16_t A1 = vld1q_u8(above + 16);
+ const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top
+ const uint16x8_t p1 = vpaddlq_u8(A1);
+ const uint16x8_t p2 = vaddq_u16(p0, p1);
+ const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+ const uint16x4_t p4 = vpadd_u16(p3, p3);
+ const uint16x4_t p5 = vpadd_u16(p4, p4);
+ sum_top = vcombine_u16(p5, p5);
+ }
+
+ if (do_left) {
+ const uint8x16_t L0 = vld1q_u8(left); // left row
+ const uint8x16_t L1 = vld1q_u8(left + 16);
+ const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left
+ const uint16x8_t p1 = vpaddlq_u8(L1);
+ const uint16x8_t p2 = vaddq_u16(p0, p1);
+ const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+ const uint16x4_t p4 = vpadd_u16(p3, p3);
+ const uint16x4_t p5 = vpadd_u16(p4, p4);
+ sum_left = vcombine_u16(p5, p5);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 6);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 5);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 5);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
+ int i;
+ for (i = 0; i < 32; ++i) {
+ vst1q_u8(dst + i * stride, dc);
+ vst1q_u8(dst + i * stride + 16, dc);
+ }
+ }
+}
+
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_32x32(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+ dc_32x32(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)left;
+ dc_32x32(dst, stride, above, NULL, 1, 0);
+}
+
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_32x32(dst, stride, NULL, NULL, 0, 0);
+}
+
+// -----------------------------------------------------------------------------
+
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above)); // top row
+ const uint64x1_t A1 = vshr_n_u64(A0, 8);
+ const uint64x1_t A2 = vshr_n_u64(A0, 16);
+ const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
+ const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
+ const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
+ const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00);
+ const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0);
+ const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+ const uint32x2_t r0 = vreinterpret_u32_u8(avg2);
+ const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+ const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+ const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+ (void)left;
+ vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
+ vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
+ vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
+ vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+ dst[3 * stride + 3] = above[7];
+}
+
+// -----------------------------------------------------------------------------
+
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
+ const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
+ const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
+ const uint32x2_t zero = vdup_n_u32(0);
+ const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
+ const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
+ const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
+ const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
+ const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
+ const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
+ const uint8_t D = vget_lane_u8(XABCD_u8, 4);
+ const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
+ const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
+ const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
+ const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
+ const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+ const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
+ const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+ const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+ const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+ vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
+ vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
+ vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
+ vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+}
+
#if !HAVE_NEON_ASM
void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index b256d4af5..14654a5ab 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -12,8 +12,8 @@
#include "vp9/common/vp9_filter.h"
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 },
@@ -33,8 +33,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 1, -5, 126, 8, -3, 1, 0},
{ -1, 3, -10, 122, 18, -6, 2, 0},
@@ -54,8 +54,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// DCT based filter
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
{-2, 5, -13, 125, 17, -6, 3, -1},
@@ -75,8 +75,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
@@ -96,15 +96,15 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
-static const InterpKernel* vp9_filter_kernels[4] = {
- vp9_sub_pel_filters_8,
- vp9_sub_pel_filters_8lp,
- vp9_sub_pel_filters_8s,
- vp9_bilinear_filters
+static const InterpKernel* filter_kernels[4] = {
+ sub_pel_filters_8,
+ sub_pel_filters_8lp,
+ sub_pel_filters_8s,
+ bilinear_filters
};
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
assert(filter != SWITCHABLE);
- return vp9_filter_kernels[filter];
+ return filter_kernels[filter];
}
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 650f4adde..1e9acb8d5 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -533,8 +533,8 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
}
intra_pred_no_4x4(d117)
-void vp9_d135_predictor_4x4(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
+void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
const int I = left[0];
const int J = left[1];
const int K = left[2];
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 2a3b054ad..dac142397 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -60,7 +60,7 @@ add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_4x4 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
@@ -72,7 +72,7 @@ add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vp9_d117_predictor_4x4/;
add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d135_predictor_4x4/;
+specialize qw/vp9_d135_predictor_4x4 neon/;
add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
@@ -84,16 +84,16 @@ add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, cons
specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_4x4 dspr2 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4 msa/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
@@ -201,16 +201,16 @@ add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_32x32 msa/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_32x32 msa/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_32x32 msa/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_32x32 msa/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
#
# Loopfilter
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index df70d48f8..0e4d8638b 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -39,6 +39,8 @@ struct CYCLIC_REFRESH {
int rdmult;
// Cyclic refresh map.
signed char *map;
+ // Map of the last q a block was coded at.
+ uint8_t *last_coded_q_map;
// Thresholds applied to the projected rate/distortion of the coding block,
// when deciding whether block should be refreshed.
int64_t thresh_rate_sb;
@@ -51,11 +53,11 @@ struct CYCLIC_REFRESH {
// Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
double rate_boost_fac;
double low_content_avg;
- int qindex_delta_seg1;
- int qindex_delta_seg2;
+ int qindex_delta[3];
};
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+ size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
@@ -65,12 +67,21 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
vpx_free(cr);
return NULL;
}
+ last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
+ cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
+ if (cr->last_coded_q_map == NULL) {
+ vpx_free(cr);
+ return NULL;
+ }
+ assert(MAXQ <= 255);
+ memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
return cr;
}
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
vpx_free(cr->map);
+ vpx_free(cr->last_coded_q_map);
vpx_free(cr);
}
@@ -159,11 +170,11 @@ int vp9_cyclic_refresh_estimate_bits_at_q(const VP9_COMP *cpi,
correction_factor, cm->bit_depth) +
weight_segment1 *
vp9_estimate_bits_at_q(cm->frame_type,
- cm->base_qindex + cr->qindex_delta_seg1, mbs,
+ cm->base_qindex + cr->qindex_delta[1], mbs,
correction_factor, cm->bit_depth) +
weight_segment2 *
vp9_estimate_bits_at_q(cm->frame_type,
- cm->base_qindex + cr->qindex_delta_seg2, mbs,
+ cm->base_qindex + cr->qindex_delta[2], mbs,
correction_factor, cm->bit_depth));
return estimated_bits;
}
@@ -248,9 +259,16 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
// copy mbmi->segment_id into global segmentation map.
for (y = 0; y < ymis; y++)
for (x = 0; x < xmis; x++) {
- cr->map[block_index + y * cm->mi_cols + x] = new_map_value;
- cpi->segmentation_map[block_index + y * cm->mi_cols + x] =
- mbmi->segment_id;
+ int map_offset = block_index + y * cm->mi_cols + x;
+ cr->map[map_offset] = new_map_value;
+ cpi->segmentation_map[map_offset] = mbmi->segment_id;
+ // Inter skip blocks were clearly not coded at the current qindex, so
+ // don't update the map for them. For cases where motion is non-zero or
+ // the reference frame isn't the previous frame, the previous value in
+ // the map for this spatial location is not entirely correct.
+ if (!is_inter_block(mbmi) || !skip)
+ cr->last_coded_q_map[map_offset] = clamp(
+ cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
}
}
@@ -384,6 +402,10 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * MI_BLOCK_SIZE;
int mi_col = sb_col_index * MI_BLOCK_SIZE;
+ int qindex_thresh =
+ cpi->oxcf.content == VP9E_CONTENT_SCREEN
+ ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
+ : 0;
assert(mi_row >= 0 && mi_row < cm->mi_rows);
assert(mi_col >= 0 && mi_col < cm->mi_cols);
bl_index = mi_row * cm->mi_cols + mi_col;
@@ -399,7 +421,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than ZEROMV.
if (cr->map[bl_index2] == 0) {
- sum_map++;
+ if (cr->last_coded_q_map[bl_index2] > qindex_thresh)
+ sum_map++;
} else if (cr->map[bl_index2] < 0) {
cr->map[bl_index2]++;
}
@@ -466,8 +489,11 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
vp9_disable_segmentation(&cm->seg);
- if (cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME) {
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
+ }
return;
} else {
int qindex_delta = 0;
@@ -505,7 +531,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// Set the q delta for segment BOOST1.
qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta);
- cr->qindex_delta_seg1 = qindex_delta;
+ cr->qindex_delta[1] = qindex_delta;
// Compute rd-mult for segment BOOST1.
qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
@@ -518,7 +544,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
qindex_delta = compute_deltaq(cpi, cm->base_qindex,
MIN(CR_MAX_RATE_TARGET_RATIO,
cr->rate_boost_fac * cr->rate_ratio_qdelta));
- cr->qindex_delta_seg2 = qindex_delta;
+ cr->qindex_delta[2] = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
// Update the segmentation and refresh map.
@@ -529,3 +555,10 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
return cr->rdmult;
}
+
+void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
+ cr->sb_index = 0;
+}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 99bb98ec8..29d2a91bc 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -75,6 +75,8 @@ void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi);
int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
+void vp9_cyclic_refresh_reset_resize(struct VP9_COMP *const cpi);
+
static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) {
return segment_id == CR_SEGMENT_ID_BOOST1 ||
segment_id == CR_SEGMENT_ID_BOOST2;
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c
index 437356681..3ef3882d2 100644
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -115,33 +115,41 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
}
}
+// coeff: 16 bits, dynamic range [-32640, 32640].
+// length: value range {16, 64, 256, 1024}.
int16_t vp9_satd_c(const int16_t *coeff, int length) {
int i;
int satd = 0;
for (i = 0; i < length; ++i)
satd += abs(coeff[i]);
+ // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
return (int16_t)satd;
}
// Integer projection onto row vectors.
-void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
+// height: value range {16, 32, 64}.
+void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref,
const int ref_stride, const int height) {
int idx;
- const int norm_factor = MAX(8, height >> 1);
+ const int norm_factor = height >> 1;
for (idx = 0; idx < 16; ++idx) {
int i;
hbuf[idx] = 0;
+ // hbuf[idx]: 14 bit, dynamic range [0, 16320].
for (i = 0; i < height; ++i)
hbuf[idx] += ref[i * ref_stride];
+ // hbuf[idx]: 9 bit, dynamic range [0, 510].
hbuf[idx] /= norm_factor;
++ref;
}
}
+// width: value range {16, 32, 64}.
int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
int idx;
int16_t sum = 0;
+ // sum: 14 bit, dynamic range [0, 16320]
for (idx = 0; idx < width; ++idx)
sum += ref[idx];
return sum;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 5299244d9..dcddefc28 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2220,66 +2220,6 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
*max_block_size = max_size;
}
-static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
- MACROBLOCKD *const xd,
- int mi_row, int mi_col,
- BLOCK_SIZE *min_block_size,
- BLOCK_SIZE *max_block_size) {
- VP9_COMMON *const cm = &cpi->common;
- MODE_INFO **mi_8x8 = xd->mi;
- const int left_in_image = xd->left_available && mi_8x8[-1];
- const int above_in_image = xd->up_available && mi_8x8[-xd->mi_stride];
- int row8x8_remaining = tile->mi_row_end - mi_row;
- int col8x8_remaining = tile->mi_col_end - mi_col;
- int bh, bw;
- BLOCK_SIZE min_size = BLOCK_32X32;
- BLOCK_SIZE max_size = BLOCK_8X8;
- int bsl = mi_width_log2_lookup[BLOCK_64X64];
- const int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
- get_chessboard_index(cm->current_video_frame)) & 0x1;
- // Trap case where we do not have a prediction.
- if (search_range_ctrl &&
- (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) {
- int block;
- MODE_INFO **mi;
- BLOCK_SIZE sb_type;
-
- // Find the min and max partition sizes used in the left SB64.
- if (left_in_image) {
- MODE_INFO *cur_mi;
- mi = &mi_8x8[-1];
- for (block = 0; block < MI_BLOCK_SIZE; ++block) {
- cur_mi = mi[block * xd->mi_stride];
- sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0;
- min_size = MIN(min_size, sb_type);
- max_size = MAX(max_size, sb_type);
- }
- }
- // Find the min and max partition sizes used in the above SB64.
- if (above_in_image) {
- mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
- for (block = 0; block < MI_BLOCK_SIZE; ++block) {
- sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0;
- min_size = MIN(min_size, sb_type);
- max_size = MAX(max_size, sb_type);
- }
- }
-
- min_size = min_partition_size[min_size];
- max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
- &bh, &bw);
- min_size = MIN(min_size, max_size);
- min_size = MAX(min_size, BLOCK_8X8);
- max_size = MIN(max_size, BLOCK_32X32);
- } else {
- min_size = BLOCK_8X8;
- max_size = BLOCK_32X32;
- }
-
- *min_block_size = min_size;
- *max_block_size = max_size;
-}
-
// TODO(jingning) refactor functions setting partition search range
static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BLOCK_SIZE bsize,
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index ecd60d772..d708b8319 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1596,6 +1596,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
sizeof(*cm->frame_contexts)));
cpi->use_svc = 0;
+ cpi->resize_state = 0;
+ cpi->resize_avg_qp = 0;
+ cpi->resize_buffer_underflow = 0;
cpi->common.buffer_pool = pool;
init_config(cpi, oxcf);
@@ -3032,6 +3035,31 @@ static void set_frame_size(VP9_COMP *cpi) {
oxcf->scaled_frame_height);
}
+ if (oxcf->pass == 0 &&
+ oxcf->rc_mode == VPX_CBR &&
+ !cpi->use_svc &&
+ oxcf->resize_mode == RESIZE_DYNAMIC) {
+ if (cpi->resize_state == 1) {
+ oxcf->scaled_frame_width =
+ (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
+ oxcf->scaled_frame_height =
+ (cm->height * cpi->resize_scale_num) /cpi->resize_scale_den;
+ } else if (cpi->resize_state == -1) {
+ // Go back up to original size.
+ oxcf->scaled_frame_width = oxcf->width;
+ oxcf->scaled_frame_height = oxcf->height;
+ }
+ if (cpi->resize_state != 0) {
+ // There has been a change in frame size.
+ vp9_set_size_literal(cpi,
+ oxcf->scaled_frame_width,
+ oxcf->scaled_frame_height);
+
+ // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+ set_mv_search_params(cpi);
+ }
+ }
+
if ((oxcf->pass == 2) &&
(!cpi->use_svc ||
(is_two_pass_svc(cpi) &&
@@ -3962,7 +3990,6 @@ static void check_src_altref(VP9_COMP *cpi,
extern double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
const unsigned char *img2, int img2_pitch,
int width, int height);
-#endif
static void adjust_image_stat(double y, double u, double v, double all,
ImageStat *s) {
@@ -3972,6 +3999,7 @@ static void adjust_image_stat(double y, double u, double v, double all,
s->stat[ALL] += all;
s->worst = MIN(s->worst, all);
}
+#endif // CONFIG_INTERNAL_STATS
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 4d2a186e8..2b0da103f 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -479,6 +479,12 @@ typedef struct VP9_COMP {
#endif
int resize_pending;
+ int resize_state;
+ int resize_scale_num;
+ int resize_scale_den;
+ int resize_avg_qp;
+ int resize_buffer_underflow;
+ int resize_count;
// VAR_BASED_PARTITION thresholds
// 0 - threshold_64x64; 1 - threshold_32x32;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 234272697..081b99f9f 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -286,20 +286,20 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
bestmv->row *= 8; \
bestmv->col *= 8;
-static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd,
- const MV *bestmv,
- const MV *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- const uint8_t *const src,
- const int src_stride,
- const uint8_t *const y,
- int y_stride,
- const uint8_t *second_pred,
- int w, int h, int offset,
- int *mvjcost, int *mvcost[2],
- unsigned int *sse1,
- int *distortion) {
+static unsigned int setup_center_error(const MACROBLOCKD *xd,
+ const MV *bestmv,
+ const MV *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ const uint8_t *const src,
+ const int src_stride,
+ const uint8_t *const y,
+ int y_stride,
+ const uint8_t *second_pred,
+ int w, int h, int offset,
+ int *mvjcost, int *mvcost[2],
+ unsigned int *sse1,
+ int *distortion) {
unsigned int besterr;
#if CONFIG_VP9_HIGHBITDEPTH
if (second_pred != NULL) {
@@ -610,7 +610,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
return besterr;
}
-const MV search_step_table[12] = {
+static const MV search_step_table[12] = {
// left, right, up, down
{0, -4}, {0, 4}, {-4, 0}, {4, 0},
{0, -2}, {0, 2}, {-2, 0}, {2, 0},
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 32682fe74..425073fcd 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1596,6 +1596,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
target = calc_pframe_target_size_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target);
+ cpi->resize_state = vp9_resize_one_pass_cbr(cpi);
}
int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
@@ -1756,3 +1757,92 @@ void vp9_set_target_rate(VP9_COMP *cpi) {
vbr_rate_correction(cpi, &target_rate);
vp9_rc_set_frame_target(cpi, target_rate);
}
+
+// Check if we should resize, based on average QP from past x frames.
+// Only allow for resize at most one scale down for now, scaling factor is 2.
+int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int resize_now = 0;
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 1;
+ // Don't resize on key frame; reset the counters on key frame.
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->resize_avg_qp = 0;
+ cpi->resize_count = 0;
+ return 0;
+ }
+ // Resize based on average QP over some window.
+ // Ignore samples close to key frame, since QP is usually high after key.
+ if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
+ const int window = (int)(5 * cpi->framerate);
+ cpi->resize_avg_qp += cm->base_qindex;
+ if (cpi->rc.buffer_level < 0)
+ ++cpi->resize_buffer_underflow;
+ ++cpi->resize_count;
+ // Check for resize action every "window" frames.
+ if (cpi->resize_count == window) {
+ int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
+ // Resize down if buffer level has underflowed sufficent amount in past
+ // window, and we are at original resolution.
+ // Resize back up if average QP is low, and we are currently in a resized
+ // down state.
+ if (cpi->resize_state == 0 &&
+ cpi->resize_buffer_underflow > (cpi->resize_count >> 3)) {
+ resize_now = 1;
+ } else if (cpi->resize_state == 1 &&
+ avg_qp < 40 * cpi->rc.worst_quality / 100) {
+ resize_now = -1;
+ }
+ // Reset for next window measurement.
+ cpi->resize_avg_qp = 0;
+ cpi->resize_count = 0;
+ cpi->resize_buffer_underflow = 0;
+ }
+ }
+ // If decision is to resize, reset some quantities, and check is we should
+ // reduce rate correction factor,
+ if (resize_now != 0) {
+ int target_bits_per_frame;
+ int active_worst_quality;
+ int qindex;
+ int tot_scale_change;
+ // For now, resize is by 1/2 x 1/2.
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 2;
+ tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) /
+ (cpi->resize_scale_num * cpi->resize_scale_num);
+ // Reset buffer level to optimal, update target size.
+ rc->buffer_level = rc->optimal_buffer_level;
+ rc->bits_off_target = rc->optimal_buffer_level;
+ rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi);
+ // Reset cyclic refresh parameters.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
+ vp9_cyclic_refresh_reset_resize(cpi);
+ // Get the projected qindex, based on the scaled target frame size (scaled
+ // so target_bits_per_mb in vp9_rc_regulate_q will be correct target).
+ target_bits_per_frame = (resize_now == 1) ?
+ rc->this_frame_target * tot_scale_change :
+ rc->this_frame_target / tot_scale_change;
+ active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
+ qindex = vp9_rc_regulate_q(cpi,
+ target_bits_per_frame,
+ rc->best_quality,
+ active_worst_quality);
+ // If resize is down, check if projected q index is close to worst_quality,
+ // and if so, reduce the rate correction factor (since likely can afford
+ // lower q for resized frame).
+ if (resize_now == 1 &&
+ qindex > 90 * cpi->rc.worst_quality / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
+ }
+ // If resize is back up, check if projected q index is too much above the
+ // current base_qindex, and if so, reduce the rate correction factor
+ // (since prefer to keep q for resized frame at least close to previous q).
+ if (resize_now == -1 &&
+ qindex > 130 * cm->base_qindex / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
+ }
+ }
+ return resize_now;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index e12d200be..a10836c74 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -245,6 +245,8 @@ void vp9_rc_set_gf_interval_range(const struct VP9_COMP *const cpi,
void vp9_set_target_rate(struct VP9_COMP *cpi);
+int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif