summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_idct.c3
-rw-r--r--vp9/common/vp9_rtcd_defs.pl36
-rw-r--r--vp9/encoder/arm/neon/vp9_avg_neon.c160
-rw-r--r--vp9/encoder/mips/msa/vp9_avg_msa.c56
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c20
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h5
-rw-r--r--vp9/encoder/vp9_avg.c230
-rw-r--r--vp9/encoder/vp9_denoiser.c35
-rw-r--r--vp9/encoder/vp9_denoiser.h6
-rw-r--r--vp9/encoder/vp9_encodeframe.c119
-rw-r--r--vp9/encoder/vp9_encoder.c30
-rw-r--r--vp9/encoder/vp9_mcomp.c18
-rw-r--r--vp9/encoder/vp9_noise_estimate.c52
-rw-r--r--vp9/encoder/vp9_noise_estimate.h2
-rw-r--r--vp9/encoder/vp9_pickmode.c91
-rw-r--r--vp9/encoder/vp9_ratectrl.c100
-rw-r--r--vp9/encoder/vp9_ratectrl.h1
-rw-r--r--vp9/encoder/vp9_rdopt.c24
-rw-r--r--vp9/encoder/vp9_skin_detection.c14
-rw-r--r--vp9/encoder/vp9_skin_detection.h3
-rw-r--r--vp9/encoder/vp9_speed_features.c2
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c19
-rw-r--r--vp9/encoder/vp9_svc_layercontext.h3
-rw-r--r--vp9/encoder/vp9_temporal_filter.c151
-rw-r--r--vp9/encoder/x86/vp9_avg_intrin_sse2.c424
-rw-r--r--vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm121
-rw-r--r--vp9/vp9cx.mk5
27 files changed, 490 insertions, 1240 deletions
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index d12cd76db..1b420143b 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -174,6 +174,9 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
else if (eob <= 34)
// non-zero coeff only in upper-left 8x8
vpx_idct32x32_34_add(input, dest, stride);
+ else if (eob <= 135)
+ // non-zero coeff only in upper-left 16x16
+ vpx_idct32x32_135_add(input, dest, stride);
else
vpx_idct32x32_1024_add(input, dest, stride);
}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 890b63821..d6c86fe5f 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -194,42 +194,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
-add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2 neon msa/;
-
-add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
-specialize qw/vp9_avg_4x4 sse2 msa/;
-
-add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-specialize qw/vp9_minmax_8x8 sse2/;
-
-add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
-
-add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_16x16 sse2/;
-
-add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
-specialize qw/vp9_satd sse2/;
-
-add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
-specialize qw/vp9_int_pro_row sse2 neon/;
-
-add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width";
-specialize qw/vp9_int_pro_col sse2 neon/;
-
-add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
-specialize qw/vp9_vector_var neon sse2/;
-
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
- specialize qw/vp9_highbd_avg_8x8/;
- add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
- specialize qw/vp9_highbd_avg_4x4/;
- add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
- specialize qw/vp9_highbd_minmax_8x8/;
-}
-
# ENCODEMB INVOKE
#
diff --git a/vp9/encoder/arm/neon/vp9_avg_neon.c b/vp9/encoder/arm/neon/vp9_avg_neon.c
deleted file mode 100644
index d569ec95d..000000000
--- a/vp9/encoder/arm/neon/vp9_avg_neon.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "./vp9_rtcd.h"
-#include "./vpx_config.h"
-
-#include "vpx/vpx_integer.h"
-
-static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
- const uint32x4_t a = vpaddlq_u16(v_16x8);
- const uint64x2_t b = vpaddlq_u32(a);
- const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
- vreinterpret_u32_u64(vget_high_u64(b)));
- return vget_lane_u32(c, 0);
-}
-
-unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
- uint8x8_t v_s0 = vld1_u8(s);
- const uint8x8_t v_s1 = vld1_u8(s + p);
- uint16x8_t v_sum = vaddl_u8(v_s0, v_s1);
-
- v_s0 = vld1_u8(s + 2 * p);
- v_sum = vaddw_u8(v_sum, v_s0);
-
- v_s0 = vld1_u8(s + 3 * p);
- v_sum = vaddw_u8(v_sum, v_s0);
-
- v_s0 = vld1_u8(s + 4 * p);
- v_sum = vaddw_u8(v_sum, v_s0);
-
- v_s0 = vld1_u8(s + 5 * p);
- v_sum = vaddw_u8(v_sum, v_s0);
-
- v_s0 = vld1_u8(s + 6 * p);
- v_sum = vaddw_u8(v_sum, v_s0);
-
- v_s0 = vld1_u8(s + 7 * p);
- v_sum = vaddw_u8(v_sum, v_s0);
-
- return (horizontal_add_u16x8(v_sum) + 32) >> 6;
-}
-
-void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,
- const int ref_stride, const int height) {
- int i;
- uint16x8_t vec_sum_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_hi = vdupq_n_u16(0);
- const int shift_factor = ((height >> 5) + 3) * -1;
- const int16x8_t vec_shift = vdupq_n_s16(shift_factor);
-
- for (i = 0; i < height; i += 8) {
- const uint8x16_t vec_row1 = vld1q_u8(ref);
- const uint8x16_t vec_row2 = vld1q_u8(ref + ref_stride);
- const uint8x16_t vec_row3 = vld1q_u8(ref + ref_stride * 2);
- const uint8x16_t vec_row4 = vld1q_u8(ref + ref_stride * 3);
- const uint8x16_t vec_row5 = vld1q_u8(ref + ref_stride * 4);
- const uint8x16_t vec_row6 = vld1q_u8(ref + ref_stride * 5);
- const uint8x16_t vec_row7 = vld1q_u8(ref + ref_stride * 6);
- const uint8x16_t vec_row8 = vld1q_u8(ref + ref_stride * 7);
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row1));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row1));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row2));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row2));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row3));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row3));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row4));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row4));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row5));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row5));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row6));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row6));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row7));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row7));
-
- vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row8));
- vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row8));
-
- ref += ref_stride * 8;
- }
-
- vec_sum_lo = vshlq_u16(vec_sum_lo, vec_shift);
- vec_sum_hi = vshlq_u16(vec_sum_hi, vec_shift);
-
- vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_lo));
- hbuf += 8;
- vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_hi));
-}
-
-int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) {
- int i;
- uint16x8_t vec_sum = vdupq_n_u16(0);
-
- for (i = 0; i < width; i += 16) {
- const uint8x16_t vec_row = vld1q_u8(ref);
- vec_sum = vaddw_u8(vec_sum, vget_low_u8(vec_row));
- vec_sum = vaddw_u8(vec_sum, vget_high_u8(vec_row));
- ref += 16;
- }
-
- return horizontal_add_u16x8(vec_sum);
-}
-
-// ref, src = [0, 510] - max diff = 16-bits
-// bwl = {2, 3, 4}, width = {16, 32, 64}
-int vp9_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) {
- int width = 4 << bwl;
- int32x4_t sse = vdupq_n_s32(0);
- int16x8_t total = vdupq_n_s16(0);
-
- assert(width >= 8);
- assert((width % 8) == 0);
-
- do {
- const int16x8_t r = vld1q_s16(ref);
- const int16x8_t s = vld1q_s16(src);
- const int16x8_t diff = vsubq_s16(r, s); // [-510, 510], 10 bits.
- const int16x4_t diff_lo = vget_low_s16(diff);
- const int16x4_t diff_hi = vget_high_s16(diff);
- sse = vmlal_s16(sse, diff_lo, diff_lo); // dynamic range 26 bits.
- sse = vmlal_s16(sse, diff_hi, diff_hi);
- total = vaddq_s16(total, diff); // dynamic range 16 bits.
-
- ref += 8;
- src += 8;
- width -= 8;
- } while (width != 0);
-
- {
- // Note: 'total''s pairwise addition could be implemented similarly to
- // horizontal_add_u16x8(), but one less vpaddl with 'total' when paired
- // with the summation of 'sse' performed better on a Cortex-A15.
- const int32x4_t t0 = vpaddlq_s16(total); // cascading summation of 'total'
- const int32x2_t t1 = vadd_s32(vget_low_s32(t0), vget_high_s32(t0));
- const int32x2_t t2 = vpadd_s32(t1, t1);
- const int t = vget_lane_s32(t2, 0);
- const int64x2_t s0 = vpaddlq_s32(sse); // cascading summation of 'sse'.
- const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)),
- vreinterpret_s32_s64(vget_high_s64(s0)));
- const int s = vget_lane_s32(s1, 0);
- const int shift_factor = bwl + 2;
- return s - ((t * t) >> shift_factor);
- }
-}
diff --git a/vp9/encoder/mips/msa/vp9_avg_msa.c b/vp9/encoder/mips/msa/vp9_avg_msa.c
deleted file mode 100644
index 611adb1a2..000000000
--- a/vp9/encoder/mips/msa/vp9_avg_msa.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vp9_rtcd.h"
-#include "vpx_dsp/mips/macros_msa.h"
-
-uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
- uint32_t sum_out;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
- v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
- v4u32 sum = { 0 };
-
- LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3);
- HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7);
- ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6);
- ADD2(sum0, sum2, sum4, sum6, sum0, sum4);
- sum0 += sum4;
-
- sum = __msa_hadd_u_w(sum0, sum0);
- sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum);
- sum = __msa_hadd_u_w(sum0, sum0);
- sum = (v4u32)__msa_srari_w((v4i32)sum, 6);
- sum_out = __msa_copy_u_w((v4i32)sum, 0);
-
- return sum_out;
-}
-
-uint32_t vp9_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
- uint32_t sum_out;
- uint32_t src0, src1, src2, src3;
- v16u8 vec = { 0 };
- v8u16 sum0;
- v4u32 sum1;
- v2u64 sum2;
-
- LW4(src, src_stride, src0, src1, src2, src3);
- INSERT_W4_UB(src0, src1, src2, src3, vec);
-
- sum0 = __msa_hadd_u_h(vec, vec);
- sum1 = __msa_hadd_u_w(sum0, sum0);
- sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1);
- sum1 = __msa_hadd_u_w(sum0, sum0);
- sum2 = __msa_hadd_u_d(sum1, sum1);
- sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4);
- sum_out = __msa_copy_u_w((v4i32)sum1, 0);
-
- return sum_out;
-}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 0def2cf1f..63db214d1 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -191,7 +191,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
BLOCK_SIZE bsize,
int64_t rate,
int64_t dist,
- int skip) {
+ int skip,
+ struct macroblock_plane *const p) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
@@ -199,12 +200,25 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
- const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
- bsize);
+ int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
+ int is_skin = 0;
+ if (refresh_this_block == 0 &&
+ bsize <= BLOCK_16X16 &&
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
+ is_skin = vp9_compute_skin_block(p[0].src.buf,
+ p[1].src.buf,
+ p[2].src.buf,
+ p[0].src.stride,
+ p[1].src.stride,
+ bsize);
+ if (is_skin)
+ refresh_this_block = 1;
+ }
+
// If this block is labeled for refresh, check if we should reset the
// segment_id.
if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index a5b38138b..edf0a973e 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -14,6 +14,8 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
#ifdef __cplusplus
extern "C" {
@@ -93,7 +95,8 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i,
void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
MB_MODE_INFO *const mbmi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip);
+ int64_t rate, int64_t dist, int skip,
+ struct macroblock_plane *const p);
void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
const MB_MODE_INFO *const mbmi,
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c
deleted file mode 100644
index a9a4c3050..000000000
--- a/vp9/encoder/vp9_avg.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vpx_ports/mem.h"
-
-unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
- int i, j;
- int sum = 0;
- for (i = 0; i < 8; ++i, s+=p)
- for (j = 0; j < 8; sum += s[j], ++j) {}
-
- return (sum + 32) >> 6;
-}
-
-unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
- int i, j;
- int sum = 0;
- for (i = 0; i < 4; ++i, s+=p)
- for (j = 0; j < 4; sum += s[j], ++j) {}
-
- return (sum + 8) >> 4;
-}
-
-// src_diff: first pass, 9 bit, dynamic range [-255, 255]
-// second pass, 12 bit, dynamic range [-2040, 2040]
-static void hadamard_col8(const int16_t *src_diff, int src_stride,
- int16_t *coeff) {
- int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
- int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
- int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
- int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
- int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
- int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
- int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
- int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
-
- int16_t c0 = b0 + b2;
- int16_t c1 = b1 + b3;
- int16_t c2 = b0 - b2;
- int16_t c3 = b1 - b3;
- int16_t c4 = b4 + b6;
- int16_t c5 = b5 + b7;
- int16_t c6 = b4 - b6;
- int16_t c7 = b5 - b7;
-
- coeff[0] = c0 + c4;
- coeff[7] = c1 + c5;
- coeff[3] = c2 + c6;
- coeff[4] = c3 + c7;
- coeff[2] = c0 - c4;
- coeff[6] = c1 - c5;
- coeff[1] = c2 - c6;
- coeff[5] = c3 - c7;
-}
-
-void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride,
- int16_t *coeff) {
- int idx;
- int16_t buffer[64];
- int16_t *tmp_buf = &buffer[0];
- for (idx = 0; idx < 8; ++idx) {
- hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit
- // dynamic range [-255, 255]
- tmp_buf += 8;
- ++src_diff;
- }
-
- tmp_buf = &buffer[0];
- for (idx = 0; idx < 8; ++idx) {
- hadamard_col8(tmp_buf, 8, coeff); // tmp_buf: 12 bit
- // dynamic range [-2040, 2040]
- coeff += 8; // coeff: 15 bit
- // dynamic range [-16320, 16320]
- ++tmp_buf;
- }
-}
-
-// In place 16x16 2D Hadamard transform
-void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
- int16_t *coeff) {
- int idx;
- for (idx = 0; idx < 4; ++idx) {
- // src_diff: 9 bit, dynamic range [-255, 255]
- int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
- + (idx & 0x01) * 8;
- vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
- }
-
- // coeff: 15 bit, dynamic range [-16320, 16320]
- for (idx = 0; idx < 64; ++idx) {
- int16_t a0 = coeff[0];
- int16_t a1 = coeff[64];
- int16_t a2 = coeff[128];
- int16_t a3 = coeff[192];
-
- int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
- int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
- int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
- int16_t b3 = (a2 - a3) >> 1;
-
- coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
- coeff[64] = b1 + b3;
- coeff[128] = b0 - b2;
- coeff[192] = b1 - b3;
-
- ++coeff;
- }
-}
-
-// coeff: 16 bits, dynamic range [-32640, 32640].
-// length: value range {16, 64, 256, 1024}.
-int16_t vp9_satd_c(const int16_t *coeff, int length) {
- int i;
- int satd = 0;
- for (i = 0; i < length; ++i)
- satd += abs(coeff[i]);
-
- // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
- return (int16_t)satd;
-}
-
-// Integer projection onto row vectors.
-// height: value range {16, 32, 64}.
-void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref,
- const int ref_stride, const int height) {
- int idx;
- const int norm_factor = height >> 1;
- for (idx = 0; idx < 16; ++idx) {
- int i;
- hbuf[idx] = 0;
- // hbuf[idx]: 14 bit, dynamic range [0, 16320].
- for (i = 0; i < height; ++i)
- hbuf[idx] += ref[i * ref_stride];
- // hbuf[idx]: 9 bit, dynamic range [0, 510].
- hbuf[idx] /= norm_factor;
- ++ref;
- }
-}
-
-// width: value range {16, 32, 64}.
-int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
- int idx;
- int16_t sum = 0;
- // sum: 14 bit, dynamic range [0, 16320]
- for (idx = 0; idx < width; ++idx)
- sum += ref[idx];
- return sum;
-}
-
-// ref: [0 - 510]
-// src: [0 - 510]
-// bwl: {2, 3, 4}
-int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
- const int bwl) {
- int i;
- int width = 4 << bwl;
- int sse = 0, mean = 0, var;
-
- for (i = 0; i < width; ++i) {
- int diff = ref[i] - src[i]; // diff: dynamic range [-510, 510], 10 bits.
- mean += diff; // mean: dynamic range 16 bits.
- sse += diff * diff; // sse: dynamic range 26 bits.
- }
-
- // (mean * mean): dynamic range 31 bits.
- var = sse - ((mean * mean) >> (bwl + 2));
- return var;
-}
-
-void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
- int *min, int *max) {
- int i, j;
- *min = 255;
- *max = 0;
- for (i = 0; i < 8; ++i, s += p, d += dp) {
- for (j = 0; j < 8; ++j) {
- int diff = abs(s[j]-d[j]);
- *min = diff < *min ? diff : *min;
- *max = diff > *max ? diff : *max;
- }
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
- int i, j;
- int sum = 0;
- const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
- for (i = 0; i < 8; ++i, s+=p)
- for (j = 0; j < 8; sum += s[j], ++j) {}
-
- return (sum + 32) >> 6;
-}
-
-unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) {
- int i, j;
- int sum = 0;
- const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
- for (i = 0; i < 4; ++i, s+=p)
- for (j = 0; j < 4; sum += s[j], ++j) {}
-
- return (sum + 8) >> 4;
-}
-
-void vp9_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
- int dp, int *min, int *max) {
- int i, j;
- const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
- const uint16_t* d = CONVERT_TO_SHORTPTR(d8);
- *min = 255;
- *max = 0;
- for (i = 0; i < 8; ++i, s += p, d += dp) {
- for (j = 0; j < 8; ++j) {
- int diff = abs(s[j]-d[j]);
- *min = diff < *min ? diff : *min;
- *max = diff > *max ? diff : *max;
- }
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index e87a12e44..6533902b3 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -194,7 +194,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
int mi_col,
PICK_MODE_CONTEXT *ctx,
int motion_magnitude,
- int is_skin) {
+ int is_skin,
+ int *zeromv_filter) {
int mv_col, mv_row;
int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
MV_REFERENCE_FRAME frame;
@@ -237,6 +238,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
mbmi->mv[0].as_int = 0;
ctx->best_sse_inter_mode = ZEROMV;
ctx->best_sse_mv.as_int = 0;
+ *zeromv_filter = 1;
}
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
@@ -316,9 +318,11 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
- PICK_MODE_CONTEXT *ctx) {
+ PICK_MODE_CONTEXT *ctx,
+ VP9_DENOISER_DECISION *denoiser_decision) {
int mv_col, mv_row;
int motion_magnitude = 0;
+ int zeromv_filter = 0;
VP9_DENOISER_DECISION decision = COPY_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
@@ -329,20 +333,12 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int is_skin = 0;
if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenLow) {
- // Take center pixel in block to determine is_skin.
- const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
- const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
- const int uv_width_shift = y_width_shift >> 1;
- const int uv_height_shift = y_height_shift >> 1;
- const int stride = mb->plane[0].src.stride;
- const int strideuv = mb->plane[1].src.stride;
- const uint8_t ysource =
- mb->plane[0].src.buf[y_height_shift * stride + y_width_shift];
- const uint8_t usource =
- mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
- const uint8_t vsource =
- mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
- is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
+ mb->plane[1].src.buf,
+ mb->plane[2].src.buf,
+ mb->plane[0].src.stride,
+ mb->plane[1].src.stride,
+ bs);
}
mv_col = ctx->best_sse_mv.as_mv.col;
@@ -359,7 +355,8 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
denoiser->increase_denoising,
mi_row, mi_col, ctx,
motion_magnitude,
- is_skin);
+ is_skin,
+ &zeromv_filter);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride,
@@ -380,6 +377,9 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
num_4x4_blocks_wide_lookup[bs] << 2,
num_4x4_blocks_high_lookup[bs] << 2);
}
+ *denoiser_decision = decision;
+ if (decision == FILTER_BLOCK && zeromv_filter == 1)
+ *denoiser_decision = FILTER_ZEROMV_BLOCK;
}
static void copy_frame(YV12_BUFFER_CONFIG * const dest,
@@ -458,6 +458,7 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
ctx->zeromv_sse = UINT_MAX;
ctx->newmv_sse = UINT_MAX;
+ ctx->zeromv_lastref_sse = UINT_MAX;
}
void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index bc676e925..d07056b45 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -23,7 +23,8 @@ extern "C" {
typedef enum vp9_denoiser_decision {
COPY_BLOCK,
- FILTER_BLOCK
+ FILTER_BLOCK,
+ FILTER_ZEROMV_BLOCK
} VP9_DENOISER_DECISION;
typedef enum vp9_denoiser_level {
@@ -54,7 +55,8 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
- PICK_MODE_CONTEXT *ctx);
+ PICK_MODE_CONTEXT *ctx ,
+ VP9_DENOISER_DECISION *denoiser_decision);
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f9c28f6a9..c07eee969 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -401,7 +401,6 @@ static int set_vt_partitioning(VP9_COMP *cpi,
variance_node vt;
const int block_width = num_8x8_blocks_wide_lookup[bsize];
const int block_height = num_8x8_blocks_high_lookup[bsize];
- const int low_res = (cm->width <= 352 && cm->height <= 288);
assert(block_height == block_width);
tree_to_node(data, bsize, &vt);
@@ -414,7 +413,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
// No check for vert/horiz split as too few samples for variance.
if (bsize == bsize_min) {
// Variance already computed to set the force_split.
- if (low_res || cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME)
get_variance(&vt.part_variances->none);
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
@@ -425,7 +424,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
return 0;
} else if (bsize > bsize_min) {
// Variance already computed to set the force_split.
- if (low_res || cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME)
get_variance(&vt.part_variances->none);
// For key frame: take split for bsize above 32X32 or very high variance.
if (cm->frame_type == KEY_FRAME &&
@@ -489,13 +488,16 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
thresholds[2] = threshold_base >> 2;
thresholds[3] = threshold_base << 2;
} else {
- // Increase base variance threshold if estimated noise level is high.
+ // Increase base variance threshold based on estimated noise level.
if (cpi->noise_estimate.enabled) {
- if (cpi->noise_estimate.level == kHigh)
+ NOISE_LEVEL noise_level = vp9_noise_estimate_extract_level(
+ &cpi->noise_estimate);
+ if (noise_level == kHigh)
threshold_base = 3 * threshold_base;
- else
- if (cpi->noise_estimate.level == kMedium)
- threshold_base = threshold_base << 1;
+ else if (noise_level == kMedium)
+ threshold_base = threshold_base << 1;
+ else if (noise_level < kLow)
+ threshold_base = (7 * threshold_base) >> 3;
}
if (cm->width <= 352 && cm->height <= 288) {
thresholds[0] = threshold_base >> 3;
@@ -556,16 +558,16 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
if (x8_idx < pixels_wide && y8_idx < pixels_high) {
#if CONFIG_VP9_HIGHBITDEPTH
if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
- vp9_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
d + y8_idx * dp + x8_idx, dp,
&min, &max);
} else {
- vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
d + y8_idx * dp + x8_idx, dp,
&min, &max);
}
#else
- vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
d + y8_idx * dp + x8_idx, dp,
&min, &max);
#endif
@@ -597,18 +599,18 @@ static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
- s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame)
- d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
} else {
- s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
}
#else
- s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
#endif
sum = s_avg - d_avg;
sse = sum * sum;
@@ -636,18 +638,18 @@ static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
- s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame)
- d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
} else {
- s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
}
#else
- s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
#endif
sum = s_avg - d_avg;
sse = sum * sum;
@@ -668,6 +670,8 @@ static int choose_partitioning(VP9_COMP *cpi,
v64x64 vt;
v16x16 vt2[16];
int force_split[21];
+ int avg_32x32;
+ int avg_16x16[4];
uint8_t *s;
const uint8_t *d;
int sp;
@@ -676,9 +680,13 @@ static int choose_partitioning(VP9_COMP *cpi,
int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
+ // For the variance computation under SVC mode, we treat the frame as key if
+ // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
+ const int is_key_frame = (cm->frame_type == KEY_FRAME ||
+ (is_one_pass_cbr_svc(cpi) &&
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
// Always use 4x4 partition for key frame.
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
- const int use_4x4_partition = is_key_frame;
+ const int use_4x4_partition = cm->frame_type == KEY_FRAME;
const int low_res = (cm->width <= 352 && cm->height <= 288);
int variance4x4downsample[16];
@@ -704,8 +712,7 @@ static int choose_partitioning(VP9_COMP *cpi,
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
- if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ if (!is_key_frame) {
// In the case of spatial/temporal scalable coding, the assumption here is
// that the temporal reference frame will always be of type LAST_FRAME.
// TODO(marpan): If that assumption is broken, we need to revisit this code.
@@ -819,6 +826,7 @@ static int choose_partitioning(VP9_COMP *cpi,
const int y32_idx = ((i >> 1) << 5);
const int i2 = i << 2;
force_split[i + 1] = 0;
+ avg_16x16[i] = 0;
for (j = 0; j < 4; j++) {
const int x16_idx = x32_idx + ((j & 1) << 4);
const int y16_idx = y32_idx + ((j >> 1) << 4);
@@ -836,6 +844,7 @@ static int choose_partitioning(VP9_COMP *cpi,
is_key_frame);
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
get_variance(&vt.split[i].split[j].part_variances.none);
+ avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
if (vt.split[i].split[j].part_variances.none.variance >
thresholds[2]) {
// 16X16 variance is above threshold for split, so force split to 8x8
@@ -843,7 +852,8 @@ static int choose_partitioning(VP9_COMP *cpi,
force_split[split_index] = 1;
force_split[i + 1] = 1;
force_split[0] = 1;
- } else if (vt.split[i].split[j].part_variances.none.variance >
+ } else if (cpi->oxcf.speed < 8 &&
+ vt.split[i].split[j].part_variances.none.variance >
thresholds[1] &&
!cyclic_refresh_segment_id_boosted(segment_id)) {
// We have some nominal amount of 16x16 variance (based on average),
@@ -861,9 +871,7 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
}
- // TODO(marpan): There is an issue with variance based on 4x4 average in
- // svc mode, don't allow it for now.
- if (is_key_frame || (low_res && !cpi->use_svc &&
+ if (is_key_frame || (low_res &&
vt.split[i].split[j].part_variances.none.variance >
(thresholds[1] << 1))) {
force_split[split_index] = 0;
@@ -885,8 +893,8 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
}
-
// Fill the rest of the variance tree by summing split partition values.
+ avg_32x32 = 0;
for (i = 0; i < 4; i++) {
const int i2 = i << 2;
for (j = 0; j < 4; j++) {
@@ -896,22 +904,41 @@ static int choose_partitioning(VP9_COMP *cpi,
for (m = 0; m < 4; m++)
fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
fill_variance_tree(vtemp, BLOCK_16X16);
+ // If variance of this 16x16 block is above the threshold, force block
+ // to split. This also forces a split on the upper levels.
+ get_variance(&vtemp->part_variances.none);
+ if (vtemp->part_variances.none.variance > thresholds[2]) {
+ force_split[5 + i2 + j] = 1;
+ force_split[i + 1] = 1;
+ force_split[0] = 1;
+ }
}
}
fill_variance_tree(&vt.split[i], BLOCK_32X32);
- // If variance of this 32x32 block is above the threshold, force the block
- // to split. This also forces a split on the upper (64x64) level.
+ // If variance of this 32x32 block is above the threshold, or if its above
+ // (some threshold of) the average variance over the sub-16x16 blocks, then
+ // force this block to split. This also forces a split on the upper
+ // (64x64) level.
if (!force_split[i + 1]) {
get_variance(&vt.split[i].part_variances.none);
- if (vt.split[i].part_variances.none.variance > thresholds[1]) {
+ if (vt.split[i].part_variances.none.variance > thresholds[1] ||
+ (!is_key_frame &&
+ vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
+ vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
force_split[i + 1] = 1;
force_split[0] = 1;
}
+ avg_32x32 += vt.split[i].part_variances.none.variance;
}
}
if (!force_split[0]) {
fill_variance_tree(&vt, BLOCK_64X64);
get_variance(&vt.part_variances.none);
+ // If variance of this 64x64 block is above (some threshold of) the average
+ // variance over the sub-32x32 blocks, then force this block to split.
+ if (!is_key_frame &&
+ vt.part_variances.none.variance > (5 * avg_32x32) >> 4)
+ force_split[0] = 1;
}
// Now go through the entire structure, splitting every block size until
@@ -1018,7 +1045,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
mi_col, bsize, ctx->rate, ctx->dist,
- x->skip);
+ x->skip, p);
}
}
@@ -1678,6 +1705,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = x->plane;
const struct segmentation *const seg = &cm->seg;
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
@@ -1698,7 +1726,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
} else {
// Setting segmentation map for cyclic_refresh.
vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize,
- ctx->rate, ctx->dist, x->skip);
+ ctx->rate, ctx->dist, x->skip, p);
}
vp9_init_plane_quantizers(cpi, x);
}
@@ -1746,16 +1774,6 @@ static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
-#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 &&
- output_enabled &&
- cpi->common.frame_type != KEY_FRAME &&
- cpi->resize_pending == 0) {
- vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
- VPXMAX(BLOCK_8X8, bsize), ctx);
- }
-#endif
-
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
update_stats(&cpi->common, td);
@@ -2432,8 +2450,15 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.use_square_partition_only &&
bsize > cpi->sf.use_square_only_threshold) {
+ if (cpi->use_svc) {
+ if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
+ partition_horz_allowed &= force_horz_split;
+ if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
+ partition_vert_allowed &= force_vert_split;
+ } else {
partition_horz_allowed &= force_horz_split;
partition_vert_allowed &= force_vert_split;
+ }
}
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index a57cf8725..e4681f601 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1478,7 +1478,11 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
+ if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
+ rc->baseline_gf_interval = FIXED_GF_INTERVAL;
+ } else {
+ rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
+ }
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 1;
@@ -2793,6 +2797,22 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
cpi->resize_pending);
}
#endif
+ if (is_one_pass_cbr_svc(cpi)) {
+ // Keep track of frame index for each reference frame.
+ SVC *const svc = &cpi->svc;
+ if (cm->frame_type == KEY_FRAME) {
+ svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
+ svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
+ svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ } else {
+ if (cpi->refresh_last_frame)
+ svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_golden_frame)
+ svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_alt_ref_frame)
+ svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ }
+ }
}
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
@@ -3682,12 +3702,16 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
cm->mi_rows * MI_SIZE != unscaled->y_height) {
#if CONFIG_VP9_HIGHBITDEPTH
- if (use_normative_scaler)
+ if (use_normative_scaler &&
+ unscaled->y_width <= (scaled->y_width << 1) &&
+ unscaled->y_height <= (scaled->y_height << 1))
scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth);
else
scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
#else
- if (use_normative_scaler)
+ if (use_normative_scaler &&
+ unscaled->y_width <= (scaled->y_width << 1) &&
+ unscaled->y_height <= (scaled->y_height << 1))
scale_and_extend_frame(unscaled, scaled);
else
scale_and_extend_frame_nonnormative(unscaled, scaled);
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 327ac1985..a84202bb4 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1755,7 +1755,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
int center, offset = 0;
int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
for (d = 0; d <= bw; d += 16) {
- this_sad = vp9_vector_var(&ref[d], src, bwl);
+ this_sad = vpx_vector_var(&ref[d], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
offset = d;
@@ -1768,7 +1768,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1781,7 +1781,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1794,7 +1794,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1807,7 +1807,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1876,25 +1876,25 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
// Set up prediction 1-D reference set
ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
for (idx = 0; idx < search_width; idx += 16) {
- vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
+ vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
ref_buf += 16;
}
ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
for (idx = 0; idx < search_height; ++idx) {
- vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor;
+ vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
ref_buf += ref_stride;
}
// Set up src 1-D reference set
for (idx = 0; idx < bw; idx += 16) {
src_buf = x->plane[0].src.buf + idx;
- vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
+ vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
}
src_buf = x->plane[0].src.buf;
for (idx = 0; idx < bh; ++idx) {
- src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor;
+ src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
src_buf += src_stride;
}
diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
index b41ffd0a3..008a40afc 100644
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -25,7 +25,7 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
int width,
int height) {
ne->enabled = 0;
- ne->level = kLow;
+ ne->level = kLowLow;
ne->value = 0;
ne->count = 0;
ne->thresh = 90;
@@ -82,6 +82,21 @@ static void copy_frame(YV12_BUFFER_CONFIG * const dest,
}
}
+NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) {
+ int noise_level = kLowLow;
+ if (ne->value > (ne->thresh << 1)) {
+ noise_level = kHigh;
+ } else {
+ if (ne->value > ne->thresh)
+ noise_level = kMedium;
+ else if (ne->value > (ne->thresh >> 1))
+ noise_level = kLow;
+ else
+ noise_level = kLowLow;
+ }
+ return noise_level;
+}
+
void vp9_update_noise_estimate(VP9_COMP *const cpi) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
@@ -130,10 +145,6 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
const uint8_t *src_u = cpi->Source->u_buffer;
const uint8_t *src_v = cpi->Source->v_buffer;
const int src_uvstride = cpi->Source->uv_stride;
- const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
- const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
- const int uv_width_shift = y_width_shift >> 1;
- const int uv_height_shift = y_height_shift >> 1;
int mi_row, mi_col;
int num_low_motion = 0;
int frame_low_motion = 1;
@@ -158,13 +169,12 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// been encoded as zero/low motion x (= thresh_consec_zeromv) frames
// in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
// 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
- const uint8_t ysource =
- src_y[y_height_shift * src_ystride + y_width_shift];
- const uint8_t usource =
- src_u[uv_height_shift * src_uvstride + uv_width_shift];
- const uint8_t vsource =
- src_v[uv_height_shift * src_uvstride + uv_width_shift];
- int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ int is_skin = vp9_compute_skin_block(src_y,
+ src_u,
+ src_v,
+ src_ystride,
+ src_uvstride,
+ bsize);
if (frame_low_motion &&
cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
@@ -220,22 +230,16 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Reset counter and check noise level condition.
ne->num_frames_estimate = 30;
ne->count = 0;
- if (ne->value > (ne->thresh << 1))
- ne->level = kHigh;
- else
- if (ne->value > ne->thresh)
- ne->level = kMedium;
- else if (ne->value > (ne->thresh >> 1))
- ne->level = kLow;
- else
- ne->level = kLowLow;
+ ne->level = vp9_noise_estimate_extract_level(ne);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+#endif
}
}
}
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0) {
+ if (cpi->oxcf.noise_sensitivity > 0)
copy_frame(&cpi->denoiser.last_source, cpi->Source);
- vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
- }
#endif
}
diff --git a/vp9/encoder/vp9_noise_estimate.h b/vp9/encoder/vp9_noise_estimate.h
index 0d22ef042..826d125b5 100644
--- a/vp9/encoder/vp9_noise_estimate.h
+++ b/vp9/encoder/vp9_noise_estimate.h
@@ -47,6 +47,8 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
int width,
int height);
+NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne);
+
void vp9_update_noise_estimate(struct VP9_COMP *const cpi);
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 8aafae1d4..b929758ca 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -619,14 +619,14 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
scan_order->scan, scan_order->iscan);
break;
case TX_16X16:
- vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
+ vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
- vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
+ vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
@@ -673,7 +673,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
if (*eob == 1)
*rate += (int)abs(qcoeff[0]);
else if (*eob > 1)
- *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
+ *rate += vpx_satd((const int16_t *)qcoeff, step << 4);
*dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
}
@@ -1094,6 +1094,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
+ const SVC *const svc = &cpi->svc;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -1143,6 +1144,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int best_pred_sad = INT_MAX;
int best_early_term = 0;
int ref_frame_cost[MAX_REF_FRAMES];
+ int svc_force_zero_mode[3] = {0};
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ int64_t zero_last_cost_orig = INT64_MAX;
+#endif
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1193,6 +1198,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
} else {
usable_ref_frame = GOLDEN_FRAME;
}
+
+ // If the reference is temporally aligned with current superframe
+ // (e.g., spatial reference within superframe), constrain the inter mode:
+ // for now only test zero motion.
+ if (cpi->use_svc && svc ->force_zero_mode_spatial_ref) {
+ if (svc->ref_frame_index[cpi->lst_fb_idx] == svc->current_superframe)
+ svc_force_zero_mode[LAST_FRAME - 1] = 1;
+ if (svc->ref_frame_index[cpi->gld_fb_idx] == svc->current_superframe)
+ svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
+ }
+
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
@@ -1245,8 +1261,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
ref_frame = ref_mode_set[idx].ref_frame;
- if (cpi->use_svc)
+ if (cpi->use_svc) {
ref_frame = ref_mode_set_svc[idx].ref_frame;
+ if (svc_force_zero_mode[ref_frame - 1] &&
+ frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
+ }
+
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
continue;
if (const_motion[ref_frame] && this_mode == NEARMV)
@@ -1524,8 +1545,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0)
+ if (cpi->oxcf.noise_sensitivity > 0) {
vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
+ // Keep track of zero_last cost.
+ if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0)
+ zero_last_cost_orig = this_rdc.rdcost;
+ }
#else
(void)ctx;
#endif
@@ -1683,6 +1708,62 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ cpi->resize_pending == 0) {
+ VP9_DENOISER_DECISION decision = COPY_BLOCK;
+ vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
+ VPXMAX(BLOCK_8X8, bsize), ctx, &decision);
+ // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on denoised
+ // result. Only do this under noise conditions, and if rdcost of ZEROMV on
+ // original source is not significantly higher than rdcost of best mode.
+ if (((best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) ||
+ (best_ref_frame == GOLDEN_FRAME && decision == FILTER_ZEROMV_BLOCK)) &&
+ cpi->noise_estimate.enabled &&
+ cpi->noise_estimate.level > kLow &&
+ zero_last_cost_orig < (best_rdc.rdcost << 3)) {
+ // Check if we should pick ZEROMV on denoised signal.
+ int rate = 0;
+ int64_t dist = 0;
+ mbmi->mode = ZEROMV;
+ mbmi->ref_frame[0] = LAST_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ mbmi->mv[0].as_int = 0;
+ mbmi->interp_filter = EIGHTTAP;
+ xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+ this_rdc.rate = rate + ref_frame_cost[LAST_FRAME] +
+ cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]]
+ [INTER_OFFSET(ZEROMV)];
+ this_rdc.dist = dist;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ // Switch to ZEROMV if the rdcost for ZEROMV on denoised source
+ // is lower than best_ref mode (on original source).
+ if (this_rdc.rdcost > best_rdc.rdcost) {
+ this_rdc = best_rdc;
+ mbmi->mode = best_mode;
+ mbmi->ref_frame[0] = best_ref_frame;
+ mbmi->interp_filter = best_pred_filter;
+ if (best_ref_frame == INTRA_FRAME)
+ mbmi->mv[0].as_int = INVALID_MV;
+ else if (best_ref_frame == GOLDEN_FRAME) {
+ mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ if (reuse_inter_pred) {
+ xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0];
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ }
+ }
+ mbmi->tx_size = best_tx_size;
+ x->skip_txfm[0] = best_mode_skip_txfm;
+ } else {
+ best_ref_frame = LAST_FRAME;
+ best_rdc = this_rdc;
+ }
+ }
+ }
+#endif
+
if (cpi->sf.adaptive_rd_thresh) {
THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)];
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index fdff36315..2579c6005 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -833,10 +833,16 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
if (frame_is_intra_only(cm)) {
- // Handle the special case for key frames forced when we have reached
- // the maximum key frame interval. Here force the Q to a range
- // based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
+ if (oxcf->rc_mode == VPX_Q) {
+ int qindex = cq_level;
+ double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex = vp9_compute_qdelta(rc, q, q * 0.25,
+ cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else if (rc->this_key_frame_forced) {
+ // Handle the special case for key frames forced when we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
int qindex = rc->last_boosted_qindex;
double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
@@ -886,17 +892,28 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
active_best_quality = active_best_quality * 15 / 16;
} else if (oxcf->rc_mode == VPX_Q) {
- if (!cpi->refresh_alt_ref_frame) {
- active_best_quality = cq_level;
- } else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
- }
+ int qindex = cq_level;
+ double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex;
+ if (cpi->refresh_alt_ref_frame)
+ delta_qindex = vp9_compute_qdelta(rc, q, q * 0.40, cm->bit_depth);
+ else
+ delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
- active_best_quality = cq_level;
+ int qindex = cq_level;
+ double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ double delta_rate[FIXED_GF_INTERVAL] =
+ {0.50, 1.0, 0.85, 1.0, 0.70, 1.0, 0.85, 1.0};
+ int delta_qindex =
+ vp9_compute_qdelta(rc, q,
+ q * delta_rate[cm->current_video_frame %
+ FIXED_GF_INTERVAL], cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
// Use the lower of active_worst_quality and recent/average Q.
if (cm->current_video_frame > 1)
@@ -1075,7 +1092,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.
@@ -1257,8 +1274,12 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
rc->frames_since_golden = 0;
// If we are not using alt ref in the up and coming group clear the arf
- // active flag.
- if (!rc->source_alt_ref_pending) {
+ // active flag. In multi arf group case, if the index is not 0 then
+ // we are overlaying a mid group arf so should not reset the flag.
+ if (cpi->oxcf.pass == 2) {
+ if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0))
+ rc->source_alt_ref_active = 0;
+ } else if (!rc->source_alt_ref_pending) {
rc->source_alt_ref_active = 0;
}
@@ -1309,9 +1330,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
}
} else {
- if (rc->is_src_frame_alt_ref ||
- !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
- (cpi->use_svc && oxcf->rc_mode == VPX_CBR)) {
+ if ((cpi->use_svc && oxcf->rc_mode == VPX_CBR) ||
+ (!rc->is_src_frame_alt_ref &&
+ !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
rc->last_q[INTER_FRAME] = qindex;
rc->avg_frame_qindex[INTER_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
@@ -1718,29 +1739,36 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
RATE_CONTROL *const rc) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- // Set Maximum gf/arf interval
- rc->max_gf_interval = oxcf->max_gf_interval;
- rc->min_gf_interval = oxcf->min_gf_interval;
- if (rc->min_gf_interval == 0)
- rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
- oxcf->width, oxcf->height, cpi->framerate);
- if (rc->max_gf_interval == 0)
- rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
- cpi->framerate, rc->min_gf_interval);
+ // Special case code for 1 pass fixed Q mode tests
+ if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
+ rc->max_gf_interval = FIXED_GF_INTERVAL;
+ rc->min_gf_interval = FIXED_GF_INTERVAL;
+ rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
+ } else {
+ // Set Maximum gf/arf interval
+ rc->max_gf_interval = oxcf->max_gf_interval;
+ rc->min_gf_interval = oxcf->min_gf_interval;
+ if (rc->min_gf_interval == 0)
+ rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
+ oxcf->width, oxcf->height, cpi->framerate);
+ if (rc->max_gf_interval == 0)
+ rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
+ cpi->framerate, rc->min_gf_interval);
+
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+
+ if (is_altref_enabled(cpi)) {
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+ if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+ rc->max_gf_interval = rc->static_scene_max_gf_interval;
- if (is_altref_enabled(cpi)) {
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ // Clamp min to max
+ rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
}
-
- if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
- rc->max_gf_interval = rc->static_scene_max_gf_interval;
-
- // Clamp min to max
- rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
}
void vp9_rc_update_framerate(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 136fd3e78..3df909cb1 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -26,6 +26,7 @@ extern "C" {
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
+#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
#define ONEHALFONLY_RESIZE 0
typedef enum {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 2a6b70703..bcd8f013f 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1349,11 +1349,25 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
for (ref = 0; ref < 1 + is_compound; ++ref) {
- const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
- pd->pre[ref].stride)];
+ const int bw = b_width_log2_lookup[BLOCK_8X8];
+ const int h = 4 * (i >> bw);
+ const int w = 4 * (i & ((1 << bw) - 1));
+ const struct scale_factors *sf = &xd->block_refs[ref]->sf;
+ int y_stride = pd->pre[ref].stride;
+ uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
+
+ if (vp9_is_scaled(sf)) {
+ const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+ const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+
+ y_stride = xd->block_refs[ref]->buf->y_stride;
+ pre = xd->block_refs[ref]->buf->y_buffer;
+ pre += scaled_buffer_offset(x_start + w, y_start + h,
+ y_stride, sf);
+ }
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_highbd_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height,
@@ -1361,7 +1375,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
} else {
- vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
@@ -1370,7 +1384,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_row * MI_SIZE + 4 * (i / 2));
}
#else
- vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c
index c2763b7da..0ca166536 100644
--- a/vp9/encoder/vp9_skin_detection.c
+++ b/vp9/encoder/vp9_skin_detection.c
@@ -48,6 +48,20 @@ int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) {
return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
}
+int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+ int stride, int strideuv, int bsize) {
+ // Take center pixel in block to determine is_skin.
+ const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
+ const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
+ const int uv_width_shift = y_width_shift >> 1;
+ const int uv_height_shift = y_height_shift >> 1;
+ const uint8_t ysource = y[y_height_shift * stride + y_width_shift];
+ const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift];
+ const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift];
+ return vp9_skin_pixel(ysource, usource, vsource);
+}
+
+
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
diff --git a/vp9/encoder/vp9_skin_detection.h b/vp9/encoder/vp9_skin_detection.h
index 0a87ef9f4..73f7c39d9 100644
--- a/vp9/encoder/vp9_skin_detection.h
+++ b/vp9/encoder/vp9_skin_detection.h
@@ -23,6 +23,9 @@ struct VP9_COMP;
int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr);
+int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+ int stride, int strideuv, int bsize);
+
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 318d8100c..c5f0bad8f 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -394,7 +394,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V;
} else {
for (i = 0; i < BLOCK_SIZES; ++i)
- if (i >= BLOCK_16X16)
+ if (i > BLOCK_16X16)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
else
// Use H and V intra mode for block sizes <= 16X16.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 13da155c7..30a7d1013 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -25,13 +25,23 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
int mi_rows = cpi->common.mi_rows;
int mi_cols = cpi->common.mi_cols;
- int sl, tl;
+ int sl, tl, i;
int alt_ref_idx = svc->number_spatial_layers;
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
svc->first_spatial_layer_to_encode = 0;
svc->rc_drop_superframe = 0;
+ svc->force_zero_mode_spatial_ref = 0;
+ svc->current_superframe = 0;
+ for (i = 0; i < REF_FRAMES; ++i)
+ svc->ref_frame_index[i] = -1;
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ cpi->svc.ext_frame_flags[sl] = 0;
+ cpi->svc.ext_lst_fb_idx[sl] = 0;
+ cpi->svc.ext_gld_fb_idx[sl] = 1;
+ cpi->svc.ext_alt_fb_idx[sl] = 2;
+ }
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
@@ -279,7 +289,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1 ||
- cpi->svc.number_spatial_layers > 1) {
+ (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) {
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
@@ -353,6 +363,8 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
cpi->svc.number_temporal_layers];
++lc->current_video_frame_in_layer;
++lc->frames_from_key_frame;
+ if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
+ ++cpi->svc.current_superframe;
}
int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
@@ -542,6 +554,7 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc = NULL;
+ cpi->svc.force_zero_mode_spatial_ref = 1;
if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
@@ -559,6 +572,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
// Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is
// needed to support the case where the frame flags may be passed in via
// vpx_codec_encode(), which can be used for the temporal-only svc case.
+ // TODO(marpan): Consider adding an enc_config parameter to better handle
+ // this case.
if (cpi->ext_refresh_frame_flags_pending == 0) {
int sl;
cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 5dbf9b418..1f446d743 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -83,6 +83,9 @@ typedef struct {
int ext_lst_fb_idx[VPX_MAX_LAYERS];
int ext_gld_fb_idx[VPX_MAX_LAYERS];
int ext_alt_fb_idx[VPX_MAX_LAYERS];
+ int ref_frame_index[REF_FRAMES];
+ int force_zero_mode_spatial_ref;
+ int current_superframe;
} SVC;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 16f9c8573..015dbc0ca 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -135,15 +135,38 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
-
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
- modifier *= 3;
+ int pixel_value = *frame2;
+
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
modifier += rounding;
modifier >>= strength;
@@ -182,15 +205,34 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
-
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
+ int pixel_value = *frame2;
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
modifier += rounding;
modifier >>= strength;
@@ -383,55 +425,58 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
- vp9_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset,
- f->y_stride,
- predictor, 16, 16, adj_strength,
- filter_weight,
- accumulator, count);
- vp9_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset,
- f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height,
- adj_strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp9_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset,
- f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height,
- adj_strength, filter_weight,
- accumulator + 512, count + 512);
+ vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset,
+ f->y_stride,
+ predictor, 16, 16, adj_strength,
+ filter_weight,
+ accumulator, count);
+ vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height,
+ adj_strength,
+ filter_weight, accumulator + 256,
+ count + 256);
+ vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height,
+ adj_strength, filter_weight,
+ accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, 16,
+ strength, filter_weight,
+ accumulator, count);
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 256,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 256,
+ count + 256);
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 512,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 512,
+ count + 512);
+ }
+#else
+ // Apply the filter (YUV)
+ // TODO(jingning): Need SIMD optimization for this.
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
- }
-#else
- // Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16,
- strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 512,
- count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
deleted file mode 100644
index 4531d794a..000000000
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <emmintrin.h>
-
-#include "./vp9_rtcd.h"
-#include "vpx_ports/mem.h"
-
-void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
- int *min, int *max) {
- __m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff;
- u0 = _mm_setzero_si128();
- // Row 0
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff0 = _mm_max_epi16(diff, negdiff);
- // Row 1
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(absdiff0, absdiff);
- minabsdiff = _mm_min_epi16(absdiff0, absdiff);
- // Row 2
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 2 * dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
- minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
- // Row 3
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 3 * dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
- minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
- // Row 4
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 4 * dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
- minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
- // Row 5
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 5 * dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
- minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
- // Row 6
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 6 * dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
- minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
- // Row 7
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
- d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 7 * dp)), u0);
- diff = _mm_subs_epi16(s0, d0);
- negdiff = _mm_subs_epi16(u0, diff);
- absdiff = _mm_max_epi16(diff, negdiff);
- maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff);
- minabsdiff = _mm_min_epi16(minabsdiff, absdiff);
-
- maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_si128(maxabsdiff, 8));
- maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 32));
- maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 16));
- *max = _mm_extract_epi16(maxabsdiff, 0);
-
- minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_si128(minabsdiff, 8));
- minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 32));
- minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 16));
- *min = _mm_extract_epi16(minabsdiff, 0);
-}
-
-unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
- __m128i s0, s1, u0;
- unsigned int avg = 0;
- u0 = _mm_setzero_si128();
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
-
- s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
- s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));
- s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
- avg = _mm_extract_epi16(s0, 0);
- return (avg + 32) >> 6;
-}
-
-unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) {
- __m128i s0, s1, u0;
- unsigned int avg = 0;
- u0 = _mm_setzero_si128();
- s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
- s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
- s0 = _mm_adds_epu16(s0, s1);
-
- s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
- s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
- avg = _mm_extract_epi16(s0, 0);
- return (avg + 8) >> 4;
-}
-
-static void hadamard_col8_sse2(__m128i *in, int iter) {
- __m128i a0 = in[0];
- __m128i a1 = in[1];
- __m128i a2 = in[2];
- __m128i a3 = in[3];
- __m128i a4 = in[4];
- __m128i a5 = in[5];
- __m128i a6 = in[6];
- __m128i a7 = in[7];
-
- __m128i b0 = _mm_add_epi16(a0, a1);
- __m128i b1 = _mm_sub_epi16(a0, a1);
- __m128i b2 = _mm_add_epi16(a2, a3);
- __m128i b3 = _mm_sub_epi16(a2, a3);
- __m128i b4 = _mm_add_epi16(a4, a5);
- __m128i b5 = _mm_sub_epi16(a4, a5);
- __m128i b6 = _mm_add_epi16(a6, a7);
- __m128i b7 = _mm_sub_epi16(a6, a7);
-
- a0 = _mm_add_epi16(b0, b2);
- a1 = _mm_add_epi16(b1, b3);
- a2 = _mm_sub_epi16(b0, b2);
- a3 = _mm_sub_epi16(b1, b3);
- a4 = _mm_add_epi16(b4, b6);
- a5 = _mm_add_epi16(b5, b7);
- a6 = _mm_sub_epi16(b4, b6);
- a7 = _mm_sub_epi16(b5, b7);
-
- if (iter == 0) {
- b0 = _mm_add_epi16(a0, a4);
- b7 = _mm_add_epi16(a1, a5);
- b3 = _mm_add_epi16(a2, a6);
- b4 = _mm_add_epi16(a3, a7);
- b2 = _mm_sub_epi16(a0, a4);
- b6 = _mm_sub_epi16(a1, a5);
- b1 = _mm_sub_epi16(a2, a6);
- b5 = _mm_sub_epi16(a3, a7);
-
- a0 = _mm_unpacklo_epi16(b0, b1);
- a1 = _mm_unpacklo_epi16(b2, b3);
- a2 = _mm_unpackhi_epi16(b0, b1);
- a3 = _mm_unpackhi_epi16(b2, b3);
- a4 = _mm_unpacklo_epi16(b4, b5);
- a5 = _mm_unpacklo_epi16(b6, b7);
- a6 = _mm_unpackhi_epi16(b4, b5);
- a7 = _mm_unpackhi_epi16(b6, b7);
-
- b0 = _mm_unpacklo_epi32(a0, a1);
- b1 = _mm_unpacklo_epi32(a4, a5);
- b2 = _mm_unpackhi_epi32(a0, a1);
- b3 = _mm_unpackhi_epi32(a4, a5);
- b4 = _mm_unpacklo_epi32(a2, a3);
- b5 = _mm_unpacklo_epi32(a6, a7);
- b6 = _mm_unpackhi_epi32(a2, a3);
- b7 = _mm_unpackhi_epi32(a6, a7);
-
- in[0] = _mm_unpacklo_epi64(b0, b1);
- in[1] = _mm_unpackhi_epi64(b0, b1);
- in[2] = _mm_unpacklo_epi64(b2, b3);
- in[3] = _mm_unpackhi_epi64(b2, b3);
- in[4] = _mm_unpacklo_epi64(b4, b5);
- in[5] = _mm_unpackhi_epi64(b4, b5);
- in[6] = _mm_unpacklo_epi64(b6, b7);
- in[7] = _mm_unpackhi_epi64(b6, b7);
- } else {
- in[0] = _mm_add_epi16(a0, a4);
- in[7] = _mm_add_epi16(a1, a5);
- in[3] = _mm_add_epi16(a2, a6);
- in[4] = _mm_add_epi16(a3, a7);
- in[2] = _mm_sub_epi16(a0, a4);
- in[6] = _mm_sub_epi16(a1, a5);
- in[1] = _mm_sub_epi16(a2, a6);
- in[5] = _mm_sub_epi16(a3, a7);
- }
-}
-
-void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
- int16_t *coeff) {
- __m128i src[8];
- src[0] = _mm_load_si128((const __m128i *)src_diff);
- src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
- src[2] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
- src[3] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
- src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
- src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
- src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
- src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
-
- hadamard_col8_sse2(src, 0);
- hadamard_col8_sse2(src, 1);
-
- _mm_store_si128((__m128i *)coeff, src[0]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[1]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[2]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[3]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[4]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[5]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[6]);
- coeff += 8;
- _mm_store_si128((__m128i *)coeff, src[7]);
-}
-
-void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
- int16_t *coeff) {
- int idx;
- for (idx = 0; idx < 4; ++idx) {
- int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
- + (idx & 0x01) * 8;
- vp9_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64);
- }
-
- for (idx = 0; idx < 64; idx += 8) {
- __m128i coeff0 = _mm_load_si128((const __m128i *)coeff);
- __m128i coeff1 = _mm_load_si128((const __m128i *)(coeff + 64));
- __m128i coeff2 = _mm_load_si128((const __m128i *)(coeff + 128));
- __m128i coeff3 = _mm_load_si128((const __m128i *)(coeff + 192));
-
- __m128i b0 = _mm_add_epi16(coeff0, coeff1);
- __m128i b1 = _mm_sub_epi16(coeff0, coeff1);
- __m128i b2 = _mm_add_epi16(coeff2, coeff3);
- __m128i b3 = _mm_sub_epi16(coeff2, coeff3);
-
- b0 = _mm_srai_epi16(b0, 1);
- b1 = _mm_srai_epi16(b1, 1);
- b2 = _mm_srai_epi16(b2, 1);
- b3 = _mm_srai_epi16(b3, 1);
-
- coeff0 = _mm_add_epi16(b0, b2);
- coeff1 = _mm_add_epi16(b1, b3);
- _mm_store_si128((__m128i *)coeff, coeff0);
- _mm_store_si128((__m128i *)(coeff + 64), coeff1);
-
- coeff2 = _mm_sub_epi16(b0, b2);
- coeff3 = _mm_sub_epi16(b1, b3);
- _mm_store_si128((__m128i *)(coeff + 128), coeff2);
- _mm_store_si128((__m128i *)(coeff + 192), coeff3);
-
- coeff += 8;
- }
-}
-
-int16_t vp9_satd_sse2(const int16_t *coeff, int length) {
- int i;
- __m128i sum = _mm_load_si128((const __m128i *)coeff);
- __m128i sign = _mm_srai_epi16(sum, 15);
- __m128i val = _mm_xor_si128(sum, sign);
- sum = _mm_sub_epi16(val, sign);
- coeff += 8;
-
- for (i = 8; i < length; i += 8) {
- __m128i src_line = _mm_load_si128((const __m128i *)coeff);
- sign = _mm_srai_epi16(src_line, 15);
- val = _mm_xor_si128(src_line, sign);
- val = _mm_sub_epi16(val, sign);
- sum = _mm_add_epi16(sum, val);
- coeff += 8;
- }
-
- val = _mm_srli_si128(sum, 8);
- sum = _mm_add_epi16(sum, val);
- val = _mm_srli_epi64(sum, 32);
- sum = _mm_add_epi16(sum, val);
- val = _mm_srli_epi32(sum, 16);
- sum = _mm_add_epi16(sum, val);
-
- return _mm_extract_epi16(sum, 0);
-}
-
-void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
- const int ref_stride, const int height) {
- int idx;
- __m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
- __m128i s0 = _mm_unpacklo_epi8(src_line, zero);
- __m128i s1 = _mm_unpackhi_epi8(src_line, zero);
- __m128i t0, t1;
- int height_1 = height - 1;
- ref += ref_stride;
-
- for (idx = 1; idx < height_1; idx += 2) {
- src_line = _mm_loadu_si128((const __m128i *)ref);
- t0 = _mm_unpacklo_epi8(src_line, zero);
- t1 = _mm_unpackhi_epi8(src_line, zero);
- s0 = _mm_adds_epu16(s0, t0);
- s1 = _mm_adds_epu16(s1, t1);
- ref += ref_stride;
-
- src_line = _mm_loadu_si128((const __m128i *)ref);
- t0 = _mm_unpacklo_epi8(src_line, zero);
- t1 = _mm_unpackhi_epi8(src_line, zero);
- s0 = _mm_adds_epu16(s0, t0);
- s1 = _mm_adds_epu16(s1, t1);
- ref += ref_stride;
- }
-
- src_line = _mm_loadu_si128((const __m128i *)ref);
- t0 = _mm_unpacklo_epi8(src_line, zero);
- t1 = _mm_unpackhi_epi8(src_line, zero);
- s0 = _mm_adds_epu16(s0, t0);
- s1 = _mm_adds_epu16(s1, t1);
-
- if (height == 64) {
- s0 = _mm_srai_epi16(s0, 5);
- s1 = _mm_srai_epi16(s1, 5);
- } else if (height == 32) {
- s0 = _mm_srai_epi16(s0, 4);
- s1 = _mm_srai_epi16(s1, 4);
- } else {
- s0 = _mm_srai_epi16(s0, 3);
- s1 = _mm_srai_epi16(s1, 3);
- }
-
- _mm_storeu_si128((__m128i *)hbuf, s0);
- hbuf += 8;
- _mm_storeu_si128((__m128i *)hbuf, s1);
-}
-
-int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
- __m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_load_si128((const __m128i *)ref);
- __m128i s0 = _mm_sad_epu8(src_line, zero);
- __m128i s1;
- int i;
-
- for (i = 16; i < width; i += 16) {
- ref += 16;
- src_line = _mm_load_si128((const __m128i *)ref);
- s1 = _mm_sad_epu8(src_line, zero);
- s0 = _mm_adds_epu16(s0, s1);
- }
-
- s1 = _mm_srli_si128(s0, 8);
- s0 = _mm_adds_epu16(s0, s1);
-
- return _mm_extract_epi16(s0, 0);
-}
-
-int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,
- const int bwl) {
- int idx;
- int width = 4 << bwl;
- int16_t mean;
- __m128i v0 = _mm_loadu_si128((const __m128i *)ref);
- __m128i v1 = _mm_load_si128((const __m128i *)src);
- __m128i diff = _mm_subs_epi16(v0, v1);
- __m128i sum = diff;
- __m128i sse = _mm_madd_epi16(diff, diff);
-
- ref += 8;
- src += 8;
-
- for (idx = 8; idx < width; idx += 8) {
- v0 = _mm_loadu_si128((const __m128i *)ref);
- v1 = _mm_load_si128((const __m128i *)src);
- diff = _mm_subs_epi16(v0, v1);
-
- sum = _mm_add_epi16(sum, diff);
- v0 = _mm_madd_epi16(diff, diff);
- sse = _mm_add_epi32(sse, v0);
-
- ref += 8;
- src += 8;
- }
-
- v0 = _mm_srli_si128(sum, 8);
- sum = _mm_add_epi16(sum, v0);
- v0 = _mm_srli_epi64(sum, 32);
- sum = _mm_add_epi16(sum, v0);
- v0 = _mm_srli_epi32(sum, 16);
- sum = _mm_add_epi16(sum, v0);
-
- v1 = _mm_srli_si128(sse, 8);
- sse = _mm_add_epi32(sse, v1);
- v1 = _mm_srli_epi64(sse, 32);
- sse = _mm_add_epi32(sse, v1);
-
- mean = _mm_extract_epi16(sum, 0);
-
- return _mm_cvtsi128_si32(sse) - ((mean * mean) >> (bwl + 2));
-}
diff --git a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
deleted file mode 100644
index 74c52df19..000000000
--- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
+++ /dev/null
@@ -1,121 +0,0 @@
-;
-; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%define private_prefix vp9
-
-%include "third_party/x86inc/x86inc.asm"
-
-; This file provides SSSE3 version of the forward transformation. Part
-; of the macro definitions are originally derived from the ffmpeg project.
-; The current version applies to x86 64-bit only.
-
-SECTION .text
-
-%if ARCH_X86_64
-; matrix transpose
-%macro INTERLEAVE_2X 4
- punpckh%1 m%4, m%2, m%3
- punpckl%1 m%2, m%3
- SWAP %3, %4
-%endmacro
-
-%macro TRANSPOSE8X8 9
- INTERLEAVE_2X wd, %1, %2, %9
- INTERLEAVE_2X wd, %3, %4, %9
- INTERLEAVE_2X wd, %5, %6, %9
- INTERLEAVE_2X wd, %7, %8, %9
-
- INTERLEAVE_2X dq, %1, %3, %9
- INTERLEAVE_2X dq, %2, %4, %9
- INTERLEAVE_2X dq, %5, %7, %9
- INTERLEAVE_2X dq, %6, %8, %9
-
- INTERLEAVE_2X qdq, %1, %5, %9
- INTERLEAVE_2X qdq, %3, %7, %9
- INTERLEAVE_2X qdq, %2, %6, %9
- INTERLEAVE_2X qdq, %4, %8, %9
-
- SWAP %2, %5
- SWAP %4, %7
-%endmacro
-
-%macro HMD8_1D 0
- psubw m8, m0, m1
- psubw m9, m2, m3
- paddw m0, m1
- paddw m2, m3
- SWAP 1, 8
- SWAP 3, 9
- psubw m8, m4, m5
- psubw m9, m6, m7
- paddw m4, m5
- paddw m6, m7
- SWAP 5, 8
- SWAP 7, 9
-
- psubw m8, m0, m2
- psubw m9, m1, m3
- paddw m0, m2
- paddw m1, m3
- SWAP 2, 8
- SWAP 3, 9
- psubw m8, m4, m6
- psubw m9, m5, m7
- paddw m4, m6
- paddw m5, m7
- SWAP 6, 8
- SWAP 7, 9
-
- psubw m8, m0, m4
- psubw m9, m1, m5
- paddw m0, m4
- paddw m1, m5
- SWAP 4, 8
- SWAP 5, 9
- psubw m8, m2, m6
- psubw m9, m3, m7
- paddw m2, m6
- paddw m3, m7
- SWAP 6, 8
- SWAP 7, 9
-%endmacro
-
-INIT_XMM ssse3
-cglobal hadamard_8x8, 3, 5, 10, input, stride, output
- lea r3, [2 * strideq]
- lea r4, [4 * strideq]
-
- mova m0, [inputq]
- mova m1, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m2, [inputq]
- mova m3, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m4, [inputq]
- mova m5, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m6, [inputq]
- mova m7, [inputq + r3]
-
- HMD8_1D
- TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
- HMD8_1D
-
- mova [outputq + 0], m0
- mova [outputq + 16], m1
- mova [outputq + 32], m2
- mova [outputq + 48], m3
- mova [outputq + 64], m4
- mova [outputq + 80], m5
- mova [outputq + 96], m6
- mova [outputq + 112], m7
-
- RET
-%endif
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 5918240e2..de688bf48 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -17,7 +17,6 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_CX_SRCS-yes += vp9_cx_iface.c
-VP9_CX_SRCS-yes += encoder/vp9_avg.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
@@ -93,7 +92,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
@@ -114,7 +112,6 @@ endif
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
endif
endif
@@ -131,10 +128,8 @@ ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_error_neon.c
endif
-VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
-VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_avg_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c