summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/external_frame_buffer_test.cc1
-rw-r--r--test/test.mk1
-rw-r--r--test/vp9_intrapred_test.cc284
-rw-r--r--vp8/common/postproc.c12
-rw-r--r--vp8/encoder/arm/neon/vp8_mse16x16_neon.c4
-rw-r--r--vp9/common/vp9_common.h12
-rw-r--r--vp9/common/vp9_frame_buffers.c4
-rw-r--r--vp9/common/vp9_quant_common.c179
-rw-r--r--vp9/common/vp9_quant_common.h5
-rw-r--r--vp9/common/vp9_reconintra.c451
-rw-r--r--vp9/common/vp9_rtcd_defs.pl232
-rw-r--r--vp9/common/x86/vp9_high_intrapred_sse2.asm476
-rw-r--r--vp9/decoder/vp9_decodeframe.c8
-rw-r--r--vp9/decoder/vp9_detokenize.c150
-rw-r--r--vp9/encoder/vp9_aq_complexity.c11
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c5
-rw-r--r--vp9/encoder/vp9_aq_variance.c5
-rw-r--r--vp9/encoder/vp9_encoder.c6
-rw-r--r--vp9/encoder/vp9_firstpass.c31
-rw-r--r--vp9/encoder/vp9_picklpf.c2
-rw-r--r--vp9/encoder/vp9_pickmode.c2
-rw-r--r--vp9/encoder/vp9_quantize.c292
-rw-r--r--vp9/encoder/vp9_ratectrl.c235
-rw-r--r--vp9/encoder/vp9_ratectrl.h11
-rw-r--r--vp9/encoder/vp9_rd.c112
-rw-r--r--vp9/encoder/vp9_rdopt.c6
-rw-r--r--vp9/encoder/vp9_temporal_filter.c4
-rw-r--r--vp9/vp9_common.mk4
-rw-r--r--vpx/vpx_frame_buffer.h18
-rw-r--r--vpx_scale/generic/yv12config.c5
-rw-r--r--vpxdec.c2
31 files changed, 2288 insertions, 282 deletions
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
index 44eba3317..70b300928 100644
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -71,6 +71,7 @@ class ExternalFrameBufferList {
if (ext_fb_list_[idx].size < min_size) {
delete [] ext_fb_list_[idx].data;
ext_fb_list_[idx].data = new uint8_t[min_size];
+ memset(ext_fb_list_[idx].data, 0, min_size);
ext_fb_list_[idx].size = min_size;
}
diff --git a/test/test.mk b/test/test.mk
index b92b6da73..abf815cc9 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -128,6 +128,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes)
LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc
new file mode 100644
index 000000000..7d08d9ee4
--- /dev/null
+++ b/test/vp9_intrapred_test.cc
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vpx_mem/vpx_mem.h"
+#include "test/util.h"
+
+namespace {
+
+using libvpx_test::ACMRandom;
+
+const int count_test_block = 100000;
+
+// Base class for VP9 intra prediction tests.
+class VP9IntraPredBase {
+ public:
+ virtual ~VP9IntraPredBase() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ virtual void Predict(PREDICTION_MODE mode) = 0;
+
+ void CheckPrediction(int test_case_number, int *error_count) const {
+ // For each pixel ensure that the calculated value is the same as reference.
+ for (int y = 0; y < block_size_; y++) {
+ for (int x = 0; x < block_size_; x++) {
+ *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
+ if (*error_count == 1) {
+ ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
+ << " Failed on Test Case Number "<< test_case_number;
+ }
+ }
+ }
+ }
+
+ void RunTest(uint16_t* left_col, uint16_t* above_data,
+ uint16_t* dst, uint16_t* ref_dst) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ left_col_ = left_col;
+ dst_ = dst;
+ ref_dst_ = ref_dst;
+ above_row_ = above_data + 16;
+ int error_count = 0;
+ for (int i = 0; i < count_test_block; ++i) {
+ // Fill edges with random data, try first with saturated values.
+ for (int x = -1; x <= block_size_*2; x++) {
+ if (i == 0) {
+ above_row_[x] = mask_;
+ } else {
+ above_row_[x] = rnd.Rand16() & mask_;
+ }
+ }
+ for (int y = 0; y < block_size_; y++) {
+ if (i == 0) {
+ left_col_[y] = mask_;
+ } else {
+ left_col_[y] = rnd.Rand16() & mask_;
+ }
+ }
+ Predict(DC_PRED);
+ CheckPrediction(i, &error_count);
+ }
+ ASSERT_EQ(0, error_count);
+ }
+
+ int block_size_;
+ uint16_t *above_row_;
+ uint16_t *left_col_;
+ uint16_t *dst_;
+ uint16_t *ref_dst_;
+ ptrdiff_t stride_;
+ int mask_;
+};
+
+typedef void (*intra_pred_fn_t)(
+ uint16_t *dst, ptrdiff_t stride, const uint16_t *above,
+ const uint16_t *left, int bps);
+typedef std::tr1::tuple<intra_pred_fn_t,
+ intra_pred_fn_t, int, int> intra_pred_params_t;
+class VP9IntraPredTest
+ : public VP9IntraPredBase,
+ public ::testing::TestWithParam<intra_pred_params_t> {
+
+ virtual void SetUp() {
+ pred_fn_ = GET_PARAM(0);
+ ref_fn_ = GET_PARAM(1);
+ block_size_ = GET_PARAM(2);
+ bit_depth_ = GET_PARAM(3);
+ stride_ = block_size_ * 3;
+ mask_ = (1 << bit_depth_) - 1;
+ }
+
+ virtual void Predict(PREDICTION_MODE mode) {
+ const uint16_t *const_above_row = above_row_;
+ const uint16_t *const_left_col = left_col_;
+ ref_fn_(ref_dst_, stride_, const_above_row, const_left_col, bit_depth_);
+ ASM_REGISTER_STATE_CHECK(pred_fn_(dst_, stride_, const_above_row,
+ const_left_col, bit_depth_));
+ }
+ intra_pred_fn_t pred_fn_;
+ intra_pred_fn_t ref_fn_;
+ int bit_depth_;
+};
+
+TEST_P(VP9IntraPredTest, IntraPredTests) {
+ // max block size is 32
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 2*32);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 2*32+32);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, dst, 3 * 32 * 32);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_dst, 3 * 32 * 32);
+ RunTest(left_col, above_data, dst, ref_dst);
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+#if ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
+ ::testing::Values(
+ make_tuple(&vp9_high_dc_predictor_32x32_sse2,
+ &vp9_high_dc_predictor_32x32_c, 32, 8),
+ make_tuple(&vp9_high_tm_predictor_16x16_sse2,
+ &vp9_high_tm_predictor_16x16_c, 16, 8),
+ make_tuple(&vp9_high_tm_predictor_32x32_sse2,
+ &vp9_high_tm_predictor_32x32_c, 32, 8),
+ make_tuple(&vp9_high_dc_predictor_4x4_sse,
+ &vp9_high_dc_predictor_4x4_c, 4, 8),
+ make_tuple(&vp9_high_dc_predictor_8x8_sse2,
+ &vp9_high_dc_predictor_8x8_c, 8, 8),
+ make_tuple(&vp9_high_dc_predictor_16x16_sse2,
+ &vp9_high_dc_predictor_16x16_c, 16, 8),
+ make_tuple(&vp9_high_v_predictor_4x4_sse,
+ &vp9_high_v_predictor_4x4_c, 4, 8),
+ make_tuple(&vp9_high_v_predictor_8x8_sse2,
+ &vp9_high_v_predictor_8x8_c, 8, 8),
+ make_tuple(&vp9_high_v_predictor_16x16_sse2,
+ &vp9_high_v_predictor_16x16_c, 16, 8),
+ make_tuple(&vp9_high_v_predictor_32x32_sse2,
+ &vp9_high_v_predictor_32x32_c, 32, 8),
+ make_tuple(&vp9_high_tm_predictor_4x4_sse,
+ &vp9_high_tm_predictor_4x4_c, 4, 8),
+ make_tuple(&vp9_high_tm_predictor_8x8_sse2,
+ &vp9_high_tm_predictor_8x8_c, 8, 8)));
+#else
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
+ ::testing::Values(
+ make_tuple(&vp9_high_dc_predictor_4x4_sse,
+ &vp9_high_dc_predictor_4x4_c, 4, 8),
+ make_tuple(&vp9_high_dc_predictor_8x8_sse2,
+ &vp9_high_dc_predictor_8x8_c, 8, 8),
+ make_tuple(&vp9_high_dc_predictor_16x16_sse2,
+ &vp9_high_dc_predictor_16x16_c, 16, 8),
+ make_tuple(&vp9_high_v_predictor_4x4_sse,
+ &vp9_high_v_predictor_4x4_c, 4, 8),
+ make_tuple(&vp9_high_v_predictor_8x8_sse2,
+ &vp9_high_v_predictor_8x8_c, 8, 8),
+ make_tuple(&vp9_high_v_predictor_16x16_sse2,
+ &vp9_high_v_predictor_16x16_c, 16, 8),
+ make_tuple(&vp9_high_v_predictor_32x32_sse2,
+ &vp9_high_v_predictor_32x32_c, 32, 8),
+ make_tuple(&vp9_high_tm_predictor_4x4_sse,
+ &vp9_high_tm_predictor_4x4_c, 4, 8),
+ make_tuple(&vp9_high_tm_predictor_8x8_sse2,
+ &vp9_high_tm_predictor_8x8_c, 8, 8)));
+#endif
+#if ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
+ ::testing::Values(
+ make_tuple(&vp9_high_dc_predictor_32x32_sse2,
+ &vp9_high_dc_predictor_32x32_c, 32, 10),
+ make_tuple(&vp9_high_tm_predictor_16x16_sse2,
+ &vp9_high_tm_predictor_16x16_c, 16, 10),
+ make_tuple(&vp9_high_tm_predictor_32x32_sse2,
+ &vp9_high_tm_predictor_32x32_c, 32, 10),
+ make_tuple(&vp9_high_dc_predictor_4x4_sse,
+ &vp9_high_dc_predictor_4x4_c, 4, 10),
+ make_tuple(&vp9_high_dc_predictor_8x8_sse2,
+ &vp9_high_dc_predictor_8x8_c, 8, 10),
+ make_tuple(&vp9_high_dc_predictor_16x16_sse2,
+ &vp9_high_dc_predictor_16x16_c, 16, 10),
+ make_tuple(&vp9_high_v_predictor_4x4_sse,
+ &vp9_high_v_predictor_4x4_c, 4, 10),
+ make_tuple(&vp9_high_v_predictor_8x8_sse2,
+ &vp9_high_v_predictor_8x8_c, 8, 10),
+ make_tuple(&vp9_high_v_predictor_16x16_sse2,
+ &vp9_high_v_predictor_16x16_c, 16, 10),
+ make_tuple(&vp9_high_v_predictor_32x32_sse2,
+ &vp9_high_v_predictor_32x32_c, 32, 10),
+ make_tuple(&vp9_high_tm_predictor_4x4_sse,
+ &vp9_high_tm_predictor_4x4_c, 4, 10),
+ make_tuple(&vp9_high_tm_predictor_8x8_sse2,
+ &vp9_high_tm_predictor_8x8_c, 8, 10)));
+#else
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
+ ::testing::Values(
+ make_tuple(&vp9_high_dc_predictor_4x4_sse,
+ &vp9_high_dc_predictor_4x4_c, 4, 10),
+ make_tuple(&vp9_high_dc_predictor_8x8_sse2,
+ &vp9_high_dc_predictor_8x8_c, 8, 10),
+ make_tuple(&vp9_high_dc_predictor_16x16_sse2,
+ &vp9_high_dc_predictor_16x16_c, 16, 10),
+ make_tuple(&vp9_high_v_predictor_4x4_sse,
+ &vp9_high_v_predictor_4x4_c, 4, 10),
+ make_tuple(&vp9_high_v_predictor_8x8_sse2,
+ &vp9_high_v_predictor_8x8_c, 8, 10),
+ make_tuple(&vp9_high_v_predictor_16x16_sse2,
+ &vp9_high_v_predictor_16x16_c, 16, 10),
+ make_tuple(&vp9_high_v_predictor_32x32_sse2,
+ &vp9_high_v_predictor_32x32_c, 32, 10),
+ make_tuple(&vp9_high_tm_predictor_4x4_sse,
+ &vp9_high_tm_predictor_4x4_c, 4, 10),
+ make_tuple(&vp9_high_tm_predictor_8x8_sse2,
+ &vp9_high_tm_predictor_8x8_c, 8, 10)));
+#endif
+
+#if ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
+ ::testing::Values(
+ make_tuple(&vp9_high_dc_predictor_32x32_sse2,
+ &vp9_high_dc_predictor_32x32_c, 32, 12),
+ make_tuple(&vp9_high_tm_predictor_16x16_sse2,
+ &vp9_high_tm_predictor_16x16_c, 16, 12),
+ make_tuple(&vp9_high_tm_predictor_32x32_sse2,
+ &vp9_high_tm_predictor_32x32_c, 32, 12),
+ make_tuple(&vp9_high_dc_predictor_4x4_sse,
+ &vp9_high_dc_predictor_4x4_c, 4, 12),
+ make_tuple(&vp9_high_dc_predictor_8x8_sse2,
+ &vp9_high_dc_predictor_8x8_c, 8, 12),
+ make_tuple(&vp9_high_dc_predictor_16x16_sse2,
+ &vp9_high_dc_predictor_16x16_c, 16, 12),
+ make_tuple(&vp9_high_v_predictor_4x4_sse,
+ &vp9_high_v_predictor_4x4_c, 4, 12),
+ make_tuple(&vp9_high_v_predictor_8x8_sse2,
+ &vp9_high_v_predictor_8x8_c, 8, 12),
+ make_tuple(&vp9_high_v_predictor_16x16_sse2,
+ &vp9_high_v_predictor_16x16_c, 16, 12),
+ make_tuple(&vp9_high_v_predictor_32x32_sse2,
+ &vp9_high_v_predictor_32x32_c, 32, 12),
+ make_tuple(&vp9_high_tm_predictor_4x4_sse,
+ &vp9_high_tm_predictor_4x4_c, 4, 12),
+ make_tuple(&vp9_high_tm_predictor_8x8_sse2,
+ &vp9_high_tm_predictor_8x8_c, 8, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
+ ::testing::Values(
+ make_tuple(&vp9_high_dc_predictor_4x4_sse,
+ &vp9_high_dc_predictor_4x4_c, 4, 12),
+ make_tuple(&vp9_high_dc_predictor_8x8_sse2,
+ &vp9_high_dc_predictor_8x8_c, 8, 12),
+ make_tuple(&vp9_high_dc_predictor_16x16_sse2,
+ &vp9_high_dc_predictor_16x16_c, 16, 12),
+ make_tuple(&vp9_high_v_predictor_4x4_sse,
+ &vp9_high_v_predictor_4x4_c, 4, 12),
+ make_tuple(&vp9_high_v_predictor_8x8_sse2,
+ &vp9_high_v_predictor_8x8_c, 8, 12),
+ make_tuple(&vp9_high_v_predictor_16x16_sse2,
+ &vp9_high_v_predictor_16x16_c, 16, 12),
+ make_tuple(&vp9_high_v_predictor_32x32_sse2,
+ &vp9_high_v_predictor_32x32_c, 32, 12),
+ make_tuple(&vp9_high_tm_predictor_4x4_sse,
+ &vp9_high_tm_predictor_4x4_c, 4, 12),
+ make_tuple(&vp9_high_tm_predictor_8x8_sse2,
+ &vp9_high_tm_predictor_8x8_c, 8, 12)));
+#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // HAVE_SSE2
+} // namespace
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index e50d3935f..769c2de6b 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -214,6 +214,7 @@ static int q2mbl(int x)
x = 50 + (x - 50) * 10 / 8;
return x * x / 3;
}
+
void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit)
{
int r, c, i;
@@ -226,14 +227,14 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
int sumsq = 0;
int sum = 0;
- for (i = -8; i<0; i++)
+ for (i = -8; i < 0; i++)
s[i]=s[0];
/* 17 avoids valgrind warning - we buffer values in c in d
* and only write them when we've read 8 ahead...
*/
- for (i = cols; i<cols+17; i++)
- s[i]=s[cols-1];
+ for (i = 0; i < 17; i++)
+ s[i+cols]=s[cols-1];
for (i = -8; i <= 6; i++)
{
@@ -264,7 +265,6 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
}
}
-
void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit)
{
int r, c, i;
@@ -284,8 +284,8 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
/* 17 avoids valgrind warning - we buffer values in c in d
* and only write them when we've read 8 ahead...
*/
- for (i = rows; i < rows+17; i++)
- s[i*pitch]=s[(rows-1)*pitch];
+ for (i = 0; i < 17; i++)
+ s[(i+rows)*pitch]=s[(rows-1)*pitch];
for (i = -8; i <= 6; i++)
{
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
index 06e4f9479..f806809df 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
@@ -18,7 +18,7 @@ unsigned int vp8_mse16x16_neon(
unsigned int *sse) {
int i;
int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- int64_t d0s64;
+ int64x1_t d0s64;
uint8x16_t q0u8, q1u8, q2u8, q3u8;
int32x4_t q7s32, q8s32, q9s32, q10s32;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
@@ -82,7 +82,7 @@ unsigned int vp8_get4x4sse_cs_neon(
const unsigned char *ref_ptr,
int recon_stride) {
int16x4_t d22s16, d24s16, d26s16, d28s16;
- int64_t d0s64;
+ int64x1_t d0s64;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
int32x4_t q7s32, q8s32, q9s32, q10s32;
uint16x8_t q11u16, q12u16, q13u16, q14u16;
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 5587192e8..8305e7fa6 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -65,6 +65,18 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
}
#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE uint16_t clip_pixel_high(int val, int bd) {
+ switch (bd) {
+ case 8:
+ default:
+ return (uint16_t)clamp(val, 0, 255);
+ case 10:
+ return (uint16_t)clamp(val, 0, 1023);
+ case 12:
+ return (uint16_t)clamp(val, 0, 4095);
+ }
+}
+
#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1 ))
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/common/vp9_frame_buffers.c b/vp9/common/vp9_frame_buffers.c
index 733b3a927..34795b74e 100644
--- a/vp9/common/vp9_frame_buffers.c
+++ b/vp9/common/vp9_frame_buffers.c
@@ -61,6 +61,10 @@ int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
if (!int_fb_list->int_fb[i].data)
return -1;
+ // This memset is needed for fixing valgrind error from C loop filter
+ // due to access uninitialized memory in frame border. It could be
+ // removed if border is totally removed.
+ vpx_memset(int_fb_list->int_fb[i].data, 0, min_size);
int_fb_list->int_fb[i].size = min_size;
}
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 3332e58e6..564a3eb0c 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -47,6 +47,78 @@ static const int16_t dc_qlookup[QINDEX_RANGE] = {
1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
};
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
+ 4, 9, 10, 13, 15, 17, 20, 22,
+ 25, 28, 31, 34, 37, 40, 43, 47,
+ 50, 53, 57, 60, 64, 68, 71, 75,
+ 78, 82, 86, 90, 93, 97, 101, 105,
+ 109, 113, 116, 120, 124, 128, 132, 136,
+ 140, 143, 147, 151, 155, 159, 163, 166,
+ 170, 174, 178, 182, 185, 189, 193, 197,
+ 200, 204, 208, 212, 215, 219, 223, 226,
+ 230, 233, 237, 241, 244, 248, 251, 255,
+ 259, 262, 266, 269, 273, 276, 280, 283,
+ 287, 290, 293, 297, 300, 304, 307, 310,
+ 314, 317, 321, 324, 327, 331, 334, 337,
+ 343, 350, 356, 362, 369, 375, 381, 387,
+ 394, 400, 406, 412, 418, 424, 430, 436,
+ 442, 448, 454, 460, 466, 472, 478, 484,
+ 490, 499, 507, 516, 525, 533, 542, 550,
+ 559, 567, 576, 584, 592, 601, 609, 617,
+ 625, 634, 644, 655, 666, 676, 687, 698,
+ 708, 718, 729, 739, 749, 759, 770, 782,
+ 795, 807, 819, 831, 844, 856, 868, 880,
+ 891, 906, 920, 933, 947, 961, 975, 988,
+ 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105,
+ 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236,
+ 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379,
+ 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537,
+ 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
+ 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929,
+ 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197,
+ 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561,
+ 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102,
+ 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953,
+ 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
+};
+
+static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
+ 4, 12, 18, 25, 33, 41, 50, 60,
+ 70, 80, 91, 103, 115, 127, 140, 153,
+ 166, 180, 194, 208, 222, 237, 251, 266,
+ 281, 296, 312, 327, 343, 358, 374, 390,
+ 405, 421, 437, 453, 469, 484, 500, 516,
+ 532, 548, 564, 580, 596, 611, 627, 643,
+ 659, 674, 690, 706, 721, 737, 752, 768,
+ 783, 798, 814, 829, 844, 859, 874, 889,
+ 904, 919, 934, 949, 964, 978, 993, 1008,
+ 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122,
+ 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
+ 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342,
+ 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544,
+ 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741,
+ 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933,
+ 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199,
+ 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467,
+ 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788,
+ 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127,
+ 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517,
+ 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951,
+ 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
+ 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942,
+ 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517,
+ 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149,
+ 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867,
+ 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715,
+ 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788,
+ 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245,
+ 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409,
+ 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812,
+ 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387,
+};
+#endif
+
static const int16_t ac_qlookup[QINDEX_RANGE] = {
4, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22,
@@ -82,15 +154,116 @@ static const int16_t ac_qlookup[QINDEX_RANGE] = {
1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
};
-int16_t vp9_dc_quant(int qindex, int delta) {
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
+ 4, 9, 11, 13, 16, 18, 21, 24,
+ 27, 30, 33, 37, 40, 44, 48, 51,
+ 55, 59, 63, 67, 71, 75, 79, 83,
+ 88, 92, 96, 100, 105, 109, 114, 118,
+ 122, 127, 131, 136, 140, 145, 149, 154,
+ 158, 163, 168, 172, 177, 181, 186, 190,
+ 195, 199, 204, 208, 213, 217, 222, 226,
+ 231, 235, 240, 244, 249, 253, 258, 262,
+ 267, 271, 275, 280, 284, 289, 293, 297,
+ 302, 306, 311, 315, 319, 324, 328, 332,
+ 337, 341, 345, 349, 354, 358, 362, 367,
+ 371, 375, 379, 384, 388, 392, 396, 401,
+ 409, 417, 425, 433, 441, 449, 458, 466,
+ 474, 482, 490, 498, 506, 514, 523, 531,
+ 539, 547, 555, 563, 571, 579, 588, 596,
+ 604, 616, 628, 640, 652, 664, 676, 688,
+ 700, 713, 725, 737, 749, 761, 773, 785,
+ 797, 809, 825, 841, 857, 873, 889, 905,
+ 922, 938, 954, 970, 986, 1002, 1018, 1038,
+ 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198,
+ 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386,
+ 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603,
+ 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859,
+ 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159,
+ 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507,
+ 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
+ 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391,
+ 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952,
+ 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604,
+ 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372,
+ 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268,
+ 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
+};
+
+static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
+ 4, 13, 19, 27, 35, 44, 54, 64,
+ 75, 87, 99, 112, 126, 139, 154, 168,
+ 183, 199, 214, 230, 247, 263, 280, 297,
+ 314, 331, 349, 366, 384, 402, 420, 438,
+ 456, 475, 493, 511, 530, 548, 567, 586,
+ 604, 623, 642, 660, 679, 698, 716, 735,
+ 753, 772, 791, 809, 828, 846, 865, 884,
+ 902, 920, 939, 957, 976, 994, 1012, 1030,
+ 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175,
+ 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317,
+ 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
+ 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595,
+ 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856,
+ 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118,
+ 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378,
+ 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750,
+ 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137,
+ 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619,
+ 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149,
+ 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791,
+ 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544,
+ 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
+ 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435,
+ 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635,
+ 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028,
+ 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661,
+ 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565,
+ 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806,
+ 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414,
+ 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486,
+ 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070,
+ 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247,
+};
+#endif
+
+int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8:
+ return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_10:
+ return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_12:
+ return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void) bit_depth;
return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
}
-int16_t vp9_ac_quant(int qindex, int delta) {
+int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8:
+ return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_10:
+ return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_12:
+ return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void) bit_depth;
return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
}
-
int vp9_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex) {
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index d1545d93c..b6266059d 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -11,6 +11,7 @@
#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_
#define VP9_COMMON_VP9_QUANT_COMMON_H_
+#include "vpx/vpx_codec.h"
#include "vp9/common/vp9_blockd.h"
#ifdef __cplusplus
@@ -22,8 +23,8 @@ extern "C" {
#define QINDEX_RANGE (MAXQ - MINQ + 1)
#define QINDEX_BITS 8
-int16_t vp9_dc_quant(int qindex, int delta);
-int16_t vp9_ac_quant(int qindex, int delta);
+int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
int vp9_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex);
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 471929aea..7ebd2ea87 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -40,11 +40,289 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
type##_predictor(dst, stride, size, above, left); \
}
+#if CONFIG_VP9_HIGHBITDEPTH
+#define intra_pred_high_sized(type, size) \
+ void vp9_high_##type##_predictor_##size##x##size##_c( \
+ uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
+ const uint16_t *left, int bd) { \
+ high_##type##_predictor(dst, stride, size, above, left, bd); \
+ }
+
+#define intra_pred_allsizes(type) \
+ intra_pred_sized(type, 4) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32) \
+ intra_pred_high_sized(type, 4) \
+ intra_pred_high_sized(type, 8) \
+ intra_pred_high_sized(type, 16) \
+ intra_pred_high_sized(type, 32)
+
+#else
+
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32)
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void high_d207_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) above;
+ (void) bd;
+
+ // First column.
+ for (r = 0; r < bs - 1; ++r) {
+ dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
+ }
+ dst[(bs - 1) * stride] = left[bs - 1];
+ dst++;
+
+ // Second column.
+ for (r = 0; r < bs - 2; ++r) {
+ dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 +
+ left[r + 2], 2);
+ }
+ dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] +
+ left[bs - 1] * 3, 2);
+ dst[(bs - 1) * stride] = left[bs - 1];
+ dst++;
+
+ // Rest of last row.
+ for (c = 0; c < bs - 2; ++c)
+ dst[(bs - 1) * stride + c] = left[bs - 1];
+
+ for (r = bs - 2; r >= 0; --r) {
+ for (c = 0; c < bs - 2; ++c)
+ dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
+ }
+}
+
+static INLINE void high_d63_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) left;
+ (void) bd;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
+ above[r/2 + c + 1] * 2 +
+ above[r/2 + c + 2], 2)
+ : ROUND_POWER_OF_TWO(above[r/2 + c] +
+ above[r/2 + c + 1], 1);
+ }
+ dst += stride;
+ }
+}
+
+static INLINE void high_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) left;
+ (void) bd;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] +
+ above[r + c + 1] * 2 +
+ above[r + c + 2], 2)
+ : above[bs * 2 - 1];
+ }
+ dst += stride;
+ }
+}
+
+static INLINE void high_d117_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) bd;
+
+ // first row
+ for (c = 0; c < bs; c++)
+ dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1);
+ dst += stride;
+
+ // second row
+ dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ for (c = 1; c < bs; c++)
+ dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+ dst += stride;
+
+ // the rest of first col
+ dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ for (r = 3; r < bs; ++r)
+ dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 +
+ left[r - 1], 2);
+
+ // the rest of the block
+ for (r = 2; r < bs; ++r) {
+ for (c = 1; c < bs; c++)
+ dst[c] = dst[-2 * stride + c - 1];
+ dst += stride;
+ }
+}
+
+static INLINE void high_d135_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) bd;
+ dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ for (c = 1; c < bs; c++)
+ dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+
+ dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ for (r = 2; r < bs; ++r)
+ dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
+ left[r], 2);
+
+ dst += stride;
+ for (r = 1; r < bs; ++r) {
+ for (c = 1; c < bs; c++)
+ dst[c] = dst[-stride + c - 1];
+ dst += stride;
+ }
+}
+
+static INLINE void high_d153_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) bd;
+ dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1);
+ for (r = 1; r < bs; r++)
+ dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1);
+ dst++;
+
+ dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ for (r = 2; r < bs; r++)
+ dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
+ left[r], 2);
+ dst++;
+
+ for (c = 0; c < bs - 2; c++)
+ dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2);
+ dst += stride;
+
+ for (r = 1; r < bs; ++r) {
+ for (c = 0; c < bs - 2; c++)
+ dst[c] = dst[-stride + c - 2];
+ dst += stride;
+ }
+}
+
+static INLINE void high_v_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void) left;
+ (void) bd;
+ for (r = 0; r < bs; r++) {
+ vpx_memcpy(dst, above, bs * sizeof(uint16_t));
+ dst += stride;
+ }
+}
+
+static INLINE void high_h_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ int r;
+ (void) above;
+ (void) bd;
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, left[r], bs);
+ dst += stride;
+ }
+}
+
+static INLINE void high_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ int ytop_left = above[-1];
+ (void) bd;
+
+ for (r = 0; r < bs; r++) {
+ for (c = 0; c < bs; c++)
+ dst[c] = clip_pixel_high(left[r] + above[c] - ytop_left, bd);
+ dst += stride;
+ }
+}
+
+static INLINE void high_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void) above;
+ (void) left;
+
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, 128 << (bd - 8), bs);
+ dst += stride;
+ }
+}
+
+static INLINE void high_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int i, r, expected_dc, sum = 0;
+ (void) above;
+ (void) bd;
+
+ for (i = 0; i < bs; i++)
+ sum += left[i];
+ expected_dc = (sum + (bs >> 1)) / bs;
+
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, expected_dc, bs);
+ dst += stride;
+ }
+}
+
+static INLINE void high_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int i, r, expected_dc, sum = 0;
+ (void) left;
+ (void) bd;
+
+ for (i = 0; i < bs; i++)
+ sum += above[i];
+ expected_dc = (sum + (bs >> 1)) / bs;
+
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, expected_dc, bs);
+ dst += stride;
+ }
+}
+
+static INLINE void high_dc_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int i, r, expected_dc, sum = 0;
+ const int count = 2 * bs;
+ (void) bd;
+
+ for (i = 0; i < bs; i++) {
+ sum += above[i];
+ sum += left[i];
+ }
+
+ expected_dc = (sum + (count >> 1)) / count;
+
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, expected_dc, bs);
+ dst += stride;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -293,6 +571,14 @@ typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
static intra_pred_fn pred[INTRA_MODES][TX_SIZES];
static intra_pred_fn dc_pred[2][2][TX_SIZES];
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd);
+static intra_high_pred_fn pred_high[INTRA_MODES][4];
+static intra_high_pred_fn dc_pred_high[2][2][4];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
void vp9_init_intra_predictors() {
#define INIT_ALL_SIZES(p, type) \
p[TX_4X4] = vp9_##type##_predictor_4x4; \
@@ -315,8 +601,163 @@ void vp9_init_intra_predictors() {
INIT_ALL_SIZES(dc_pred[1][0], dc_left);
INIT_ALL_SIZES(dc_pred[1][1], dc);
-#undef INIT_ALL_SIZES
+#if CONFIG_VP9_HIGHBITDEPTH
+ INIT_ALL_SIZES(pred_high[V_PRED], high_v);
+ INIT_ALL_SIZES(pred_high[H_PRED], high_h);
+ INIT_ALL_SIZES(pred_high[D207_PRED], high_d207);
+ INIT_ALL_SIZES(pred_high[D45_PRED], high_d45);
+ INIT_ALL_SIZES(pred_high[D63_PRED], high_d63);
+ INIT_ALL_SIZES(pred_high[D117_PRED], high_d117);
+ INIT_ALL_SIZES(pred_high[D135_PRED], high_d135);
+ INIT_ALL_SIZES(pred_high[D153_PRED], high_d153);
+ INIT_ALL_SIZES(pred_high[TM_PRED], high_tm);
+
+ INIT_ALL_SIZES(dc_pred_high[0][0], high_dc_128);
+ INIT_ALL_SIZES(dc_pred_high[0][1], high_dc_top);
+ INIT_ALL_SIZES(dc_pred_high[1][0], high_dc_left);
+ INIT_ALL_SIZES(dc_pred_high[1][1], high_dc);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#undef intra_pred_allsizes
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_intra_predictors_high(const MACROBLOCKD *xd,
+ const uint8_t *ref8,
+ int ref_stride,
+ uint8_t *dst8,
+ int dst_stride,
+ PREDICTION_MODE mode,
+ TX_SIZE tx_size,
+ int up_available,
+ int left_available,
+ int right_available,
+ int x, int y,
+ int plane, int bd) {
+ int i;
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16);
+ uint16_t *above_row = above_data + 16;
+ const uint16_t *const_above_row = above_row;
+ const int bs = 4 << tx_size;
+ int frame_width, frame_height;
+ int x0, y0;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ // int base=128;
+ int base = 128 << (bd - 8);
+ // 127 127 127 .. 127 127 127 127 127 127
+ // 129 A B .. Y Z
+ // 129 C D .. W X
+ // 129 E F .. U V
+ // 129 G H .. S T T T T T
+
+ // Get current frame pointer, width and height.
+ if (plane == 0) {
+ frame_width = xd->cur_buf->y_width;
+ frame_height = xd->cur_buf->y_height;
+ } else {
+ frame_width = xd->cur_buf->uv_width;
+ frame_height = xd->cur_buf->uv_height;
+ }
+
+ // Get block position in current frame.
+ x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+ y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+ // left
+ if (left_available) {
+ if (xd->mb_to_bottom_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (y0 + bs <= frame_height) {
+ for (i = 0; i < bs; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ } else {
+ const int extend_bottom = frame_height - y0;
+ for (i = 0; i < extend_bottom; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ for (; i < bs; ++i)
+ left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+ }
+ } else {
+ /* faster path if the block does not need extension */
+ for (i = 0; i < bs; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ }
+ } else {
+ // TODO(Peter): this value should probably change for high bitdepth
+ vpx_memset16(left_col, base + 1, bs);
+ }
+
+ // TODO(hkuang) do not extend 2*bs pixels for all modes.
+ // above
+ if (up_available) {
+ const uint16_t *above_ref = ref - ref_stride;
+ if (xd->mb_to_right_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (x0 + 2 * bs <= frame_width) {
+ if (right_available && bs == 4) {
+ vpx_memcpy(above_row, above_ref, 2 * bs * sizeof(uint16_t));
+ } else {
+ vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
+ vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ }
+ } else if (x0 + bs <= frame_width) {
+ const int r = frame_width - x0;
+ if (right_available && bs == 4) {
+ vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
+ vpx_memset16(above_row + r, above_row[r - 1],
+ x0 + 2 * bs - frame_width);
+ } else {
+ vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
+ vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ }
+ } else if (x0 <= frame_width) {
+ const int r = frame_width - x0;
+ if (right_available && bs == 4) {
+ vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
+ vpx_memset16(above_row + r, above_row[r - 1],
+ x0 + 2 * bs - frame_width);
+ } else {
+ vpx_memcpy(above_row, above_ref, r * sizeof(uint16_t));
+ vpx_memset16(above_row + r, above_row[r - 1],
+ x0 + 2 * bs - frame_width);
+ }
+ }
+ // TODO(Peter) this value should probably change for high bitdepth
+ above_row[-1] = left_available ? above_ref[-1] : (base+1);
+ } else {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && right_available && left_available) {
+ const_above_row = above_ref;
+ } else {
+ vpx_memcpy(above_row, above_ref, bs * sizeof(uint16_t));
+ if (bs == 4 && right_available)
+ vpx_memcpy(above_row + bs, above_ref + bs, bs * sizeof(uint16_t));
+ else
+ vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ // TODO(Peter): this value should probably change for high bitdepth
+ above_row[-1] = left_available ? above_ref[-1] : (base+1);
+ }
+ }
+ } else {
+ vpx_memset16(above_row, base - 1, bs * 2);
+ // TODO(Peter): this value should probably change for high bitdepth
+ above_row[-1] = base - 1;
+ }
+
+ // predict
+ if (mode == DC_PRED) {
+ dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
+ const_above_row,
+ left_col, xd->bd);
+ } else {
+ pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
+ xd->bd);
+ }
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int ref_stride, uint8_t *dst, int dst_stride,
@@ -454,6 +895,14 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
const int y = loff * 4;
assert(bwl >= 0);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
+ tx_size, have_top, have_left, have_right,
+ x, y, plane, xd->bd);
+ return;
+ }
+#endif
build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
have_top, have_left, have_right, x, y, plane);
}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index c695a5dc0..b75ea64f0 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -445,61 +445,219 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_iwht4x4_16_add/;
}
-
# High bitdepth functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
-#
-# dct
-#
-add_proto qw/void vp9_high_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct4x4_1_add/;
+ #
+ # Intra prediction
+ #
+ add_proto qw/void vp9_high_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d207_predictor_4x4/;
+
+ add_proto qw/void vp9_high_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d45_predictor_4x4/;
+
+ add_proto qw/void vp9_high_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d63_predictor_4x4/;
+
+ add_proto qw/void vp9_high_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_h_predictor_4x4/;
+
+ add_proto qw/void vp9_high_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d117_predictor_4x4/;
+
+ add_proto qw/void vp9_high_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d135_predictor_4x4/;
+
+ add_proto qw/void vp9_high_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d153_predictor_4x4/;
+
+ add_proto qw/void vp9_high_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_v_predictor_4x4 neon/, "$sse_x86inc";
+
+ add_proto qw/void vp9_high_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_tm_predictor_4x4/, "$sse_x86inc";
+
+ add_proto qw/void vp9_high_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_predictor_4x4/, "$sse_x86inc";
+
+ add_proto qw/void vp9_high_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_top_predictor_4x4/;
+
+ add_proto qw/void vp9_high_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_left_predictor_4x4/;
+
+ add_proto qw/void vp9_high_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_128_predictor_4x4/;
+
+ add_proto qw/void vp9_high_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d207_predictor_8x8/;
+
+ add_proto qw/void vp9_high_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d45_predictor_8x8/;
+
+ add_proto qw/void vp9_high_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d63_predictor_8x8/;
+
+ add_proto qw/void vp9_high_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_h_predictor_8x8/;
+
+ add_proto qw/void vp9_high_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d117_predictor_8x8/;
+
+ add_proto qw/void vp9_high_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d135_predictor_8x8/;
+
+ add_proto qw/void vp9_high_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d153_predictor_8x8/;
+
+ add_proto qw/void vp9_high_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_v_predictor_8x8/, "$sse2_x86inc";
+
+ add_proto qw/void vp9_high_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_tm_predictor_8x8/, "$sse2_x86inc";
+
+ add_proto qw/void vp9_high_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_predictor_8x8/, "$sse2_x86inc";;
+
+ add_proto qw/void vp9_high_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_top_predictor_8x8/;
+
+ add_proto qw/void vp9_high_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_left_predictor_8x8/;
+
+ add_proto qw/void vp9_high_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_128_predictor_8x8/;
-add_proto qw/void vp9_high_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct4x4_16_add/;
+ add_proto qw/void vp9_high_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d207_predictor_16x16/;
-add_proto qw/void vp9_high_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct8x8_1_add/;
+ add_proto qw/void vp9_high_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d45_predictor_16x16/;
-add_proto qw/void vp9_high_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct8x8_64_add/;
+ add_proto qw/void vp9_high_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d63_predictor_16x16/;
-add_proto qw/void vp9_high_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct8x8_10_add/;
+ add_proto qw/void vp9_high_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_h_predictor_16x16/;
-add_proto qw/void vp9_high_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct16x16_1_add/;
+ add_proto qw/void vp9_high_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d117_predictor_16x16/;
-add_proto qw/void vp9_high_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct16x16_256_add/;
+ add_proto qw/void vp9_high_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d135_predictor_16x16/;
-add_proto qw/void vp9_high_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct16x16_10_add/;
+ add_proto qw/void vp9_high_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d153_predictor_16x16/;
-add_proto qw/void vp9_high_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct32x32_1024_add/;
+ add_proto qw/void vp9_high_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_v_predictor_16x16 neon/, "$sse2_x86inc";
-add_proto qw/void vp9_high_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct32x32_34_add/;
+ add_proto qw/void vp9_high_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_tm_predictor_16x16/, "$sse2_x86_64";
-add_proto qw/void vp9_high_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_idct32x32_1_add/;
+ add_proto qw/void vp9_high_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_predictor_16x16/, "$sse2_x86inc";
-add_proto qw/void vp9_high_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
-specialize qw/vp9_high_iht4x4_16_add/;
+ add_proto qw/void vp9_high_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_top_predictor_16x16/;
-add_proto qw/void vp9_high_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
-specialize qw/vp9_high_iht8x8_64_add/;
+ add_proto qw/void vp9_high_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_left_predictor_16x16/;
-add_proto qw/void vp9_high_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
-specialize qw/vp9_high_iht16x16_256_add/;
+ add_proto qw/void vp9_high_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_128_predictor_16x16/;
-# dct and add
+ add_proto qw/void vp9_high_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d207_predictor_32x32/;
+
+ add_proto qw/void vp9_high_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d45_predictor_32x32/;
+
+ add_proto qw/void vp9_high_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d63_predictor_32x32/;
+
+ add_proto qw/void vp9_high_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_h_predictor_32x32/;
+
+ add_proto qw/void vp9_high_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d117_predictor_32x32/;
+
+ add_proto qw/void vp9_high_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d135_predictor_32x32/;
+
+ add_proto qw/void vp9_high_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_d153_predictor_32x32/;
+
+ add_proto qw/void vp9_high_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_v_predictor_32x32/, "$sse2_x86inc";
+
+ add_proto qw/void vp9_high_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_tm_predictor_32x32/, "$sse2_x86_64";
+
+ add_proto qw/void vp9_high_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_predictor_32x32/, "$sse2_x86_64";
+
+ add_proto qw/void vp9_high_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_top_predictor_32x32/;
+
+ add_proto qw/void vp9_high_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_left_predictor_32x32/;
+
+ add_proto qw/void vp9_high_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bps";
+ specialize qw/vp9_high_dc_128_predictor_32x32/;
+
+ #
+ # dct
+ #
+ add_proto qw/void vp9_high_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct4x4_1_add/;
+
+ add_proto qw/void vp9_high_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct4x4_16_add/;
+
+ add_proto qw/void vp9_high_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct8x8_1_add/;
+
+ add_proto qw/void vp9_high_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct8x8_64_add/;
+
+ add_proto qw/void vp9_high_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct8x8_10_add/;
+
+ add_proto qw/void vp9_high_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct16x16_1_add/;
+
+ add_proto qw/void vp9_high_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct16x16_256_add/;
+
+ add_proto qw/void vp9_high_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct16x16_10_add/;
+
+ add_proto qw/void vp9_high_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct32x32_1024_add/;
+
+ add_proto qw/void vp9_high_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct32x32_34_add/;
+
+ add_proto qw/void vp9_high_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_idct32x32_1_add/;
+
+ add_proto qw/void vp9_high_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp9_high_iht4x4_16_add/;
+
+ add_proto qw/void vp9_high_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp9_high_iht8x8_64_add/;
+
+ add_proto qw/void vp9_high_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
+ specialize qw/vp9_high_iht16x16_256_add/;
+
+ # dct and add
-add_proto qw/void vp9_high_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_iwht4x4_1_add/;
+ add_proto qw/void vp9_high_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_iwht4x4_1_add/;
-add_proto qw/void vp9_high_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-specialize qw/vp9_high_iwht4x4_16_add/;
+ add_proto qw/void vp9_high_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp9_high_iwht4x4_16_add/;
}
#
diff --git a/vp9/common/x86/vp9_high_intrapred_sse2.asm b/vp9/common/x86/vp9_high_intrapred_sse2.asm
new file mode 100644
index 000000000..ff450711e
--- /dev/null
+++ b/vp9/common/x86/vp9_high_intrapred_sse2.asm
@@ -0,0 +1,476 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION_RODATA
+pw_4: times 8 dw 4
+pw_8: times 8 dw 8
+pw_16: times 4 dd 16
+pw_32: times 4 dd 32
+
+SECTION .text
+INIT_MMX sse
+cglobal high_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ movq m0, [aboveq]
+ movq m2, [leftq]
+ DEFINE_ARGS dst, stride, one
+ mov oned, 0x0001
+ pxor m1, m1
+ movd m3, oned
+ pshufw m3, m3, 0x0
+ paddw m0, m2
+ pmaddwd m0, m3
+ packssdw m0, m1
+ pmaddwd m0, m3
+ paddw m0, [GLOBAL(pw_4)]
+ psraw m0, 3
+ pshufw m0, m0, 0x0
+ movq [dstq ], m0
+ movq [dstq+strideq*2], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq*2], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_XMM sse2
+cglobal high_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ mova m0, [aboveq]
+ mova m2, [leftq]
+ DEFINE_ARGS dst, stride, stride3, one
+ mov oned, 0x00010001
+ lea stride3q, [strideq*3]
+ movd m3, oned
+ pshufd m3, m3, 0x0
+ paddw m0, m2
+ pmaddwd m0, m3
+ packssdw m0, m1
+ pmaddwd m0, m3
+ packssdw m0, m1
+ pmaddwd m0, m3
+ paddw m0, [GLOBAL(pw_8)]
+ psrlw m0, 4
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+ mova [dstq ], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+stride3q*2], m0
+ lea dstq, [dstq+strideq*8]
+ mova [dstq ], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+stride3q*2], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_XMM sse2
+cglobal high_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ mova m0, [aboveq]
+ mova m3, [aboveq+16]
+ mova m2, [leftq]
+ mova m4, [leftq+16]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 4
+ paddw m0, m2
+ paddw m0, m3
+ paddw m0, m4
+ movhlps m2, m0
+ paddw m0, m2
+ punpcklwd m0, m1
+ movhlps m2, m0
+ paddd m0, m2
+ punpckldq m0, m1
+ movhlps m2, m0
+ paddd m0, m2
+ paddd m0, [GLOBAL(pw_16)]
+ psrad m0, 5
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2 +16], m0
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+strideq*4 +16], m0
+ mova [dstq+stride3q*2 ], m0
+ mova [dstq+stride3q*2+16], m0
+ lea dstq, [dstq+strideq*8]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+
+%if ARCH_X86_64
+INIT_XMM sse2
+cglobal high_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ mova m0, [aboveq]
+ mova m2, [aboveq+16]
+ mova m3, [aboveq+32]
+ mova m4, [aboveq+48]
+ mova m5, [leftq]
+ mova m6, [leftq+16]
+ mova m7, [leftq+32]
+ mova m8, [leftq+48]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 8
+ paddw m0, m2
+ paddw m0, m3
+ paddw m0, m4
+ paddw m0, m5
+ paddw m0, m6
+ paddw m0, m7
+ paddw m0, m8
+ movhlps m2, m0
+ paddw m0, m2
+ punpcklwd m0, m1
+ movhlps m2, m0
+ paddd m0, m2
+ punpckldq m0, m1
+ movhlps m2, m0
+ paddd m0, m2
+ paddd m0, [GLOBAL(pw_32)]
+ psrad m0, 6
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16 ], m0
+ mova [dstq +32 ], m0
+ mova [dstq +48 ], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2+16 ], m0
+ mova [dstq+strideq*2+32 ], m0
+ mova [dstq+strideq*2+48 ], m0
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+strideq*4+16 ], m0
+ mova [dstq+strideq*4+32 ], m0
+ mova [dstq+strideq*4+48 ], m0
+ mova [dstq+stride3q*2 ], m0
+ mova [dstq+stride3q*2 +16], m0
+ mova [dstq+stride3q*2 +32], m0
+ mova [dstq+stride3q*2 +48], m0
+ lea dstq, [dstq+strideq*8]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+%endif
+
+INIT_MMX sse
+cglobal high_v_predictor_4x4, 3, 3, 1, dst, stride, above
+ movq m0, [aboveq]
+ movq [dstq ], m0
+ movq [dstq+strideq*2], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq*2], m0
+ RET
+
+INIT_XMM sse2
+cglobal high_v_predictor_8x8, 3, 3, 1, dst, stride, above
+ mova m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ mova [dstq ], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+stride3q*2], m0
+ lea dstq, [dstq+strideq*8]
+ mova [dstq ], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+stride3q*2], m0
+ RET
+
+INIT_XMM sse2
+cglobal high_v_predictor_16x16, 3, 4, 2, dst, stride, above
+ mova m0, [aboveq]
+ mova m1, [aboveq+16]
+ DEFINE_ARGS dst, stride, stride3, nlines4
+ lea stride3q, [strideq*3]
+ mov nlines4d, 4
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m1
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2 +16], m1
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+strideq*4 +16], m1
+ mova [dstq+stride3q*2 ], m0
+ mova [dstq+stride3q*2+16], m1
+ lea dstq, [dstq+strideq*8]
+ dec nlines4d
+ jnz .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal high_v_predictor_32x32, 3, 4, 4, dst, stride, above
+ mova m0, [aboveq]
+ mova m1, [aboveq+16]
+ mova m2, [aboveq+32]
+ mova m3, [aboveq+48]
+ DEFINE_ARGS dst, stride, stride3, nlines4
+ lea stride3q, [strideq*3]
+ mov nlines4d, 8
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m1
+ mova [dstq +32], m2
+ mova [dstq +48], m3
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2 +16], m1
+ mova [dstq+strideq*2 +32], m2
+ mova [dstq+strideq*2 +48], m3
+ mova [dstq+strideq*4 ], m0
+ mova [dstq+strideq*4 +16], m1
+ mova [dstq+strideq*4 +32], m2
+ mova [dstq+strideq*4 +48], m3
+ mova [dstq+stride3q*2 ], m0
+ mova [dstq+stride3q*2 +16], m1
+ mova [dstq+stride3q*2 +32], m2
+ mova [dstq+stride3q*2 +48], m3
+ lea dstq, [dstq+strideq*8]
+ dec nlines4d
+ jnz .loop
+ REP_RET
+
+INIT_MMX sse
+cglobal high_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
+ movd m1, [aboveq-2]
+ movq m0, [aboveq]
+ pshufw m1, m1, 0x0
+ ; Get the values to compute the maximum value at this bit depth
+ mov oned, 1
+ movd m3, oned
+ movd m4, bpsd
+ pshufw m3, m3, 0x0
+ DEFINE_ARGS dst, stride, line, left
+ mov lineq, -2
+ mova m2, m3
+ psllw m3, m4
+ add leftq, 8
+ psubw m3, m2 ; max possible value
+ pxor m4, m4 ; min possible value
+ psubw m0, m1
+.loop:
+ movq m1, [leftq+lineq*4]
+ movq m2, [leftq+lineq*4+2]
+ pshufw m1, m1, 0x0
+ pshufw m2, m2, 0x0
+ paddw m1, m0
+ paddw m2, m0
+ ;Clamp to the bit-depth
+ pminsw m1, m3
+ pminsw m2, m3
+ pmaxsw m1, m4
+ pmaxsw m2, m4
+ ;Store the values
+ movq [dstq ], m1
+ movq [dstq+strideq*2], m2
+ lea dstq, [dstq+strideq*4]
+ inc lineq
+ jnz .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal high_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
+ movd m1, [aboveq-2]
+ mova m0, [aboveq]
+ pshuflw m1, m1, 0x0
+ ; Get the values to compute the maximum value at this bit depth
+ mov oned, 1
+ pxor m3, m3
+ pxor m4, m4
+ pinsrw m3, oned, 0
+ pinsrw m4, bpsd, 0
+ pshuflw m3, m3, 0x0
+ DEFINE_ARGS dst, stride, line, left
+ punpcklqdq m3, m3
+ mov lineq, -4
+ mova m2, m3
+ punpcklqdq m1, m1
+ psllw m3, m4
+ add leftq, 16
+ psubw m3, m2 ; max possible value
+ pxor m4, m4 ; min possible value
+ psubw m0, m1
+.loop:
+ movd m1, [leftq+lineq*4]
+ movd m2, [leftq+lineq*4+2]
+ pshuflw m1, m1, 0x0
+ pshuflw m2, m2, 0x0
+ punpcklqdq m1, m1
+ punpcklqdq m2, m2
+ paddw m1, m0
+ paddw m2, m0
+ ;Clamp to the bit-depth
+ pminsw m1, m3
+ pminsw m2, m3
+ pmaxsw m1, m4
+ pmaxsw m2, m4
+ ;Store the values
+ mova [dstq ], m1
+ mova [dstq+strideq*2], m2
+ lea dstq, [dstq+strideq*4]
+ inc lineq
+ jnz .loop
+ REP_RET
+
+%if ARCH_X86_64
+INIT_XMM sse2
+cglobal high_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one
+ movd m2, [aboveq-2]
+ mova m0, [aboveq]
+ mova m1, [aboveq+16]
+ pshuflw m2, m2, 0x0
+ ; Get the values to compute the maximum value at this bit depth
+ mov oned, 1
+ pxor m7, m7
+ pxor m8, m8
+ pinsrw m7, oned, 0
+ pinsrw m8, bpsd, 0
+ pshuflw m7, m7, 0x0
+ DEFINE_ARGS dst, stride, line, left
+ punpcklqdq m7, m7
+ mov lineq, -8
+ mova m5, m7
+ punpcklqdq m2, m2
+ psllw m7, m8
+ add leftq, 32
+ psubw m7, m5 ; max possible value
+ pxor m8, m8 ; min possible value
+ psubw m0, m2
+ psubw m1, m2
+.loop:
+ movd m2, [leftq+lineq*4]
+ movd m3, [leftq+lineq*4+2]
+ pshuflw m2, m2, 0x0
+ pshuflw m3, m3, 0x0
+ punpcklqdq m2, m2
+ punpcklqdq m3, m3
+ paddw m4, m2, m0
+ paddw m5, m3, m0
+ paddw m2, m1
+ paddw m3, m1
+ ;Clamp to the bit-depth
+ pminsw m4, m7
+ pminsw m5, m7
+ pminsw m2, m7
+ pminsw m3, m7
+ pmaxsw m4, m8
+ pmaxsw m5, m8
+ pmaxsw m2, m8
+ pmaxsw m3, m8
+ ;Store the values
+ mova [dstq ], m4
+ mova [dstq+strideq*2 ], m5
+ mova [dstq +16], m2
+ mova [dstq+strideq*2+16], m3
+ lea dstq, [dstq+strideq*4]
+ inc lineq
+ jnz .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal high_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
+ movd m0, [aboveq-2]
+ mova m1, [aboveq]
+ mova m2, [aboveq+16]
+ mova m3, [aboveq+32]
+ mova m4, [aboveq+48]
+ pshuflw m0, m0, 0x0
+ ; Get the values to compute the maximum value at this bit depth
+ mov oned, 1
+ pxor m10, m10
+ pxor m11, m11
+ pinsrw m10, oned, 0
+ pinsrw m11, bpsd, 0
+ pshuflw m10, m10, 0x0
+ DEFINE_ARGS dst, stride, line, left
+ punpcklqdq m10, m10
+ mov lineq, -16
+ mova m5, m10
+ punpcklqdq m0, m0
+ psllw m10, m11
+ add leftq, 64
+ psubw m10, m5 ; max possible value
+ pxor m11, m11 ; min possible value
+ psubw m1, m0
+ psubw m2, m0
+ psubw m3, m0
+ psubw m4, m0
+.loop:
+ movd m5, [leftq+lineq*4]
+ movd m6, [leftq+lineq*4+2]
+ pshuflw m5, m5, 0x0
+ pshuflw m6, m6, 0x0
+ punpcklqdq m5, m5
+ punpcklqdq m6, m6
+ paddw m7, m5, m1
+ paddw m8, m5, m2
+ paddw m9, m5, m3
+ paddw m5, m4
+ ;Clamp these values to the bit-depth
+ pminsw m7, m10
+ pminsw m8, m10
+ pminsw m9, m10
+ pminsw m5, m10
+ pmaxsw m7, m11
+ pmaxsw m8, m11
+ pmaxsw m9, m11
+ pmaxsw m5, m11
+ ;Store these values
+ mova [dstq ], m7
+ mova [dstq +16], m8
+ mova [dstq +32], m9
+ mova [dstq +48], m5
+ paddw m7, m6, m1
+ paddw m8, m6, m2
+ paddw m9, m6, m3
+ paddw m6, m4
+ ;Clamp these values to the bit-depth
+ pminsw m7, m10
+ pminsw m8, m10
+ pminsw m9, m10
+ pminsw m6, m10
+ pmaxsw m7, m11
+ pmaxsw m8, m11
+ pmaxsw m9, m11
+ pmaxsw m6, m11
+ ;Store these values
+ mova [dstq+strideq*2 ], m7
+ mova [dstq+strideq*2+16], m8
+ mova [dstq+strideq*2+32], m9
+ mova [dstq+strideq*2+48], m6
+ lea dstq, [dstq+strideq*4]
+ inc lineq
+ jnz .loop
+ REP_RET
+%endif
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 7615cddda..f99fa7a58 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1366,11 +1366,11 @@ void vp9_init_dequantizer(VP9_COMMON *cm) {
int q;
for (q = 0; q < QINDEX_RANGE; q++) {
- cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q);
- cm->y_dequant[q][1] = vp9_ac_quant(q, 0);
+ cm->y_dequant[q][0] = vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth);
+ cm->y_dequant[q][1] = vp9_ac_quant(q, 0, cm->bit_depth);
- cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q);
- cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q);
+ cm->uv_dequant[q][0] = vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth);
+ cm->uv_dequant[q][1] = vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
}
}
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 76ca1ae8f..df46f64e7 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -13,6 +13,7 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_entropy.h"
#include "vp9/decoder/vp9_detokenize.h"
@@ -31,29 +32,31 @@
#define INCREMENT_COUNT(token) \
do { \
if (!cm->frame_parallel_decoding_mode) \
- ++coef_counts[band][ctx][token]; \
+ ++coef_counts[band][ctx][token]; \
} while (0)
-#define WRITE_COEF_CONTINUE(val, token) \
- { \
- v = (val * dqv) >> dq_shift; \
- dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v; \
- token_cache[scan[c]] = vp9_pt_energy_class[token]; \
- ++c; \
- ctx = get_coef_context(nb, token_cache, c); \
- dqv = dq[1]; \
- continue; \
- }
+static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
+ int i, val = 0;
+ for (i = 0; i < n; ++i)
+ val = (val << 1) | vp9_read(r, probs[i]);
+ return val;
+}
-#define ADJUST_COEF(prob, bits_count) \
- do { \
- val += (vp9_read(r, prob) << bits_count); \
- } while (0)
+static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = {
+ 2, 6, /* 0 = LOW_VAL */
+ -TWO_TOKEN, 4, /* 1 = TWO */
+ -THREE_TOKEN, -FOUR_TOKEN, /* 2 = THREE */
+ 8, 10, /* 3 = HIGH_LOW */
+ -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, /* 4 = CAT_ONE */
+ 12, 14, /* 5 = CAT_THREEFOUR */
+ -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, /* 6 = CAT_THREE */
+ -CATEGORY5_TOKEN, -CATEGORY6_TOKEN /* 7 = CAT_FIVE */
+};
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
- tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
- int ctx, const int16_t *scan, const int16_t *nb,
- vp9_reader *r) {
+ tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+ int ctx, const int16_t *scan, const int16_t *nb,
+ vp9_reader *r) {
const int max_eob = 16 << (tx_size << 1);
const FRAME_CONTEXT *const fc = &cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
@@ -69,11 +72,11 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
uint8_t token_cache[32 * 32];
const uint8_t *band_translate = get_band_translate(tx_size);
const int dq_shift = (tx_size == TX_32X32);
- int v;
+ int v, token;
int16_t dqv = dq[0];
while (c < max_eob) {
- int val;
+ int val = -1;
band = *band_translate++;
prob = coef_probs[band][ctx];
if (!cm->frame_parallel_decoding_mode)
@@ -95,81 +98,46 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
prob = coef_probs[band][ctx];
}
- // ONE_CONTEXT_NODE_0_
if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
INCREMENT_COUNT(ONE_TOKEN);
- WRITE_COEF_CONTINUE(1, ONE_TOKEN);
- }
-
- INCREMENT_COUNT(TWO_TOKEN);
-
- prob = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
-
- if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) {
- if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) {
- WRITE_COEF_CONTINUE(2, TWO_TOKEN);
+ token = ONE_TOKEN;
+ val = 1;
+ } else {
+ INCREMENT_COUNT(TWO_TOKEN);
+ token = vp9_read_tree(r, coeff_subtree_high,
+ vp9_pareto8_full[prob[PIVOT_NODE] - 1]);
+ switch (token) {
+ case TWO_TOKEN:
+ case THREE_TOKEN:
+ case FOUR_TOKEN:
+ val = token;
+ break;
+ case CATEGORY1_TOKEN:
+ val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r);
+ break;
+ case CATEGORY2_TOKEN:
+ val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r);
+ break;
+ case CATEGORY3_TOKEN:
+ val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r);
+ break;
+ case CATEGORY4_TOKEN:
+ val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r);
+ break;
+ case CATEGORY5_TOKEN:
+ val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r);
+ break;
+ case CATEGORY6_TOKEN:
+ val = CAT6_MIN_VAL + read_coeff(vp9_cat6_prob, 14, r);
+ break;
}
- if (!vp9_read(r, prob[THREE_CONTEXT_NODE])) {
- WRITE_COEF_CONTINUE(3, THREE_TOKEN);
- }
- WRITE_COEF_CONTINUE(4, FOUR_TOKEN);
}
-
- if (!vp9_read(r, prob[HIGH_LOW_CONTEXT_NODE])) {
- if (!vp9_read(r, prob[CAT_ONE_CONTEXT_NODE])) {
- val = CAT1_MIN_VAL;
- ADJUST_COEF(vp9_cat1_prob[0], 0);
- WRITE_COEF_CONTINUE(val, CATEGORY1_TOKEN);
- }
- val = CAT2_MIN_VAL;
- ADJUST_COEF(vp9_cat2_prob[0], 1);
- ADJUST_COEF(vp9_cat2_prob[1], 0);
- WRITE_COEF_CONTINUE(val, CATEGORY2_TOKEN);
- }
-
- if (!vp9_read(r, prob[CAT_THREEFOUR_CONTEXT_NODE])) {
- if (!vp9_read(r, prob[CAT_THREE_CONTEXT_NODE])) {
- val = CAT3_MIN_VAL;
- ADJUST_COEF(vp9_cat3_prob[0], 2);
- ADJUST_COEF(vp9_cat3_prob[1], 1);
- ADJUST_COEF(vp9_cat3_prob[2], 0);
- WRITE_COEF_CONTINUE(val, CATEGORY3_TOKEN);
- }
- val = CAT4_MIN_VAL;
- ADJUST_COEF(vp9_cat4_prob[0], 3);
- ADJUST_COEF(vp9_cat4_prob[1], 2);
- ADJUST_COEF(vp9_cat4_prob[2], 1);
- ADJUST_COEF(vp9_cat4_prob[3], 0);
- WRITE_COEF_CONTINUE(val, CATEGORY4_TOKEN);
- }
-
- if (!vp9_read(r, prob[CAT_FIVE_CONTEXT_NODE])) {
- val = CAT5_MIN_VAL;
- ADJUST_COEF(vp9_cat5_prob[0], 4);
- ADJUST_COEF(vp9_cat5_prob[1], 3);
- ADJUST_COEF(vp9_cat5_prob[2], 2);
- ADJUST_COEF(vp9_cat5_prob[3], 1);
- ADJUST_COEF(vp9_cat5_prob[4], 0);
- WRITE_COEF_CONTINUE(val, CATEGORY5_TOKEN);
- }
- val = 0;
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[0]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[1]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[2]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[3]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[4]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[5]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[6]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[7]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[8]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[9]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[10]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[11]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[12]);
- val = (val << 1) | vp9_read(r, vp9_cat6_prob[13]);
- val += CAT6_MIN_VAL;
-
- WRITE_COEF_CONTINUE(val, CATEGORY6_TOKEN);
+ v = (val * dqv) >> dq_shift;
+ dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v;
+ token_cache[scan[c]] = vp9_pt_energy_class[token];
+ ++c;
+ ctx = get_coef_context(nb, token_cache, c);
+ dqv = dq[1];
}
return c;
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c
index 33f92393c..f7fca0cde 100644
--- a/vp9/encoder/vp9_aq_complexity.c
+++ b/vp9/encoder/vp9_aq_complexity.c
@@ -23,9 +23,9 @@ static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{{1.0, 1.0, 1.0}, {1.0, 0.25, 0.0}, {1.0, 0.5, 0.25}};
-static int get_aq_c_strength(int q_index) {
+static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) {
// Approximate base quatizer (truncated to int)
- int base_quant = vp9_ac_quant(q_index, 0) / 4;
+ const int base_quant = vp9_ac_quant(q_index, 0, bit_depth) / 4;
return (base_quant > 20) + (base_quant > 45);
}
@@ -40,7 +40,7 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
int segment;
- const int aq_strength = get_aq_c_strength(cm->base_qindex);
+ const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
const int active_segments = aq_c_active_segments[aq_strength];
// Clear down the segment map.
@@ -70,7 +70,8 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
for (segment = 1; segment < active_segments; ++segment) {
int qindex_delta =
vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
- aq_c_q_adj_factor[aq_strength][segment]);
+ aq_c_q_adj_factor[aq_strength][segment],
+ cm->bit_depth);
// For AQ complexity mode, we dont allow Q0 in a segment if the base
// Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
@@ -115,7 +116,7 @@ void vp9_select_in_frame_q_segment(VP9_COMP *cpi,
// It is converted to bits * 256 units.
const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
(bw * bh);
- const int aq_strength = get_aq_c_strength(cm->base_qindex);
+ const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
const int active_segments = aq_c_active_segments[aq_strength];
// The number of segments considered and the transition points used to
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index e7f0daac3..514ff7a52 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -200,7 +200,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// Rate target ratio to set q delta.
const float rate_ratio_qdelta = 2.0;
- const double q = vp9_convert_qindex_to_q(cm->base_qindex);
+ const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
vp9_clear_system_state();
// Some of these parameters may be set via codec-control function later.
cr->max_sbs_perframe = 10;
@@ -242,7 +242,8 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// Set the q delta for segment 1.
qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type,
cm->base_qindex,
- rate_ratio_qdelta);
+ rate_ratio_qdelta,
+ cm->bit_depth);
// TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from
// previous encoded frame.
if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100)
diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c
index 56db95eb7..b96f00fd1 100644
--- a/vp9/encoder/vp9_aq_variance.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -75,7 +75,7 @@ void vp9_vaq_init() {
void vp9_vaq_frame_setup(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
struct segmentation *seg = &cm->seg;
- const double base_q = vp9_convert_qindex_to_q(cm->base_qindex);
+ const double base_q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
const int base_rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex +
cm->y_dc_delta_q);
int i;
@@ -99,7 +99,8 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
continue;
}
- qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i));
+ qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i),
+ cm->bit_depth);
vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index e16b0b356..9545ba0f3 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -330,7 +330,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
seg->update_map = 1;
seg->update_data = 1;
- qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875);
+ qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875,
+ cm->bit_depth);
vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
@@ -351,7 +352,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
seg->update_data = 1;
seg->abs_delta = SEGMENT_DELTADATA;
- qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125);
+ qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125,
+ cm->bit_depth);
vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 54b57cf88..df82be5ec 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -62,8 +62,8 @@ static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
*b = temp;
}
-static int gfboost_qadjust(int qindex) {
- const double q = vp9_convert_qindex_to_q(qindex);
+static int gfboost_qadjust(int qindex, vpx_bit_depth_t bit_depth) {
+ const double q = vp9_convert_qindex_to_q(qindex, bit_depth);
return (int)((0.00000828 * q * q * q) +
(-0.0055 * q * q) +
(1.32 * q) + 79.3);
@@ -360,11 +360,11 @@ static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) {
}
}
-static int find_fp_qindex() {
+static int find_fp_qindex(vpx_bit_depth_t bit_depth) {
int i;
for (i = 0; i < QINDEX_RANGE; ++i)
- if (vp9_convert_qindex_to_q(i) >= 30.0)
+ if (vp9_convert_qindex_to_q(i, bit_depth) >= 30.0)
break;
if (i == QINDEX_RANGE)
@@ -434,7 +434,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
vp9_clear_system_state();
set_first_pass_params(cpi);
- vp9_set_quantizer(cm, find_fp_qindex());
+ vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
if (lc != NULL) {
twopass = &lc->twopass;
@@ -935,12 +935,13 @@ static double calc_correction_factor(double err_per_mb,
double err_divisor,
double pt_low,
double pt_high,
- int q) {
+ int q,
+ vpx_bit_depth_t bit_depth) {
const double error_term = err_per_mb / err_divisor;
// Adjustment based on actual quantizer to power term.
- const double power_term = MIN(vp9_convert_qindex_to_q(q) * 0.0125 + pt_low,
- pt_high);
+ const double power_term =
+ MIN(vp9_convert_qindex_to_q(q, bit_depth) * 0.0125 + pt_low, pt_high);
// Calculate correction factor.
if (power_term < 1.0)
@@ -975,9 +976,11 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
const double factor =
calc_correction_factor(err_per_mb, ERR_DIVISOR,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
- FACTOR_PT_LOW, FACTOR_PT_HIGH, q);
+ FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
+ cpi->common.bit_depth);
const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q,
- factor * speed_term);
+ factor * speed_term,
+ cpi->common.bit_depth);
if (bits_per_mb <= target_norm_bits_per_mb)
break;
}
@@ -1594,7 +1597,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
active_max_gf_interval =
- 12 + ((int)vp9_convert_qindex_to_q(rc->last_q[INTER_FRAME]) >> 5);
+ 12 + ((int)vp9_convert_qindex_to_q(rc->last_q[INTER_FRAME],
+ cpi->common.bit_depth) >> 5);
if (active_max_gf_interval > rc->max_gf_interval)
active_max_gf_interval = rc->max_gf_interval;
@@ -1736,7 +1740,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Calculate the extra bits to be used for boosted frame(s)
{
int q = rc->last_q[INTER_FRAME];
- int boost = (rc->gfu_boost * gfboost_qadjust(q)) / 100;
+ int boost =
+ (rc->gfu_boost * gfboost_qadjust(q, cpi->common.bit_depth)) / 100;
// Set max and minimum boost and hence minimum allocation.
boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200);
@@ -2227,7 +2232,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
section_target_bandwidth);
twopass->active_worst_quality = tmp_q;
rc->ni_av_qi = tmp_q;
- rc->avg_q = vp9_convert_qindex_to_q(tmp_q);
+ rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth);
}
vp9_zero(this_frame);
if (EOF == input_stats(twopass, &this_frame))
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 5557d7fe7..2fc05e7fe 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -142,7 +142,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
} else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = get_max_filter_level(cpi);
- const int q = vp9_ac_quant(cm->base_qindex, 0);
+ const int q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth);
// These values were determined by linear fitting the result of the
// searched level, filt_guess = q * 0.316206 + 3.87252
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index a97d77831..2edd52bae 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -440,7 +440,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int sse_y = UINT_MAX;
const int intra_cost_penalty =
- 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
+ 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
intra_cost_penalty, 0);
const int intra_mode_cost = 50;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index d49eb956f..2f225d74e 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -40,6 +40,31 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_high_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+
+ if (!skip_block) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ const int64_t tmp =
+ (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
+ quant) >> 16;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
@@ -62,6 +87,31 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_high_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+
+ if (!skip_block) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ const int64_t tmp =
+ (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
+ quant) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -103,6 +153,51 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_high_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i;
+ int eob = -1;
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)zbin_oq_value;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ const int64_t tmp =
+ (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
+ quant_ptr[rc != 0]) >> 16;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
// TODO(jingning) Refactor this file and combine functions with similar
// operations.
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -146,6 +241,51 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_high_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, eob = -1;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)zbin_oq_value;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int64_t tmp = 0;
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT32_MIN, INT32_MAX);
+ tmp = (tmp * quant_ptr[rc != 0]) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ }
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -199,6 +339,62 @@ void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_high_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, int zbin_oq_value,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
+ zbin_ptr[1] + zbin_oq_value };
+ const int nzbins[2] = { zbins[0] * -1,
+ zbins[1] * -1 };
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= zbins[rc != 0]) {
+ int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0],
+ INT32_MIN, INT32_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 16; // quantization
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -255,12 +451,84 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_high_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
+ const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
+
+ int idx = 0;
+ int idx_arr[1024];
+ int i, eob = -1;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int64_t tmp = clamp(abs_coeff +
+ ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT32_MIN, INT32_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 15;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+ if (tmp)
+ eob = idx_arr[i];
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_high_quantize_b(BLOCK_OFFSET(p->coeff, block),
+ 16, x->skip_block,
+ p->zbin, p->round, p->quant, p->quant_shift,
+ BLOCK_OFFSET(p->qcoeff, block),
+ BLOCK_OFFSET(pd->dqcoeff, block),
+ pd->dequant, p->zbin_extra, &p->eobs[block],
+ scan, iscan);
+ return;
+ }
+#endif
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
@@ -281,9 +549,23 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) {
}
static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
- int quant = vp9_dc_quant(q, 0);
+ const int quant = vp9_dc_quant(q, 0, bit_depth);
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8:
+ return q == 0 ? 64 : (quant < 148 ? 84 : 80);
+ case VPX_BITS_10:
+ return q == 0 ? 64 : (quant < 592 ? 84 : 80);
+ case VPX_BITS_12:
+ return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
(void) bit_depth;
return q == 0 ? 64 : (quant < 148 ? 84 : 80);
+#endif
}
void vp9_init_quantizer(VP9_COMP *cpi) {
@@ -301,8 +583,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
qrounding_factor_fp = 64;
// y
- quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q)
- : vp9_ac_quant(q, 0);
+ quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
+ : vp9_ac_quant(q, 0, cm->bit_depth);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
quants->y_quant_fp[q][i] = (1 << 16) / quant;
quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
@@ -311,8 +593,8 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cm->y_dequant[q][i] = quant;
// uv
- quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q)
- : vp9_ac_quant(q, cm->uv_ac_delta_q);
+ quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth)
+ : vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
invert_quant(&quants->uv_quant[q][i],
&quants->uv_quant_shift[q][i], quant);
quants->uv_quant_fp[q][i] = (1 << 16) / quant;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b607c8559..94c0b64dd 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -42,13 +42,56 @@
#define FRAME_OVERHEAD_BITS 200
+#if CONFIG_VP9_HIGHBITDEPTH
+#define ASSIGN_MINQ_TABLE(bit_depth, name) \
+ do { \
+ switch (bit_depth) { \
+ case VPX_BITS_8: \
+ name = name##_8; \
+ break; \
+ case VPX_BITS_10: \
+ name = name##_10; \
+ break; \
+ case VPX_BITS_12: \
+ name = name##_12; \
+ break; \
+ default: \
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10" \
+ " or VPX_BITS_12"); \
+ name = NULL; \
+ } \
+ } while (0)
+#else
+#define ASSIGN_MINQ_TABLE(bit_depth, name) \
+ do { \
+ (void) bit_depth; \
+ name = name##_8; \
+ } while (0)
+#endif
+
// Tables relating active max Q to active min Q
-static int kf_low_motion_minq[QINDEX_RANGE];
-static int kf_high_motion_minq[QINDEX_RANGE];
-static int arfgf_low_motion_minq[QINDEX_RANGE];
-static int arfgf_high_motion_minq[QINDEX_RANGE];
-static int inter_minq[QINDEX_RANGE];
-static int rtc_minq[QINDEX_RANGE];
+static int kf_low_motion_minq_8[QINDEX_RANGE];
+static int kf_high_motion_minq_8[QINDEX_RANGE];
+static int arfgf_low_motion_minq_8[QINDEX_RANGE];
+static int arfgf_high_motion_minq_8[QINDEX_RANGE];
+static int inter_minq_8[QINDEX_RANGE];
+static int rtc_minq_8[QINDEX_RANGE];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int kf_low_motion_minq_10[QINDEX_RANGE];
+static int kf_high_motion_minq_10[QINDEX_RANGE];
+static int arfgf_low_motion_minq_10[QINDEX_RANGE];
+static int arfgf_high_motion_minq_10[QINDEX_RANGE];
+static int inter_minq_10[QINDEX_RANGE];
+static int rtc_minq_10[QINDEX_RANGE];
+static int kf_low_motion_minq_12[QINDEX_RANGE];
+static int kf_high_motion_minq_12[QINDEX_RANGE];
+static int arfgf_low_motion_minq_12[QINDEX_RANGE];
+static int arfgf_high_motion_minq_12[QINDEX_RANGE];
+static int inter_minq_12[QINDEX_RANGE];
+static int rtc_minq_12[QINDEX_RANGE];
+#endif
+
static int gf_high = 2000;
static int gf_low = 400;
static int kf_high = 5000;
@@ -58,7 +101,8 @@ static int kf_low = 400;
// formulaic approach to facilitate easier adjustment of the Q tables.
// The formulae were derived from computing a 3rd order polynomial best
// fit to the original data (after plotting real maxq vs minq (not q index))
-static int get_minq_index(double maxq, double x3, double x2, double x1) {
+static int get_minq_index(double maxq, double x3, double x2, double x1,
+ vpx_bit_depth_t bit_depth) {
int i;
const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq,
maxq);
@@ -68,38 +112,69 @@ static int get_minq_index(double maxq, double x3, double x2, double x1) {
if (minqtarget <= 2.0)
return 0;
- for (i = 0; i < QINDEX_RANGE; i++)
- if (minqtarget <= vp9_convert_qindex_to_q(i))
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ if (minqtarget <= vp9_convert_qindex_to_q(i, bit_depth))
return i;
+ }
return QINDEX_RANGE - 1;
}
-void vp9_rc_init_minq_luts() {
+static void init_minq_luts(int *kf_low_m, int *kf_high_m,
+ int *arfgf_low, int *arfgf_high,
+ int *inter, int *rtc, vpx_bit_depth_t bit_depth) {
int i;
-
for (i = 0; i < QINDEX_RANGE; i++) {
- const double maxq = vp9_convert_qindex_to_q(i);
- kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.125);
- kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
- arfgf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30);
- arfgf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
- inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90);
- rtc_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70);
+ const double maxq = vp9_convert_qindex_to_q(i, bit_depth);
+ kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.125, bit_depth);
+ kf_high_m[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50, bit_depth);
+ arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth);
+ arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50, bit_depth);
+ inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth);
+ rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth);
}
}
+void vp9_rc_init_minq_luts() {
+ init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
+ arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
+ inter_minq_8, rtc_minq_8, VPX_BITS_8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ init_minq_luts(kf_low_motion_minq_10, kf_high_motion_minq_10,
+ arfgf_low_motion_minq_10, arfgf_high_motion_minq_10,
+ inter_minq_10, rtc_minq_10, VPX_BITS_10);
+ init_minq_luts(kf_low_motion_minq_12, kf_high_motion_minq_12,
+ arfgf_low_motion_minq_12, arfgf_high_motion_minq_12,
+ inter_minq_12, rtc_minq_12, VPX_BITS_12);
+#endif
+}
+
// These functions use formulaic calculations to make playing with the
// quantizer tables easier. If necessary they can be replaced by lookup
// tables if and when things settle down in the experimental bitstream
-double vp9_convert_qindex_to_q(int qindex) {
+double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) {
// Convert the index to a real Q value (scaled down to match old Q values)
- return vp9_ac_quant(qindex, 0) / 4.0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8:
+ return vp9_ac_quant(qindex, 0, bit_depth) / 4.0;
+ case VPX_BITS_10:
+ return vp9_ac_quant(qindex, 0, bit_depth) / 16.0;
+ case VPX_BITS_12:
+ return vp9_ac_quant(qindex, 0, bit_depth) / 64.0;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1.0;
+ }
+#else
+ return vp9_ac_quant(qindex, 0, bit_depth) / 4.0;
+#endif
}
int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
- double correction_factor) {
- const double q = vp9_convert_qindex_to_q(qindex);
+ double correction_factor,
+ vpx_bit_depth_t bit_depth) {
+ const double q = vp9_convert_qindex_to_q(qindex, bit_depth);
int enumerator = frame_type == KEY_FRAME ? 3300000 : 2250000;
// q based adjustment to baseline enumerator
@@ -108,8 +183,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
}
static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
- double correction_factor) {
- const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor));
+ double correction_factor,
+ vpx_bit_depth_t bit_depth) {
+ const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor,
+ bit_depth));
return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS;
}
@@ -227,7 +304,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->ni_frames = 0;
rc->tot_q = 0.0;
- rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q);
+ rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q, oxcf->bit_depth);
for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
rc->rate_correction_factors[i] = 1.0;
@@ -330,7 +407,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
// Stay in double to avoid int overflow when values are large
projected_size_based_on_q = estimate_bits_at_q(cm->frame_type,
cm->base_qindex, cm->MBs,
- rate_correction_factor);
+ rate_correction_factor,
+ cm->bit_depth);
// Work out a size correction factor.
if (projected_size_based_on_q > 0)
correction_factor = (100 * cpi->rc.projected_frame_size) /
@@ -392,7 +470,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
do {
const int bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cm->frame_type, i,
- correction_factor);
+ correction_factor,
+ cm->bit_depth);
if (bits_per_mb_at_this_q <= target_bits_per_mb) {
if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
@@ -424,12 +503,22 @@ static int get_active_quality(int q, int gfu_boost, int low, int high,
}
}
-static int get_kf_active_quality(const RATE_CONTROL *const rc, int q) {
+static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
+ vpx_bit_depth_t bit_depth) {
+ int *kf_low_motion_minq;
+ int *kf_high_motion_minq;
+ ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq);
+ ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq);
return get_active_quality(q, rc->kf_boost, kf_low, kf_high,
kf_low_motion_minq, kf_high_motion_minq);
}
-static int get_gf_active_quality(const RATE_CONTROL *const rc, int q) {
+static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
+ vpx_bit_depth_t bit_depth) {
+ int *arfgf_low_motion_minq;
+ int *arfgf_high_motion_minq;
+ ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
+ ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
arfgf_low_motion_minq, arfgf_high_motion_minq);
}
@@ -516,6 +605,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
int active_best_quality;
int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
int q;
+ int *rtc_minq;
+ ASSIGN_MINQ_TABLE(cm->bit_depth, rtc_minq);
if (frame_is_intra_only(cm)) {
active_best_quality = rc->best_quality;
@@ -524,9 +615,10 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
// based on the ambient Q to reduce the risk of popping.
if (rc->this_key_frame_forced) {
int qindex = rc->last_boosted_qindex;
- double last_boosted_q = vp9_convert_qindex_to_q(qindex);
+ double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
- (last_boosted_q * 0.75));
+ (last_boosted_q * 0.75),
+ cm->bit_depth);
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
} else if (cm->current_video_frame > 0) {
// not first frame of one pass and kf_boost is set
@@ -534,7 +626,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
double q_val;
active_best_quality =
- get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME]);
+ get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME],
+ cm->bit_depth);
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -543,9 +636,10 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
// Convert the adjustment factor to a qindex delta
// on active_best_quality.
- q_val = vp9_convert_qindex_to_q(active_best_quality);
+ q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth);
active_best_quality += vp9_compute_qdelta(rc, q_val,
- q_val * q_adj_factor);
+ q_val * q_adj_factor,
+ cm->bit_depth);
}
} else if (!rc->is_src_frame_alt_ref &&
!cpi->use_svc &&
@@ -559,7 +653,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
} else {
q = active_worst_quality;
}
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
} else {
// Use the lower of active_worst_quality and recent/average Q.
if (cm->current_video_frame > 1) {
@@ -592,7 +686,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
int qdelta = 0;
vp9_clear_system_state();
qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 2.0);
+ active_worst_quality, 2.0,
+ cm->bit_depth);
*top_index = active_worst_quality + qdelta;
*top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
}
@@ -644,6 +739,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
int active_best_quality;
int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
int q;
+ int *inter_minq;
+ ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
if (frame_is_intra_only(cm)) {
@@ -652,9 +749,10 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
// based on the ambient Q to reduce the risk of popping.
if (rc->this_key_frame_forced) {
int qindex = rc->last_boosted_qindex;
- double last_boosted_q = vp9_convert_qindex_to_q(qindex);
+ double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
- last_boosted_q * 0.75);
+ last_boosted_q * 0.75,
+ cm->bit_depth);
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
} else {
// not first frame of one pass and kf_boost is set
@@ -662,7 +760,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
double q_val;
active_best_quality =
- get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME]);
+ get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME],
+ cm->bit_depth);
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -671,9 +770,10 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
// Convert the adjustment factor to a qindex delta
// on active_best_quality.
- q_val = vp9_convert_qindex_to_q(active_best_quality);
+ q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth);
active_best_quality += vp9_compute_qdelta(rc, q_val,
- q_val * q_adj_factor);
+ q_val * q_adj_factor,
+ cm->bit_depth);
}
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -691,7 +791,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
if (q < cq_level)
q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -700,10 +800,10 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
@@ -742,11 +842,13 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
!rc->this_key_frame_forced &&
!(cm->current_video_frame == 0)) {
qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 2.0);
+ active_worst_quality, 2.0,
+ cm->bit_depth);
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 1.75);
+ active_worst_quality, 1.75,
+ cm->bit_depth);
}
*top_index = active_worst_quality + qdelta;
*top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
@@ -788,6 +890,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
int active_best_quality;
int active_worst_quality = cpi->twopass.active_worst_quality;
int q;
+ int *inter_minq;
+ ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) {
// Handle the special case for key frames forced when we have75 reached
@@ -795,16 +899,18 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
// based on the ambient Q to reduce the risk of popping.
if (rc->this_key_frame_forced) {
int qindex = rc->last_boosted_qindex;
- double last_boosted_q = vp9_convert_qindex_to_q(qindex);
+ double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
- last_boosted_q * 0.75);
+ last_boosted_q * 0.75,
+ cm->bit_depth);
active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
} else {
// Not forced keyframe.
double q_adj_factor = 1.0;
double q_val;
// Baseline value derived from cpi->active_worst_quality and kf boost.
- active_best_quality = get_kf_active_quality(rc, active_worst_quality);
+ active_best_quality = get_kf_active_quality(rc, active_worst_quality,
+ cm->bit_depth);
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -816,9 +922,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
// Convert the adjustment factor to a qindex delta
// on active_best_quality.
- q_val = vp9_convert_qindex_to_q(active_best_quality);
+ q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth);
active_best_quality += vp9_compute_qdelta(rc, q_val,
- q_val * q_adj_factor);
+ q_val * q_adj_factor,
+ cm->bit_depth);
}
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -836,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (q < cq_level)
q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -845,10 +952,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
- active_best_quality = get_gf_active_quality(rc, q);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
@@ -888,7 +995,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
const double rate_factor =
rate_factor_deltas[gf_group->rf_level[gf_group->index]];
int qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, rate_factor);
+ active_worst_quality, rate_factor,
+ cm->bit_depth);
*top_index = active_worst_quality + qdelta;
*top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
}
@@ -1038,7 +1146,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->avg_frame_qindex[INTER_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
rc->ni_frames++;
- rc->tot_q += vp9_convert_qindex_to_q(qindex);
+ rc->tot_q += vp9_convert_qindex_to_q(qindex, cm->bit_depth);
rc->avg_q = rc->tot_q / rc->ni_frames;
// Calculate the average Q for normal inter frames (not key or GFU
// frames).
@@ -1294,7 +1402,8 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
rc->baseline_gf_interval = INT_MAX;
}
-int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) {
+int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
+ vpx_bit_depth_t bit_depth) {
int start_index = rc->worst_quality;
int target_index = rc->worst_quality;
int i;
@@ -1302,14 +1411,14 @@ int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) {
// Convert the average q value to an index.
for (i = rc->best_quality; i < rc->worst_quality; ++i) {
start_index = i;
- if (vp9_convert_qindex_to_q(i) >= qstart)
+ if (vp9_convert_qindex_to_q(i, bit_depth) >= qstart)
break;
}
// Convert the q target to an index
for (i = rc->best_quality; i < rc->worst_quality; ++i) {
target_index = i;
- if (vp9_convert_qindex_to_q(i) >= qtarget)
+ if (vp9_convert_qindex_to_q(i, bit_depth) >= qtarget)
break;
}
@@ -1317,12 +1426,14 @@ int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) {
}
int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
- int qindex, double rate_target_ratio) {
+ int qindex, double rate_target_ratio,
+ vpx_bit_depth_t bit_depth) {
int target_index = rc->worst_quality;
int i;
// Look up the current projected bits per block for the base index
- const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0);
+ const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0,
+ bit_depth);
// Find the target bits per mb based on the base value and given ratio.
const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
@@ -1330,7 +1441,7 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
// Convert the q target to an index
for (i = rc->best_quality; i < rc->worst_quality; ++i) {
target_index = i;
- if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb )
+ if (vp9_rc_bits_per_mb(frame_type, i, 1.0, bit_depth) <= target_bits_per_mb)
break;
}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 456daf48d..2ced8e6dd 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -12,6 +12,7 @@
#ifndef VP9_ENCODER_VP9_RATECTRL_H_
#define VP9_ENCODER_VP9_RATECTRL_H_
+#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
@@ -104,7 +105,7 @@ struct VP9EncoderConfig;
void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass,
RATE_CONTROL *rc);
-double vp9_convert_qindex_to_q(int qindex);
+double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
void vp9_rc_init_minq_luts();
@@ -167,7 +168,7 @@ int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame,
// Estimates bits per mb for a given qindex and correction factor.
int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
- double correction_factor);
+ double correction_factor, vpx_bit_depth_t bit_depth);
// Clamping utilities for bitrate targets for iframes and pframes.
int vp9_rc_clamp_iframe_target_size(const struct VP9_COMP *const cpi,
@@ -180,12 +181,14 @@ void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target);
// Computes a q delta (in "q index" terms) to get from a starting q value
// to a target q value
-int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget);
+int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
+ vpx_bit_depth_t bit_depth);
// Computes a q delta (in "q index" terms) to get from a starting q value
// to a value that should equate to the given rate ratio.
int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
- int qindex, double rate_target_ratio);
+ int qindex, double rate_target_ratio,
+ vpx_bit_depth_t bit_depth);
void vp9_rc_update_framerate(struct VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 1dd44b4aa..8b7066b13 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -93,34 +93,69 @@ static void fill_token_costs(vp9_coeff_cost *c,
}
// Values are now correlated to quantizer.
-static int sad_per_bit16lut[QINDEX_RANGE];
-static int sad_per_bit4lut[QINDEX_RANGE];
-
-void vp9_init_me_luts() {
+static int sad_per_bit16lut_8[QINDEX_RANGE];
+static int sad_per_bit4lut_8[QINDEX_RANGE];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int sad_per_bit16lut_10[QINDEX_RANGE];
+static int sad_per_bit4lut_10[QINDEX_RANGE];
+static int sad_per_bit16lut_12[QINDEX_RANGE];
+static int sad_per_bit4lut_12[QINDEX_RANGE];
+#endif
+
+static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
+ vpx_bit_depth_t bit_depth) {
int i;
-
// Initialize the sad lut tables using a formulaic calculation for now.
// This is to make it easier to resolve the impact of experimental changes
// to the quantizer tables.
- for (i = 0; i < QINDEX_RANGE; ++i) {
- const double q = vp9_convert_qindex_to_q(i);
- sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
- sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
+ for (i = 0; i < range; i++) {
+ const double q = vp9_convert_qindex_to_q(i, bit_depth);
+ bit16lut[i] = (int)(0.0418 * q + 2.4107);
+ bit4lut[i] = (int)(0.063 * q + 2.742);
}
}
+void vp9_init_me_luts() {
+ init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
+ VPX_BITS_8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
+ VPX_BITS_10);
+ init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
+ VPX_BITS_12);
+#endif
+}
+
static const int rd_boost_factor[16] = {
64, 32, 32, 32, 24, 16, 12, 12,
8, 8, 4, 4, 2, 2, 1, 0
};
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
-128, 144, 128, 128, 144
+ 128, 144, 128, 128, 144
};
int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
- const int q = vp9_dc_quant(qindex, 0);
+ const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
+#if CONFIG_VP9_HIGHBITDEPTH
+ int rdmult = 0;
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8:
+ rdmult = 88 * q * q / 24;
+ break;
+ case VPX_BITS_10:
+ rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
+ break;
+ case VPX_BITS_12:
+ rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
+ break;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
int rdmult = 88 * q * q / 24;
-
+#endif
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
@@ -132,15 +167,53 @@ int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
return rdmult;
}
-static int compute_rd_thresh_factor(int qindex) {
+static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
+ double q;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8:
+ q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
+ break;
+ case VPX_BITS_10:
+ q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
+ break;
+ case VPX_BITS_12:
+ q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
+ break;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void) bit_depth;
+ q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
+#endif
// TODO(debargha): Adjust the function below.
- const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
- return MAX(q, 8);
+ return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
}
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
- cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
- cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8:
+ cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
+ break;
+ case VPX_BITS_10:
+ cpi->mb.sadperbit16 = sad_per_bit16lut_10[qindex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut_10[qindex];
+ break;
+ case VPX_BITS_12:
+ cpi->mb.sadperbit16 = sad_per_bit16lut_12[qindex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut_12[qindex];
+ break;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ }
+#else
+ cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex];
+#endif
}
static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
@@ -149,9 +222,8 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
const int qindex =
clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
- cm->y_dc_delta_q,
- 0, MAXQ);
- const int q = compute_rd_thresh_factor(qindex);
+ cm->y_dc_delta_q, 0, MAXQ);
+ const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
// Threshold here seems unnecessarily harsh but fine given actual
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7be557df9..bf27ba682 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2582,7 +2582,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t dist_uv[TX_SIZES];
int skip_uv[TX_SIZES];
PREDICTION_MODE mode_uv[TX_SIZES];
- int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
+ const int intra_cost_penalty =
+ 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int best_skip2 = 0;
uint8_t ref_frame_skip_mask[2] = { 0 };
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
@@ -3312,7 +3313,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int64_t dist_uv;
int skip_uv;
PREDICTION_MODE mode_uv = DC_PRED;
- int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
+ const int intra_cost_penalty =
+ 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
int best_skip2 = 0;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 51d6f766b..ff026666b 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -389,10 +389,10 @@ static void adjust_arnr_filter(VP9_COMP *cpi,
// Adjust the strength based on active max q.
if (cpi->common.current_video_frame > 1)
q = ((int)vp9_convert_qindex_to_q(
- cpi->rc.avg_frame_qindex[INTER_FRAME]));
+ cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth));
else
q = ((int)vp9_convert_qindex_to_q(
- cpi->rc.avg_frame_qindex[KEY_FRAME]));
+ cpi->rc.avg_frame_qindex[KEY_FRAME], cpi->common.bit_depth));
if (q > 16) {
strength = oxcf->arnr_strength;
} else {
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 90f03426b..e88060c64 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -89,6 +89,10 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
endif
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_intrapred_sse2.asm
+endif
+
# common (c)
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_common_dspr2.h
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_convolve2_avg_dspr2.c
diff --git a/vpx/vpx_frame_buffer.h b/vpx/vpx_frame_buffer.h
index e69df4bc8..41038b10d 100644
--- a/vpx/vpx_frame_buffer.h
+++ b/vpx/vpx_frame_buffer.h
@@ -43,15 +43,15 @@ typedef struct vpx_codec_frame_buffer {
*
* This callback is invoked by the decoder to retrieve data for the frame
* buffer in order for the decode call to complete. The callback must
- * allocate at least min_size in bytes and assign it to fb->data. Then the
- * callback must set fb->size to the allocated size. The application does not
- * need to align the allocated data. The callback is triggered when the
- * decoder needs a frame buffer to decode a compressed image into. This
- * function may be called more than once for every call to vpx_codec_decode.
- * The application may set fb->priv to some data which will be passed
- * back in the ximage and the release function call. |fb| is guaranteed to
- * not be NULL. On success the callback must return 0. Any failure the
- * callback must return a value less than 0.
+ * allocate at least min_size in bytes and assign it to fb->data. The callback
+ * must zero out all the data allocated. Then the callback must set fb->size
+ * to the allocated size. The application does not need to align the allocated
+ * data. The callback is triggered when the decoder needs a frame buffer to
+ * decode a compressed image into. This function may be called more than once
+ * for every call to vpx_codec_decode. The application may set fb->priv to
+ * some data which will be passed back in the ximage and the release function
+ * call. |fb| is guaranteed to not be NULL. On success the callback must
+ * return 0. Any failure the callback must return a value less than 0.
*
* \param[in] priv Callback's private data
* \param[in] new_size Size in bytes needed by the buffer
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 70d7ac0c8..475d231e1 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -199,11 +199,6 @@ int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
if (fb->data == NULL || fb->size < external_frame_size)
return -1;
- // This memset is needed for fixing valgrind error from C loop filter
- // due to access uninitialized memory in frame border. It could be
- // removed if border is totally removed.
- vpx_memset(fb->data, 0, fb->size);
-
ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
} else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
// Allocation to hold larger frame, or first allocation.
diff --git a/vpxdec.c b/vpxdec.c
index 091522f06..cf23c295e 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -384,7 +384,7 @@ int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
if (ext_fb_list->ext_fb[i].size < min_size) {
free(ext_fb_list->ext_fb[i].data);
- ext_fb_list->ext_fb[i].data = (uint8_t *)malloc(min_size);
+ ext_fb_list->ext_fb[i].data = (uint8_t *)calloc(min_size, sizeof(uint8_t));
if (!ext_fb_list->ext_fb[i].data)
return -1;