summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSai Deng <sdeng@google.com>2018-12-08 18:43:55 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2018-12-08 18:43:55 +0000
commit0b7e4af7446bd2ed962776010812393d2a3dcf09 (patch)
tree8850b71065374b4695ffede95256f9a260ed02e5
parent673ebe8d2b1417eadaa769436d14f51de6258e3f (diff)
parentf6a002f2a67579cce9d53a4314f0676228517c12 (diff)
downloadlibvpx-0b7e4af7446bd2ed962776010812393d2a3dcf09.tar
libvpx-0b7e4af7446bd2ed962776010812393d2a3dcf09.tar.gz
libvpx-0b7e4af7446bd2ed962776010812393d2a3dcf09.tar.bz2
libvpx-0b7e4af7446bd2ed962776010812393d2a3dcf09.zip
Merge "Add satd avx2 implementation"
-rw-r--r--test/acm_random.h12
-rw-r--r--test/avg_test.cc118
-rw-r--r--vp9/encoder/vp9_encoder.c5
-rw-r--r--vpx_dsp/avg.c13
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl3
-rw-r--r--vpx_dsp/x86/avg_intrin_avx2.c23
6 files changed, 150 insertions, 24 deletions
diff --git a/test/acm_random.h b/test/acm_random.h
index 5a4e6c392..ccfa20681 100644
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -34,6 +34,18 @@ class ACMRandom {
return (value >> 15) & 0xffff;
}
+ int32_t Rand20Signed(void) {
+ // Use 20 bits: values between 524287 and -524288.
+ const uint32_t value = random_.Generate(1048576);
+ return static_cast<int32_t>(value) - 524288;
+ }
+
+ int16_t Rand16Signed(void) {
+ // Use 16 bits: values between 32767 and -32768.
+ const uint32_t value = random_.Generate(65536);
+ return static_cast<int16_t>(value) - 32768;
+ }
+
int16_t Rand13Signed(void) {
// Use 13 bits: values between 4095 and -4096.
const uint32_t value = random_.Generate(8192);
diff --git a/test/avg_test.cc b/test/avg_test.cc
index 9c242c24f..3d24f1cdb 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -251,12 +251,7 @@ class SatdTest : public ::testing::Test,
for (int i = 0; i < satd_size_; ++i) src_[i] = val;
}
- void FillRandom() {
- for (int i = 0; i < satd_size_; ++i) {
- const int16_t tmp = rnd_.Rand16();
- src_[i] = (tran_low_t)tmp;
- }
- }
+ virtual void FillRandom() = 0;
void Check(const int expected) {
int total;
@@ -267,11 +262,21 @@ class SatdTest : public ::testing::Test,
tran_low_t *GetCoeff() const { return src_; }
int satd_size_;
+ ACMRandom rnd_;
+ tran_low_t *src_;
private:
- tran_low_t *src_;
SatdFunc satd_func_;
- ACMRandom rnd_;
+};
+
+class SatdLowbdTest : public SatdTest {
+ protected:
+ virtual void FillRandom() {
+ for (int i = 0; i < satd_size_; ++i) {
+ const int16_t tmp = rnd_.Rand16Signed();
+ src_[i] = (tran_low_t)tmp;
+ }
+ }
};
typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
@@ -403,27 +408,82 @@ TEST_P(IntProColTest, Random) {
RunComparison();
}
-TEST_P(SatdTest, MinValue) {
+TEST_P(SatdLowbdTest, MinValue) {
const int kMin = -32640;
const int expected = -kMin * satd_size_;
FillConstant(kMin);
Check(expected);
}
-TEST_P(SatdTest, MaxValue) {
+TEST_P(SatdLowbdTest, MaxValue) {
const int kMax = 32640;
const int expected = kMax * satd_size_;
FillConstant(kMax);
Check(expected);
}
-TEST_P(SatdTest, Random) {
+TEST_P(SatdLowbdTest, Random) {
+ int expected;
+ switch (satd_size_) {
+ case 16: expected = 263252; break;
+ case 64: expected = 1105420; break;
+ case 256: expected = 4252250; break;
+ case 1024: expected = 16876840; break;
+ default:
+ FAIL() << "Invalid satd size (" << satd_size_
+ << ") valid: 16/64/256/1024";
+ }
+ FillRandom();
+ Check(expected);
+}
+
+TEST_P(SatdLowbdTest, DISABLED_Speed) {
+ const int kCountSpeedTestBlock = 20000;
+ vpx_usec_timer timer;
+ const int blocksize = GET_PARAM(0);
+ FillRandom();
+ tran_low_t *coeff = GetCoeff();
+
+ vpx_usec_timer_start(&timer);
+ for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+ GET_PARAM(1)(coeff, blocksize);
+ }
+ vpx_usec_timer_mark(&timer);
+ const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+class SatdHighbdTest : public SatdTest {
+ protected:
+ virtual void FillRandom() {
+ for (int i = 0; i < satd_size_; ++i) {
+ src_[i] = rnd_.Rand20Signed();
+ }
+ }
+};
+
+TEST_P(SatdHighbdTest, MinValue) {
+ const int kMin = -524280;
+ const int expected = -kMin * satd_size_;
+ FillConstant(kMin);
+ Check(expected);
+}
+
+TEST_P(SatdHighbdTest, MaxValue) {
+ const int kMax = 524280;
+ const int expected = kMax * satd_size_;
+ FillConstant(kMax);
+ Check(expected);
+}
+
+TEST_P(SatdHighbdTest, Random) {
int expected;
switch (satd_size_) {
- case 16: expected = 205298; break;
- case 64: expected = 1113950; break;
- case 256: expected = 4268415; break;
- case 1024: expected = 16954082; break;
+ case 16: expected = 5249712; break;
+ case 64: expected = 18362120; break;
+ case 256: expected = 66100520; break;
+ case 1024: expected = 266094734; break;
default:
FAIL() << "Invalid satd size (" << satd_size_
<< ") valid: 16/64/256/1024";
@@ -432,7 +492,7 @@ TEST_P(SatdTest, Random) {
Check(expected);
}
-TEST_P(SatdTest, DISABLED_Speed) {
+TEST_P(SatdHighbdTest, DISABLED_Speed) {
const int kCountSpeedTestBlock = 20000;
vpx_usec_timer timer;
const int blocksize = GET_PARAM(0);
@@ -447,6 +507,7 @@ TEST_P(SatdTest, DISABLED_Speed) {
const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time);
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
TEST_P(BlockErrorTestFP, MinValue) {
const int64_t kMin = -32640;
@@ -513,9 +574,15 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_sse2),
make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_sse2)));
#endif // HAVE_SSE2
+
+INSTANTIATE_TEST_CASE_P(C, SatdHighbdTest,
+ ::testing::Values(make_tuple(16, &vpx_satd_c),
+ make_tuple(64, &vpx_satd_c),
+ make_tuple(256, &vpx_satd_c),
+ make_tuple(1024, &vpx_satd_c)));
#endif // CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(C, SatdTest,
+INSTANTIATE_TEST_CASE_P(C, SatdLowbdTest,
::testing::Values(make_tuple(16, &vpx_satd_c),
make_tuple(64, &vpx_satd_c),
make_tuple(256, &vpx_satd_c),
@@ -552,7 +619,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(64, &vpx_int_pro_col_sse2,
&vpx_int_pro_col_c)));
-INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
+INSTANTIATE_TEST_CASE_P(SSE2, SatdLowbdTest,
::testing::Values(make_tuple(16, &vpx_satd_sse2),
make_tuple(64, &vpx_satd_sse2),
make_tuple(256, &vpx_satd_sse2),
@@ -567,12 +634,21 @@ INSTANTIATE_TEST_CASE_P(
#endif // HAVE_SSE2
#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, SatdTest,
+INSTANTIATE_TEST_CASE_P(AVX2, SatdLowbdTest,
::testing::Values(make_tuple(16, &vpx_satd_avx2),
make_tuple(64, &vpx_satd_avx2),
make_tuple(256, &vpx_satd_avx2),
make_tuple(1024, &vpx_satd_avx2)));
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ AVX2, SatdHighbdTest,
+ ::testing::Values(make_tuple(16, &vpx_highbd_satd_avx2),
+ make_tuple(64, &vpx_highbd_satd_avx2),
+ make_tuple(256, &vpx_highbd_satd_avx2),
+ make_tuple(1024, &vpx_highbd_satd_avx2)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
INSTANTIATE_TEST_CASE_P(
AVX2, BlockErrorTestFP,
::testing::Values(make_tuple(16, &vp9_block_error_fp_avx2),
@@ -605,7 +681,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(64, &vpx_int_pro_col_neon,
&vpx_int_pro_col_c)));
-INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
+INSTANTIATE_TEST_CASE_P(NEON, SatdLowbdTest,
::testing::Values(make_tuple(16, &vpx_satd_neon),
make_tuple(64, &vpx_satd_neon),
make_tuple(256, &vpx_satd_neon),
@@ -650,7 +726,7 @@ INSTANTIATE_TEST_CASE_P(
// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
// in place.
#if !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(MSA, SatdTest,
+INSTANTIATE_TEST_CASE_P(MSA, SatdLowbdTest,
::testing::Values(make_tuple(16, &vpx_satd_msa),
make_tuple(64, &vpx_satd_msa),
make_tuple(256, &vpx_satd_msa),
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 425db009e..96f17388f 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5972,8 +5972,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
dst_stride, xd->bd);
highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
- // TODO(sdeng): Implement SIMD based high bit-depth satd.
- intra_cost = vpx_satd_c(coeff, pix_num);
+ intra_cost = vpx_highbd_satd(coeff, pix_num);
} else {
vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
dst_stride);
@@ -6019,7 +6018,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
- inter_cost = vpx_satd_c(coeff, pix_num);
+ inter_cost = vpx_highbd_satd(coeff, pix_num);
} else {
vp9_build_inter_predictor(
ref_frame[rf_idx]->y_buffer + mb_y_offset,
diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c
index 41a6fd828..1c45e8a73 100644
--- a/vpx_dsp/avg.c
+++ b/vpx_dsp/avg.c
@@ -314,6 +314,19 @@ void vpx_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride,
}
}
+#if CONFIG_VP9_HIGHBITDEPTH
+// coeff: dynamic range 20 bit.
+// length: value range {16, 64, 256, 1024}.
+int vpx_highbd_satd_c(const tran_low_t *coeff, int length) {
+ int i;
+ int satd = 0;
+ for (i = 0; i < length; ++i) satd += abs(coeff[i]);
+
+ // satd: 30 bits
+ return satd;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
// coeff: 16 bits, dynamic range [-32640, 32640].
// length: value range {16, 64, 256, 1024}.
int vpx_satd_c(const tran_low_t *coeff, int length) {
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 6dc317630..7e2d922ab 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -796,6 +796,9 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
specialize qw/vpx_satd avx2 sse2 neon/;
+
+ add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length";
+ specialize qw/vpx_highbd_satd avx2/;
} else {
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx/, "$ssse3_x86_64";
diff --git a/vpx_dsp/x86/avg_intrin_avx2.c b/vpx_dsp/x86/avg_intrin_avx2.c
index f39210b6a..3f4f577a2 100644
--- a/vpx_dsp/x86/avg_intrin_avx2.c
+++ b/vpx_dsp/x86/avg_intrin_avx2.c
@@ -457,3 +457,26 @@ int vpx_satd_avx2(const tran_low_t *coeff, int length) {
return _mm_cvtsi128_si32(accum_128);
}
}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int vpx_highbd_satd_avx2(const tran_low_t *coeff, int length) {
+ __m256i accum = _mm256_setzero_si256();
+ int i;
+
+ for (i = 0; i < length; i += 8, coeff += 8) {
+ const __m256i src_line = _mm256_loadu_si256((const __m256i *)coeff);
+ const __m256i abs = _mm256_abs_epi32(src_line);
+ accum = _mm256_add_epi32(accum, abs);
+ }
+
+ { // 32 bit horizontal add
+ const __m256i a = _mm256_srli_si256(accum, 8);
+ const __m256i b = _mm256_add_epi32(accum, a);
+ const __m256i c = _mm256_srli_epi64(b, 32);
+ const __m256i d = _mm256_add_epi32(b, c);
+ const __m128i accum_128 = _mm_add_epi32(_mm256_castsi256_si128(d),
+ _mm256_extractf128_si256(d, 1));
+ return _mm_cvtsi128_si32(accum_128);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH