diff options
-rw-r--r-- | test/lpf_8_test.cc | 24 | ||||
-rw-r--r-- | vp10/common/loopfilter.c | 8 | ||||
-rw-r--r-- | vp9/common/vp9_loopfilter.c | 8 | ||||
-rw-r--r-- | vpx_dsp/loopfilter.c | 22 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 7 | ||||
-rw-r--r-- | vpx_dsp/x86/highbd_loopfilter_sse2.c | 45 |
6 files changed, 58 insertions, 56 deletions
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc index 9ddbf71cb..13a4c476f 100644 --- a/test/lpf_8_test.cc +++ b/test/lpf_8_test.cc @@ -475,10 +475,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 8, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>, &wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 8, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 8, 2), + make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_sse2>, + &wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_c>, 8, 1), + make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_sse2>, + &wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_c>, 8, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>, &wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 8, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>, @@ -489,10 +489,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 10, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>, &wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 10, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 10, 2), + make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_sse2>, + &wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_c>, 10, 1), + make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_sse2>, + &wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_c>, 10, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>, &wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 10, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>, @@ -503,10 +503,10 @@ INSTANTIATE_TEST_CASE_P( &wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 12, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>, &wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 12, 1), - make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, - &vpx_highbd_lpf_horizontal_16_c, 12, 2), + make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_sse2>, + &wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_c>, 12, 1), + make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_sse2>, + &wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_c>, 12, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>, &wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 12, 1), make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 3fe316db8..11dfe6d70 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2, bd); + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); count = 2; } else { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index aae0a33fa..ee20cc557 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2, bd); + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); count = 2; } else { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c index f866a3dcf..46ef64617 100644 --- a/vpx_dsp/loopfilter.c +++ b/vpx_dsp/loopfilter.c @@ -669,9 +669,11 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, } } -void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int count, int bd) { +static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -703,6 +705,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, } } +void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); +} + +void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); +} + static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 557b4c55f..d7835f4a7 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -588,8 +588,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; - add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; - specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; + add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/; + + add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; + specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/; add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c index 73deb733f..72e42adc9 100644 --- a/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { // TODO(debargha, peter): Break up large functions into smaller ones // in this file. -static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { +void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi16(1); __m128i blimit, limit, thresh; @@ -496,27 +494,12 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, _mm_store_si128((__m128i *)(s - 0 * p), q0); } -static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s, - int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int bd) { - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh, - bd); -} - -// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, - int count, int bd) { - if (count == 1) - highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd); - else - highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd); +void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p, + const uint8_t *_blimit, + const uint8_t *_limit, + const uint8_t *_thresh, int bd) { + vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd); + vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd); } void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, @@ -1171,8 +1154,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, highbd_transpose(src, p, dst, 8, 2); // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, - thresh, bd); + vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, + thresh, bd); src[0] = t_dst; src[1] = t_dst + 8 * 8; dst[0] = s - 8; @@ -1195,8 +1178,8 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); // Loop filtering - highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, - thresh, bd); + vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, + thresh, bd); // Transpose back highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); |