diff options
-rw-r--r-- | test/test_intra_pred_speed.cc | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 24 | ||||
-rw-r--r-- | vp9/encoder/vp9_noise_estimate.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_noise_estimate.h | 1 | ||||
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 2 | ||||
-rw-r--r-- | vpx_dsp/x86/intrapred_sse2.asm | 43 |
6 files changed, 45 insertions, 38 deletions
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc index 4064ea645..3e65fecfb 100644 --- a/test/test_intra_pred_speed.cc +++ b/test/test_intra_pred_speed.cc @@ -337,21 +337,12 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c, vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c) #if HAVE_SSE2 && CONFIG_USE_X86INC -#if ARCH_X86_64 INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, vpx_dc_left_predictor_32x32_sse2, vpx_dc_top_predictor_32x32_sse2, vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_sse2) -#else -INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, - vpx_dc_left_predictor_32x32_sse2, - vpx_dc_top_predictor_32x32_sse2, - vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, - vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, - NULL, NULL) -#endif // ARCH_X86_64 #endif // HAVE_SSE2 && CONFIG_USE_X86INC #if HAVE_SSSE3 && CONFIG_USE_X86INC diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index fc5eb1bbe..afb0d1496 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -496,6 +496,8 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { threshold_base = 3 * threshold_base; else if (noise_level == kMedium) threshold_base = threshold_base << 1; + else if (noise_level == kLowLow) + threshold_base = (7 * threshold_base) >> 3; } if (cm->width <= 352 && cm->height <= 288) { thresholds[0] = threshold_base >> 3; @@ -668,6 +670,8 @@ static int choose_partitioning(VP9_COMP *cpi, v64x64 vt; v16x16 vt2[16]; int force_split[21]; + int avg_32x32; + int avg_16x16[4]; uint8_t *s; const uint8_t *d; int sp; @@ -819,6 +823,7 @@ static int choose_partitioning(VP9_COMP *cpi, const int y32_idx = ((i >> 1) << 5); const int i2 = i << 2; force_split[i + 1] = 0; + avg_16x16[i] = 0; for (j = 0; j < 4; j++) { const int x16_idx = x32_idx + ((j & 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4); @@ -836,6 +841,7 @@ static int choose_partitioning(VP9_COMP *cpi, is_key_frame); fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); get_variance(&vt.split[i].split[j].part_variances.none); + avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { // 16X16 variance is above threshold for split, so force split to 8x8 @@ -888,6 +894,7 @@ static int choose_partitioning(VP9_COMP *cpi, } // Fill the rest of the variance tree by summing split partition values. + avg_32x32 = 0; for (i = 0; i < 4; i++) { const int i2 = i << 2; for (j = 0; j < 4; j++) { @@ -908,19 +915,30 @@ static int choose_partitioning(VP9_COMP *cpi, } } fill_variance_tree(&vt.split[i], BLOCK_32X32); - // If variance of this 32x32 block is above the threshold, force the block - // to split. This also forces a split on the upper (64x64) level. + // If variance of this 32x32 block is above the threshold, or if its above + // (some threshold of) the average variance over the sub-16x16 blocks, then + // force this block to split. This also forces a split on the upper + // (64x64) level. if (!force_split[i + 1]) { get_variance(&vt.split[i].part_variances.none); - if (vt.split[i].part_variances.none.variance > thresholds[1]) { + if (vt.split[i].part_variances.none.variance > thresholds[1] || + (!is_key_frame && + vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && + vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { force_split[i + 1] = 1; force_split[0] = 1; } + avg_32x32 += vt.split[i].part_variances.none.variance; } } if (!force_split[0]) { fill_variance_tree(&vt, BLOCK_64X64); get_variance(&vt.part_variances.none); + // If variance of this 64x64 block is above (some threshold of) the average + // variance over the sub-32x32 blocks, then force this block to split. + if (!is_key_frame && + vt.part_variances.none.variance > (3 * avg_32x32) >> 3) + force_split[0] = 1; } // Now go through the entire structure, splitting every block size until diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c index 4befbb066..6e717e53c 100644 --- a/vp9/encoder/vp9_noise_estimate.c +++ b/vp9/encoder/vp9_noise_estimate.c @@ -25,7 +25,7 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { ne->enabled = 0; - ne->level = kLowLow; + ne->level = kUnknown; ne->value = 0; ne->count = 0; ne->thresh = 90; @@ -83,7 +83,7 @@ static void copy_frame(YV12_BUFFER_CONFIG * const dest, } NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { - int noise_level = kLowLow; + int noise_level = kUnknown; if (ne->value > (ne->thresh << 1)) { noise_level = kHigh; } else { diff --git a/vp9/encoder/vp9_noise_estimate.h b/vp9/encoder/vp9_noise_estimate.h index 826d125b5..266326286 100644 --- a/vp9/encoder/vp9_noise_estimate.h +++ b/vp9/encoder/vp9_noise_estimate.h @@ -24,6 +24,7 @@ extern "C" { #endif typedef enum noise_level { + kUnknown, kLowLow, kLow, kMedium, diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index bbedba070..f71769918 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -241,7 +241,7 @@ add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, con specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc"; +specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc"; add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc"; diff --git a/vpx_dsp/x86/intrapred_sse2.asm b/vpx_dsp/x86/intrapred_sse2.asm index 6f924a779..c24d53686 100644 --- a/vpx_dsp/x86/intrapred_sse2.asm +++ b/vpx_dsp/x86/intrapred_sse2.asm @@ -700,9 +700,8 @@ cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left jnz .loop REP_RET -%if ARCH_X86_64 INIT_XMM sse2 -cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left +cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left pxor m1, m1 movd m2, [aboveq-1] mova m0, [aboveq] @@ -723,31 +722,29 @@ cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left psubw m5, m2 .loop: movd m2, [leftq+lineq*2] - movd m6, [leftq+lineq*2+1] + pxor m1, m1 punpcklbw m2, m1 - punpcklbw m6, m1 + pshuflw m7, m2, 0x55 pshuflw m2, m2, 0x0 - pshuflw m6, m6, 0x0 punpcklqdq m2, m2 - punpcklqdq m6, m6 - paddw m7, m2, m0 - paddw m8, m2, m3 - paddw m9, m2, m4 - paddw m2, m5 - packuswb m7, m8 - packuswb m9, m2 - paddw m2, m6, m0 - paddw m8, m6, m3 - mova [dstq ], m7 - paddw m7, m6, m4 - paddw m6, m5 - mova [dstq +16], m9 - packuswb m2, m8 - packuswb m7, m6 - mova [dstq+strideq ], m2 - mova [dstq+strideq+16], m7 + punpcklqdq m7, m7 + paddw m6, m2, m3 + paddw m1, m2, m0 + packuswb m1, m6 + mova [dstq ], m1 + paddw m6, m2, m5 + paddw m1, m2, m4 + packuswb m1, m6 + mova [dstq+16 ], m1 + paddw m6, m7, m3 + paddw m1, m7, m0 + packuswb m1, m6 + mova [dstq+strideq ], m1 + paddw m6, m7, m5 + paddw m1, m7, m4 + packuswb m1, m6 + mova [dstq+strideq+16], m1 lea dstq, [dstq+strideq*2] inc lineq jnz .loop REP_RET -%endif |