summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_intra_pred_speed.cc9
-rw-r--r--vp9/encoder/vp9_encodeframe.c24
-rw-r--r--vp9/encoder/vp9_noise_estimate.c4
-rw-r--r--vp9/encoder/vp9_noise_estimate.h1
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl2
-rw-r--r--vpx_dsp/x86/intrapred_sse2.asm43
6 files changed, 45 insertions, 38 deletions
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index 4064ea645..3e65fecfb 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -337,21 +337,12 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)
#if HAVE_SSE2 && CONFIG_USE_X86INC
-#if ARCH_X86_64
INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
vpx_dc_left_predictor_32x32_sse2,
vpx_dc_top_predictor_32x32_sse2,
vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_tm_predictor_32x32_sse2)
-#else
-INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
- vpx_dc_left_predictor_32x32_sse2,
- vpx_dc_top_predictor_32x32_sse2,
- vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
- vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL)
-#endif // ARCH_X86_64
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index fc5eb1bbe..afb0d1496 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -496,6 +496,8 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
threshold_base = 3 * threshold_base;
else if (noise_level == kMedium)
threshold_base = threshold_base << 1;
+ else if (noise_level == kLowLow)
+ threshold_base = (7 * threshold_base) >> 3;
}
if (cm->width <= 352 && cm->height <= 288) {
thresholds[0] = threshold_base >> 3;
@@ -668,6 +670,8 @@ static int choose_partitioning(VP9_COMP *cpi,
v64x64 vt;
v16x16 vt2[16];
int force_split[21];
+ int avg_32x32;
+ int avg_16x16[4];
uint8_t *s;
const uint8_t *d;
int sp;
@@ -819,6 +823,7 @@ static int choose_partitioning(VP9_COMP *cpi,
const int y32_idx = ((i >> 1) << 5);
const int i2 = i << 2;
force_split[i + 1] = 0;
+ avg_16x16[i] = 0;
for (j = 0; j < 4; j++) {
const int x16_idx = x32_idx + ((j & 1) << 4);
const int y16_idx = y32_idx + ((j >> 1) << 4);
@@ -836,6 +841,7 @@ static int choose_partitioning(VP9_COMP *cpi,
is_key_frame);
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
get_variance(&vt.split[i].split[j].part_variances.none);
+ avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
if (vt.split[i].split[j].part_variances.none.variance >
thresholds[2]) {
// 16X16 variance is above threshold for split, so force split to 8x8
@@ -888,6 +894,7 @@ static int choose_partitioning(VP9_COMP *cpi,
}
// Fill the rest of the variance tree by summing split partition values.
+ avg_32x32 = 0;
for (i = 0; i < 4; i++) {
const int i2 = i << 2;
for (j = 0; j < 4; j++) {
@@ -908,19 +915,30 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
fill_variance_tree(&vt.split[i], BLOCK_32X32);
- // If variance of this 32x32 block is above the threshold, force the block
- // to split. This also forces a split on the upper (64x64) level.
+ // If variance of this 32x32 block is above the threshold, or if its above
+ // (some threshold of) the average variance over the sub-16x16 blocks, then
+ // force this block to split. This also forces a split on the upper
+ // (64x64) level.
if (!force_split[i + 1]) {
get_variance(&vt.split[i].part_variances.none);
- if (vt.split[i].part_variances.none.variance > thresholds[1]) {
+ if (vt.split[i].part_variances.none.variance > thresholds[1] ||
+ (!is_key_frame &&
+ vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
+ vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
force_split[i + 1] = 1;
force_split[0] = 1;
}
+ avg_32x32 += vt.split[i].part_variances.none.variance;
}
}
if (!force_split[0]) {
fill_variance_tree(&vt, BLOCK_64X64);
get_variance(&vt.part_variances.none);
+ // If variance of this 64x64 block is above (some threshold of) the average
+ // variance over the sub-32x32 blocks, then force this block to split.
+ if (!is_key_frame &&
+ vt.part_variances.none.variance > (3 * avg_32x32) >> 3)
+ force_split[0] = 1;
}
// Now go through the entire structure, splitting every block size until
diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
index 4befbb066..6e717e53c 100644
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -25,7 +25,7 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
int width,
int height) {
ne->enabled = 0;
- ne->level = kLowLow;
+ ne->level = kUnknown;
ne->value = 0;
ne->count = 0;
ne->thresh = 90;
@@ -83,7 +83,7 @@ static void copy_frame(YV12_BUFFER_CONFIG * const dest,
}
NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) {
- int noise_level = kLowLow;
+ int noise_level = kUnknown;
if (ne->value > (ne->thresh << 1)) {
noise_level = kHigh;
} else {
diff --git a/vp9/encoder/vp9_noise_estimate.h b/vp9/encoder/vp9_noise_estimate.h
index 826d125b5..266326286 100644
--- a/vp9/encoder/vp9_noise_estimate.h
+++ b/vp9/encoder/vp9_noise_estimate.h
@@ -24,6 +24,7 @@ extern "C" {
#endif
typedef enum noise_level {
+ kUnknown,
kLowLow,
kLow,
kMedium,
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index bbedba070..f71769918 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -241,7 +241,7 @@ add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, con
specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
+specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
diff --git a/vpx_dsp/x86/intrapred_sse2.asm b/vpx_dsp/x86/intrapred_sse2.asm
index 6f924a779..c24d53686 100644
--- a/vpx_dsp/x86/intrapred_sse2.asm
+++ b/vpx_dsp/x86/intrapred_sse2.asm
@@ -700,9 +700,8 @@ cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left
jnz .loop
REP_RET
-%if ARCH_X86_64
INIT_XMM sse2
-cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
+cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left
pxor m1, m1
movd m2, [aboveq-1]
mova m0, [aboveq]
@@ -723,31 +722,29 @@ cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
psubw m5, m2
.loop:
movd m2, [leftq+lineq*2]
- movd m6, [leftq+lineq*2+1]
+ pxor m1, m1
punpcklbw m2, m1
- punpcklbw m6, m1
+ pshuflw m7, m2, 0x55
pshuflw m2, m2, 0x0
- pshuflw m6, m6, 0x0
punpcklqdq m2, m2
- punpcklqdq m6, m6
- paddw m7, m2, m0
- paddw m8, m2, m3
- paddw m9, m2, m4
- paddw m2, m5
- packuswb m7, m8
- packuswb m9, m2
- paddw m2, m6, m0
- paddw m8, m6, m3
- mova [dstq ], m7
- paddw m7, m6, m4
- paddw m6, m5
- mova [dstq +16], m9
- packuswb m2, m8
- packuswb m7, m6
- mova [dstq+strideq ], m2
- mova [dstq+strideq+16], m7
+ punpcklqdq m7, m7
+ paddw m6, m2, m3
+ paddw m1, m2, m0
+ packuswb m1, m6
+ mova [dstq ], m1
+ paddw m6, m2, m5
+ paddw m1, m2, m4
+ packuswb m1, m6
+ mova [dstq+16 ], m1
+ paddw m6, m7, m3
+ paddw m1, m7, m0
+ packuswb m1, m6
+ mova [dstq+strideq ], m1
+ paddw m6, m7, m5
+ paddw m1, m7, m4
+ packuswb m1, m6
+ mova [dstq+strideq+16], m1
lea dstq, [dstq+strideq*2]
inc lineq
jnz .loop
REP_RET
-%endif