summaryrefslogtreecommitdiff
path: root/vp8
diff options
context:
space:
mode:
authorFritz Koenig <frkoenig@google.com>2011-08-19 08:51:27 -0700
committerFritz Koenig <frkoenig@google.com>2011-08-19 08:51:27 -0700
commit01376858cd184d820ff4c2d8390361a8679c0e87 (patch)
tree8cbfe5149b080df8a18863136e76b603734e0e57 /vp8
parent744f4823503a7220999103111c569c00ffe0d09c (diff)
downloadlibvpx-01376858cd184d820ff4c2d8390361a8679c0e87.tar
libvpx-01376858cd184d820ff4c2d8390361a8679c0e87.tar.gz
libvpx-01376858cd184d820ff4c2d8390361a8679c0e87.tar.bz2
libvpx-01376858cd184d820ff4c2d8390361a8679c0e87.zip
Reclasify optimized ssim calculations as SSE2.
Calculations were incorrectly classified as either SSE3 or SSSE3. Only using SSE2 instructions. Cleanup function names and make non-RTCD code work as well. Change-Id: I29f5c2ead342b2086a468029c15e2c1d948b5d97
Diffstat (limited to 'vp8')
-rw-r--r--vp8/encoder/generic/csystemdependent.c9
-rw-r--r--vp8/encoder/ssim.c19
-rw-r--r--vp8/encoder/variance.h16
-rw-r--r--vp8/encoder/x86/ssim_opt.asm12
-rw-r--r--vp8/encoder/x86/variance_x86.h10
-rw-r--r--vp8/encoder/x86/x86_csystemdependent.c38
6 files changed, 41 insertions, 63 deletions
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 990610554..a14843a80 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -94,16 +94,15 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
#endif
+#if CONFIG_INTERNAL_STATS
+ cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c;
+ cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c;
+#endif
#endif
// Pure C:
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#if CONFIG_INTERNAL_STATS
- cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c;
- cpi->rtcd.variance.ssimpf = ssim_parms_c;
-#endif
-
#if ARCH_X86 || ARCH_X86_64
vp8_arch_x86_encoder_init(cpi);
#endif
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index fea756f7b..d0f8e490a 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -9,18 +9,9 @@
*/
-#include "vpx_scale/yv12config.h"
-#include "math.h"
#include "onyx_int.h"
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
-
-void ssim_parms_c
+void vp8_ssim_parms_16x16_c
(
unsigned char *s,
int sp,
@@ -46,7 +37,7 @@ void ssim_parms_c
}
}
}
-void ssim_parms_8x8_c
+void vp8_ssim_parms_8x8_c
(
unsigned char *s,
int sp,
@@ -107,14 +98,14 @@ static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
- rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+ SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
}
static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
- rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+ SSIMPF_INVOKE(rtcd,8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
}
@@ -134,7 +125,7 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
c1 = cc1*16;
c2 = cc2*16;
- rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
+ SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
ssim_n1 = (2*sum_s*sum_r+ c1);
ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 5fd6d3ae0..0f35152e3 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -320,16 +320,16 @@ extern prototype_variance(vp8_variance_mse16x16);
#endif
extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs);
-#ifndef vp8_ssimpf
-#define vp8_ssimpf ssim_parms_c
-#endif
-extern prototype_ssimpf(vp8_ssimpf)
-
#ifndef vp8_ssimpf_8x8
-#define vp8_ssimpf_8x8 ssim_parms_8x8_c
+#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
#endif
extern prototype_ssimpf(vp8_ssimpf_8x8)
+#ifndef vp8_ssimpf_16x16
+#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
+#endif
+extern prototype_ssimpf(vp8_ssimpf_16x16)
+
typedef prototype_sad(*vp8_sad_fn_t);
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
@@ -394,7 +394,7 @@ typedef struct
#if CONFIG_INTERNAL_STATS
vp8_ssimpf_fn_t ssimpf_8x8;
- vp8_ssimpf_fn_t ssimpf;
+ vp8_ssimpf_fn_t ssimpf_16x16;
#endif
} vp8_variance_rtcd_vtable_t;
@@ -417,8 +417,10 @@ typedef struct
#if CONFIG_RUNTIME_CPU_DETECT
#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
+#define SSIMPF_INVOKE(ctx,fn) (ctx)->fn
#else
#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
+#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn
#endif
#endif
diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm
index d5d267a69..8af4b4533 100644
--- a/vp8/encoder/x86/ssim_opt.asm
+++ b/vp8/encoder/x86/ssim_opt.asm
@@ -44,7 +44,7 @@
paddd %1, xmm1
SUM_ACROSS_Q %1
%endmacro
-;void ssim_parms_sse3(
+;void ssim_parms_sse2(
; unsigned char *s,
; int sp,
; unsigned char *r,
@@ -61,8 +61,8 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_16x16_sse3)
-sym(vp8_ssim_parms_16x16_sse3):
+global sym(vp8_ssim_parms_16x16_sse2)
+sym(vp8_ssim_parms_16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
@@ -134,7 +134,7 @@ NextRow:
pop rbp
ret
-;void ssim_parms_sse3(
+;void ssim_parms_sse2(
; unsigned char *s,
; int sp,
; unsigned char *r,
@@ -151,8 +151,8 @@ NextRow:
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_8x8_sse3)
-sym(vp8_ssim_parms_8x8_sse3):
+global sym(vp8_ssim_parms_8x8_sse2)
+sym(vp8_ssim_parms_8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index af6c4d27e..4b41b5436 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -140,6 +140,8 @@ extern prototype_getmbss(vp8_get_mb_ss_sse2);
extern prototype_variance(vp8_mse16x16_wmt);
extern prototype_variance2(vp8_get8x8var_sse2);
extern prototype_variance2(vp8_get16x16var_sse2);
+extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
+extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_variance_sad4x4
@@ -208,6 +210,14 @@ extern prototype_variance2(vp8_get16x16var_sse2);
#undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_wmt
+#if ARCH_X86_64
+#undef vp8_ssimpf_8x8
+#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2
+
+#undef vp8_ssimpf_16x16
+#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2
+#endif
+
#endif
#endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index badb9f044..36b7b7194 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -111,29 +111,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
#endif
-#if HAVE_SSSE3
-#if CONFIG_INTERNAL_STATS
-#if ARCH_X86_64
-typedef void ssimpf
-(
- unsigned char *s,
- int sp,
- unsigned char *r,
- int rp,
- unsigned long *sum_s,
- unsigned long *sum_r,
- unsigned long *sum_sq_s,
- unsigned long *sum_sq_r,
- unsigned long *sum_sxr
-);
-
-extern ssimpf vp8_ssim_parms_16x16_sse3;
-extern ssimpf vp8_ssim_parms_8x8_sse3;
-#endif
-#endif
-#endif
-
-
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
{
#if CONFIG_RUNTIME_CPU_DETECT
@@ -246,6 +223,13 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
#endif
+
+#if CONFIG_INTERNAL_STATS
+#if ARCH_X86_64
+ cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2;
+ cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2;
+#endif
+#endif
}
#endif
@@ -280,14 +264,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
-
-#if CONFIG_INTERNAL_STATS
-#if ARCH_X86_64
- cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3;
- cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3;
-#endif
-#endif
-
}
#endif