diff options
author | Ronald S. Bultje <rbultje@google.com> | 2013-07-01 11:36:07 -0700 |
---|---|---|
committer | Ronald S. Bultje <rbultje@google.com> | 2013-07-01 11:36:07 -0700 |
commit | 7353ceab9d6b5c9d1bbdae18e509207c5afb2b19 (patch) | |
tree | 20de3c17fae4e02bbe4b1fccbe3a5ef5a10b8817 /vp9/encoder/vp9_onyx_int.h | |
parent | 632289b31fd11229c875c116f4281e3ab6f42115 (diff) | |
download | libvpx-7353ceab9d6b5c9d1bbdae18e509207c5afb2b19.tar libvpx-7353ceab9d6b5c9d1bbdae18e509207c5afb2b19.tar.gz libvpx-7353ceab9d6b5c9d1bbdae18e509207c5afb2b19.tar.bz2 libvpx-7353ceab9d6b5c9d1bbdae18e509207c5afb2b19.zip |
Quantize (64-bit only, for now) SSSE3 SIMD.
Total encoding time for first 50 frames of bus (speed 0) @ 1500kbps
goes 2min34.8 to 2min14.4, i.e. a 10.4% overall speedup. The code is
x86-64 only, it needs some minor modifications to be 32bit compatible,
because it uses 15 xmm registers, whereas 32bit only has 8.
Change-Id: I2df53770c2e850813ffa713e1a91b45b0082b904
Diffstat (limited to 'vp9/encoder/vp9_onyx_int.h')
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 980095ba6..ba11a9344 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -253,21 +253,21 @@ enum BlockSize { typedef struct VP9_COMP { - DECLARE_ALIGNED(16, short, y_quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, y_quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, y_zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, y_round[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, short, uv_quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, uv_quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, uv_zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, uv_round[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); #if CONFIG_ALPHA - DECLARE_ALIGNED(16, short, a_quant[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, unsigned char, a_quant_shift[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, a_zbin[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, a_round[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); #endif MACROBLOCK mb; |